PostgreSQL Source Code git master
Loading...
Searching...
No Matches
spgtextproc.c File Reference
#include "postgres.h"
#include "access/spgist.h"
#include "catalog/pg_type.h"
#include "common/int.h"
#include "mb/pg_wchar.h"
#include "utils/datum.h"
#include "utils/fmgrprotos.h"
#include "utils/pg_locale.h"
#include "utils/varlena.h"
#include "varatt.h"
Include dependency graph for spgtextproc.c:

Go to the source code of this file.

Data Structures

struct  spgNodePtr
 

Macros

#define SPGIST_MAX_PREFIX_LENGTH   Max((int) (BLCKSZ - 258 * 16 - 100), 32)
 
#define SPG_STRATEGY_ADDITION   (10)
 
#define SPG_IS_COLLATION_AWARE_STRATEGY(s)
 

Typedefs

typedef struct spgNodePtr spgNodePtr
 

Functions

Datum spg_text_config (PG_FUNCTION_ARGS)
 
static Datum formTextDatum (const char *data, int datalen)
 
static int commonPrefix (const char *a, const char *b, int lena, int lenb)
 
static bool searchChar (const Datum *nodeLabels, int nNodes, int16 c, int *i)
 
Datum spg_text_choose (PG_FUNCTION_ARGS)
 
static int cmpNodePtr (const void *a, const void *b)
 
Datum spg_text_picksplit (PG_FUNCTION_ARGS)
 
Datum spg_text_inner_consistent (PG_FUNCTION_ARGS)
 
Datum spg_text_leaf_consistent (PG_FUNCTION_ARGS)
 

Macro Definition Documentation

◆ SPG_IS_COLLATION_AWARE_STRATEGY

#define SPG_IS_COLLATION_AWARE_STRATEGY (   s)
Value:
#define SPG_STRATEGY_ADDITION
Definition spgtextproc.c:82
#define RTPrefixStrategyNumber
Definition stratnum.h:78

Definition at line 83 of file spgtextproc.c.

87{
88 Datum d;
89 int i;
90 int16 c;
92
93
96{
97#ifdef NOT_USED
99#endif
101
102 cfg->prefixType = TEXTOID;
103 cfg->labelType = INT2OID;
104 cfg->canReturnData = true;
105 cfg->longValuesOK = true; /* suffixing will shorten long values */
107}
108
109/*
110 * Form a text datum from the given not-necessarily-null-terminated string,
111 * using short varlena header format if possible
112 */
113static Datum
114formTextDatum(const char *data, int datalen)
115{
116 char *p;
117
118 p = (char *) palloc(datalen + VARHDRSZ);
119
120 if (datalen + VARHDRSZ_SHORT <= VARATT_SHORT_MAX)
121 {
122 SET_VARSIZE_SHORT(p, datalen + VARHDRSZ_SHORT);
123 if (datalen)
124 memcpy(p + VARHDRSZ_SHORT, data, datalen);
125 }
126 else
127 {
128 SET_VARSIZE(p, datalen + VARHDRSZ);
129 memcpy(p + VARHDRSZ, data, datalen);
130 }
131
132 return PointerGetDatum(p);
133}
134
135/*
136 * Find the length of the common prefix of a and b
137 */
138static int
139commonPrefix(const char *a, const char *b, int lena, int lenb)
140{
141 int i = 0;
142
143 while (i < lena && i < lenb && *a == *b)
144 {
145 a++;
146 b++;
147 i++;
148 }
149
150 return i;
151}
152
153/*
154 * Binary search an array of int16 datums for a match to c
155 *
156 * On success, *i gets the match location; on failure, it gets where to insert
157 */
158static bool
159searchChar(const Datum *nodeLabels, int nNodes, int16 c, int *i)
160{
161 int StopLow = 0,
162 StopHigh = nNodes;
163
164 while (StopLow < StopHigh)
165 {
166 int StopMiddle = (StopLow + StopHigh) >> 1;
167 int16 middle = DatumGetInt16(nodeLabels[StopMiddle]);
168
169 if (c < middle)
171 else if (c > middle)
172 StopLow = StopMiddle + 1;
173 else
174 {
175 *i = StopMiddle;
176 return true;
177 }
178 }
179
180 *i = StopHigh;
181 return false;
182}
183
184Datum
186{
190 char *inStr = VARDATA_ANY(inText);
192 char *prefixStr = NULL;
193 int prefixSize = 0;
194 int commonLen = 0;
195 int16 nodeChar = 0;
196 int i = 0;
197
198 /* Check for prefix match, set nodeChar to first byte after prefix */
199 if (in->hasPrefix)
200 {
202
204 prefixSize = VARSIZE_ANY_EXHDR(prefixText);
205
207 prefixStr,
208 inSize - in->level,
209 prefixSize);
210
211 if (commonLen == prefixSize)
212 {
213 if (inSize - in->level > commonLen)
214 nodeChar = *(unsigned char *) (inStr + in->level + commonLen);
215 else
216 nodeChar = -1;
217 }
218 else
219 {
220 /* Must split tuple because incoming value doesn't match prefix */
222
223 if (commonLen == 0)
224 {
225 out->result.splitTuple.prefixHasPrefix = false;
226 }
227 else
228 {
232 }
236 Int16GetDatum(*(unsigned char *) (prefixStr + commonLen));
237
239
240 if (prefixSize - commonLen == 1)
241 {
243 }
244 else
245 {
249 prefixSize - commonLen - 1);
250 }
251
253 }
254 }
255 else if (inSize > in->level)
256 {
257 nodeChar = *(unsigned char *) (inStr + in->level);
258 }
259 else
260 {
261 nodeChar = -1;
262 }
263
264 /* Look up nodeChar in the node label array */
265 if (searchChar(in->nodeLabels, in->nNodes, nodeChar, &i))
266 {
267 /*
268 * Descend to existing node. (If in->allTheSame, the core code will
269 * ignore our nodeN specification here, but that's OK. We still have
270 * to provide the correct levelAdd and restDatum values, and those are
271 * the same regardless of which node gets chosen by core.)
272 */
273 int levelAdd;
274
276 out->result.matchNode.nodeN = i;
277 levelAdd = commonLen;
278 if (nodeChar >= 0)
279 levelAdd++;
280 out->result.matchNode.levelAdd = levelAdd;
281 if (inSize - in->level - levelAdd > 0)
283 formTextDatum(inStr + in->level + levelAdd,
284 inSize - in->level - levelAdd);
285 else
288 }
289 else if (in->allTheSame)
290 {
291 /*
292 * Can't use AddNode action, so split the tuple. The upper tuple has
293 * the same prefix as before and uses a dummy node label -2 for the
294 * lower tuple. The lower tuple has no prefix and the same node
295 * labels as the original tuple.
296 *
297 * Note: it might seem tempting to shorten the upper tuple's prefix,
298 * if it has one, then use its last byte as label for the lower tuple.
299 * But that doesn't win since we know the incoming value matches the
300 * whole prefix: we'd just end up splitting the lower tuple again.
301 */
310 }
311 else
312 {
313 /* Add a node for the not-previously-seen nodeChar value */
314 out->resultType = spgAddNode;
316 out->result.addNode.nodeN = i;
317 }
318
320}
321
322/* qsort comparator to sort spgNodePtr structs by "c" */
323static int
324cmpNodePtr(const void *a, const void *b)
325{
326 const spgNodePtr *aa = (const spgNodePtr *) a;
327 const spgNodePtr *bb = (const spgNodePtr *) b;
328
329 return pg_cmp_s16(aa->c, bb->c);
330}
331
332Datum
334{
337 text *text0 = DatumGetTextPP(in->datums[0]);
338 int i,
339 commonLen;
341
342 /* Identify longest common prefix, if any */
344 for (i = 1; i < in->nTuples && commonLen > 0; i++)
345 {
347 int tmp = commonPrefix(VARDATA_ANY(text0),
351
352 if (tmp < commonLen)
353 commonLen = tmp;
354 }
355
356 /*
357 * Limit the prefix length, if necessary, to ensure that the resulting
358 * inner tuple will fit on a page.
359 */
361
362 /* Set node prefix to be that string, if it's not empty */
363 if (commonLen == 0)
364 {
365 out->hasPrefix = false;
366 }
367 else
368 {
369 out->hasPrefix = true;
371 }
372
373 /* Extract the node label (first non-common byte) from each value */
375
376 for (i = 0; i < in->nTuples; i++)
377 {
379
381 nodes[i].c = *(unsigned char *) (VARDATA_ANY(texti) + commonLen);
382 else
383 nodes[i].c = -1; /* use -1 if string is all common */
384 nodes[i].i = i;
385 nodes[i].d = in->datums[i];
386 }
387
388 /*
389 * Sort by label values so that we can group the values into nodes. This
390 * also ensures that the nodes are ordered by label value, allowing the
391 * use of binary search in searchChar.
392 */
393 qsort(nodes, in->nTuples, sizeof(*nodes), cmpNodePtr);
394
395 /* And emit results */
396 out->nNodes = 0;
398 out->mapTuplesToNodes = palloc_array(int, in->nTuples);
400
401 for (i = 0; i < in->nTuples; i++)
402 {
404 Datum leafD;
405
406 if (i == 0 || nodes[i].c != nodes[i - 1].c)
407 {
408 out->nodeLabels[out->nNodes] = Int16GetDatum(nodes[i].c);
409 out->nNodes++;
410 }
411
415 else
417
418 out->leafTupleDatums[nodes[i].i] = leafD;
419 out->mapTuplesToNodes[nodes[i].i] = out->nNodes - 1;
420 }
421
423}
424
425Datum
427{
431 text *reconstructedValue;
433 int maxReconstrLen;
435 int prefixSize = 0;
436 int i;
437
438 /*
439 * Reconstruct values represented at this tuple, including parent data,
440 * prefix of this tuple if any, and the node label if it's non-dummy.
441 * in->level should be the length of the previously reconstructed value,
442 * and the number of bytes added here is prefixSize or prefixSize + 1.
443 *
444 * Note: we assume that in->reconstructedValue isn't toasted and doesn't
445 * have a short varlena header. This is okay because it must have been
446 * created by a previous invocation of this routine, and we always emit
447 * long-format reconstructed values.
448 */
449 reconstructedValue = (text *) DatumGetPointer(in->reconstructedValue);
450 Assert(reconstructedValue == NULL ? in->level == 0 :
451 VARSIZE_ANY_EXHDR(reconstructedValue) == in->level);
452
453 maxReconstrLen = in->level + 1;
454 if (in->hasPrefix)
455 {
457 prefixSize = VARSIZE_ANY_EXHDR(prefixText);
458 maxReconstrLen += prefixSize;
459 }
460
463
464 if (in->level)
466 VARDATA(reconstructedValue),
467 in->level);
468 if (prefixSize)
469 memcpy(((char *) VARDATA(reconstrText)) + in->level,
471 prefixSize);
472 /* last byte of reconstrText will be filled in below */
473
474 /*
475 * Scan the child nodes. For each one, complete the reconstructed value
476 * and see if it's consistent with the query. If so, emit an entry into
477 * the output arrays.
478 */
479 out->nodeNumbers = palloc_array(int, in->nNodes);
480 out->levelAdds = palloc_array(int, in->nNodes);
482 out->nNodes = 0;
483
484 for (i = 0; i < in->nNodes; i++)
485 {
487 int thisLen;
488 bool res = true;
489 int j;
490
491 /* If nodeChar is a dummy value, don't include it in data */
492 if (nodeChar <= 0)
494 else
495 {
496 ((unsigned char *) VARDATA(reconstrText))[maxReconstrLen - 1] = nodeChar;
498 }
499
500 for (j = 0; j < in->nkeys; j++)
501 {
502 StrategyNumber strategy = in->scankeys[j].sk_strategy;
503 text *inText;
504 int inSize;
505 int r;
506
507 /*
508 * If it's a collation-aware operator, but the collation is C, we
509 * can treat it as non-collation-aware. With non-C collation we
510 * need to traverse whole tree :-( so there's no point in making
511 * any check here. (Note also that our reconstructed value may
512 * well end with a partial multibyte character, so that applying
513 * any encoding-sensitive test to it would be risky anyhow.)
514 */
516 {
517 if (collate_is_c)
518 strategy -= SPG_STRATEGY_ADDITION;
519 else
520 continue;
521 }
522
525
527 Min(inSize, thisLen));
528
529 switch (strategy)
530 {
533 if (r > 0)
534 res = false;
535 break;
537 if (r != 0 || inSize < thisLen)
538 res = false;
539 break;
542 if (r < 0)
543 res = false;
544 break;
546 if (r != 0)
547 res = false;
548 break;
549 default:
550 elog(ERROR, "unrecognized strategy number: %d",
551 in->scankeys[j].sk_strategy);
552 break;
553 }
554
555 if (!res)
556 break; /* no need to consider remaining conditions */
557 }
558
559 if (res)
560 {
561 out->nodeNumbers[out->nNodes] = i;
562 out->levelAdds[out->nNodes] = thisLen - in->level;
564 out->reconstructedValues[out->nNodes] =
566 out->nNodes++;
567 }
568 }
569
571}
572
573Datum
575{
578 int level = in->level;
579 text *leafValue,
581 char *fullValue;
582 int fullLen;
583 bool res;
584 int j;
585
586 /* all tests are exact */
587 out->recheck = false;
588
589 leafValue = DatumGetTextPP(in->leafDatum);
590
591 /* As above, in->reconstructedValue isn't toasted or short. */
594
595 Assert(reconstrValue == NULL ? level == 0 :
597
598 /* Reconstruct the full string represented by this leaf tuple */
599 fullLen = level + VARSIZE_ANY_EXHDR(leafValue);
600 if (VARSIZE_ANY_EXHDR(leafValue) == 0 && level > 0)
601 {
604 }
605 else
606 {
608
611 if (level)
613 if (VARSIZE_ANY_EXHDR(leafValue) > 0)
614 memcpy(fullValue + level, VARDATA_ANY(leafValue),
615 VARSIZE_ANY_EXHDR(leafValue));
617 }
618
619 /* Perform the required comparison(s) */
620 res = true;
621 for (j = 0; j < in->nkeys; j++)
622 {
623 StrategyNumber strategy = in->scankeys[j].sk_strategy;
625 int queryLen = VARSIZE_ANY_EXHDR(query);
626 int r;
627
628 if (strategy == RTPrefixStrategyNumber)
629 {
630 /*
631 * if level >= length of query then reconstrValue must begin with
632 * query (prefix) string, so we don't need to check it again.
633 */
634 res = (level >= queryLen) ||
637 out->leafValue,
638 PointerGetDatum(query)));
639
640 if (!res) /* no need to consider remaining conditions */
641 break;
642
643 continue;
644 }
645
647 {
648 /* Collation-aware comparison */
649 strategy -= SPG_STRATEGY_ADDITION;
650
651 /* If asserts enabled, verify encoding of reconstructed string */
653
655 VARDATA_ANY(query), queryLen,
657 }
658 else
659 {
660 /* Non-collation-aware comparison */
662
663 if (r == 0)
664 {
665 if (queryLen > fullLen)
666 r = -1;
667 else if (queryLen < fullLen)
668 r = 1;
669 }
670 }
671
672 switch (strategy)
673 {
675 res = (r < 0);
676 break;
678 res = (r <= 0);
679 break;
681 res = (r == 0);
682 break;
684 res = (r >= 0);
685 break;
687 res = (r > 0);
688 break;
689 default:
690 elog(ERROR, "unrecognized strategy number: %d",
691 in->scankeys[j].sk_strategy);
692 res = false;
693 break;
694 }
695
696 if (!res)
697 break; /* no need to consider remaining conditions */
698 }
699
700 PG_RETURN_BOOL(res);
701}
#define Min(x, y)
Definition c.h:997
#define VARHDRSZ
Definition c.h:711
#define Assert(condition)
Definition c.h:873
int16_t int16
Definition c.h:541
Datum datumCopy(Datum value, bool typByVal, int typLen)
Definition datum.c:132
#define ERROR
Definition elog.h:39
#define elog(elevel,...)
Definition elog.h:226
#define palloc_object(type)
Definition fe_memutils.h:74
#define palloc_array(type, count)
Definition fe_memutils.h:76
Datum DirectFunctionCall2Coll(PGFunction func, Oid collation, Datum arg1, Datum arg2)
Definition fmgr.c:813
#define PG_RETURN_VOID()
Definition fmgr.h:350
#define DatumGetTextPP(X)
Definition fmgr.h:293
#define PG_GETARG_POINTER(n)
Definition fmgr.h:277
#define PG_GET_COLLATION()
Definition fmgr.h:198
#define PG_FUNCTION_ARGS
Definition fmgr.h:193
#define PG_RETURN_BOOL(x)
Definition fmgr.h:360
static int pg_cmp_s16(int16 a, int16 b)
Definition int.h:701
int b
Definition isn.c:74
int a
Definition isn.c:73
int j
Definition isn.c:78
int i
Definition isn.c:77
bool pg_verifymbstr(const char *mbstr, int len, bool noError)
Definition mbutils.c:1559
void * palloc(Size size)
Definition mcxt.c:1387
const void * data
pg_locale_t pg_newlocale_from_collation(Oid collid)
Definition pg_locale.c:1189
#define qsort(a, b, c, d)
Definition port.h:495
static bool DatumGetBool(Datum X)
Definition postgres.h:100
static Datum PointerGetDatum(const void *X)
Definition postgres.h:352
static Datum Int16GetDatum(int16 X)
Definition postgres.h:182
uint64_t Datum
Definition postgres.h:70
static Pointer DatumGetPointer(Datum X)
Definition postgres.h:342
static int16 DatumGetInt16(Datum X)
Definition postgres.h:172
char * c
static int fb(int x)
@ spgMatchNode
Definition spgist.h:69
@ spgAddNode
Definition spgist.h:70
@ spgSplitTuple
Definition spgist.h:71
Datum spg_text_config(PG_FUNCTION_ARGS)
Definition spgtextproc.c:96
static int commonPrefix(const char *a, const char *b, int lena, int lenb)
#define SPG_IS_COLLATION_AWARE_STRATEGY(s)
Definition spgtextproc.c:83
static int cmpNodePtr(const void *a, const void *b)
#define SPGIST_MAX_PREFIX_LENGTH
Definition spgtextproc.c:70
Datum spg_text_leaf_consistent(PG_FUNCTION_ARGS)
Datum spg_text_inner_consistent(PG_FUNCTION_ARGS)
static Datum formTextDatum(const char *data, int datalen)
Datum spg_text_choose(PG_FUNCTION_ARGS)
static bool searchChar(const Datum *nodeLabels, int nNodes, int16 c, int *i)
Datum spg_text_picksplit(PG_FUNCTION_ARGS)
uint16 StrategyNumber
Definition stratnum.h:22
#define BTGreaterStrategyNumber
Definition stratnum.h:33
#define BTLessStrategyNumber
Definition stratnum.h:29
#define BTEqualStrategyNumber
Definition stratnum.h:31
#define BTLessEqualStrategyNumber
Definition stratnum.h:30
#define BTGreaterEqualStrategyNumber
Definition stratnum.h:32
Datum sk_argument
Definition skey.h:72
StrategyNumber sk_strategy
Definition skey.h:68
Datum * nodeLabels
Definition spgist.h:64
bool hasPrefix
Definition spgist.h:61
Datum prefixDatum
Definition spgist.h:62
int nNodes
Definition spgist.h:63
Datum datum
Definition spgist.h:55
int level
Definition spgist.h:57
bool allTheSame
Definition spgist.h:60
bool postfixHasPrefix
Definition spgist.h:101
int childNodeN
Definition spgist.h:98
spgChooseResultType resultType
Definition spgist.h:76
struct spgChooseOut::@54::@57 splitTuple
int levelAdd
Definition spgist.h:82
struct spgChooseOut::@54::@56 addNode
Datum nodeLabel
Definition spgist.h:87
Datum * prefixNodeLabels
Definition spgist.h:96
Datum postfixPrefixDatum
Definition spgist.h:102
Datum restDatum
Definition spgist.h:83
int prefixNNodes
Definition spgist.h:95
int nodeN
Definition spgist.h:81
union spgChooseOut::@54 result
Datum prefixPrefixDatum
Definition spgist.h:94
bool prefixHasPrefix
Definition spgist.h:93
struct spgChooseOut::@54::@55 matchNode
bool longValuesOK
Definition spgist.h:47
bool canReturnData
Definition spgist.h:46
Oid labelType
Definition spgist.h:44
Oid prefixType
Definition spgist.h:43
Datum reconstructedValue
Definition spgist.h:140
Datum * reconstructedValues
Definition spgist.h:159
Datum reconstructedValue
Definition spgist.h:175
Datum * datums
Definition spgist.h:113
int * mapTuplesToNodes
Definition spgist.h:125
Datum * nodeLabels
Definition spgist.h:123
Datum * leafTupleDatums
Definition spgist.h:126
Datum prefixDatum
Definition spgist.h:120
Definition c.h:706
#define VARHDRSZ_SHORT
Definition varatt.h:278
static Size VARSIZE_ANY_EXHDR(const void *PTR)
Definition varatt.h:472
static char * VARDATA(const void *PTR)
Definition varatt.h:305
static char * VARDATA_ANY(const void *PTR)
Definition varatt.h:486
static void SET_VARSIZE_SHORT(void *PTR, Size len)
Definition varatt.h:439
#define VARATT_SHORT_MAX
Definition varatt.h:279
static void SET_VARSIZE(void *PTR, Size len)
Definition varatt.h:432
int varstr_cmp(const char *arg1, int len1, const char *arg2, int len2, Oid collid)
Definition varlena.c:1308
Datum text_starts_with(PG_FUNCTION_ARGS)
Definition varlena.c:1545

◆ SPG_STRATEGY_ADDITION

#define SPG_STRATEGY_ADDITION   (10)

Definition at line 82 of file spgtextproc.c.

◆ SPGIST_MAX_PREFIX_LENGTH

#define SPGIST_MAX_PREFIX_LENGTH   Max((int) (BLCKSZ - 258 * 16 - 100), 32)

Definition at line 70 of file spgtextproc.c.

Typedef Documentation

◆ spgNodePtr

Function Documentation

◆ cmpNodePtr()

static int cmpNodePtr ( const void a,
const void b 
)
static

Definition at line 325 of file spgtextproc.c.

326{
327 const spgNodePtr *aa = (const spgNodePtr *) a;
328 const spgNodePtr *bb = (const spgNodePtr *) b;
329
330 return pg_cmp_s16(aa->c, bb->c);
331}

References a, b, fb(), and pg_cmp_s16().

Referenced by spg_text_picksplit().

◆ commonPrefix()

static int commonPrefix ( const char a,
const char b,
int  lena,
int  lenb 
)
static

Definition at line 140 of file spgtextproc.c.

141{
142 int i = 0;
143
144 while (i < lena && i < lenb && *a == *b)
145 {
146 a++;
147 b++;
148 i++;
149 }
150
151 return i;
152}

References a, b, fb(), and i.

Referenced by spg_text_choose(), and spg_text_picksplit().

◆ formTextDatum()

static Datum formTextDatum ( const char data,
int  datalen 
)
static

Definition at line 115 of file spgtextproc.c.

116{
117 char *p;
118
119 p = (char *) palloc(datalen + VARHDRSZ);
120
121 if (datalen + VARHDRSZ_SHORT <= VARATT_SHORT_MAX)
122 {
123 SET_VARSIZE_SHORT(p, datalen + VARHDRSZ_SHORT);
124 if (datalen)
125 memcpy(p + VARHDRSZ_SHORT, data, datalen);
126 }
127 else
128 {
129 SET_VARSIZE(p, datalen + VARHDRSZ);
130 memcpy(p + VARHDRSZ, data, datalen);
131 }
132
133 return PointerGetDatum(p);
134}

References data, fb(), palloc(), PointerGetDatum(), SET_VARSIZE(), SET_VARSIZE_SHORT(), VARATT_SHORT_MAX, VARHDRSZ, and VARHDRSZ_SHORT.

Referenced by spg_text_choose(), and spg_text_picksplit().

◆ searchChar()

static bool searchChar ( const Datum nodeLabels,
int  nNodes,
int16  c,
int i 
)
static

Definition at line 160 of file spgtextproc.c.

161{
162 int StopLow = 0,
163 StopHigh = nNodes;
164
165 while (StopLow < StopHigh)
166 {
167 int StopMiddle = (StopLow + StopHigh) >> 1;
168 int16 middle = DatumGetInt16(nodeLabels[StopMiddle]);
169
170 if (c < middle)
172 else if (c > middle)
173 StopLow = StopMiddle + 1;
174 else
175 {
176 *i = StopMiddle;
177 return true;
178 }
179 }
180
181 *i = StopHigh;
182 return false;
183}

References DatumGetInt16(), fb(), and i.

Referenced by spg_text_choose().

◆ spg_text_choose()

Datum spg_text_choose ( PG_FUNCTION_ARGS  )

Definition at line 186 of file spgtextproc.c.

187{
191 char *inStr = VARDATA_ANY(inText);
193 char *prefixStr = NULL;
194 int prefixSize = 0;
195 int commonLen = 0;
196 int16 nodeChar = 0;
197 int i = 0;
198
199 /* Check for prefix match, set nodeChar to first byte after prefix */
200 if (in->hasPrefix)
201 {
203
205 prefixSize = VARSIZE_ANY_EXHDR(prefixText);
206
208 prefixStr,
209 inSize - in->level,
210 prefixSize);
211
212 if (commonLen == prefixSize)
213 {
214 if (inSize - in->level > commonLen)
215 nodeChar = *(unsigned char *) (inStr + in->level + commonLen);
216 else
217 nodeChar = -1;
218 }
219 else
220 {
221 /* Must split tuple because incoming value doesn't match prefix */
223
224 if (commonLen == 0)
225 {
226 out->result.splitTuple.prefixHasPrefix = false;
227 }
228 else
229 {
233 }
237 Int16GetDatum(*(unsigned char *) (prefixStr + commonLen));
238
240
241 if (prefixSize - commonLen == 1)
242 {
244 }
245 else
246 {
250 prefixSize - commonLen - 1);
251 }
252
254 }
255 }
256 else if (inSize > in->level)
257 {
258 nodeChar = *(unsigned char *) (inStr + in->level);
259 }
260 else
261 {
262 nodeChar = -1;
263 }
264
265 /* Look up nodeChar in the node label array */
266 if (searchChar(in->nodeLabels, in->nNodes, nodeChar, &i))
267 {
268 /*
269 * Descend to existing node. (If in->allTheSame, the core code will
270 * ignore our nodeN specification here, but that's OK. We still have
271 * to provide the correct levelAdd and restDatum values, and those are
272 * the same regardless of which node gets chosen by core.)
273 */
274 int levelAdd;
275
277 out->result.matchNode.nodeN = i;
278 levelAdd = commonLen;
279 if (nodeChar >= 0)
280 levelAdd++;
281 out->result.matchNode.levelAdd = levelAdd;
282 if (inSize - in->level - levelAdd > 0)
284 formTextDatum(inStr + in->level + levelAdd,
285 inSize - in->level - levelAdd);
286 else
289 }
290 else if (in->allTheSame)
291 {
292 /*
293 * Can't use AddNode action, so split the tuple. The upper tuple has
294 * the same prefix as before and uses a dummy node label -2 for the
295 * lower tuple. The lower tuple has no prefix and the same node
296 * labels as the original tuple.
297 *
298 * Note: it might seem tempting to shorten the upper tuple's prefix,
299 * if it has one, then use its last byte as label for the lower tuple.
300 * But that doesn't win since we know the incoming value matches the
301 * whole prefix: we'd just end up splitting the lower tuple again.
302 */
311 }
312 else
313 {
314 /* Add a node for the not-previously-seen nodeChar value */
315 out->resultType = spgAddNode;
317 out->result.addNode.nodeN = i;
318 }
319
321}

References spgChooseOut::addNode, spgChooseIn::allTheSame, spgChooseOut::childNodeN, commonPrefix(), spgChooseIn::datum, DatumGetTextPP, fb(), formTextDatum(), spgChooseIn::hasPrefix, i, Int16GetDatum(), spgChooseIn::level, spgChooseOut::levelAdd, spgChooseOut::matchNode, spgChooseIn::nNodes, spgChooseOut::nodeLabel, spgChooseIn::nodeLabels, spgChooseOut::nodeN, palloc_object, PG_GETARG_POINTER, PG_RETURN_VOID, spgChooseOut::postfixHasPrefix, spgChooseOut::postfixPrefixDatum, spgChooseIn::prefixDatum, spgChooseOut::prefixHasPrefix, spgChooseOut::prefixNNodes, spgChooseOut::prefixNodeLabels, spgChooseOut::prefixPrefixDatum, spgChooseOut::restDatum, spgChooseOut::result, spgChooseOut::resultType, searchChar(), spgAddNode, spgMatchNode, spgSplitTuple, spgChooseOut::splitTuple, VARDATA_ANY(), and VARSIZE_ANY_EXHDR().

◆ spg_text_config()

Datum spg_text_config ( PG_FUNCTION_ARGS  )

Definition at line 96 of file spgtextproc.c.

97{
98#ifdef NOT_USED
100#endif
102
103 cfg->prefixType = TEXTOID;
104 cfg->labelType = INT2OID;
105 cfg->canReturnData = true;
106 cfg->longValuesOK = true; /* suffixing will shorten long values */
108}

References spgConfigOut::canReturnData, fb(), spgConfigOut::labelType, spgConfigOut::longValuesOK, PG_GETARG_POINTER, PG_RETURN_VOID, and spgConfigOut::prefixType.

◆ spg_text_inner_consistent()

Datum spg_text_inner_consistent ( PG_FUNCTION_ARGS  )

Definition at line 427 of file spgtextproc.c.

428{
432 text *reconstructedValue;
434 int maxReconstrLen;
436 int prefixSize = 0;
437 int i;
438
439 /*
440 * Reconstruct values represented at this tuple, including parent data,
441 * prefix of this tuple if any, and the node label if it's non-dummy.
442 * in->level should be the length of the previously reconstructed value,
443 * and the number of bytes added here is prefixSize or prefixSize + 1.
444 *
445 * Note: we assume that in->reconstructedValue isn't toasted and doesn't
446 * have a short varlena header. This is okay because it must have been
447 * created by a previous invocation of this routine, and we always emit
448 * long-format reconstructed values.
449 */
450 reconstructedValue = (text *) DatumGetPointer(in->reconstructedValue);
451 Assert(reconstructedValue == NULL ? in->level == 0 :
452 VARSIZE_ANY_EXHDR(reconstructedValue) == in->level);
453
454 maxReconstrLen = in->level + 1;
455 if (in->hasPrefix)
456 {
458 prefixSize = VARSIZE_ANY_EXHDR(prefixText);
459 maxReconstrLen += prefixSize;
460 }
461
464
465 if (in->level)
467 VARDATA(reconstructedValue),
468 in->level);
469 if (prefixSize)
470 memcpy(((char *) VARDATA(reconstrText)) + in->level,
472 prefixSize);
473 /* last byte of reconstrText will be filled in below */
474
475 /*
476 * Scan the child nodes. For each one, complete the reconstructed value
477 * and see if it's consistent with the query. If so, emit an entry into
478 * the output arrays.
479 */
480 out->nodeNumbers = palloc_array(int, in->nNodes);
481 out->levelAdds = palloc_array(int, in->nNodes);
483 out->nNodes = 0;
484
485 for (i = 0; i < in->nNodes; i++)
486 {
488 int thisLen;
489 bool res = true;
490 int j;
491
492 /* If nodeChar is a dummy value, don't include it in data */
493 if (nodeChar <= 0)
495 else
496 {
497 ((unsigned char *) VARDATA(reconstrText))[maxReconstrLen - 1] = nodeChar;
499 }
500
501 for (j = 0; j < in->nkeys; j++)
502 {
503 StrategyNumber strategy = in->scankeys[j].sk_strategy;
504 text *inText;
505 int inSize;
506 int r;
507
508 /*
509 * If it's a collation-aware operator, but the collation is C, we
510 * can treat it as non-collation-aware. With non-C collation we
511 * need to traverse whole tree :-( so there's no point in making
512 * any check here. (Note also that our reconstructed value may
513 * well end with a partial multibyte character, so that applying
514 * any encoding-sensitive test to it would be risky anyhow.)
515 */
517 {
518 if (collate_is_c)
519 strategy -= SPG_STRATEGY_ADDITION;
520 else
521 continue;
522 }
523
526
528 Min(inSize, thisLen));
529
530 switch (strategy)
531 {
534 if (r > 0)
535 res = false;
536 break;
538 if (r != 0 || inSize < thisLen)
539 res = false;
540 break;
543 if (r < 0)
544 res = false;
545 break;
547 if (r != 0)
548 res = false;
549 break;
550 default:
551 elog(ERROR, "unrecognized strategy number: %d",
552 in->scankeys[j].sk_strategy);
553 break;
554 }
555
556 if (!res)
557 break; /* no need to consider remaining conditions */
558 }
559
560 if (res)
561 {
562 out->nodeNumbers[out->nNodes] = i;
563 out->levelAdds[out->nNodes] = thisLen - in->level;
565 out->reconstructedValues[out->nNodes] =
567 out->nNodes++;
568 }
569 }
570
572}

References Assert, BTEqualStrategyNumber, BTGreaterEqualStrategyNumber, BTGreaterStrategyNumber, BTLessEqualStrategyNumber, BTLessStrategyNumber, pg_locale_struct::collate_is_c, datumCopy(), DatumGetInt16(), DatumGetPointer(), DatumGetTextPP, elog, ERROR, fb(), spgInnerConsistentIn::hasPrefix, i, j, spgInnerConsistentIn::level, spgInnerConsistentOut::levelAdds, Min, spgInnerConsistentIn::nkeys, spgInnerConsistentIn::nNodes, spgInnerConsistentOut::nNodes, spgInnerConsistentIn::nodeLabels, spgInnerConsistentOut::nodeNumbers, palloc(), palloc_array, PG_GET_COLLATION, PG_GETARG_POINTER, pg_newlocale_from_collation(), PG_RETURN_VOID, PointerGetDatum(), spgInnerConsistentIn::prefixDatum, spgInnerConsistentIn::reconstructedValue, spgInnerConsistentOut::reconstructedValues, RTPrefixStrategyNumber, spgInnerConsistentIn::scankeys, SET_VARSIZE(), ScanKeyData::sk_argument, ScanKeyData::sk_strategy, SPG_IS_COLLATION_AWARE_STRATEGY, SPG_STRATEGY_ADDITION, VARDATA(), VARDATA_ANY(), VARHDRSZ, and VARSIZE_ANY_EXHDR().

◆ spg_text_leaf_consistent()

Datum spg_text_leaf_consistent ( PG_FUNCTION_ARGS  )

Definition at line 575 of file spgtextproc.c.

576{
579 int level = in->level;
580 text *leafValue,
582 char *fullValue;
583 int fullLen;
584 bool res;
585 int j;
586
587 /* all tests are exact */
588 out->recheck = false;
589
590 leafValue = DatumGetTextPP(in->leafDatum);
591
592 /* As above, in->reconstructedValue isn't toasted or short. */
595
596 Assert(reconstrValue == NULL ? level == 0 :
598
599 /* Reconstruct the full string represented by this leaf tuple */
600 fullLen = level + VARSIZE_ANY_EXHDR(leafValue);
601 if (VARSIZE_ANY_EXHDR(leafValue) == 0 && level > 0)
602 {
605 }
606 else
607 {
609
612 if (level)
614 if (VARSIZE_ANY_EXHDR(leafValue) > 0)
615 memcpy(fullValue + level, VARDATA_ANY(leafValue),
616 VARSIZE_ANY_EXHDR(leafValue));
618 }
619
620 /* Perform the required comparison(s) */
621 res = true;
622 for (j = 0; j < in->nkeys; j++)
623 {
624 StrategyNumber strategy = in->scankeys[j].sk_strategy;
626 int queryLen = VARSIZE_ANY_EXHDR(query);
627 int r;
628
629 if (strategy == RTPrefixStrategyNumber)
630 {
631 /*
632 * if level >= length of query then reconstrValue must begin with
633 * query (prefix) string, so we don't need to check it again.
634 */
635 res = (level >= queryLen) ||
638 out->leafValue,
639 PointerGetDatum(query)));
640
641 if (!res) /* no need to consider remaining conditions */
642 break;
643
644 continue;
645 }
646
648 {
649 /* Collation-aware comparison */
650 strategy -= SPG_STRATEGY_ADDITION;
651
652 /* If asserts enabled, verify encoding of reconstructed string */
654
656 VARDATA_ANY(query), queryLen,
658 }
659 else
660 {
661 /* Non-collation-aware comparison */
663
664 if (r == 0)
665 {
666 if (queryLen > fullLen)
667 r = -1;
668 else if (queryLen < fullLen)
669 r = 1;
670 }
671 }
672
673 switch (strategy)
674 {
676 res = (r < 0);
677 break;
679 res = (r <= 0);
680 break;
682 res = (r == 0);
683 break;
685 res = (r >= 0);
686 break;
688 res = (r > 0);
689 break;
690 default:
691 elog(ERROR, "unrecognized strategy number: %d",
692 in->scankeys[j].sk_strategy);
693 res = false;
694 break;
695 }
696
697 if (!res)
698 break; /* no need to consider remaining conditions */
699 }
700
701 PG_RETURN_BOOL(res);
702}

References Assert, BTEqualStrategyNumber, BTGreaterEqualStrategyNumber, BTGreaterStrategyNumber, BTLessEqualStrategyNumber, BTLessStrategyNumber, DatumGetBool(), DatumGetPointer(), DatumGetTextPP, DirectFunctionCall2Coll(), elog, ERROR, fb(), j, spgLeafConsistentIn::leafDatum, spgLeafConsistentOut::leafValue, spgLeafConsistentIn::level, Min, spgLeafConsistentIn::nkeys, palloc(), PG_GET_COLLATION, PG_GETARG_POINTER, PG_RETURN_BOOL, pg_verifymbstr(), PointerGetDatum(), spgLeafConsistentOut::recheck, spgLeafConsistentIn::reconstructedValue, RTPrefixStrategyNumber, spgLeafConsistentIn::scankeys, SET_VARSIZE(), ScanKeyData::sk_argument, ScanKeyData::sk_strategy, SPG_IS_COLLATION_AWARE_STRATEGY, SPG_STRATEGY_ADDITION, text_starts_with(), VARDATA(), VARDATA_ANY(), VARHDRSZ, VARSIZE_ANY_EXHDR(), and varstr_cmp().

◆ spg_text_picksplit()

Datum spg_text_picksplit ( PG_FUNCTION_ARGS  )

Definition at line 334 of file spgtextproc.c.

335{
338 text *text0 = DatumGetTextPP(in->datums[0]);
339 int i,
340 commonLen;
342
343 /* Identify longest common prefix, if any */
345 for (i = 1; i < in->nTuples && commonLen > 0; i++)
346 {
348 int tmp = commonPrefix(VARDATA_ANY(text0),
352
353 if (tmp < commonLen)
354 commonLen = tmp;
355 }
356
357 /*
358 * Limit the prefix length, if necessary, to ensure that the resulting
359 * inner tuple will fit on a page.
360 */
362
363 /* Set node prefix to be that string, if it's not empty */
364 if (commonLen == 0)
365 {
366 out->hasPrefix = false;
367 }
368 else
369 {
370 out->hasPrefix = true;
372 }
373
374 /* Extract the node label (first non-common byte) from each value */
376
377 for (i = 0; i < in->nTuples; i++)
378 {
380
382 nodes[i].c = *(unsigned char *) (VARDATA_ANY(texti) + commonLen);
383 else
384 nodes[i].c = -1; /* use -1 if string is all common */
385 nodes[i].i = i;
386 nodes[i].d = in->datums[i];
387 }
388
389 /*
390 * Sort by label values so that we can group the values into nodes. This
391 * also ensures that the nodes are ordered by label value, allowing the
392 * use of binary search in searchChar.
393 */
394 qsort(nodes, in->nTuples, sizeof(*nodes), cmpNodePtr);
395
396 /* And emit results */
397 out->nNodes = 0;
399 out->mapTuplesToNodes = palloc_array(int, in->nTuples);
401
402 for (i = 0; i < in->nTuples; i++)
403 {
405 Datum leafD;
406
407 if (i == 0 || nodes[i].c != nodes[i - 1].c)
408 {
409 out->nodeLabels[out->nNodes] = Int16GetDatum(nodes[i].c);
410 out->nNodes++;
411 }
412
416 else
418
419 out->leafTupleDatums[nodes[i].i] = leafD;
420 out->mapTuplesToNodes[nodes[i].i] = out->nNodes - 1;
421 }
422
424}

References cmpNodePtr(), commonPrefix(), DatumGetTextPP, spgPickSplitIn::datums, fb(), formTextDatum(), spgPickSplitOut::hasPrefix, i, Int16GetDatum(), spgPickSplitOut::leafTupleDatums, spgPickSplitOut::mapTuplesToNodes, Min, spgPickSplitOut::nNodes, spgPickSplitOut::nodeLabels, spgPickSplitIn::nTuples, palloc_array, PG_GETARG_POINTER, PG_RETURN_VOID, spgPickSplitOut::prefixDatum, qsort, SPGIST_MAX_PREFIX_LENGTH, VARDATA_ANY(), and VARSIZE_ANY_EXHDR().