PostgreSQL Source Code  git master
spgist_name_ops.c
Go to the documentation of this file.
1 /*--------------------------------------------------------------------------
2  *
3  * spgist_name_ops.c
4  * Test opclass for SP-GiST
5  *
6  * This indexes input values of type "name", but the index storage is "text",
7  * with the same choices as made in the core SP-GiST text_ops opclass.
8  * Much of the code is identical to src/backend/access/spgist/spgtextproc.c,
9  * which see for a more detailed header comment.
10  *
11  * Unlike spgtextproc.c, we don't bother with collation-aware logic.
12  *
13  *
14  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
15  * Portions Copyright (c) 1994, Regents of the University of California
16  *
17  * IDENTIFICATION
18  * src/test/modules/spgist_name_ops/spgist_name_ops.c
19  *
20  * -------------------------------------------------------------------------
21  */
22 #include "postgres.h"
23 
24 #include "access/spgist.h"
25 #include "catalog/pg_type.h"
26 #include "utils/datum.h"
27 #include "varatt.h"
28 
30 
31 
33 Datum
35 {
36  /* spgConfigIn *cfgin = (spgConfigIn *) PG_GETARG_POINTER(0); */
38 
39  cfg->prefixType = TEXTOID;
40  cfg->labelType = INT2OID;
41  cfg->leafType = TEXTOID;
42  cfg->canReturnData = true;
43  cfg->longValuesOK = true; /* suffixing will shorten long values */
45 }
46 
47 /*
48  * Form a text datum from the given not-necessarily-null-terminated string,
49  * using short varlena header format if possible
50  */
51 static Datum
52 formTextDatum(const char *data, int datalen)
53 {
54  char *p;
55 
56  p = (char *) palloc(datalen + VARHDRSZ);
57 
58  if (datalen + VARHDRSZ_SHORT <= VARATT_SHORT_MAX)
59  {
60  SET_VARSIZE_SHORT(p, datalen + VARHDRSZ_SHORT);
61  if (datalen)
62  memcpy(p + VARHDRSZ_SHORT, data, datalen);
63  }
64  else
65  {
66  SET_VARSIZE(p, datalen + VARHDRSZ);
67  memcpy(p + VARHDRSZ, data, datalen);
68  }
69 
70  return PointerGetDatum(p);
71 }
72 
73 /*
74  * Find the length of the common prefix of a and b
75  */
76 static int
77 commonPrefix(const char *a, const char *b, int lena, int lenb)
78 {
79  int i = 0;
80 
81  while (i < lena && i < lenb && *a == *b)
82  {
83  a++;
84  b++;
85  i++;
86  }
87 
88  return i;
89 }
90 
91 /*
92  * Binary search an array of int16 datums for a match to c
93  *
94  * On success, *i gets the match location; on failure, it gets where to insert
95  */
96 static bool
97 searchChar(Datum *nodeLabels, int nNodes, int16 c, int *i)
98 {
99  int StopLow = 0,
100  StopHigh = nNodes;
101 
102  while (StopLow < StopHigh)
103  {
104  int StopMiddle = (StopLow + StopHigh) >> 1;
105  int16 middle = DatumGetInt16(nodeLabels[StopMiddle]);
106 
107  if (c < middle)
108  StopHigh = StopMiddle;
109  else if (c > middle)
110  StopLow = StopMiddle + 1;
111  else
112  {
113  *i = StopMiddle;
114  return true;
115  }
116  }
117 
118  *i = StopHigh;
119  return false;
120 }
121 
123 Datum
125 {
128  Name inName = DatumGetName(in->datum);
129  char *inStr = NameStr(*inName);
130  int inSize = strlen(inStr);
131  char *prefixStr = NULL;
132  int prefixSize = 0;
133  int commonLen = 0;
134  int16 nodeChar = 0;
135  int i = 0;
136 
137  /* Check for prefix match, set nodeChar to first byte after prefix */
138  if (in->hasPrefix)
139  {
140  text *prefixText = DatumGetTextPP(in->prefixDatum);
141 
142  prefixStr = VARDATA_ANY(prefixText);
143  prefixSize = VARSIZE_ANY_EXHDR(prefixText);
144 
145  commonLen = commonPrefix(inStr + in->level,
146  prefixStr,
147  inSize - in->level,
148  prefixSize);
149 
150  if (commonLen == prefixSize)
151  {
152  if (inSize - in->level > commonLen)
153  nodeChar = *(unsigned char *) (inStr + in->level + commonLen);
154  else
155  nodeChar = -1;
156  }
157  else
158  {
159  /* Must split tuple because incoming value doesn't match prefix */
160  out->resultType = spgSplitTuple;
161 
162  if (commonLen == 0)
163  {
164  out->result.splitTuple.prefixHasPrefix = false;
165  }
166  else
167  {
168  out->result.splitTuple.prefixHasPrefix = true;
169  out->result.splitTuple.prefixPrefixDatum =
170  formTextDatum(prefixStr, commonLen);
171  }
172  out->result.splitTuple.prefixNNodes = 1;
173  out->result.splitTuple.prefixNodeLabels =
174  (Datum *) palloc(sizeof(Datum));
175  out->result.splitTuple.prefixNodeLabels[0] =
176  Int16GetDatum(*(unsigned char *) (prefixStr + commonLen));
177 
178  out->result.splitTuple.childNodeN = 0;
179 
180  if (prefixSize - commonLen == 1)
181  {
182  out->result.splitTuple.postfixHasPrefix = false;
183  }
184  else
185  {
186  out->result.splitTuple.postfixHasPrefix = true;
187  out->result.splitTuple.postfixPrefixDatum =
188  formTextDatum(prefixStr + commonLen + 1,
189  prefixSize - commonLen - 1);
190  }
191 
192  PG_RETURN_VOID();
193  }
194  }
195  else if (inSize > in->level)
196  {
197  nodeChar = *(unsigned char *) (inStr + in->level);
198  }
199  else
200  {
201  nodeChar = -1;
202  }
203 
204  /* Look up nodeChar in the node label array */
205  if (searchChar(in->nodeLabels, in->nNodes, nodeChar, &i))
206  {
207  /*
208  * Descend to existing node. (If in->allTheSame, the core code will
209  * ignore our nodeN specification here, but that's OK. We still have
210  * to provide the correct levelAdd and restDatum values, and those are
211  * the same regardless of which node gets chosen by core.)
212  */
213  int levelAdd;
214 
215  out->resultType = spgMatchNode;
216  out->result.matchNode.nodeN = i;
217  levelAdd = commonLen;
218  if (nodeChar >= 0)
219  levelAdd++;
220  out->result.matchNode.levelAdd = levelAdd;
221  if (inSize - in->level - levelAdd > 0)
222  out->result.matchNode.restDatum =
223  formTextDatum(inStr + in->level + levelAdd,
224  inSize - in->level - levelAdd);
225  else
226  out->result.matchNode.restDatum =
227  formTextDatum(NULL, 0);
228  }
229  else if (in->allTheSame)
230  {
231  /*
232  * Can't use AddNode action, so split the tuple. The upper tuple has
233  * the same prefix as before and uses a dummy node label -2 for the
234  * lower tuple. The lower tuple has no prefix and the same node
235  * labels as the original tuple.
236  *
237  * Note: it might seem tempting to shorten the upper tuple's prefix,
238  * if it has one, then use its last byte as label for the lower tuple.
239  * But that doesn't win since we know the incoming value matches the
240  * whole prefix: we'd just end up splitting the lower tuple again.
241  */
242  out->resultType = spgSplitTuple;
243  out->result.splitTuple.prefixHasPrefix = in->hasPrefix;
244  out->result.splitTuple.prefixPrefixDatum = in->prefixDatum;
245  out->result.splitTuple.prefixNNodes = 1;
246  out->result.splitTuple.prefixNodeLabels = (Datum *) palloc(sizeof(Datum));
247  out->result.splitTuple.prefixNodeLabels[0] = Int16GetDatum(-2);
248  out->result.splitTuple.childNodeN = 0;
249  out->result.splitTuple.postfixHasPrefix = false;
250  }
251  else
252  {
253  /* Add a node for the not-previously-seen nodeChar value */
254  out->resultType = spgAddNode;
255  out->result.addNode.nodeLabel = Int16GetDatum(nodeChar);
256  out->result.addNode.nodeN = i;
257  }
258 
259  PG_RETURN_VOID();
260 }
261 
262 /* The picksplit function is identical to the core opclass, so just use that */
263 
265 Datum
267 {
270  text *reconstructedValue;
271  text *reconstrText;
272  int maxReconstrLen;
273  text *prefixText = NULL;
274  int prefixSize = 0;
275  int i;
276 
277  /*
278  * Reconstruct values represented at this tuple, including parent data,
279  * prefix of this tuple if any, and the node label if it's non-dummy.
280  * in->level should be the length of the previously reconstructed value,
281  * and the number of bytes added here is prefixSize or prefixSize + 1.
282  *
283  * Recall that reconstructedValues are assumed to be the same type as leaf
284  * datums, so we must use "text" not "name" for them.
285  *
286  * Note: we assume that in->reconstructedValue isn't toasted and doesn't
287  * have a short varlena header. This is okay because it must have been
288  * created by a previous invocation of this routine, and we always emit
289  * long-format reconstructed values.
290  */
291  reconstructedValue = (text *) DatumGetPointer(in->reconstructedValue);
292  Assert(reconstructedValue == NULL ? in->level == 0 :
293  VARSIZE_ANY_EXHDR(reconstructedValue) == in->level);
294 
295  maxReconstrLen = in->level + 1;
296  if (in->hasPrefix)
297  {
298  prefixText = DatumGetTextPP(in->prefixDatum);
299  prefixSize = VARSIZE_ANY_EXHDR(prefixText);
300  maxReconstrLen += prefixSize;
301  }
302 
303  reconstrText = palloc(VARHDRSZ + maxReconstrLen);
304  SET_VARSIZE(reconstrText, VARHDRSZ + maxReconstrLen);
305 
306  if (in->level)
307  memcpy(VARDATA(reconstrText),
308  VARDATA(reconstructedValue),
309  in->level);
310  if (prefixSize)
311  memcpy(((char *) VARDATA(reconstrText)) + in->level,
312  VARDATA_ANY(prefixText),
313  prefixSize);
314  /* last byte of reconstrText will be filled in below */
315 
316  /*
317  * Scan the child nodes. For each one, complete the reconstructed value
318  * and see if it's consistent with the query. If so, emit an entry into
319  * the output arrays.
320  */
321  out->nodeNumbers = (int *) palloc(sizeof(int) * in->nNodes);
322  out->levelAdds = (int *) palloc(sizeof(int) * in->nNodes);
323  out->reconstructedValues = (Datum *) palloc(sizeof(Datum) * in->nNodes);
324  out->nNodes = 0;
325 
326  for (i = 0; i < in->nNodes; i++)
327  {
328  int16 nodeChar = DatumGetInt16(in->nodeLabels[i]);
329  int thisLen;
330  bool res = true;
331  int j;
332 
333  /* If nodeChar is a dummy value, don't include it in data */
334  if (nodeChar <= 0)
335  thisLen = maxReconstrLen - 1;
336  else
337  {
338  ((unsigned char *) VARDATA(reconstrText))[maxReconstrLen - 1] = nodeChar;
339  thisLen = maxReconstrLen;
340  }
341 
342  for (j = 0; j < in->nkeys; j++)
343  {
344  StrategyNumber strategy = in->scankeys[j].sk_strategy;
345  Name inName;
346  char *inStr;
347  int inSize;
348  int r;
349 
350  inName = DatumGetName(in->scankeys[j].sk_argument);
351  inStr = NameStr(*inName);
352  inSize = strlen(inStr);
353 
354  r = memcmp(VARDATA(reconstrText), inStr,
355  Min(inSize, thisLen));
356 
357  switch (strategy)
358  {
361  if (r > 0)
362  res = false;
363  break;
365  if (r != 0 || inSize < thisLen)
366  res = false;
367  break;
370  if (r < 0)
371  res = false;
372  break;
373  default:
374  elog(ERROR, "unrecognized strategy number: %d",
375  in->scankeys[j].sk_strategy);
376  break;
377  }
378 
379  if (!res)
380  break; /* no need to consider remaining conditions */
381  }
382 
383  if (res)
384  {
385  out->nodeNumbers[out->nNodes] = i;
386  out->levelAdds[out->nNodes] = thisLen - in->level;
387  SET_VARSIZE(reconstrText, VARHDRSZ + thisLen);
388  out->reconstructedValues[out->nNodes] =
389  datumCopy(PointerGetDatum(reconstrText), false, -1);
390  out->nNodes++;
391  }
392  }
393 
394  PG_RETURN_VOID();
395 }
396 
398 Datum
400 {
403  int level = in->level;
404  text *leafValue,
405  *reconstrValue = NULL;
406  char *fullValue;
407  int fullLen;
408  bool res;
409  int j;
410 
411  /* all tests are exact */
412  out->recheck = false;
413 
414  leafValue = DatumGetTextPP(in->leafDatum);
415 
416  /* As above, in->reconstructedValue isn't toasted or short. */
418  reconstrValue = (text *) DatumGetPointer(in->reconstructedValue);
419 
420  Assert(reconstrValue == NULL ? level == 0 :
421  VARSIZE_ANY_EXHDR(reconstrValue) == level);
422 
423  /* Reconstruct the Name represented by this leaf tuple */
424  fullValue = palloc0(NAMEDATALEN);
425  fullLen = level + VARSIZE_ANY_EXHDR(leafValue);
426  Assert(fullLen < NAMEDATALEN);
427  if (VARSIZE_ANY_EXHDR(leafValue) == 0 && level > 0)
428  {
429  memcpy(fullValue, VARDATA(reconstrValue),
430  VARSIZE_ANY_EXHDR(reconstrValue));
431  }
432  else
433  {
434  if (level)
435  memcpy(fullValue, VARDATA(reconstrValue), level);
436  if (VARSIZE_ANY_EXHDR(leafValue) > 0)
437  memcpy(fullValue + level, VARDATA_ANY(leafValue),
438  VARSIZE_ANY_EXHDR(leafValue));
439  }
440  out->leafValue = PointerGetDatum(fullValue);
441 
442  /* Perform the required comparison(s) */
443  res = true;
444  for (j = 0; j < in->nkeys; j++)
445  {
446  StrategyNumber strategy = in->scankeys[j].sk_strategy;
447  Name queryName = DatumGetName(in->scankeys[j].sk_argument);
448  char *queryStr = NameStr(*queryName);
449  int queryLen = strlen(queryStr);
450  int r;
451 
452  /* Non-collation-aware comparison */
453  r = memcmp(fullValue, queryStr, Min(queryLen, fullLen));
454 
455  if (r == 0)
456  {
457  if (queryLen > fullLen)
458  r = -1;
459  else if (queryLen < fullLen)
460  r = 1;
461  }
462 
463  switch (strategy)
464  {
466  res = (r < 0);
467  break;
469  res = (r <= 0);
470  break;
472  res = (r == 0);
473  break;
475  res = (r >= 0);
476  break;
478  res = (r > 0);
479  break;
480  default:
481  elog(ERROR, "unrecognized strategy number: %d",
482  in->scankeys[j].sk_strategy);
483  res = false;
484  break;
485  }
486 
487  if (!res)
488  break; /* no need to consider remaining conditions */
489  }
490 
492 }
493 
495 Datum
497 {
498  Name inName = PG_GETARG_NAME(0);
499  char *inStr = NameStr(*inName);
500 
501  PG_RETURN_DATUM(formTextDatum(inStr, strlen(inStr)));
502 }
#define NameStr(name)
Definition: c.h:700
#define Min(x, y)
Definition: c.h:958
#define VARHDRSZ
Definition: c.h:646
#define Assert(condition)
Definition: c.h:812
int16_t int16
Definition: c.h:480
Datum datumCopy(Datum value, bool typByVal, int typLen)
Definition: datum.c:132
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:225
#define PG_RETURN_VOID()
Definition: fmgr.h:349
#define DatumGetTextPP(X)
Definition: fmgr.h:292
#define PG_GETARG_POINTER(n)
Definition: fmgr.h:276
#define PG_GETARG_NAME(n)
Definition: fmgr.h:278
#define PG_RETURN_DATUM(x)
Definition: fmgr.h:353
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
#define PG_RETURN_BOOL(x)
Definition: fmgr.h:359
int b
Definition: isn.c:69
int a
Definition: isn.c:68
int j
Definition: isn.c:73
int i
Definition: isn.c:72
void * palloc0(Size size)
Definition: mcxt.c:1347
void * palloc(Size size)
Definition: mcxt.c:1317
#define NAMEDATALEN
const void * data
static Datum PointerGetDatum(const void *X)
Definition: postgres.h:322
static Name DatumGetName(Datum X)
Definition: postgres.h:360
uintptr_t Datum
Definition: postgres.h:64
static Datum Int16GetDatum(int16 X)
Definition: postgres.h:172
static Pointer DatumGetPointer(Datum X)
Definition: postgres.h:312
static int16 DatumGetInt16(Datum X)
Definition: postgres.h:162
char * c
@ spgMatchNode
Definition: spgist.h:69
@ spgAddNode
Definition: spgist.h:70
@ spgSplitTuple
Definition: spgist.h:71
PG_FUNCTION_INFO_V1(spgist_name_config)
Datum spgist_name_compress(PG_FUNCTION_ARGS)
Datum spgist_name_config(PG_FUNCTION_ARGS)
static int commonPrefix(const char *a, const char *b, int lena, int lenb)
PG_MODULE_MAGIC
static Datum formTextDatum(const char *data, int datalen)
Datum spgist_name_leaf_consistent(PG_FUNCTION_ARGS)
Datum spgist_name_inner_consistent(PG_FUNCTION_ARGS)
Datum spgist_name_choose(PG_FUNCTION_ARGS)
static bool searchChar(Datum *nodeLabels, int nNodes, int16 c, int *i)
uint16 StrategyNumber
Definition: stratnum.h:22
#define BTGreaterStrategyNumber
Definition: stratnum.h:33
#define BTLessStrategyNumber
Definition: stratnum.h:29
#define BTEqualStrategyNumber
Definition: stratnum.h:31
#define BTLessEqualStrategyNumber
Definition: stratnum.h:30
#define BTGreaterEqualStrategyNumber
Definition: stratnum.h:32
Datum sk_argument
Definition: skey.h:72
StrategyNumber sk_strategy
Definition: skey.h:68
Definition: c.h:695
Datum * nodeLabels
Definition: spgist.h:64
bool hasPrefix
Definition: spgist.h:61
Datum prefixDatum
Definition: spgist.h:62
int nNodes
Definition: spgist.h:63
Datum datum
Definition: spgist.h:55
int level
Definition: spgist.h:57
bool allTheSame
Definition: spgist.h:60
spgChooseResultType resultType
Definition: spgist.h:76
struct spgChooseOut::@51::@54 splitTuple
struct spgChooseOut::@51::@53 addNode
struct spgChooseOut::@51::@52 matchNode
union spgChooseOut::@51 result
Oid leafType
Definition: spgist.h:45
bool longValuesOK
Definition: spgist.h:47
bool canReturnData
Definition: spgist.h:46
Oid labelType
Definition: spgist.h:44
Oid prefixType
Definition: spgist.h:43
Datum reconstructedValue
Definition: spgist.h:140
ScanKey scankeys
Definition: spgist.h:134
Datum * nodeLabels
Definition: spgist.h:151
Datum * reconstructedValues
Definition: spgist.h:159
ScanKey scankeys
Definition: spgist.h:169
Datum reconstructedValue
Definition: spgist.h:175
Definition: c.h:641
#define VARHDRSZ_SHORT
Definition: varatt.h:255
#define SET_VARSIZE_SHORT(PTR, len)
Definition: varatt.h:306
#define VARDATA(PTR)
Definition: varatt.h:278
#define VARDATA_ANY(PTR)
Definition: varatt.h:324
#define SET_VARSIZE(PTR, len)
Definition: varatt.h:305
#define VARATT_SHORT_MAX
Definition: varatt.h:257
#define VARSIZE_ANY_EXHDR(PTR)
Definition: varatt.h:317