PostgreSQL Source Code  git master
gistutil.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * gistutil.c
4  * utilities routines for the postgres GiST index access method.
5  *
6  *
7  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
8  * Portions Copyright (c) 1994, Regents of the University of California
9  *
10  * IDENTIFICATION
11  * src/backend/access/gist/gistutil.c
12  *-------------------------------------------------------------------------
13  */
14 #include "postgres.h"
15 
16 #include <math.h>
17 
18 #include "access/gist_private.h"
19 #include "access/htup_details.h"
20 #include "access/reloptions.h"
21 #include "catalog/pg_opclass.h"
22 #include "common/pg_prng.h"
23 #include "storage/indexfsm.h"
24 #include "storage/lmgr.h"
25 #include "utils/float.h"
26 #include "utils/fmgrprotos.h"
27 #include "utils/lsyscache.h"
28 #include "utils/snapmgr.h"
29 #include "utils/syscache.h"
30 
31 /*
32  * Write itup vector to page, has no control of free space.
33  */
34 void
36 {
37  int i;
38 
39  if (off == InvalidOffsetNumber)
40  off = (PageIsEmpty(page)) ? FirstOffsetNumber :
42 
43  for (i = 0; i < len; i++)
44  {
45  Size sz = IndexTupleSize(itup[i]);
46  OffsetNumber l;
47 
48  l = PageAddItem(page, (Item) itup[i], sz, off, false, false);
49  if (l == InvalidOffsetNumber)
50  elog(ERROR, "failed to add item to GiST index page, item %d out of %d, size %d bytes",
51  i, len, (int) sz);
52  off++;
53  }
54 }
55 
56 /*
57  * Check space for itup vector on page
58  */
59 bool
60 gistnospace(Page page, IndexTuple *itvec, int len, OffsetNumber todelete, Size freespace)
61 {
62  unsigned int size = freespace,
63  deleted = 0;
64  int i;
65 
66  for (i = 0; i < len; i++)
67  size += IndexTupleSize(itvec[i]) + sizeof(ItemIdData);
68 
69  if (todelete != InvalidOffsetNumber)
70  {
71  IndexTuple itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, todelete));
72 
73  deleted = IndexTupleSize(itup) + sizeof(ItemIdData);
74  }
75 
76  return (PageGetFreeSpace(page) + deleted < size);
77 }
78 
79 bool
81 {
82  int i;
83  Size size = 0;
84 
85  for (i = 0; i < len; i++)
86  size += IndexTupleSize(itvec[i]) + sizeof(ItemIdData);
87 
88  /* TODO: Consider fillfactor */
89  return (size <= GiSTPageSize);
90 }
91 
92 /*
93  * Read buffer into itup vector
94  */
95 IndexTuple *
96 gistextractpage(Page page, int *len /* out */ )
97 {
99  maxoff;
100  IndexTuple *itvec;
101 
102  maxoff = PageGetMaxOffsetNumber(page);
103  *len = maxoff;
104  itvec = palloc(sizeof(IndexTuple) * maxoff);
105  for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
106  itvec[i - FirstOffsetNumber] = (IndexTuple) PageGetItem(page, PageGetItemId(page, i));
107 
108  return itvec;
109 }
110 
111 /*
112  * join two vectors into one
113  */
114 IndexTuple *
115 gistjoinvector(IndexTuple *itvec, int *len, IndexTuple *additvec, int addlen)
116 {
117  itvec = (IndexTuple *) repalloc(itvec, sizeof(IndexTuple) * ((*len) + addlen));
118  memmove(&itvec[*len], additvec, sizeof(IndexTuple) * addlen);
119  *len += addlen;
120  return itvec;
121 }
122 
123 /*
124  * make plain IndexTuple vector
125  */
126 
128 gistfillitupvec(IndexTuple *vec, int veclen, int *memlen)
129 {
130  char *ptr,
131  *ret;
132  int i;
133 
134  *memlen = 0;
135 
136  for (i = 0; i < veclen; i++)
137  *memlen += IndexTupleSize(vec[i]);
138 
139  ptr = ret = palloc(*memlen);
140 
141  for (i = 0; i < veclen; i++)
142  {
143  memcpy(ptr, vec[i], IndexTupleSize(vec[i]));
144  ptr += IndexTupleSize(vec[i]);
145  }
146 
147  return (IndexTupleData *) ret;
148 }
149 
150 /*
151  * Make unions of keys in IndexTuple vector (one union datum per index column).
152  * Union Datums are returned into the attr/isnull arrays.
153  * Resulting Datums aren't compressed.
154  */
155 void
156 gistMakeUnionItVec(GISTSTATE *giststate, IndexTuple *itvec, int len,
157  Datum *attr, bool *isnull)
158 {
159  int i;
160  GistEntryVector *evec;
161  int attrsize;
162 
163  evec = (GistEntryVector *) palloc((len + 2) * sizeof(GISTENTRY) + GEVHDRSZ);
164 
165  for (i = 0; i < giststate->nonLeafTupdesc->natts; i++)
166  {
167  int j;
168 
169  /* Collect non-null datums for this column */
170  evec->n = 0;
171  for (j = 0; j < len; j++)
172  {
173  Datum datum;
174  bool IsNull;
175 
176  datum = index_getattr(itvec[j], i + 1, giststate->leafTupdesc,
177  &IsNull);
178  if (IsNull)
179  continue;
180 
181  gistdentryinit(giststate, i,
182  evec->vector + evec->n,
183  datum,
184  NULL, NULL, (OffsetNumber) 0,
185  false, IsNull);
186  evec->n++;
187  }
188 
189  /* If this column was all NULLs, the union is NULL */
190  if (evec->n == 0)
191  {
192  attr[i] = (Datum) 0;
193  isnull[i] = true;
194  }
195  else
196  {
197  if (evec->n == 1)
198  {
199  /* unionFn may expect at least two inputs */
200  evec->n = 2;
201  evec->vector[1] = evec->vector[0];
202  }
203 
204  /* Make union and store in attr array */
205  attr[i] = FunctionCall2Coll(&giststate->unionFn[i],
206  giststate->supportCollation[i],
207  PointerGetDatum(evec),
208  PointerGetDatum(&attrsize));
209 
210  isnull[i] = false;
211  }
212  }
213 }
214 
215 /*
216  * Return an IndexTuple containing the result of applying the "union"
217  * method to the specified IndexTuple vector.
218  */
220 gistunion(Relation r, IndexTuple *itvec, int len, GISTSTATE *giststate)
221 {
222  Datum attr[INDEX_MAX_KEYS];
223  bool isnull[INDEX_MAX_KEYS];
224 
225  gistMakeUnionItVec(giststate, itvec, len, attr, isnull);
226 
227  return gistFormTuple(giststate, r, attr, isnull, false);
228 }
229 
230 /*
231  * makes union of two key
232  */
233 void
234 gistMakeUnionKey(GISTSTATE *giststate, int attno,
235  GISTENTRY *entry1, bool isnull1,
236  GISTENTRY *entry2, bool isnull2,
237  Datum *dst, bool *dstisnull)
238 {
239  /* we need a GistEntryVector with room for exactly 2 elements */
240  union
241  {
242  GistEntryVector gev;
243  char padding[2 * sizeof(GISTENTRY) + GEVHDRSZ];
244  } storage;
245  GistEntryVector *evec = &storage.gev;
246  int dstsize;
247 
248  evec->n = 2;
249 
250  if (isnull1 && isnull2)
251  {
252  *dstisnull = true;
253  *dst = (Datum) 0;
254  }
255  else
256  {
257  if (isnull1 == false && isnull2 == false)
258  {
259  evec->vector[0] = *entry1;
260  evec->vector[1] = *entry2;
261  }
262  else if (isnull1 == false)
263  {
264  evec->vector[0] = *entry1;
265  evec->vector[1] = *entry1;
266  }
267  else
268  {
269  evec->vector[0] = *entry2;
270  evec->vector[1] = *entry2;
271  }
272 
273  *dstisnull = false;
274  *dst = FunctionCall2Coll(&giststate->unionFn[attno],
275  giststate->supportCollation[attno],
276  PointerGetDatum(evec),
277  PointerGetDatum(&dstsize));
278  }
279 }
280 
281 bool
282 gistKeyIsEQ(GISTSTATE *giststate, int attno, Datum a, Datum b)
283 {
284  bool result = false; /* silence compiler warning */
285 
286  FunctionCall3Coll(&giststate->equalFn[attno],
287  giststate->supportCollation[attno],
288  a, b,
289  PointerGetDatum(&result));
290  return result;
291 }
292 
293 /*
294  * Decompress all keys in tuple
295  */
296 void
298  OffsetNumber o, GISTENTRY *attdata, bool *isnull)
299 {
300  int i;
301 
302  for (i = 0; i < IndexRelationGetNumberOfKeyAttributes(r); i++)
303  {
304  Datum datum;
305 
306  datum = index_getattr(tuple, i + 1, giststate->leafTupdesc, &isnull[i]);
307  gistdentryinit(giststate, i, &attdata[i],
308  datum, r, p, o,
309  false, isnull[i]);
310  }
311 }
312 
313 /*
314  * Forms union of oldtup and addtup, if union == oldtup then return NULL
315  */
318 {
319  bool neednew = false;
320  GISTENTRY oldentries[INDEX_MAX_KEYS],
321  addentries[INDEX_MAX_KEYS];
322  bool oldisnull[INDEX_MAX_KEYS],
323  addisnull[INDEX_MAX_KEYS];
324  Datum attr[INDEX_MAX_KEYS];
325  bool isnull[INDEX_MAX_KEYS];
326  IndexTuple newtup = NULL;
327  int i;
328 
329  gistDeCompressAtt(giststate, r, oldtup, NULL,
330  (OffsetNumber) 0, oldentries, oldisnull);
331 
332  gistDeCompressAtt(giststate, r, addtup, NULL,
333  (OffsetNumber) 0, addentries, addisnull);
334 
335  for (i = 0; i < IndexRelationGetNumberOfKeyAttributes(r); i++)
336  {
337  gistMakeUnionKey(giststate, i,
338  oldentries + i, oldisnull[i],
339  addentries + i, addisnull[i],
340  attr + i, isnull + i);
341 
342  if (neednew)
343  /* we already need new key, so we can skip check */
344  continue;
345 
346  if (isnull[i])
347  /* union of key may be NULL if and only if both keys are NULL */
348  continue;
349 
350  if (!addisnull[i])
351  {
352  if (oldisnull[i] ||
353  !gistKeyIsEQ(giststate, i, oldentries[i].key, attr[i]))
354  neednew = true;
355  }
356  }
357 
358  if (neednew)
359  {
360  /* need to update key */
361  newtup = gistFormTuple(giststate, r, attr, isnull, false);
362  newtup->t_tid = oldtup->t_tid;
363  }
364 
365  return newtup;
366 }
367 
368 /*
369  * Search an upper index page for the entry with lowest penalty for insertion
370  * of the new index key contained in "it".
371  *
372  * Returns the index of the page entry to insert into.
373  */
375 gistchoose(Relation r, Page p, IndexTuple it, /* it has compressed entry */
376  GISTSTATE *giststate)
377 {
378  OffsetNumber result;
379  OffsetNumber maxoff;
380  OffsetNumber i;
381  float best_penalty[INDEX_MAX_KEYS];
382  GISTENTRY entry,
383  identry[INDEX_MAX_KEYS];
384  bool isnull[INDEX_MAX_KEYS];
385  int keep_current_best;
386 
387  Assert(!GistPageIsLeaf(p));
388 
389  gistDeCompressAtt(giststate, r,
390  it, NULL, (OffsetNumber) 0,
391  identry, isnull);
392 
393  /* we'll return FirstOffsetNumber if page is empty (shouldn't happen) */
394  result = FirstOffsetNumber;
395 
396  /*
397  * The index may have multiple columns, and there's a penalty value for
398  * each column. The penalty associated with a column that appears earlier
399  * in the index definition is strictly more important than the penalty of
400  * a column that appears later in the index definition.
401  *
402  * best_penalty[j] is the best penalty we have seen so far for column j,
403  * or -1 when we haven't yet examined column j. Array entries to the
404  * right of the first -1 are undefined.
405  */
406  best_penalty[0] = -1;
407 
408  /*
409  * If we find a tuple that's exactly as good as the currently best one, we
410  * could use either one. When inserting a lot of tuples with the same or
411  * similar keys, it's preferable to descend down the same path when
412  * possible, as that's more cache-friendly. On the other hand, if all
413  * inserts land on the same leaf page after a split, we're never going to
414  * insert anything to the other half of the split, and will end up using
415  * only 50% of the available space. Distributing the inserts evenly would
416  * lead to better space usage, but that hurts cache-locality during
417  * insertion. To get the best of both worlds, when we find a tuple that's
418  * exactly as good as the previous best, choose randomly whether to stick
419  * to the old best, or use the new one. Once we decide to stick to the
420  * old best, we keep sticking to it for any subsequent equally good tuples
421  * we might find. This favors tuples with low offsets, but still allows
422  * some inserts to go to other equally-good subtrees.
423  *
424  * keep_current_best is -1 if we haven't yet had to make a random choice
425  * whether to keep the current best tuple. If we have done so, and
426  * decided to keep it, keep_current_best is 1; if we've decided to
427  * replace, keep_current_best is 0. (This state will be reset to -1 as
428  * soon as we've made the replacement, but sometimes we make the choice in
429  * advance of actually finding a replacement best tuple.)
430  */
431  keep_current_best = -1;
432 
433  /*
434  * Loop over tuples on page.
435  */
436  maxoff = PageGetMaxOffsetNumber(p);
437  Assert(maxoff >= FirstOffsetNumber);
438 
439  for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
440  {
442  bool zero_penalty;
443  int j;
444 
445  zero_penalty = true;
446 
447  /* Loop over index attributes. */
448  for (j = 0; j < IndexRelationGetNumberOfKeyAttributes(r); j++)
449  {
450  Datum datum;
451  float usize;
452  bool IsNull;
453 
454  /* Compute penalty for this column. */
455  datum = index_getattr(itup, j + 1, giststate->leafTupdesc,
456  &IsNull);
457  gistdentryinit(giststate, j, &entry, datum, r, p, i,
458  false, IsNull);
459  usize = gistpenalty(giststate, j, &entry, IsNull,
460  &identry[j], isnull[j]);
461  if (usize > 0)
462  zero_penalty = false;
463 
464  if (best_penalty[j] < 0 || usize < best_penalty[j])
465  {
466  /*
467  * New best penalty for column. Tentatively select this tuple
468  * as the target, and record the best penalty. Then reset the
469  * next column's penalty to "unknown" (and indirectly, the
470  * same for all the ones to its right). This will force us to
471  * adopt this tuple's penalty values as the best for all the
472  * remaining columns during subsequent loop iterations.
473  */
474  result = i;
475  best_penalty[j] = usize;
476 
478  best_penalty[j + 1] = -1;
479 
480  /* we have new best, so reset keep-it decision */
481  keep_current_best = -1;
482  }
483  else if (best_penalty[j] == usize)
484  {
485  /*
486  * The current tuple is exactly as good for this column as the
487  * best tuple seen so far. The next iteration of this loop
488  * will compare the next column.
489  */
490  }
491  else
492  {
493  /*
494  * The current tuple is worse for this column than the best
495  * tuple seen so far. Skip the remaining columns and move on
496  * to the next tuple, if any.
497  */
498  zero_penalty = false; /* so outer loop won't exit */
499  break;
500  }
501  }
502 
503  /*
504  * If we looped past the last column, and did not update "result",
505  * then this tuple is exactly as good as the prior best tuple.
506  */
507  if (j == IndexRelationGetNumberOfKeyAttributes(r) && result != i)
508  {
509  if (keep_current_best == -1)
510  {
511  /* we didn't make the random choice yet for this old best */
512  keep_current_best = pg_prng_bool(&pg_global_prng_state) ? 1 : 0;
513  }
514  if (keep_current_best == 0)
515  {
516  /* we choose to use the new tuple */
517  result = i;
518  /* choose again if there are even more exactly-as-good ones */
519  keep_current_best = -1;
520  }
521  }
522 
523  /*
524  * If we find a tuple with zero penalty for all columns, and we've
525  * decided we don't want to search for another tuple with equal
526  * penalty, there's no need to examine remaining tuples; just break
527  * out of the loop and return it.
528  */
529  if (zero_penalty)
530  {
531  if (keep_current_best == -1)
532  {
533  /* we didn't make the random choice yet for this old best */
534  keep_current_best = pg_prng_bool(&pg_global_prng_state) ? 1 : 0;
535  }
536  if (keep_current_best == 1)
537  break;
538  }
539  }
540 
541  return result;
542 }
543 
544 /*
545  * initialize a GiST entry with a decompressed version of key
546  */
547 void
548 gistdentryinit(GISTSTATE *giststate, int nkey, GISTENTRY *e,
549  Datum k, Relation r, Page pg, OffsetNumber o,
550  bool l, bool isNull)
551 {
552  if (!isNull)
553  {
554  GISTENTRY *dep;
555 
556  gistentryinit(*e, k, r, pg, o, l);
557 
558  /* there may not be a decompress function in opclass */
559  if (!OidIsValid(giststate->decompressFn[nkey].fn_oid))
560  return;
561 
562  dep = (GISTENTRY *)
564  giststate->supportCollation[nkey],
565  PointerGetDatum(e)));
566  /* decompressFn may just return the given pointer */
567  if (dep != e)
568  gistentryinit(*e, dep->key, dep->rel, dep->page, dep->offset,
569  dep->leafkey);
570  }
571  else
572  gistentryinit(*e, (Datum) 0, r, pg, o, l);
573 }
574 
577  const Datum *attdata, const bool *isnull, bool isleaf)
578 {
579  Datum compatt[INDEX_MAX_KEYS];
580  IndexTuple res;
581 
582  gistCompressValues(giststate, r, attdata, isnull, isleaf, compatt);
583 
584  res = index_form_tuple(isleaf ? giststate->leafTupdesc :
585  giststate->nonLeafTupdesc,
586  compatt, isnull);
587 
588  /*
589  * The offset number on tuples on internal pages is unused. For historical
590  * reasons, it is set to 0xffff.
591  */
592  ItemPointerSetOffsetNumber(&(res->t_tid), 0xffff);
593  return res;
594 }
595 
596 void
598  const Datum *attdata, const bool *isnull, bool isleaf, Datum *compatt)
599 {
600  int i;
601 
602  /*
603  * Call the compress method on each attribute.
604  */
605  for (i = 0; i < IndexRelationGetNumberOfKeyAttributes(r); i++)
606  {
607  if (isnull[i])
608  compatt[i] = (Datum) 0;
609  else
610  {
611  GISTENTRY centry;
612  GISTENTRY *cep;
613 
614  gistentryinit(centry, attdata[i], r, NULL, (OffsetNumber) 0,
615  isleaf);
616  /* there may not be a compress function in opclass */
617  if (OidIsValid(giststate->compressFn[i].fn_oid))
618  cep = (GISTENTRY *)
620  giststate->supportCollation[i],
621  PointerGetDatum(&centry)));
622  else
623  cep = &centry;
624  compatt[i] = cep->key;
625  }
626  }
627 
628  if (isleaf)
629  {
630  /*
631  * Emplace each included attribute if any.
632  */
633  for (; i < r->rd_att->natts; i++)
634  {
635  if (isnull[i])
636  compatt[i] = (Datum) 0;
637  else
638  compatt[i] = attdata[i];
639  }
640  }
641 }
642 
643 /*
644  * initialize a GiST entry with fetched value in key field
645  */
646 static Datum
647 gistFetchAtt(GISTSTATE *giststate, int nkey, Datum k, Relation r)
648 {
649  GISTENTRY fentry;
650  GISTENTRY *fep;
651 
652  gistentryinit(fentry, k, r, NULL, (OffsetNumber) 0, false);
653 
654  fep = (GISTENTRY *)
655  DatumGetPointer(FunctionCall1Coll(&giststate->fetchFn[nkey],
656  giststate->supportCollation[nkey],
657  PointerGetDatum(&fentry)));
658 
659  /* fetchFn set 'key', return it to the caller */
660  return fep->key;
661 }
662 
663 /*
664  * Fetch all keys in tuple.
665  * Returns a new HeapTuple containing the originally-indexed data.
666  */
667 HeapTuple
669 {
670  MemoryContext oldcxt = MemoryContextSwitchTo(giststate->tempCxt);
672  bool isnull[INDEX_MAX_KEYS];
673  int i;
674 
675  for (i = 0; i < IndexRelationGetNumberOfKeyAttributes(r); i++)
676  {
677  Datum datum;
678 
679  datum = index_getattr(tuple, i + 1, giststate->leafTupdesc, &isnull[i]);
680 
681  if (giststate->fetchFn[i].fn_oid != InvalidOid)
682  {
683  if (!isnull[i])
684  fetchatt[i] = gistFetchAtt(giststate, i, datum, r);
685  else
686  fetchatt[i] = (Datum) 0;
687  }
688  else if (giststate->compressFn[i].fn_oid == InvalidOid)
689  {
690  /*
691  * If opclass does not provide compress method that could change
692  * original value, att is necessarily stored in original form.
693  */
694  if (!isnull[i])
695  fetchatt[i] = datum;
696  else
697  fetchatt[i] = (Datum) 0;
698  }
699  else
700  {
701  /*
702  * Index-only scans not supported for this column. Since the
703  * planner chose an index-only scan anyway, it is not interested
704  * in this column, and we can replace it with a NULL.
705  */
706  isnull[i] = true;
707  fetchatt[i] = (Datum) 0;
708  }
709  }
710 
711  /*
712  * Get each included attribute.
713  */
714  for (; i < r->rd_att->natts; i++)
715  {
716  fetchatt[i] = index_getattr(tuple, i + 1, giststate->leafTupdesc,
717  &isnull[i]);
718  }
719  MemoryContextSwitchTo(oldcxt);
720 
721  return heap_form_tuple(giststate->fetchTupdesc, fetchatt, isnull);
722 }
723 
724 float
725 gistpenalty(GISTSTATE *giststate, int attno,
726  GISTENTRY *orig, bool isNullOrig,
727  GISTENTRY *add, bool isNullAdd)
728 {
729  float penalty = 0.0;
730 
731  if (giststate->penaltyFn[attno].fn_strict == false ||
732  (isNullOrig == false && isNullAdd == false))
733  {
734  FunctionCall3Coll(&giststate->penaltyFn[attno],
735  giststate->supportCollation[attno],
736  PointerGetDatum(orig),
737  PointerGetDatum(add),
738  PointerGetDatum(&penalty));
739  /* disallow negative or NaN penalty */
740  if (isnan(penalty) || penalty < 0.0)
741  penalty = 0.0;
742  }
743  else if (isNullOrig && isNullAdd)
744  penalty = 0.0;
745  else
746  {
747  /* try to prevent mixing null and non-null values */
748  penalty = get_float4_infinity();
749  }
750 
751  return penalty;
752 }
753 
754 /*
755  * Initialize a new index page
756  */
757 void
759 {
760  GISTPageOpaque opaque;
761 
762  PageInit(page, BLCKSZ, sizeof(GISTPageOpaqueData));
763 
764  opaque = GistPageGetOpaque(page);
765  opaque->rightlink = InvalidBlockNumber;
766  opaque->flags = f;
767  opaque->gist_page_id = GIST_PAGE_ID;
768 }
769 
770 /*
771  * Initialize a new index buffer
772  */
773 void
775 {
776  Page page;
777 
778  page = BufferGetPage(b);
779  gistinitpage(page, f);
780 }
781 
782 /*
783  * Verify that a freshly-read page looks sane.
784  */
785 void
787 {
788  Page page = BufferGetPage(buf);
789 
790  /*
791  * ReadBuffer verifies that every newly-read page passes
792  * PageHeaderIsValid, which means it either contains a reasonably sane
793  * page header or is all-zero. We have to defend against the all-zero
794  * case, however.
795  */
796  if (PageIsNew(page))
797  ereport(ERROR,
798  (errcode(ERRCODE_INDEX_CORRUPTED),
799  errmsg("index \"%s\" contains unexpected zero page at block %u",
802  errhint("Please REINDEX it.")));
803 
804  /*
805  * Additionally check that the special area looks sane.
806  */
807  if (PageGetSpecialSize(page) != MAXALIGN(sizeof(GISTPageOpaqueData)))
808  ereport(ERROR,
809  (errcode(ERRCODE_INDEX_CORRUPTED),
810  errmsg("index \"%s\" contains corrupted page at block %u",
813  errhint("Please REINDEX it.")));
814 }
815 
816 
817 /*
818  * Allocate a new page (either by recycling, or by extending the index file)
819  *
820  * The returned buffer is already pinned and exclusive-locked
821  *
822  * Caller is responsible for initializing the page by calling GISTInitBuffer
823  */
824 Buffer
826 {
827  Buffer buffer;
828 
829  /* First, try to get a page from FSM */
830  for (;;)
831  {
832  BlockNumber blkno = GetFreeIndexPage(r);
833 
834  if (blkno == InvalidBlockNumber)
835  break; /* nothing left in FSM */
836 
837  buffer = ReadBuffer(r, blkno);
838 
839  /*
840  * We have to guard against the possibility that someone else already
841  * recycled this page; the buffer may be locked if so.
842  */
843  if (ConditionalLockBuffer(buffer))
844  {
845  Page page = BufferGetPage(buffer);
846 
847  /*
848  * If the page was never initialized, it's OK to use.
849  */
850  if (PageIsNew(page))
851  return buffer;
852 
853  gistcheckpage(r, buffer);
854 
855  /*
856  * Otherwise, recycle it if deleted, and too old to have any
857  * processes interested in it.
858  */
859  if (gistPageRecyclable(page))
860  {
861  /*
862  * If we are generating WAL for Hot Standby then create a WAL
863  * record that will allow us to conflict with queries running
864  * on standby, in case they have snapshots older than the
865  * page's deleteXid.
866  */
868  gistXLogPageReuse(r, heaprel, blkno, GistPageGetDeleteXid(page));
869 
870  return buffer;
871  }
872 
873  LockBuffer(buffer, GIST_UNLOCK);
874  }
875 
876  /* Can't use it, so release buffer and try again */
877  ReleaseBuffer(buffer);
878  }
879 
880  /* Must extend the file */
881  buffer = ExtendBufferedRel(BMR_REL(r), MAIN_FORKNUM, NULL,
882  EB_LOCK_FIRST);
883 
884  return buffer;
885 }
886 
887 /* Can this page be recycled yet? */
888 bool
890 {
891  if (PageIsNew(page))
892  return true;
893  if (GistPageIsDeleted(page))
894  {
895  /*
896  * The page was deleted, but when? If it was just deleted, a scan
897  * might have seen the downlink to it, and will read the page later.
898  * As long as that can happen, we must keep the deleted page around as
899  * a tombstone.
900  *
901  * For that check if the deletion XID could still be visible to
902  * anyone. If not, then no scan that's still in progress could have
903  * seen its downlink, and we can recycle it.
904  */
905  FullTransactionId deletexid_full = GistPageGetDeleteXid(page);
906 
907  return GlobalVisCheckRemovableFullXid(NULL, deletexid_full);
908  }
909  return false;
910 }
911 
912 bytea *
913 gistoptions(Datum reloptions, bool validate)
914 {
915  static const relopt_parse_elt tab[] = {
916  {"fillfactor", RELOPT_TYPE_INT, offsetof(GiSTOptions, fillfactor)},
917  {"buffering", RELOPT_TYPE_ENUM, offsetof(GiSTOptions, buffering_mode)}
918  };
919 
920  return (bytea *) build_reloptions(reloptions, validate,
922  sizeof(GiSTOptions),
923  tab, lengthof(tab));
924 }
925 
926 /*
927  * gistproperty() -- Check boolean properties of indexes.
928  *
929  * This is optional for most AMs, but is required for GiST because the core
930  * property code doesn't support AMPROP_DISTANCE_ORDERABLE. We also handle
931  * AMPROP_RETURNABLE here to save opening the rel to call gistcanreturn.
932  */
933 bool
934 gistproperty(Oid index_oid, int attno,
935  IndexAMProperty prop, const char *propname,
936  bool *res, bool *isnull)
937 {
938  Oid opclass,
939  opfamily,
940  opcintype;
941  int16 procno;
942 
943  /* Only answer column-level inquiries */
944  if (attno == 0)
945  return false;
946 
947  /*
948  * Currently, GiST distance-ordered scans require that there be a distance
949  * function in the opclass with the default types (i.e. the one loaded
950  * into the relcache entry, see initGISTstate). So we assume that if such
951  * a function exists, then there's a reason for it (rather than grubbing
952  * through all the opfamily's operators to find an ordered one).
953  *
954  * Essentially the same code can test whether we support returning the
955  * column data, since that's true if the opclass provides a fetch proc.
956  */
957 
958  switch (prop)
959  {
961  procno = GIST_DISTANCE_PROC;
962  break;
963  case AMPROP_RETURNABLE:
964  procno = GIST_FETCH_PROC;
965  break;
966  default:
967  return false;
968  }
969 
970  /* First we need to know the column's opclass. */
971  opclass = get_index_column_opclass(index_oid, attno);
972  if (!OidIsValid(opclass))
973  {
974  *isnull = true;
975  return true;
976  }
977 
978  /* Now look up the opclass family and input datatype. */
979  if (!get_opclass_opfamily_and_input_type(opclass, &opfamily, &opcintype))
980  {
981  *isnull = true;
982  return true;
983  }
984 
985  /* And now we can check whether the function is provided. */
986 
987  *res = SearchSysCacheExists4(AMPROCNUM,
988  ObjectIdGetDatum(opfamily),
989  ObjectIdGetDatum(opcintype),
990  ObjectIdGetDatum(opcintype),
991  Int16GetDatum(procno));
992 
993  /*
994  * Special case: even without a fetch function, AMPROP_RETURNABLE is true
995  * if the opclass has no compress function.
996  */
997  if (prop == AMPROP_RETURNABLE && !*res)
998  {
999  *res = !SearchSysCacheExists4(AMPROCNUM,
1000  ObjectIdGetDatum(opfamily),
1001  ObjectIdGetDatum(opcintype),
1002  ObjectIdGetDatum(opcintype),
1004  }
1005 
1006  *isnull = false;
1007 
1008  return true;
1009 }
1010 
1011 /*
1012  * Some indexes are not WAL-logged, but we need LSNs to detect concurrent page
1013  * splits anyway. This function provides a fake sequence of LSNs for that
1014  * purpose.
1015  */
1016 XLogRecPtr
1018 {
1019  if (rel->rd_rel->relpersistence == RELPERSISTENCE_TEMP)
1020  {
1021  /*
1022  * Temporary relations are only accessible in our session, so a simple
1023  * backend-local counter will do.
1024  */
1025  static XLogRecPtr counter = FirstNormalUnloggedLSN;
1026 
1027  return counter++;
1028  }
1029  else if (RelationIsPermanent(rel))
1030  {
1031  /*
1032  * WAL-logging on this relation will start after commit, so its LSNs
1033  * must be distinct numbers smaller than the LSN at the next commit.
1034  * Emit a dummy WAL record if insert-LSN hasn't advanced after the
1035  * last call.
1036  */
1037  static XLogRecPtr lastlsn = InvalidXLogRecPtr;
1038  XLogRecPtr currlsn = GetXLogInsertRecPtr();
1039 
1040  /* Shouldn't be called for WAL-logging relations */
1041  Assert(!RelationNeedsWAL(rel));
1042 
1043  /* No need for an actual record if we already have a distinct LSN */
1044  if (!XLogRecPtrIsInvalid(lastlsn) && lastlsn == currlsn)
1045  currlsn = gistXLogAssignLSN();
1046 
1047  lastlsn = currlsn;
1048  return currlsn;
1049  }
1050  else
1051  {
1052  /*
1053  * Unlogged relations are accessible from other backends, and survive
1054  * (clean) restarts. GetFakeLSNForUnloggedRel() handles that for us.
1055  */
1056  Assert(rel->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED);
1057  return GetFakeLSNForUnloggedRel();
1058  }
1059 }
1060 
1061 /*
1062  * Returns the same number that was received.
1063  *
1064  * This is for GiST opclasses that use the RT*StrategyNumber constants.
1065  */
1066 Datum
1068 {
1069  StrategyNumber strat = PG_GETARG_UINT16(0);
1070 
1071  PG_RETURN_UINT16(strat);
1072 }
1073 
1074 /*
1075  * Returns the opclass's private stratnum used for the given strategy.
1076  *
1077  * Calls the opclass's GIST_STRATNUM_PROC support function, if any,
1078  * and returns the result.
1079  * Returns InvalidStrategy if the function is not defined.
1080  */
1083 {
1084  Oid opfamily;
1085  Oid opcintype;
1086  Oid funcid;
1087  Datum result;
1088 
1089  /* Look up the opclass family and input datatype. */
1090  if (!get_opclass_opfamily_and_input_type(opclass, &opfamily, &opcintype))
1091  return InvalidStrategy;
1092 
1093  /* Check whether the function is provided. */
1094  funcid = get_opfamily_proc(opfamily, opcintype, opcintype, GIST_STRATNUM_PROC);
1095  if (!OidIsValid(funcid))
1096  return InvalidStrategy;
1097 
1098  /* Ask the translation function */
1099  result = OidFunctionCall1Coll(funcid, InvalidOid, UInt16GetDatum(strat));
1100  return DatumGetUInt16(result);
1101 }
IndexAMProperty
Definition: amapi.h:35
@ AMPROP_DISTANCE_ORDERABLE
Definition: amapi.h:42
@ AMPROP_RETURNABLE
Definition: amapi.h:43
uint32 BlockNumber
Definition: block.h:31
#define InvalidBlockNumber
Definition: block.h:33
int Buffer
Definition: buf.h:23
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:3378
Buffer ExtendBufferedRel(BufferManagerRelation bmr, ForkNumber forkNum, BufferAccessStrategy strategy, uint32 flags)
Definition: bufmgr.c:839
bool ConditionalLockBuffer(Buffer buffer)
Definition: bufmgr.c:4822
void ReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:4561
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:4796
Buffer ReadBuffer(Relation reln, BlockNumber blockNum)
Definition: bufmgr.c:735
static Page BufferGetPage(Buffer buffer)
Definition: bufmgr.h:350
@ EB_LOCK_FIRST
Definition: bufmgr.h:85
#define BMR_REL(p_rel)
Definition: bufmgr.h:106
void PageInit(Page page, Size pageSize, Size specialSize)
Definition: bufpage.c:42
Size PageGetFreeSpace(Page page)
Definition: bufpage.c:907
static bool PageIsEmpty(Page page)
Definition: bufpage.h:220
Pointer Page
Definition: bufpage.h:78
static Item PageGetItem(Page page, ItemId itemId)
Definition: bufpage.h:351
static ItemId PageGetItemId(Page page, OffsetNumber offsetNumber)
Definition: bufpage.h:240
static bool PageIsNew(Page page)
Definition: bufpage.h:230
static OffsetNumber PageGetMaxOffsetNumber(Page page)
Definition: bufpage.h:369
#define PageAddItem(page, item, size, offsetNumber, overwrite, is_heap)
Definition: bufpage.h:468
static uint16 PageGetSpecialSize(Page page)
Definition: bufpage.h:313
unsigned int uint32
Definition: c.h:495
signed short int16
Definition: c.h:482
#define MAXALIGN(LEN)
Definition: c.h:800
#define lengthof(array)
Definition: c.h:777
#define OidIsValid(objectId)
Definition: c.h:764
size_t Size
Definition: c.h:594
int errhint(const char *fmt,...)
Definition: elog.c:1322
int errcode(int sqlerrcode)
Definition: elog.c:860
int errmsg(const char *fmt,...)
Definition: elog.c:1075
#define ERROR
Definition: elog.h:39
#define ereport(elevel,...)
Definition: elog.h:149
static float4 get_float4_infinity(void)
Definition: float.h:74
Datum FunctionCall2Coll(FmgrInfo *flinfo, Oid collation, Datum arg1, Datum arg2)
Definition: fmgr.c:1149
Datum OidFunctionCall1Coll(Oid functionId, Oid collation, Datum arg1)
Definition: fmgr.c:1411
Datum FunctionCall3Coll(FmgrInfo *flinfo, Oid collation, Datum arg1, Datum arg2, Datum arg3)
Definition: fmgr.c:1171
Datum FunctionCall1Coll(FmgrInfo *flinfo, Oid collation, Datum arg1)
Definition: fmgr.c:1129
#define PG_GETARG_UINT16(n)
Definition: fmgr.h:272
#define PG_RETURN_UINT16(x)
Definition: fmgr.h:357
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
#define GIST_STRATNUM_PROC
Definition: gist.h:42
#define GIST_PAGE_ID
Definition: gist.h:111
struct GISTENTRY GISTENTRY
#define GIST_FETCH_PROC
Definition: gist.h:39
#define GIST_COMPRESS_PROC
Definition: gist.h:33
#define GEVHDRSZ
Definition: gist.h:239
#define GistPageIsLeaf(page)
Definition: gist.h:169
#define GIST_DISTANCE_PROC
Definition: gist.h:38
static FullTransactionId GistPageGetDeleteXid(Page page)
Definition: gist.h:215
#define GistPageIsDeleted(page)
Definition: gist.h:172
#define GistPageGetOpaque(page)
Definition: gist.h:167
#define gistentryinit(e, k, r, pg, o, l)
Definition: gist.h:244
#define GIST_UNLOCK
Definition: gist_private.h:44
#define GiSTPageSize
Definition: gist_private.h:476
void gistDeCompressAtt(GISTSTATE *giststate, Relation r, IndexTuple tuple, Page p, OffsetNumber o, GISTENTRY *attdata, bool *isnull)
Definition: gistutil.c:297
Buffer gistNewBuffer(Relation r, Relation heaprel)
Definition: gistutil.c:825
bool gistproperty(Oid index_oid, int attno, IndexAMProperty prop, const char *propname, bool *res, bool *isnull)
Definition: gistutil.c:934
bool gistPageRecyclable(Page page)
Definition: gistutil.c:889
void gistMakeUnionKey(GISTSTATE *giststate, int attno, GISTENTRY *entry1, bool isnull1, GISTENTRY *entry2, bool isnull2, Datum *dst, bool *dstisnull)
Definition: gistutil.c:234
void gistMakeUnionItVec(GISTSTATE *giststate, IndexTuple *itvec, int len, Datum *attr, bool *isnull)
Definition: gistutil.c:156
bool gistnospace(Page page, IndexTuple *itvec, int len, OffsetNumber todelete, Size freespace)
Definition: gistutil.c:60
HeapTuple gistFetchTuple(GISTSTATE *giststate, Relation r, IndexTuple tuple)
Definition: gistutil.c:668
bool gistKeyIsEQ(GISTSTATE *giststate, int attno, Datum a, Datum b)
Definition: gistutil.c:282
static Datum gistFetchAtt(GISTSTATE *giststate, int nkey, Datum k, Relation r)
Definition: gistutil.c:647
void gistfillbuffer(Page page, IndexTuple *itup, int len, OffsetNumber off)
Definition: gistutil.c:35
IndexTuple gistFormTuple(GISTSTATE *giststate, Relation r, const Datum *attdata, const bool *isnull, bool isleaf)
Definition: gistutil.c:576
IndexTuple * gistextractpage(Page page, int *len)
Definition: gistutil.c:96
void gistdentryinit(GISTSTATE *giststate, int nkey, GISTENTRY *e, Datum k, Relation r, Page pg, OffsetNumber o, bool l, bool isNull)
Definition: gistutil.c:548
bool gistfitpage(IndexTuple *itvec, int len)
Definition: gistutil.c:80
IndexTuple gistgetadjusted(Relation r, IndexTuple oldtup, IndexTuple addtup, GISTSTATE *giststate)
Definition: gistutil.c:317
OffsetNumber gistchoose(Relation r, Page p, IndexTuple it, GISTSTATE *giststate)
Definition: gistutil.c:375
XLogRecPtr gistGetFakeLSN(Relation rel)
Definition: gistutil.c:1017
void gistCompressValues(GISTSTATE *giststate, Relation r, const Datum *attdata, const bool *isnull, bool isleaf, Datum *compatt)
Definition: gistutil.c:597
Datum gist_stratnum_identity(PG_FUNCTION_ARGS)
Definition: gistutil.c:1067
StrategyNumber GistTranslateStratnum(Oid opclass, StrategyNumber strat)
Definition: gistutil.c:1082
IndexTuple * gistjoinvector(IndexTuple *itvec, int *len, IndexTuple *additvec, int addlen)
Definition: gistutil.c:115
void gistinitpage(Page page, uint32 f)
Definition: gistutil.c:758
bytea * gistoptions(Datum reloptions, bool validate)
Definition: gistutil.c:913
IndexTuple gistunion(Relation r, IndexTuple *itvec, int len, GISTSTATE *giststate)
Definition: gistutil.c:220
void GISTInitBuffer(Buffer b, uint32 f)
Definition: gistutil.c:774
IndexTupleData * gistfillitupvec(IndexTuple *vec, int veclen, int *memlen)
Definition: gistutil.c:128
void gistcheckpage(Relation rel, Buffer buf)
Definition: gistutil.c:786
float gistpenalty(GISTSTATE *giststate, int attno, GISTENTRY *orig, bool isNullOrig, GISTENTRY *add, bool isNullAdd)
Definition: gistutil.c:725
XLogRecPtr gistXLogAssignLSN(void)
Definition: gistxlog.c:578
void gistXLogPageReuse(Relation rel, Relation heaprel, BlockNumber blkno, FullTransactionId deleteXid)
Definition: gistxlog.c:596
HeapTuple heap_form_tuple(TupleDesc tupleDescriptor, const Datum *values, const bool *isnull)
Definition: heaptuple.c:1117
#define storage
Definition: indent_codes.h:68
BlockNumber GetFreeIndexPage(Relation rel)
Definition: indexfsm.c:38
IndexTuple index_form_tuple(TupleDesc tupleDescriptor, const Datum *values, const bool *isnull)
Definition: indextuple.c:44
int b
Definition: isn.c:70
int a
Definition: isn.c:69
int j
Definition: isn.c:74
int i
Definition: isn.c:73
Pointer Item
Definition: item.h:17
struct ItemIdData ItemIdData
static void ItemPointerSetOffsetNumber(ItemPointerData *pointer, OffsetNumber offsetNumber)
Definition: itemptr.h:158
IndexTupleData * IndexTuple
Definition: itup.h:53
#define IndexTupleSize(itup)
Definition: itup.h:70
static Datum index_getattr(IndexTuple tup, int attnum, TupleDesc tupleDesc, bool *isnull)
Definition: itup.h:117
Assert(fmt[strlen(fmt) - 1] !='\n')
bool get_opclass_opfamily_and_input_type(Oid opclass, Oid *opfamily, Oid *opcintype)
Definition: lsyscache.c:1212
Oid get_opfamily_proc(Oid opfamily, Oid lefttype, Oid righttype, int16 procnum)
Definition: lsyscache.c:795
Oid get_index_column_opclass(Oid index_oid, int attno)
Definition: lsyscache.c:3467
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1451
void * palloc(Size size)
Definition: mcxt.c:1201
#define InvalidOffsetNumber
Definition: off.h:26
#define OffsetNumberNext(offsetNumber)
Definition: off.h:52
uint16 OffsetNumber
Definition: off.h:24
#define FirstOffsetNumber
Definition: off.h:27
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:124
#define INDEX_MAX_KEYS
const void size_t len
pg_prng_state pg_global_prng_state
Definition: pg_prng.c:34
bool pg_prng_bool(pg_prng_state *state)
Definition: pg_prng.c:277
static char * buf
Definition: pg_test_fsync.c:73
int fillfactor
Definition: pgbench.c:187
static Datum PointerGetDatum(const void *X)
Definition: postgres.h:322
uintptr_t Datum
Definition: postgres.h:64
static Datum Int16GetDatum(int16 X)
Definition: postgres.h:172
static Datum UInt16GetDatum(uint16 X)
Definition: postgres.h:192
static Datum ObjectIdGetDatum(Oid X)
Definition: postgres.h:252
static uint16 DatumGetUInt16(Datum X)
Definition: postgres.h:182
static Pointer DatumGetPointer(Datum X)
Definition: postgres.h:312
#define InvalidOid
Definition: postgres_ext.h:36
unsigned int Oid
Definition: postgres_ext.h:31
e
Definition: preproc-init.c:82
bool GlobalVisCheckRemovableFullXid(Relation rel, FullTransactionId fxid)
Definition: procarray.c:4227
#define RelationGetRelationName(relation)
Definition: rel.h:538
#define RelationNeedsWAL(relation)
Definition: rel.h:627
#define IndexRelationGetNumberOfKeyAttributes(relation)
Definition: rel.h:523
#define RelationIsPermanent(relation)
Definition: rel.h:616
void * build_reloptions(Datum reloptions, bool validate, relopt_kind kind, Size relopt_struct_size, const relopt_parse_elt *relopt_elems, int num_relopt_elems)
Definition: reloptions.c:1910
@ RELOPT_KIND_GIST
Definition: reloptions.h:47
@ RELOPT_TYPE_ENUM
Definition: reloptions.h:34
@ RELOPT_TYPE_INT
Definition: reloptions.h:32
@ MAIN_FORKNUM
Definition: relpath.h:50
uint16 StrategyNumber
Definition: stratnum.h:22
#define InvalidStrategy
Definition: stratnum.h:24
Oid fn_oid
Definition: fmgr.h:59
bool fn_strict
Definition: fmgr.h:61
OffsetNumber offset
Definition: gist.h:163
Datum key
Definition: gist.h:160
Page page
Definition: gist.h:162
Relation rel
Definition: gist.h:161
bool leafkey
Definition: gist.h:164
uint16 gist_page_id
Definition: gist.h:82
BlockNumber rightlink
Definition: gist.h:80
uint16 flags
Definition: gist.h:81
FmgrInfo fetchFn[INDEX_MAX_KEYS]
Definition: gist_private.h:94
TupleDesc leafTupdesc
Definition: gist_private.h:80
TupleDesc fetchTupdesc
Definition: gist_private.h:83
TupleDesc nonLeafTupdesc
Definition: gist_private.h:81
FmgrInfo penaltyFn[INDEX_MAX_KEYS]
Definition: gist_private.h:90
MemoryContext tempCxt
Definition: gist_private.h:78
Oid supportCollation[INDEX_MAX_KEYS]
Definition: gist_private.h:97
FmgrInfo decompressFn[INDEX_MAX_KEYS]
Definition: gist_private.h:89
FmgrInfo compressFn[INDEX_MAX_KEYS]
Definition: gist_private.h:88
FmgrInfo equalFn[INDEX_MAX_KEYS]
Definition: gist_private.h:92
FmgrInfo unionFn[INDEX_MAX_KEYS]
Definition: gist_private.h:87
GISTENTRY vector[FLEXIBLE_ARRAY_MEMBER]
Definition: gist.h:236
int32 n
Definition: gist.h:235
ItemPointerData t_tid
Definition: itup.h:37
TupleDesc rd_att
Definition: rel.h:112
Form_pg_class rd_rel
Definition: rel.h:111
Definition: c.h:676
#define SearchSysCacheExists4(cacheId, key1, key2, key3, key4)
Definition: syscache.h:101
#define fetchatt(A, T)
Definition: tupmacs.h:46
XLogRecPtr GetXLogInsertRecPtr(void)
Definition: xlog.c:9272
XLogRecPtr GetFakeLSNForUnloggedRel(void)
Definition: xlog.c:4499
#define XLogStandbyInfoActive()
Definition: xlog.h:121
#define FirstNormalUnloggedLSN
Definition: xlogdefs.h:36
#define XLogRecPtrIsInvalid(r)
Definition: xlogdefs.h:29
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define InvalidXLogRecPtr
Definition: xlogdefs.h:28