PostgreSQL Source Code  git master
gistutil.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * gistutil.c
4  * utilities routines for the postgres GiST index access method.
5  *
6  *
7  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
8  * Portions Copyright (c) 1994, Regents of the University of California
9  *
10  * IDENTIFICATION
11  * src/backend/access/gist/gistutil.c
12  *-------------------------------------------------------------------------
13  */
14 #include "postgres.h"
15 
16 #include <math.h>
17 
18 #include "access/gist_private.h"
19 #include "access/htup_details.h"
20 #include "access/reloptions.h"
21 #include "common/pg_prng.h"
22 #include "storage/indexfsm.h"
23 #include "utils/float.h"
24 #include "utils/fmgrprotos.h"
25 #include "utils/lsyscache.h"
26 #include "utils/rel.h"
27 #include "utils/snapmgr.h"
28 #include "utils/syscache.h"
29 
30 /*
31  * Write itup vector to page, has no control of free space.
32  */
33 void
35 {
36  int i;
37 
38  if (off == InvalidOffsetNumber)
39  off = (PageIsEmpty(page)) ? FirstOffsetNumber :
41 
42  for (i = 0; i < len; i++)
43  {
44  Size sz = IndexTupleSize(itup[i]);
45  OffsetNumber l;
46 
47  l = PageAddItem(page, (Item) itup[i], sz, off, false, false);
48  if (l == InvalidOffsetNumber)
49  elog(ERROR, "failed to add item to GiST index page, item %d out of %d, size %d bytes",
50  i, len, (int) sz);
51  off++;
52  }
53 }
54 
55 /*
56  * Check space for itup vector on page
57  */
58 bool
59 gistnospace(Page page, IndexTuple *itvec, int len, OffsetNumber todelete, Size freespace)
60 {
61  unsigned int size = freespace,
62  deleted = 0;
63  int i;
64 
65  for (i = 0; i < len; i++)
66  size += IndexTupleSize(itvec[i]) + sizeof(ItemIdData);
67 
68  if (todelete != InvalidOffsetNumber)
69  {
70  IndexTuple itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, todelete));
71 
72  deleted = IndexTupleSize(itup) + sizeof(ItemIdData);
73  }
74 
75  return (PageGetFreeSpace(page) + deleted < size);
76 }
77 
78 bool
80 {
81  int i;
82  Size size = 0;
83 
84  for (i = 0; i < len; i++)
85  size += IndexTupleSize(itvec[i]) + sizeof(ItemIdData);
86 
87  /* TODO: Consider fillfactor */
88  return (size <= GiSTPageSize);
89 }
90 
91 /*
92  * Read buffer into itup vector
93  */
94 IndexTuple *
95 gistextractpage(Page page, int *len /* out */ )
96 {
98  maxoff;
99  IndexTuple *itvec;
100 
101  maxoff = PageGetMaxOffsetNumber(page);
102  *len = maxoff;
103  itvec = palloc(sizeof(IndexTuple) * maxoff);
104  for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
105  itvec[i - FirstOffsetNumber] = (IndexTuple) PageGetItem(page, PageGetItemId(page, i));
106 
107  return itvec;
108 }
109 
110 /*
111  * join two vectors into one
112  */
113 IndexTuple *
114 gistjoinvector(IndexTuple *itvec, int *len, IndexTuple *additvec, int addlen)
115 {
116  itvec = (IndexTuple *) repalloc(itvec, sizeof(IndexTuple) * ((*len) + addlen));
117  memmove(&itvec[*len], additvec, sizeof(IndexTuple) * addlen);
118  *len += addlen;
119  return itvec;
120 }
121 
122 /*
123  * make plain IndexTuple vector
124  */
125 
127 gistfillitupvec(IndexTuple *vec, int veclen, int *memlen)
128 {
129  char *ptr,
130  *ret;
131  int i;
132 
133  *memlen = 0;
134 
135  for (i = 0; i < veclen; i++)
136  *memlen += IndexTupleSize(vec[i]);
137 
138  ptr = ret = palloc(*memlen);
139 
140  for (i = 0; i < veclen; i++)
141  {
142  memcpy(ptr, vec[i], IndexTupleSize(vec[i]));
143  ptr += IndexTupleSize(vec[i]);
144  }
145 
146  return (IndexTupleData *) ret;
147 }
148 
149 /*
150  * Make unions of keys in IndexTuple vector (one union datum per index column).
151  * Union Datums are returned into the attr/isnull arrays.
152  * Resulting Datums aren't compressed.
153  */
154 void
155 gistMakeUnionItVec(GISTSTATE *giststate, IndexTuple *itvec, int len,
156  Datum *attr, bool *isnull)
157 {
158  int i;
159  GistEntryVector *evec;
160  int attrsize;
161 
162  evec = (GistEntryVector *) palloc((len + 2) * sizeof(GISTENTRY) + GEVHDRSZ);
163 
164  for (i = 0; i < giststate->nonLeafTupdesc->natts; i++)
165  {
166  int j;
167 
168  /* Collect non-null datums for this column */
169  evec->n = 0;
170  for (j = 0; j < len; j++)
171  {
172  Datum datum;
173  bool IsNull;
174 
175  datum = index_getattr(itvec[j], i + 1, giststate->leafTupdesc,
176  &IsNull);
177  if (IsNull)
178  continue;
179 
180  gistdentryinit(giststate, i,
181  evec->vector + evec->n,
182  datum,
183  NULL, NULL, (OffsetNumber) 0,
184  false, IsNull);
185  evec->n++;
186  }
187 
188  /* If this column was all NULLs, the union is NULL */
189  if (evec->n == 0)
190  {
191  attr[i] = (Datum) 0;
192  isnull[i] = true;
193  }
194  else
195  {
196  if (evec->n == 1)
197  {
198  /* unionFn may expect at least two inputs */
199  evec->n = 2;
200  evec->vector[1] = evec->vector[0];
201  }
202 
203  /* Make union and store in attr array */
204  attr[i] = FunctionCall2Coll(&giststate->unionFn[i],
205  giststate->supportCollation[i],
206  PointerGetDatum(evec),
207  PointerGetDatum(&attrsize));
208 
209  isnull[i] = false;
210  }
211  }
212 }
213 
214 /*
215  * Return an IndexTuple containing the result of applying the "union"
216  * method to the specified IndexTuple vector.
217  */
219 gistunion(Relation r, IndexTuple *itvec, int len, GISTSTATE *giststate)
220 {
221  Datum attr[INDEX_MAX_KEYS];
222  bool isnull[INDEX_MAX_KEYS];
223 
224  gistMakeUnionItVec(giststate, itvec, len, attr, isnull);
225 
226  return gistFormTuple(giststate, r, attr, isnull, false);
227 }
228 
229 /*
230  * makes union of two key
231  */
232 void
233 gistMakeUnionKey(GISTSTATE *giststate, int attno,
234  GISTENTRY *entry1, bool isnull1,
235  GISTENTRY *entry2, bool isnull2,
236  Datum *dst, bool *dstisnull)
237 {
238  /* we need a GistEntryVector with room for exactly 2 elements */
239  union
240  {
241  GistEntryVector gev;
242  char padding[2 * sizeof(GISTENTRY) + GEVHDRSZ];
243  } storage;
244  GistEntryVector *evec = &storage.gev;
245  int dstsize;
246 
247  evec->n = 2;
248 
249  if (isnull1 && isnull2)
250  {
251  *dstisnull = true;
252  *dst = (Datum) 0;
253  }
254  else
255  {
256  if (isnull1 == false && isnull2 == false)
257  {
258  evec->vector[0] = *entry1;
259  evec->vector[1] = *entry2;
260  }
261  else if (isnull1 == false)
262  {
263  evec->vector[0] = *entry1;
264  evec->vector[1] = *entry1;
265  }
266  else
267  {
268  evec->vector[0] = *entry2;
269  evec->vector[1] = *entry2;
270  }
271 
272  *dstisnull = false;
273  *dst = FunctionCall2Coll(&giststate->unionFn[attno],
274  giststate->supportCollation[attno],
275  PointerGetDatum(evec),
276  PointerGetDatum(&dstsize));
277  }
278 }
279 
280 bool
281 gistKeyIsEQ(GISTSTATE *giststate, int attno, Datum a, Datum b)
282 {
283  bool result = false; /* silence compiler warning */
284 
285  FunctionCall3Coll(&giststate->equalFn[attno],
286  giststate->supportCollation[attno],
287  a, b,
288  PointerGetDatum(&result));
289  return result;
290 }
291 
292 /*
293  * Decompress all keys in tuple
294  */
295 void
297  OffsetNumber o, GISTENTRY *attdata, bool *isnull)
298 {
299  int i;
300 
301  for (i = 0; i < IndexRelationGetNumberOfKeyAttributes(r); i++)
302  {
303  Datum datum;
304 
305  datum = index_getattr(tuple, i + 1, giststate->leafTupdesc, &isnull[i]);
306  gistdentryinit(giststate, i, &attdata[i],
307  datum, r, p, o,
308  false, isnull[i]);
309  }
310 }
311 
312 /*
313  * Forms union of oldtup and addtup, if union == oldtup then return NULL
314  */
317 {
318  bool neednew = false;
319  GISTENTRY oldentries[INDEX_MAX_KEYS],
320  addentries[INDEX_MAX_KEYS];
321  bool oldisnull[INDEX_MAX_KEYS],
322  addisnull[INDEX_MAX_KEYS];
323  Datum attr[INDEX_MAX_KEYS];
324  bool isnull[INDEX_MAX_KEYS];
325  IndexTuple newtup = NULL;
326  int i;
327 
328  gistDeCompressAtt(giststate, r, oldtup, NULL,
329  (OffsetNumber) 0, oldentries, oldisnull);
330 
331  gistDeCompressAtt(giststate, r, addtup, NULL,
332  (OffsetNumber) 0, addentries, addisnull);
333 
334  for (i = 0; i < IndexRelationGetNumberOfKeyAttributes(r); i++)
335  {
336  gistMakeUnionKey(giststate, i,
337  oldentries + i, oldisnull[i],
338  addentries + i, addisnull[i],
339  attr + i, isnull + i);
340 
341  if (neednew)
342  /* we already need new key, so we can skip check */
343  continue;
344 
345  if (isnull[i])
346  /* union of key may be NULL if and only if both keys are NULL */
347  continue;
348 
349  if (!addisnull[i])
350  {
351  if (oldisnull[i] ||
352  !gistKeyIsEQ(giststate, i, oldentries[i].key, attr[i]))
353  neednew = true;
354  }
355  }
356 
357  if (neednew)
358  {
359  /* need to update key */
360  newtup = gistFormTuple(giststate, r, attr, isnull, false);
361  newtup->t_tid = oldtup->t_tid;
362  }
363 
364  return newtup;
365 }
366 
367 /*
368  * Search an upper index page for the entry with lowest penalty for insertion
369  * of the new index key contained in "it".
370  *
371  * Returns the index of the page entry to insert into.
372  */
374 gistchoose(Relation r, Page p, IndexTuple it, /* it has compressed entry */
375  GISTSTATE *giststate)
376 {
377  OffsetNumber result;
378  OffsetNumber maxoff;
379  OffsetNumber i;
380  float best_penalty[INDEX_MAX_KEYS];
381  GISTENTRY entry,
382  identry[INDEX_MAX_KEYS];
383  bool isnull[INDEX_MAX_KEYS];
384  int keep_current_best;
385 
386  Assert(!GistPageIsLeaf(p));
387 
388  gistDeCompressAtt(giststate, r,
389  it, NULL, (OffsetNumber) 0,
390  identry, isnull);
391 
392  /* we'll return FirstOffsetNumber if page is empty (shouldn't happen) */
393  result = FirstOffsetNumber;
394 
395  /*
396  * The index may have multiple columns, and there's a penalty value for
397  * each column. The penalty associated with a column that appears earlier
398  * in the index definition is strictly more important than the penalty of
399  * a column that appears later in the index definition.
400  *
401  * best_penalty[j] is the best penalty we have seen so far for column j,
402  * or -1 when we haven't yet examined column j. Array entries to the
403  * right of the first -1 are undefined.
404  */
405  best_penalty[0] = -1;
406 
407  /*
408  * If we find a tuple that's exactly as good as the currently best one, we
409  * could use either one. When inserting a lot of tuples with the same or
410  * similar keys, it's preferable to descend down the same path when
411  * possible, as that's more cache-friendly. On the other hand, if all
412  * inserts land on the same leaf page after a split, we're never going to
413  * insert anything to the other half of the split, and will end up using
414  * only 50% of the available space. Distributing the inserts evenly would
415  * lead to better space usage, but that hurts cache-locality during
416  * insertion. To get the best of both worlds, when we find a tuple that's
417  * exactly as good as the previous best, choose randomly whether to stick
418  * to the old best, or use the new one. Once we decide to stick to the
419  * old best, we keep sticking to it for any subsequent equally good tuples
420  * we might find. This favors tuples with low offsets, but still allows
421  * some inserts to go to other equally-good subtrees.
422  *
423  * keep_current_best is -1 if we haven't yet had to make a random choice
424  * whether to keep the current best tuple. If we have done so, and
425  * decided to keep it, keep_current_best is 1; if we've decided to
426  * replace, keep_current_best is 0. (This state will be reset to -1 as
427  * soon as we've made the replacement, but sometimes we make the choice in
428  * advance of actually finding a replacement best tuple.)
429  */
430  keep_current_best = -1;
431 
432  /*
433  * Loop over tuples on page.
434  */
435  maxoff = PageGetMaxOffsetNumber(p);
436  Assert(maxoff >= FirstOffsetNumber);
437 
438  for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
439  {
441  bool zero_penalty;
442  int j;
443 
444  zero_penalty = true;
445 
446  /* Loop over index attributes. */
447  for (j = 0; j < IndexRelationGetNumberOfKeyAttributes(r); j++)
448  {
449  Datum datum;
450  float usize;
451  bool IsNull;
452 
453  /* Compute penalty for this column. */
454  datum = index_getattr(itup, j + 1, giststate->leafTupdesc,
455  &IsNull);
456  gistdentryinit(giststate, j, &entry, datum, r, p, i,
457  false, IsNull);
458  usize = gistpenalty(giststate, j, &entry, IsNull,
459  &identry[j], isnull[j]);
460  if (usize > 0)
461  zero_penalty = false;
462 
463  if (best_penalty[j] < 0 || usize < best_penalty[j])
464  {
465  /*
466  * New best penalty for column. Tentatively select this tuple
467  * as the target, and record the best penalty. Then reset the
468  * next column's penalty to "unknown" (and indirectly, the
469  * same for all the ones to its right). This will force us to
470  * adopt this tuple's penalty values as the best for all the
471  * remaining columns during subsequent loop iterations.
472  */
473  result = i;
474  best_penalty[j] = usize;
475 
477  best_penalty[j + 1] = -1;
478 
479  /* we have new best, so reset keep-it decision */
480  keep_current_best = -1;
481  }
482  else if (best_penalty[j] == usize)
483  {
484  /*
485  * The current tuple is exactly as good for this column as the
486  * best tuple seen so far. The next iteration of this loop
487  * will compare the next column.
488  */
489  }
490  else
491  {
492  /*
493  * The current tuple is worse for this column than the best
494  * tuple seen so far. Skip the remaining columns and move on
495  * to the next tuple, if any.
496  */
497  zero_penalty = false; /* so outer loop won't exit */
498  break;
499  }
500  }
501 
502  /*
503  * If we looped past the last column, and did not update "result",
504  * then this tuple is exactly as good as the prior best tuple.
505  */
506  if (j == IndexRelationGetNumberOfKeyAttributes(r) && result != i)
507  {
508  if (keep_current_best == -1)
509  {
510  /* we didn't make the random choice yet for this old best */
511  keep_current_best = pg_prng_bool(&pg_global_prng_state) ? 1 : 0;
512  }
513  if (keep_current_best == 0)
514  {
515  /* we choose to use the new tuple */
516  result = i;
517  /* choose again if there are even more exactly-as-good ones */
518  keep_current_best = -1;
519  }
520  }
521 
522  /*
523  * If we find a tuple with zero penalty for all columns, and we've
524  * decided we don't want to search for another tuple with equal
525  * penalty, there's no need to examine remaining tuples; just break
526  * out of the loop and return it.
527  */
528  if (zero_penalty)
529  {
530  if (keep_current_best == -1)
531  {
532  /* we didn't make the random choice yet for this old best */
533  keep_current_best = pg_prng_bool(&pg_global_prng_state) ? 1 : 0;
534  }
535  if (keep_current_best == 1)
536  break;
537  }
538  }
539 
540  return result;
541 }
542 
543 /*
544  * initialize a GiST entry with a decompressed version of key
545  */
546 void
547 gistdentryinit(GISTSTATE *giststate, int nkey, GISTENTRY *e,
548  Datum k, Relation r, Page pg, OffsetNumber o,
549  bool l, bool isNull)
550 {
551  if (!isNull)
552  {
553  GISTENTRY *dep;
554 
555  gistentryinit(*e, k, r, pg, o, l);
556 
557  /* there may not be a decompress function in opclass */
558  if (!OidIsValid(giststate->decompressFn[nkey].fn_oid))
559  return;
560 
561  dep = (GISTENTRY *)
563  giststate->supportCollation[nkey],
564  PointerGetDatum(e)));
565  /* decompressFn may just return the given pointer */
566  if (dep != e)
567  gistentryinit(*e, dep->key, dep->rel, dep->page, dep->offset,
568  dep->leafkey);
569  }
570  else
571  gistentryinit(*e, (Datum) 0, r, pg, o, l);
572 }
573 
576  const Datum *attdata, const bool *isnull, bool isleaf)
577 {
578  Datum compatt[INDEX_MAX_KEYS];
579  IndexTuple res;
580 
581  gistCompressValues(giststate, r, attdata, isnull, isleaf, compatt);
582 
583  res = index_form_tuple(isleaf ? giststate->leafTupdesc :
584  giststate->nonLeafTupdesc,
585  compatt, isnull);
586 
587  /*
588  * The offset number on tuples on internal pages is unused. For historical
589  * reasons, it is set to 0xffff.
590  */
591  ItemPointerSetOffsetNumber(&(res->t_tid), 0xffff);
592  return res;
593 }
594 
595 void
597  const Datum *attdata, const bool *isnull, bool isleaf, Datum *compatt)
598 {
599  int i;
600 
601  /*
602  * Call the compress method on each attribute.
603  */
604  for (i = 0; i < IndexRelationGetNumberOfKeyAttributes(r); i++)
605  {
606  if (isnull[i])
607  compatt[i] = (Datum) 0;
608  else
609  {
610  GISTENTRY centry;
611  GISTENTRY *cep;
612 
613  gistentryinit(centry, attdata[i], r, NULL, (OffsetNumber) 0,
614  isleaf);
615  /* there may not be a compress function in opclass */
616  if (OidIsValid(giststate->compressFn[i].fn_oid))
617  cep = (GISTENTRY *)
619  giststate->supportCollation[i],
620  PointerGetDatum(&centry)));
621  else
622  cep = &centry;
623  compatt[i] = cep->key;
624  }
625  }
626 
627  if (isleaf)
628  {
629  /*
630  * Emplace each included attribute if any.
631  */
632  for (; i < r->rd_att->natts; i++)
633  {
634  if (isnull[i])
635  compatt[i] = (Datum) 0;
636  else
637  compatt[i] = attdata[i];
638  }
639  }
640 }
641 
642 /*
643  * initialize a GiST entry with fetched value in key field
644  */
645 static Datum
646 gistFetchAtt(GISTSTATE *giststate, int nkey, Datum k, Relation r)
647 {
648  GISTENTRY fentry;
649  GISTENTRY *fep;
650 
651  gistentryinit(fentry, k, r, NULL, (OffsetNumber) 0, false);
652 
653  fep = (GISTENTRY *)
654  DatumGetPointer(FunctionCall1Coll(&giststate->fetchFn[nkey],
655  giststate->supportCollation[nkey],
656  PointerGetDatum(&fentry)));
657 
658  /* fetchFn set 'key', return it to the caller */
659  return fep->key;
660 }
661 
662 /*
663  * Fetch all keys in tuple.
664  * Returns a new HeapTuple containing the originally-indexed data.
665  */
666 HeapTuple
668 {
669  MemoryContext oldcxt = MemoryContextSwitchTo(giststate->tempCxt);
671  bool isnull[INDEX_MAX_KEYS];
672  int i;
673 
674  for (i = 0; i < IndexRelationGetNumberOfKeyAttributes(r); i++)
675  {
676  Datum datum;
677 
678  datum = index_getattr(tuple, i + 1, giststate->leafTupdesc, &isnull[i]);
679 
680  if (giststate->fetchFn[i].fn_oid != InvalidOid)
681  {
682  if (!isnull[i])
683  fetchatt[i] = gistFetchAtt(giststate, i, datum, r);
684  else
685  fetchatt[i] = (Datum) 0;
686  }
687  else if (giststate->compressFn[i].fn_oid == InvalidOid)
688  {
689  /*
690  * If opclass does not provide compress method that could change
691  * original value, att is necessarily stored in original form.
692  */
693  if (!isnull[i])
694  fetchatt[i] = datum;
695  else
696  fetchatt[i] = (Datum) 0;
697  }
698  else
699  {
700  /*
701  * Index-only scans not supported for this column. Since the
702  * planner chose an index-only scan anyway, it is not interested
703  * in this column, and we can replace it with a NULL.
704  */
705  isnull[i] = true;
706  fetchatt[i] = (Datum) 0;
707  }
708  }
709 
710  /*
711  * Get each included attribute.
712  */
713  for (; i < r->rd_att->natts; i++)
714  {
715  fetchatt[i] = index_getattr(tuple, i + 1, giststate->leafTupdesc,
716  &isnull[i]);
717  }
718  MemoryContextSwitchTo(oldcxt);
719 
720  return heap_form_tuple(giststate->fetchTupdesc, fetchatt, isnull);
721 }
722 
723 float
724 gistpenalty(GISTSTATE *giststate, int attno,
725  GISTENTRY *orig, bool isNullOrig,
726  GISTENTRY *add, bool isNullAdd)
727 {
728  float penalty = 0.0;
729 
730  if (giststate->penaltyFn[attno].fn_strict == false ||
731  (isNullOrig == false && isNullAdd == false))
732  {
733  FunctionCall3Coll(&giststate->penaltyFn[attno],
734  giststate->supportCollation[attno],
735  PointerGetDatum(orig),
736  PointerGetDatum(add),
737  PointerGetDatum(&penalty));
738  /* disallow negative or NaN penalty */
739  if (isnan(penalty) || penalty < 0.0)
740  penalty = 0.0;
741  }
742  else if (isNullOrig && isNullAdd)
743  penalty = 0.0;
744  else
745  {
746  /* try to prevent mixing null and non-null values */
747  penalty = get_float4_infinity();
748  }
749 
750  return penalty;
751 }
752 
753 /*
754  * Initialize a new index page
755  */
756 void
758 {
759  GISTPageOpaque opaque;
760 
761  PageInit(page, BLCKSZ, sizeof(GISTPageOpaqueData));
762 
763  opaque = GistPageGetOpaque(page);
764  opaque->rightlink = InvalidBlockNumber;
765  opaque->flags = f;
766  opaque->gist_page_id = GIST_PAGE_ID;
767 }
768 
769 /*
770  * Initialize a new index buffer
771  */
772 void
774 {
775  Page page;
776 
777  page = BufferGetPage(b);
778  gistinitpage(page, f);
779 }
780 
781 /*
782  * Verify that a freshly-read page looks sane.
783  */
784 void
786 {
787  Page page = BufferGetPage(buf);
788 
789  /*
790  * ReadBuffer verifies that every newly-read page passes
791  * PageHeaderIsValid, which means it either contains a reasonably sane
792  * page header or is all-zero. We have to defend against the all-zero
793  * case, however.
794  */
795  if (PageIsNew(page))
796  ereport(ERROR,
797  (errcode(ERRCODE_INDEX_CORRUPTED),
798  errmsg("index \"%s\" contains unexpected zero page at block %u",
801  errhint("Please REINDEX it.")));
802 
803  /*
804  * Additionally check that the special area looks sane.
805  */
806  if (PageGetSpecialSize(page) != MAXALIGN(sizeof(GISTPageOpaqueData)))
807  ereport(ERROR,
808  (errcode(ERRCODE_INDEX_CORRUPTED),
809  errmsg("index \"%s\" contains corrupted page at block %u",
812  errhint("Please REINDEX it.")));
813 }
814 
815 
816 /*
817  * Allocate a new page (either by recycling, or by extending the index file)
818  *
819  * The returned buffer is already pinned and exclusive-locked
820  *
821  * Caller is responsible for initializing the page by calling GISTInitBuffer
822  */
823 Buffer
825 {
826  Buffer buffer;
827 
828  /* First, try to get a page from FSM */
829  for (;;)
830  {
831  BlockNumber blkno = GetFreeIndexPage(r);
832 
833  if (blkno == InvalidBlockNumber)
834  break; /* nothing left in FSM */
835 
836  buffer = ReadBuffer(r, blkno);
837 
838  /*
839  * We have to guard against the possibility that someone else already
840  * recycled this page; the buffer may be locked if so.
841  */
842  if (ConditionalLockBuffer(buffer))
843  {
844  Page page = BufferGetPage(buffer);
845 
846  /*
847  * If the page was never initialized, it's OK to use.
848  */
849  if (PageIsNew(page))
850  return buffer;
851 
852  gistcheckpage(r, buffer);
853 
854  /*
855  * Otherwise, recycle it if deleted, and too old to have any
856  * processes interested in it.
857  */
858  if (gistPageRecyclable(page))
859  {
860  /*
861  * If we are generating WAL for Hot Standby then create a WAL
862  * record that will allow us to conflict with queries running
863  * on standby, in case they have snapshots older than the
864  * page's deleteXid.
865  */
867  gistXLogPageReuse(r, heaprel, blkno, GistPageGetDeleteXid(page));
868 
869  return buffer;
870  }
871 
872  LockBuffer(buffer, GIST_UNLOCK);
873  }
874 
875  /* Can't use it, so release buffer and try again */
876  ReleaseBuffer(buffer);
877  }
878 
879  /* Must extend the file */
880  buffer = ExtendBufferedRel(BMR_REL(r), MAIN_FORKNUM, NULL,
881  EB_LOCK_FIRST);
882 
883  return buffer;
884 }
885 
886 /* Can this page be recycled yet? */
887 bool
889 {
890  if (PageIsNew(page))
891  return true;
892  if (GistPageIsDeleted(page))
893  {
894  /*
895  * The page was deleted, but when? If it was just deleted, a scan
896  * might have seen the downlink to it, and will read the page later.
897  * As long as that can happen, we must keep the deleted page around as
898  * a tombstone.
899  *
900  * For that check if the deletion XID could still be visible to
901  * anyone. If not, then no scan that's still in progress could have
902  * seen its downlink, and we can recycle it.
903  */
904  FullTransactionId deletexid_full = GistPageGetDeleteXid(page);
905 
906  return GlobalVisCheckRemovableFullXid(NULL, deletexid_full);
907  }
908  return false;
909 }
910 
911 bytea *
912 gistoptions(Datum reloptions, bool validate)
913 {
914  static const relopt_parse_elt tab[] = {
915  {"fillfactor", RELOPT_TYPE_INT, offsetof(GiSTOptions, fillfactor)},
916  {"buffering", RELOPT_TYPE_ENUM, offsetof(GiSTOptions, buffering_mode)}
917  };
918 
919  return (bytea *) build_reloptions(reloptions, validate,
921  sizeof(GiSTOptions),
922  tab, lengthof(tab));
923 }
924 
925 /*
926  * gistproperty() -- Check boolean properties of indexes.
927  *
928  * This is optional for most AMs, but is required for GiST because the core
929  * property code doesn't support AMPROP_DISTANCE_ORDERABLE. We also handle
930  * AMPROP_RETURNABLE here to save opening the rel to call gistcanreturn.
931  */
932 bool
933 gistproperty(Oid index_oid, int attno,
934  IndexAMProperty prop, const char *propname,
935  bool *res, bool *isnull)
936 {
937  Oid opclass,
938  opfamily,
939  opcintype;
940  int16 procno;
941 
942  /* Only answer column-level inquiries */
943  if (attno == 0)
944  return false;
945 
946  /*
947  * Currently, GiST distance-ordered scans require that there be a distance
948  * function in the opclass with the default types (i.e. the one loaded
949  * into the relcache entry, see initGISTstate). So we assume that if such
950  * a function exists, then there's a reason for it (rather than grubbing
951  * through all the opfamily's operators to find an ordered one).
952  *
953  * Essentially the same code can test whether we support returning the
954  * column data, since that's true if the opclass provides a fetch proc.
955  */
956 
957  switch (prop)
958  {
960  procno = GIST_DISTANCE_PROC;
961  break;
962  case AMPROP_RETURNABLE:
963  procno = GIST_FETCH_PROC;
964  break;
965  default:
966  return false;
967  }
968 
969  /* First we need to know the column's opclass. */
970  opclass = get_index_column_opclass(index_oid, attno);
971  if (!OidIsValid(opclass))
972  {
973  *isnull = true;
974  return true;
975  }
976 
977  /* Now look up the opclass family and input datatype. */
978  if (!get_opclass_opfamily_and_input_type(opclass, &opfamily, &opcintype))
979  {
980  *isnull = true;
981  return true;
982  }
983 
984  /* And now we can check whether the function is provided. */
985 
986  *res = SearchSysCacheExists4(AMPROCNUM,
987  ObjectIdGetDatum(opfamily),
988  ObjectIdGetDatum(opcintype),
989  ObjectIdGetDatum(opcintype),
990  Int16GetDatum(procno));
991 
992  /*
993  * Special case: even without a fetch function, AMPROP_RETURNABLE is true
994  * if the opclass has no compress function.
995  */
996  if (prop == AMPROP_RETURNABLE && !*res)
997  {
998  *res = !SearchSysCacheExists4(AMPROCNUM,
999  ObjectIdGetDatum(opfamily),
1000  ObjectIdGetDatum(opcintype),
1001  ObjectIdGetDatum(opcintype),
1003  }
1004 
1005  *isnull = false;
1006 
1007  return true;
1008 }
1009 
1010 /*
1011  * Some indexes are not WAL-logged, but we need LSNs to detect concurrent page
1012  * splits anyway. This function provides a fake sequence of LSNs for that
1013  * purpose.
1014  */
1015 XLogRecPtr
1017 {
1018  if (rel->rd_rel->relpersistence == RELPERSISTENCE_TEMP)
1019  {
1020  /*
1021  * Temporary relations are only accessible in our session, so a simple
1022  * backend-local counter will do.
1023  */
1024  static XLogRecPtr counter = FirstNormalUnloggedLSN;
1025 
1026  return counter++;
1027  }
1028  else if (RelationIsPermanent(rel))
1029  {
1030  /*
1031  * WAL-logging on this relation will start after commit, so its LSNs
1032  * must be distinct numbers smaller than the LSN at the next commit.
1033  * Emit a dummy WAL record if insert-LSN hasn't advanced after the
1034  * last call.
1035  */
1036  static XLogRecPtr lastlsn = InvalidXLogRecPtr;
1037  XLogRecPtr currlsn = GetXLogInsertRecPtr();
1038 
1039  /* Shouldn't be called for WAL-logging relations */
1040  Assert(!RelationNeedsWAL(rel));
1041 
1042  /* No need for an actual record if we already have a distinct LSN */
1043  if (!XLogRecPtrIsInvalid(lastlsn) && lastlsn == currlsn)
1044  currlsn = gistXLogAssignLSN();
1045 
1046  lastlsn = currlsn;
1047  return currlsn;
1048  }
1049  else
1050  {
1051  /*
1052  * Unlogged relations are accessible from other backends, and survive
1053  * (clean) restarts. GetFakeLSNForUnloggedRel() handles that for us.
1054  */
1055  Assert(rel->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED);
1056  return GetFakeLSNForUnloggedRel();
1057  }
1058 }
1059 
1060 /*
1061  * Returns the same number that was received.
1062  *
1063  * This is for GiST opclasses that use the RT*StrategyNumber constants.
1064  */
1065 Datum
1067 {
1068  StrategyNumber strat = PG_GETARG_UINT16(0);
1069 
1070  PG_RETURN_UINT16(strat);
1071 }
1072 
1073 /*
1074  * Returns the opclass's private stratnum used for the given strategy.
1075  *
1076  * Calls the opclass's GIST_STRATNUM_PROC support function, if any,
1077  * and returns the result.
1078  * Returns InvalidStrategy if the function is not defined.
1079  */
1082 {
1083  Oid opfamily;
1084  Oid opcintype;
1085  Oid funcid;
1086  Datum result;
1087 
1088  /* Look up the opclass family and input datatype. */
1089  if (!get_opclass_opfamily_and_input_type(opclass, &opfamily, &opcintype))
1090  return InvalidStrategy;
1091 
1092  /* Check whether the function is provided. */
1093  funcid = get_opfamily_proc(opfamily, opcintype, opcintype, GIST_STRATNUM_PROC);
1094  if (!OidIsValid(funcid))
1095  return InvalidStrategy;
1096 
1097  /* Ask the translation function */
1098  result = OidFunctionCall1Coll(funcid, InvalidOid, UInt16GetDatum(strat));
1099  return DatumGetUInt16(result);
1100 }
IndexAMProperty
Definition: amapi.h:35
@ AMPROP_DISTANCE_ORDERABLE
Definition: amapi.h:42
@ AMPROP_RETURNABLE
Definition: amapi.h:43
uint32 BlockNumber
Definition: block.h:31
#define InvalidBlockNumber
Definition: block.h:33
int Buffer
Definition: buf.h:23
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:3667
Buffer ExtendBufferedRel(BufferManagerRelation bmr, ForkNumber forkNum, BufferAccessStrategy strategy, uint32 flags)
Definition: bufmgr.c:845
bool ConditionalLockBuffer(Buffer buffer)
Definition: bufmgr.c:5111
void ReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:4850
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:5085
Buffer ReadBuffer(Relation reln, BlockNumber blockNum)
Definition: bufmgr.c:745
static Page BufferGetPage(Buffer buffer)
Definition: bufmgr.h:408
@ EB_LOCK_FIRST
Definition: bufmgr.h:86
#define BMR_REL(p_rel)
Definition: bufmgr.h:107
void PageInit(Page page, Size pageSize, Size specialSize)
Definition: bufpage.c:42
Size PageGetFreeSpace(Page page)
Definition: bufpage.c:907
static bool PageIsEmpty(Page page)
Definition: bufpage.h:220
Pointer Page
Definition: bufpage.h:78
static Item PageGetItem(Page page, ItemId itemId)
Definition: bufpage.h:351
static ItemId PageGetItemId(Page page, OffsetNumber offsetNumber)
Definition: bufpage.h:240
static bool PageIsNew(Page page)
Definition: bufpage.h:230
static OffsetNumber PageGetMaxOffsetNumber(Page page)
Definition: bufpage.h:369
#define PageAddItem(page, item, size, offsetNumber, overwrite, is_heap)
Definition: bufpage.h:468
static uint16 PageGetSpecialSize(Page page)
Definition: bufpage.h:313
unsigned int uint32
Definition: c.h:506
signed short int16
Definition: c.h:493
#define MAXALIGN(LEN)
Definition: c.h:811
#define Assert(condition)
Definition: c.h:858
#define lengthof(array)
Definition: c.h:788
#define OidIsValid(objectId)
Definition: c.h:775
size_t Size
Definition: c.h:605
int errhint(const char *fmt,...)
Definition: elog.c:1319
int errcode(int sqlerrcode)
Definition: elog.c:859
int errmsg(const char *fmt,...)
Definition: elog.c:1072
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:224
#define ereport(elevel,...)
Definition: elog.h:149
static float4 get_float4_infinity(void)
Definition: float.h:74
Datum FunctionCall2Coll(FmgrInfo *flinfo, Oid collation, Datum arg1, Datum arg2)
Definition: fmgr.c:1149
Datum OidFunctionCall1Coll(Oid functionId, Oid collation, Datum arg1)
Definition: fmgr.c:1411
Datum FunctionCall3Coll(FmgrInfo *flinfo, Oid collation, Datum arg1, Datum arg2, Datum arg3)
Definition: fmgr.c:1171
Datum FunctionCall1Coll(FmgrInfo *flinfo, Oid collation, Datum arg1)
Definition: fmgr.c:1129
#define PG_GETARG_UINT16(n)
Definition: fmgr.h:272
#define PG_RETURN_UINT16(x)
Definition: fmgr.h:357
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
#define GIST_STRATNUM_PROC
Definition: gist.h:42
#define GIST_PAGE_ID
Definition: gist.h:111
struct GISTENTRY GISTENTRY
#define GIST_FETCH_PROC
Definition: gist.h:39
#define GIST_COMPRESS_PROC
Definition: gist.h:33
#define GEVHDRSZ
Definition: gist.h:239
#define GistPageIsLeaf(page)
Definition: gist.h:169
#define GIST_DISTANCE_PROC
Definition: gist.h:38
static FullTransactionId GistPageGetDeleteXid(Page page)
Definition: gist.h:215
#define GistPageIsDeleted(page)
Definition: gist.h:172
#define GistPageGetOpaque(page)
Definition: gist.h:167
#define gistentryinit(e, k, r, pg, o, l)
Definition: gist.h:244
#define GIST_UNLOCK
Definition: gist_private.h:44
#define GiSTPageSize
Definition: gist_private.h:476
void gistDeCompressAtt(GISTSTATE *giststate, Relation r, IndexTuple tuple, Page p, OffsetNumber o, GISTENTRY *attdata, bool *isnull)
Definition: gistutil.c:296
Buffer gistNewBuffer(Relation r, Relation heaprel)
Definition: gistutil.c:824
bool gistproperty(Oid index_oid, int attno, IndexAMProperty prop, const char *propname, bool *res, bool *isnull)
Definition: gistutil.c:933
bool gistPageRecyclable(Page page)
Definition: gistutil.c:888
void gistMakeUnionKey(GISTSTATE *giststate, int attno, GISTENTRY *entry1, bool isnull1, GISTENTRY *entry2, bool isnull2, Datum *dst, bool *dstisnull)
Definition: gistutil.c:233
void gistMakeUnionItVec(GISTSTATE *giststate, IndexTuple *itvec, int len, Datum *attr, bool *isnull)
Definition: gistutil.c:155
bool gistnospace(Page page, IndexTuple *itvec, int len, OffsetNumber todelete, Size freespace)
Definition: gistutil.c:59
HeapTuple gistFetchTuple(GISTSTATE *giststate, Relation r, IndexTuple tuple)
Definition: gistutil.c:667
bool gistKeyIsEQ(GISTSTATE *giststate, int attno, Datum a, Datum b)
Definition: gistutil.c:281
static Datum gistFetchAtt(GISTSTATE *giststate, int nkey, Datum k, Relation r)
Definition: gistutil.c:646
void gistfillbuffer(Page page, IndexTuple *itup, int len, OffsetNumber off)
Definition: gistutil.c:34
IndexTuple gistFormTuple(GISTSTATE *giststate, Relation r, const Datum *attdata, const bool *isnull, bool isleaf)
Definition: gistutil.c:575
IndexTuple * gistextractpage(Page page, int *len)
Definition: gistutil.c:95
void gistdentryinit(GISTSTATE *giststate, int nkey, GISTENTRY *e, Datum k, Relation r, Page pg, OffsetNumber o, bool l, bool isNull)
Definition: gistutil.c:547
bool gistfitpage(IndexTuple *itvec, int len)
Definition: gistutil.c:79
IndexTuple gistgetadjusted(Relation r, IndexTuple oldtup, IndexTuple addtup, GISTSTATE *giststate)
Definition: gistutil.c:316
OffsetNumber gistchoose(Relation r, Page p, IndexTuple it, GISTSTATE *giststate)
Definition: gistutil.c:374
XLogRecPtr gistGetFakeLSN(Relation rel)
Definition: gistutil.c:1016
void gistCompressValues(GISTSTATE *giststate, Relation r, const Datum *attdata, const bool *isnull, bool isleaf, Datum *compatt)
Definition: gistutil.c:596
Datum gist_stratnum_identity(PG_FUNCTION_ARGS)
Definition: gistutil.c:1066
StrategyNumber GistTranslateStratnum(Oid opclass, StrategyNumber strat)
Definition: gistutil.c:1081
IndexTuple * gistjoinvector(IndexTuple *itvec, int *len, IndexTuple *additvec, int addlen)
Definition: gistutil.c:114
void gistinitpage(Page page, uint32 f)
Definition: gistutil.c:757
bytea * gistoptions(Datum reloptions, bool validate)
Definition: gistutil.c:912
IndexTuple gistunion(Relation r, IndexTuple *itvec, int len, GISTSTATE *giststate)
Definition: gistutil.c:219
void GISTInitBuffer(Buffer b, uint32 f)
Definition: gistutil.c:773
IndexTupleData * gistfillitupvec(IndexTuple *vec, int veclen, int *memlen)
Definition: gistutil.c:127
void gistcheckpage(Relation rel, Buffer buf)
Definition: gistutil.c:785
float gistpenalty(GISTSTATE *giststate, int attno, GISTENTRY *orig, bool isNullOrig, GISTENTRY *add, bool isNullAdd)
Definition: gistutil.c:724
XLogRecPtr gistXLogAssignLSN(void)
Definition: gistxlog.c:576
void gistXLogPageReuse(Relation rel, Relation heaprel, BlockNumber blkno, FullTransactionId deleteXid)
Definition: gistxlog.c:594
HeapTuple heap_form_tuple(TupleDesc tupleDescriptor, const Datum *values, const bool *isnull)
Definition: heaptuple.c:1116
#define storage
Definition: indent_codes.h:68
BlockNumber GetFreeIndexPage(Relation rel)
Definition: indexfsm.c:38
IndexTuple index_form_tuple(TupleDesc tupleDescriptor, const Datum *values, const bool *isnull)
Definition: indextuple.c:44
int b
Definition: isn.c:70
int a
Definition: isn.c:69
int j
Definition: isn.c:74
int i
Definition: isn.c:73
Pointer Item
Definition: item.h:17
struct ItemIdData ItemIdData
static void ItemPointerSetOffsetNumber(ItemPointerData *pointer, OffsetNumber offsetNumber)
Definition: itemptr.h:158
IndexTupleData * IndexTuple
Definition: itup.h:53
#define IndexTupleSize(itup)
Definition: itup.h:70
static Datum index_getattr(IndexTuple tup, int attnum, TupleDesc tupleDesc, bool *isnull)
Definition: itup.h:117
bool get_opclass_opfamily_and_input_type(Oid opclass, Oid *opfamily, Oid *opcintype)
Definition: lsyscache.c:1235
Oid get_opfamily_proc(Oid opfamily, Oid lefttype, Oid righttype, int16 procnum)
Definition: lsyscache.c:796
Oid get_index_column_opclass(Oid index_oid, int attno)
Definition: lsyscache.c:3512
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1540
void * palloc(Size size)
Definition: mcxt.c:1316
#define InvalidOffsetNumber
Definition: off.h:26
#define OffsetNumberNext(offsetNumber)
Definition: off.h:52
uint16 OffsetNumber
Definition: off.h:24
#define FirstOffsetNumber
Definition: off.h:27
#define INDEX_MAX_KEYS
const void size_t len
pg_prng_state pg_global_prng_state
Definition: pg_prng.c:34
bool pg_prng_bool(pg_prng_state *state)
Definition: pg_prng.c:313
static char * buf
Definition: pg_test_fsync.c:73
int fillfactor
Definition: pgbench.c:187
static Datum PointerGetDatum(const void *X)
Definition: postgres.h:322
uintptr_t Datum
Definition: postgres.h:64
static Datum Int16GetDatum(int16 X)
Definition: postgres.h:172
static Datum UInt16GetDatum(uint16 X)
Definition: postgres.h:192
static Datum ObjectIdGetDatum(Oid X)
Definition: postgres.h:252
static uint16 DatumGetUInt16(Datum X)
Definition: postgres.h:182
static Pointer DatumGetPointer(Datum X)
Definition: postgres.h:312
#define InvalidOid
Definition: postgres_ext.h:36
unsigned int Oid
Definition: postgres_ext.h:31
e
Definition: preproc-init.c:82
bool GlobalVisCheckRemovableFullXid(Relation rel, FullTransactionId fxid)
Definition: procarray.c:4270
MemoryContextSwitchTo(old_ctx)
#define RelationGetRelationName(relation)
Definition: rel.h:539
#define RelationNeedsWAL(relation)
Definition: rel.h:628
#define IndexRelationGetNumberOfKeyAttributes(relation)
Definition: rel.h:524
#define RelationIsPermanent(relation)
Definition: rel.h:617
void * build_reloptions(Datum reloptions, bool validate, relopt_kind kind, Size relopt_struct_size, const relopt_parse_elt *relopt_elems, int num_relopt_elems)
Definition: reloptions.c:1908
@ RELOPT_KIND_GIST
Definition: reloptions.h:47
@ RELOPT_TYPE_ENUM
Definition: reloptions.h:34
@ RELOPT_TYPE_INT
Definition: reloptions.h:32
@ MAIN_FORKNUM
Definition: relpath.h:50
static pg_noinline void Size size
Definition: slab.c:607
uint16 StrategyNumber
Definition: stratnum.h:22
#define InvalidStrategy
Definition: stratnum.h:24
Oid fn_oid
Definition: fmgr.h:59
bool fn_strict
Definition: fmgr.h:61
OffsetNumber offset
Definition: gist.h:163
Datum key
Definition: gist.h:160
Page page
Definition: gist.h:162
Relation rel
Definition: gist.h:161
bool leafkey
Definition: gist.h:164
uint16 gist_page_id
Definition: gist.h:82
BlockNumber rightlink
Definition: gist.h:80
uint16 flags
Definition: gist.h:81
FmgrInfo fetchFn[INDEX_MAX_KEYS]
Definition: gist_private.h:94
TupleDesc leafTupdesc
Definition: gist_private.h:80
TupleDesc fetchTupdesc
Definition: gist_private.h:83
TupleDesc nonLeafTupdesc
Definition: gist_private.h:81
FmgrInfo penaltyFn[INDEX_MAX_KEYS]
Definition: gist_private.h:90
MemoryContext tempCxt
Definition: gist_private.h:78
Oid supportCollation[INDEX_MAX_KEYS]
Definition: gist_private.h:97
FmgrInfo decompressFn[INDEX_MAX_KEYS]
Definition: gist_private.h:89
FmgrInfo compressFn[INDEX_MAX_KEYS]
Definition: gist_private.h:88
FmgrInfo equalFn[INDEX_MAX_KEYS]
Definition: gist_private.h:92
FmgrInfo unionFn[INDEX_MAX_KEYS]
Definition: gist_private.h:87
GISTENTRY vector[FLEXIBLE_ARRAY_MEMBER]
Definition: gist.h:236
int32 n
Definition: gist.h:235
ItemPointerData t_tid
Definition: itup.h:37
TupleDesc rd_att
Definition: rel.h:112
Form_pg_class rd_rel
Definition: rel.h:111
Definition: c.h:687
#define SearchSysCacheExists4(cacheId, key1, key2, key3, key4)
Definition: syscache.h:101
#define fetchatt(A, T)
Definition: tupmacs.h:46
XLogRecPtr GetXLogInsertRecPtr(void)
Definition: xlog.c:9355
XLogRecPtr GetFakeLSNForUnloggedRel(void)
Definition: xlog.c:4571
#define XLogStandbyInfoActive()
Definition: xlog.h:121
#define FirstNormalUnloggedLSN
Definition: xlogdefs.h:36
#define XLogRecPtrIsInvalid(r)
Definition: xlogdefs.h:29
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define InvalidXLogRecPtr
Definition: xlogdefs.h:28