PostgreSQL Source Code  git master
gist.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * gist.c
4  * interface routines for the postgres GiST index access method.
5  *
6  *
7  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
8  * Portions Copyright (c) 1994, Regents of the University of California
9  *
10  * IDENTIFICATION
11  * src/backend/access/gist/gist.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 #include "postgres.h"
16 
17 #include "access/gist_private.h"
18 #include "access/gistscan.h"
19 #include "access/xloginsert.h"
20 #include "catalog/pg_collation.h"
21 #include "commands/vacuum.h"
22 #include "miscadmin.h"
23 #include "nodes/execnodes.h"
24 #include "storage/predicate.h"
25 #include "utils/fmgrprotos.h"
26 #include "utils/index_selfuncs.h"
27 #include "utils/memutils.h"
28 #include "utils/rel.h"
29 
30 /* non-export function prototypes */
31 static void gistfixsplit(GISTInsertState *state, GISTSTATE *giststate);
33  GISTSTATE *giststate, IndexTuple tuple, OffsetNumber oldoffnum);
35  GISTSTATE *giststate,
36  IndexTuple *tuples, int ntup, OffsetNumber oldoffnum,
38  bool unlockbuf, bool unlockleftchild);
40  GISTSTATE *giststate, List *splitinfo, bool unlockbuf);
41 static void gistprunepage(Relation rel, Page page, Buffer buffer,
42  Relation heapRel);
43 
44 
45 #define ROTATEDIST(d) do { \
46  SplitPageLayout *tmp=(SplitPageLayout*)palloc0(sizeof(SplitPageLayout)); \
47  tmp->block.blkno = InvalidBlockNumber; \
48  tmp->buffer = InvalidBuffer; \
49  tmp->next = (d); \
50  (d)=tmp; \
51 } while(0)
52 
53 
54 /*
55  * GiST handler function: return IndexAmRoutine with access method parameters
56  * and callbacks.
57  */
58 Datum
60 {
62 
63  amroutine->amstrategies = 0;
64  amroutine->amsupport = GISTNProcs;
65  amroutine->amoptsprocnum = GIST_OPTIONS_PROC;
66  amroutine->amcanorder = false;
67  amroutine->amcanorderbyop = true;
68  amroutine->amcanbackward = false;
69  amroutine->amcanunique = false;
70  amroutine->amcanmulticol = true;
71  amroutine->amoptionalkey = true;
72  amroutine->amsearcharray = false;
73  amroutine->amsearchnulls = true;
74  amroutine->amstorage = true;
75  amroutine->amclusterable = true;
76  amroutine->ampredlocks = true;
77  amroutine->amcanparallel = false;
78  amroutine->amcanbuildparallel = false;
79  amroutine->amcaninclude = true;
80  amroutine->amusemaintenanceworkmem = false;
81  amroutine->amsummarizing = false;
82  amroutine->amparallelvacuumoptions =
84  amroutine->amkeytype = InvalidOid;
85 
86  amroutine->ambuild = gistbuild;
87  amroutine->ambuildempty = gistbuildempty;
88  amroutine->aminsert = gistinsert;
89  amroutine->aminsertcleanup = NULL;
90  amroutine->ambulkdelete = gistbulkdelete;
92  amroutine->amcanreturn = gistcanreturn;
93  amroutine->amcostestimate = gistcostestimate;
94  amroutine->amgettreeheight = NULL;
95  amroutine->amoptions = gistoptions;
96  amroutine->amproperty = gistproperty;
97  amroutine->ambuildphasename = NULL;
98  amroutine->amvalidate = gistvalidate;
100  amroutine->ambeginscan = gistbeginscan;
101  amroutine->amrescan = gistrescan;
102  amroutine->amgettuple = gistgettuple;
103  amroutine->amgetbitmap = gistgetbitmap;
104  amroutine->amendscan = gistendscan;
105  amroutine->ammarkpos = NULL;
106  amroutine->amrestrpos = NULL;
107  amroutine->amestimateparallelscan = NULL;
108  amroutine->aminitparallelscan = NULL;
109  amroutine->amparallelrescan = NULL;
110 
111  PG_RETURN_POINTER(amroutine);
112 }
113 
114 /*
115  * Create and return a temporary memory context for use by GiST. We
116  * _always_ invoke user-provided methods in a temporary memory
117  * context, so that memory leaks in those functions cannot cause
118  * problems. Also, we use some additional temporary contexts in the
119  * GiST code itself, to avoid the need to do some awkward manual
120  * memory management.
121  */
124 {
126  "GiST temporary context",
128 }
129 
130 /*
131  * gistbuildempty() -- build an empty gist index in the initialization fork
132  */
133 void
135 {
136  Buffer buffer;
137 
138  /* Initialize the root page */
139  buffer = ExtendBufferedRel(BMR_REL(index), INIT_FORKNUM, NULL,
141 
142  /* Initialize and xlog buffer */
144  GISTInitBuffer(buffer, F_LEAF);
145  MarkBufferDirty(buffer);
146  log_newpage_buffer(buffer, true);
148 
149  /* Unlock and release the buffer */
150  UnlockReleaseBuffer(buffer);
151 }
152 
153 /*
154  * gistinsert -- wrapper for GiST tuple insertion.
155  *
156  * This is the public interface routine for tuple insertion in GiSTs.
157  * It doesn't do any work; just locks the relation and passes the buck.
158  */
159 bool
160 gistinsert(Relation r, Datum *values, bool *isnull,
161  ItemPointer ht_ctid, Relation heapRel,
162  IndexUniqueCheck checkUnique,
163  bool indexUnchanged,
164  IndexInfo *indexInfo)
165 {
166  GISTSTATE *giststate = (GISTSTATE *) indexInfo->ii_AmCache;
167  IndexTuple itup;
168  MemoryContext oldCxt;
169 
170  /* Initialize GISTSTATE cache if first call in this statement */
171  if (giststate == NULL)
172  {
173  oldCxt = MemoryContextSwitchTo(indexInfo->ii_Context);
174  giststate = initGISTstate(r);
175  giststate->tempCxt = createTempGistContext();
176  indexInfo->ii_AmCache = (void *) giststate;
177  MemoryContextSwitchTo(oldCxt);
178  }
179 
180  oldCxt = MemoryContextSwitchTo(giststate->tempCxt);
181 
182  itup = gistFormTuple(giststate, r,
183  values, isnull, true /* size is currently bogus */ );
184  itup->t_tid = *ht_ctid;
185 
186  gistdoinsert(r, itup, 0, giststate, heapRel, false);
187 
188  /* cleanup */
189  MemoryContextSwitchTo(oldCxt);
190  MemoryContextReset(giststate->tempCxt);
191 
192  return false;
193 }
194 
195 
196 /*
197  * Place tuples from 'itup' to 'buffer'. If 'oldoffnum' is valid, the tuple
198  * at that offset is atomically removed along with inserting the new tuples.
199  * This is used to replace a tuple with a new one.
200  *
201  * If 'leftchildbuf' is valid, we're inserting the downlink for the page
202  * to the right of 'leftchildbuf', or updating the downlink for 'leftchildbuf'.
203  * F_FOLLOW_RIGHT flag on 'leftchildbuf' is cleared and NSN is set.
204  *
205  * If 'markfollowright' is true and the page is split, the left child is
206  * marked with F_FOLLOW_RIGHT flag. That is the normal case. During buffered
207  * index build, however, there is no concurrent access and the page splitting
208  * is done in a slightly simpler fashion, and false is passed.
209  *
210  * If there is not enough room on the page, it is split. All the split
211  * pages are kept pinned and locked and returned in *splitinfo, the caller
212  * is responsible for inserting the downlinks for them. However, if
213  * 'buffer' is the root page and it needs to be split, gistplacetopage()
214  * performs the split as one atomic operation, and *splitinfo is set to NIL.
215  * In that case, we continue to hold the root page locked, and the child
216  * pages are released; note that new tuple(s) are *not* on the root page
217  * but in one of the new child pages.
218  *
219  * If 'newblkno' is not NULL, returns the block number of page the first
220  * new/updated tuple was inserted to. Usually it's the given page, but could
221  * be its right sibling if the page was split.
222  *
223  * Returns 'true' if the page was split, 'false' otherwise.
224  */
225 bool
226 gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
227  Buffer buffer,
228  IndexTuple *itup, int ntup, OffsetNumber oldoffnum,
229  BlockNumber *newblkno,
230  Buffer leftchildbuf,
231  List **splitinfo,
232  bool markfollowright,
233  Relation heapRel,
234  bool is_build)
235 {
236  BlockNumber blkno = BufferGetBlockNumber(buffer);
237  Page page = BufferGetPage(buffer);
238  bool is_leaf = (GistPageIsLeaf(page)) ? true : false;
239  XLogRecPtr recptr;
240  bool is_split;
241 
242  /*
243  * Refuse to modify a page that's incompletely split. This should not
244  * happen because we finish any incomplete splits while we walk down the
245  * tree. However, it's remotely possible that another concurrent inserter
246  * splits a parent page, and errors out before completing the split. We
247  * will just throw an error in that case, and leave any split we had in
248  * progress unfinished too. The next insert that comes along will clean up
249  * the mess.
250  */
251  if (GistFollowRight(page))
252  elog(ERROR, "concurrent GiST page split was incomplete");
253 
254  /* should never try to insert to a deleted page */
255  Assert(!GistPageIsDeleted(page));
256 
257  *splitinfo = NIL;
258 
259  /*
260  * if isupdate, remove old key: This node's key has been modified, either
261  * because a child split occurred or because we needed to adjust our key
262  * for an insert in a child node. Therefore, remove the old version of
263  * this node's key.
264  *
265  * for WAL replay, in the non-split case we handle this by setting up a
266  * one-element todelete array; in the split case, it's handled implicitly
267  * because the tuple vector passed to gistSplit won't include this tuple.
268  */
269  is_split = gistnospace(page, itup, ntup, oldoffnum, freespace);
270 
271  /*
272  * If leaf page is full, try at first to delete dead tuples. And then
273  * check again.
274  */
275  if (is_split && GistPageIsLeaf(page) && GistPageHasGarbage(page))
276  {
277  gistprunepage(rel, page, buffer, heapRel);
278  is_split = gistnospace(page, itup, ntup, oldoffnum, freespace);
279  }
280 
281  if (is_split)
282  {
283  /* no space for insertion */
284  IndexTuple *itvec;
285  int tlen;
286  SplitPageLayout *dist = NULL,
287  *ptr;
288  BlockNumber oldrlink = InvalidBlockNumber;
289  GistNSN oldnsn = 0;
290  SplitPageLayout rootpg;
291  bool is_rootsplit;
292  int npage;
293 
294  is_rootsplit = (blkno == GIST_ROOT_BLKNO);
295 
296  /*
297  * Form index tuples vector to split. If we're replacing an old tuple,
298  * remove the old version from the vector.
299  */
300  itvec = gistextractpage(page, &tlen);
301  if (OffsetNumberIsValid(oldoffnum))
302  {
303  /* on inner page we should remove old tuple */
304  int pos = oldoffnum - FirstOffsetNumber;
305 
306  tlen--;
307  if (pos != tlen)
308  memmove(itvec + pos, itvec + pos + 1, sizeof(IndexTuple) * (tlen - pos));
309  }
310  itvec = gistjoinvector(itvec, &tlen, itup, ntup);
311  dist = gistSplit(rel, page, itvec, tlen, giststate);
312 
313  /*
314  * Check that split didn't produce too many pages.
315  */
316  npage = 0;
317  for (ptr = dist; ptr; ptr = ptr->next)
318  npage++;
319  /* in a root split, we'll add one more page to the list below */
320  if (is_rootsplit)
321  npage++;
322  if (npage > GIST_MAX_SPLIT_PAGES)
323  elog(ERROR, "GiST page split into too many halves (%d, maximum %d)",
324  npage, GIST_MAX_SPLIT_PAGES);
325 
326  /*
327  * Set up pages to work with. Allocate new buffers for all but the
328  * leftmost page. The original page becomes the new leftmost page, and
329  * is just replaced with the new contents.
330  *
331  * For a root-split, allocate new buffers for all child pages, the
332  * original page is overwritten with new root page containing
333  * downlinks to the new child pages.
334  */
335  ptr = dist;
336  if (!is_rootsplit)
337  {
338  /* save old rightlink and NSN */
339  oldrlink = GistPageGetOpaque(page)->rightlink;
340  oldnsn = GistPageGetNSN(page);
341 
342  dist->buffer = buffer;
343  dist->block.blkno = BufferGetBlockNumber(buffer);
345 
346  /* clean all flags except F_LEAF */
347  GistPageGetOpaque(dist->page)->flags = (is_leaf) ? F_LEAF : 0;
348 
349  ptr = ptr->next;
350  }
351  for (; ptr; ptr = ptr->next)
352  {
353  /* Allocate new page */
354  ptr->buffer = gistNewBuffer(rel, heapRel);
355  GISTInitBuffer(ptr->buffer, (is_leaf) ? F_LEAF : 0);
356  ptr->page = BufferGetPage(ptr->buffer);
357  ptr->block.blkno = BufferGetBlockNumber(ptr->buffer);
359  BufferGetBlockNumber(buffer),
360  BufferGetBlockNumber(ptr->buffer));
361  }
362 
363  /*
364  * Now that we know which blocks the new pages go to, set up downlink
365  * tuples to point to them.
366  */
367  for (ptr = dist; ptr; ptr = ptr->next)
368  {
369  ItemPointerSetBlockNumber(&(ptr->itup->t_tid), ptr->block.blkno);
370  GistTupleSetValid(ptr->itup);
371  }
372 
373  /*
374  * If this is a root split, we construct the new root page with the
375  * downlinks here directly, instead of requiring the caller to insert
376  * them. Add the new root page to the list along with the child pages.
377  */
378  if (is_rootsplit)
379  {
380  IndexTuple *downlinks;
381  int ndownlinks = 0;
382  int i;
383 
384  rootpg.buffer = buffer;
386  GistPageGetOpaque(rootpg.page)->flags = 0;
387 
388  /* Prepare a vector of all the downlinks */
389  for (ptr = dist; ptr; ptr = ptr->next)
390  ndownlinks++;
391  downlinks = palloc(sizeof(IndexTuple) * ndownlinks);
392  for (i = 0, ptr = dist; ptr; ptr = ptr->next)
393  downlinks[i++] = ptr->itup;
394 
395  rootpg.block.blkno = GIST_ROOT_BLKNO;
396  rootpg.block.num = ndownlinks;
397  rootpg.list = gistfillitupvec(downlinks, ndownlinks,
398  &(rootpg.lenlist));
399  rootpg.itup = NULL;
400 
401  rootpg.next = dist;
402  dist = &rootpg;
403  }
404  else
405  {
406  /* Prepare split-info to be returned to caller */
407  for (ptr = dist; ptr; ptr = ptr->next)
408  {
410 
411  si->buf = ptr->buffer;
412  si->downlink = ptr->itup;
413  *splitinfo = lappend(*splitinfo, si);
414  }
415  }
416 
417  /*
418  * Fill all pages. All the pages are new, ie. freshly allocated empty
419  * pages, or a temporary copy of the old page.
420  */
421  for (ptr = dist; ptr; ptr = ptr->next)
422  {
423  char *data = (char *) (ptr->list);
424 
425  for (int i = 0; i < ptr->block.num; i++)
426  {
427  IndexTuple thistup = (IndexTuple) data;
428 
429  if (PageAddItem(ptr->page, (Item) data, IndexTupleSize(thistup), i + FirstOffsetNumber, false, false) == InvalidOffsetNumber)
430  elog(ERROR, "failed to add item to index page in \"%s\"", RelationGetRelationName(rel));
431 
432  /*
433  * If this is the first inserted/updated tuple, let the caller
434  * know which page it landed on.
435  */
436  if (newblkno && ItemPointerEquals(&thistup->t_tid, &(*itup)->t_tid))
437  *newblkno = ptr->block.blkno;
438 
439  data += IndexTupleSize(thistup);
440  }
441 
442  /* Set up rightlinks */
443  if (ptr->next && ptr->block.blkno != GIST_ROOT_BLKNO)
444  GistPageGetOpaque(ptr->page)->rightlink =
445  ptr->next->block.blkno;
446  else
447  GistPageGetOpaque(ptr->page)->rightlink = oldrlink;
448 
449  /*
450  * Mark the all but the right-most page with the follow-right
451  * flag. It will be cleared as soon as the downlink is inserted
452  * into the parent, but this ensures that if we error out before
453  * that, the index is still consistent. (in buffering build mode,
454  * any error will abort the index build anyway, so this is not
455  * needed.)
456  */
457  if (ptr->next && !is_rootsplit && markfollowright)
458  GistMarkFollowRight(ptr->page);
459  else
460  GistClearFollowRight(ptr->page);
461 
462  /*
463  * Copy the NSN of the original page to all pages. The
464  * F_FOLLOW_RIGHT flags ensure that scans will follow the
465  * rightlinks until the downlinks are inserted.
466  */
467  GistPageSetNSN(ptr->page, oldnsn);
468  }
469 
470  /*
471  * gistXLogSplit() needs to WAL log a lot of pages, prepare WAL
472  * insertion for that. NB: The number of pages and data segments
473  * specified here must match the calculations in gistXLogSplit()!
474  */
475  if (!is_build && RelationNeedsWAL(rel))
476  XLogEnsureRecordSpace(npage, 1 + npage * 2);
477 
479 
480  /*
481  * Must mark buffers dirty before XLogInsert, even though we'll still
482  * be changing their opaque fields below.
483  */
484  for (ptr = dist; ptr; ptr = ptr->next)
485  MarkBufferDirty(ptr->buffer);
486  if (BufferIsValid(leftchildbuf))
487  MarkBufferDirty(leftchildbuf);
488 
489  /*
490  * The first page in the chain was a temporary working copy meant to
491  * replace the old page. Copy it over the old page.
492  */
494  dist->page = BufferGetPage(dist->buffer);
495 
496  /*
497  * Write the WAL record.
498  *
499  * If we're building a new index, however, we don't WAL-log changes
500  * yet. The LSN-NSN interlock between parent and child requires that
501  * LSNs never move backwards, so set the LSNs to a value that's
502  * smaller than any real or fake unlogged LSN that might be generated
503  * later. (There can't be any concurrent scans during index build, so
504  * we don't need to be able to detect concurrent splits yet.)
505  */
506  if (is_build)
507  recptr = GistBuildLSN;
508  else
509  {
510  if (RelationNeedsWAL(rel))
511  recptr = gistXLogSplit(is_leaf,
512  dist, oldrlink, oldnsn, leftchildbuf,
513  markfollowright);
514  else
515  recptr = gistGetFakeLSN(rel);
516  }
517 
518  for (ptr = dist; ptr; ptr = ptr->next)
519  PageSetLSN(ptr->page, recptr);
520 
521  /*
522  * Return the new child buffers to the caller.
523  *
524  * If this was a root split, we've already inserted the downlink
525  * pointers, in the form of a new root page. Therefore we can release
526  * all the new buffers, and keep just the root page locked.
527  */
528  if (is_rootsplit)
529  {
530  for (ptr = dist->next; ptr; ptr = ptr->next)
531  UnlockReleaseBuffer(ptr->buffer);
532  }
533  }
534  else
535  {
536  /*
537  * Enough space. We always get here if ntup==0.
538  */
540 
541  /*
542  * Delete old tuple if any, then insert new tuple(s) if any. If
543  * possible, use the fast path of PageIndexTupleOverwrite.
544  */
545  if (OffsetNumberIsValid(oldoffnum))
546  {
547  if (ntup == 1)
548  {
549  /* One-for-one replacement, so use PageIndexTupleOverwrite */
550  if (!PageIndexTupleOverwrite(page, oldoffnum, (Item) *itup,
551  IndexTupleSize(*itup)))
552  elog(ERROR, "failed to add item to index page in \"%s\"",
554  }
555  else
556  {
557  /* Delete old, then append new tuple(s) to page */
558  PageIndexTupleDelete(page, oldoffnum);
559  gistfillbuffer(page, itup, ntup, InvalidOffsetNumber);
560  }
561  }
562  else
563  {
564  /* Just append new tuples at the end of the page */
565  gistfillbuffer(page, itup, ntup, InvalidOffsetNumber);
566  }
567 
568  MarkBufferDirty(buffer);
569 
570  if (BufferIsValid(leftchildbuf))
571  MarkBufferDirty(leftchildbuf);
572 
573  if (is_build)
574  recptr = GistBuildLSN;
575  else
576  {
577  if (RelationNeedsWAL(rel))
578  {
579  OffsetNumber ndeloffs = 0,
580  deloffs[1];
581 
582  if (OffsetNumberIsValid(oldoffnum))
583  {
584  deloffs[0] = oldoffnum;
585  ndeloffs = 1;
586  }
587 
588  recptr = gistXLogUpdate(buffer,
589  deloffs, ndeloffs, itup, ntup,
590  leftchildbuf);
591  }
592  else
593  recptr = gistGetFakeLSN(rel);
594  }
595  PageSetLSN(page, recptr);
596 
597  if (newblkno)
598  *newblkno = blkno;
599  }
600 
601  /*
602  * If we inserted the downlink for a child page, set NSN and clear
603  * F_FOLLOW_RIGHT flag on the left child, so that concurrent scans know to
604  * follow the rightlink if and only if they looked at the parent page
605  * before we inserted the downlink.
606  *
607  * Note that we do this *after* writing the WAL record. That means that
608  * the possible full page image in the WAL record does not include these
609  * changes, and they must be replayed even if the page is restored from
610  * the full page image. There's a chicken-and-egg problem: if we updated
611  * the child pages first, we wouldn't know the recptr of the WAL record
612  * we're about to write.
613  */
614  if (BufferIsValid(leftchildbuf))
615  {
616  Page leftpg = BufferGetPage(leftchildbuf);
617 
618  GistPageSetNSN(leftpg, recptr);
619  GistClearFollowRight(leftpg);
620 
621  PageSetLSN(leftpg, recptr);
622  }
623 
625 
626  return is_split;
627 }
628 
629 /*
630  * Workhouse routine for doing insertion into a GiST index. Note that
631  * this routine assumes it is invoked in a short-lived memory context,
632  * so it does not bother releasing palloc'd allocations.
633  */
634 void
636  GISTSTATE *giststate, Relation heapRel, bool is_build)
637 {
638  ItemId iid;
639  IndexTuple idxtuple;
640  GISTInsertStack firststack;
641  GISTInsertStack *stack;
643  bool xlocked = false;
644 
645  memset(&state, 0, sizeof(GISTInsertState));
646  state.freespace = freespace;
647  state.r = r;
648  state.heapRel = heapRel;
649  state.is_build = is_build;
650 
651  /* Start from the root */
652  firststack.blkno = GIST_ROOT_BLKNO;
653  firststack.lsn = 0;
654  firststack.retry_from_parent = false;
655  firststack.parent = NULL;
656  firststack.downlinkoffnum = InvalidOffsetNumber;
657  state.stack = stack = &firststack;
658 
659  /*
660  * Walk down along the path of smallest penalty, updating the parent
661  * pointers with the key we're inserting as we go. If we crash in the
662  * middle, the tree is consistent, although the possible parent updates
663  * were a waste.
664  */
665  for (;;)
666  {
667  /*
668  * If we split an internal page while descending the tree, we have to
669  * retry at the parent. (Normally, the LSN-NSN interlock below would
670  * also catch this and cause us to retry. But LSNs are not updated
671  * during index build.)
672  */
673  while (stack->retry_from_parent)
674  {
675  if (xlocked)
676  LockBuffer(stack->buffer, GIST_UNLOCK);
677  xlocked = false;
678  ReleaseBuffer(stack->buffer);
679  state.stack = stack = stack->parent;
680  }
681 
682  if (XLogRecPtrIsInvalid(stack->lsn))
683  stack->buffer = ReadBuffer(state.r, stack->blkno);
684 
685  /*
686  * Be optimistic and grab shared lock first. Swap it for an exclusive
687  * lock later if we need to update the page.
688  */
689  if (!xlocked)
690  {
691  LockBuffer(stack->buffer, GIST_SHARE);
692  gistcheckpage(state.r, stack->buffer);
693  }
694 
695  stack->page = (Page) BufferGetPage(stack->buffer);
696  stack->lsn = xlocked ?
697  PageGetLSN(stack->page) : BufferGetLSNAtomic(stack->buffer);
699 
700  /*
701  * If this page was split but the downlink was never inserted to the
702  * parent because the inserting backend crashed before doing that, fix
703  * that now.
704  */
705  if (GistFollowRight(stack->page))
706  {
707  if (!xlocked)
708  {
709  LockBuffer(stack->buffer, GIST_UNLOCK);
711  xlocked = true;
712  /* someone might've completed the split when we unlocked */
713  if (!GistFollowRight(stack->page))
714  continue;
715  }
716  gistfixsplit(&state, giststate);
717 
718  UnlockReleaseBuffer(stack->buffer);
719  xlocked = false;
720  state.stack = stack = stack->parent;
721  continue;
722  }
723 
724  if ((stack->blkno != GIST_ROOT_BLKNO &&
725  stack->parent->lsn < GistPageGetNSN(stack->page)) ||
726  GistPageIsDeleted(stack->page))
727  {
728  /*
729  * Concurrent split or page deletion detected. There's no
730  * guarantee that the downlink for this page is consistent with
731  * the tuple we're inserting anymore, so go back to parent and
732  * rechoose the best child.
733  */
734  UnlockReleaseBuffer(stack->buffer);
735  xlocked = false;
736  state.stack = stack = stack->parent;
737  continue;
738  }
739 
740  if (!GistPageIsLeaf(stack->page))
741  {
742  /*
743  * This is an internal page so continue to walk down the tree.
744  * Find the child node that has the minimum insertion penalty.
745  */
746  BlockNumber childblkno;
747  IndexTuple newtup;
748  GISTInsertStack *item;
749  OffsetNumber downlinkoffnum;
750 
751  downlinkoffnum = gistchoose(state.r, stack->page, itup, giststate);
752  iid = PageGetItemId(stack->page, downlinkoffnum);
753  idxtuple = (IndexTuple) PageGetItem(stack->page, iid);
754  childblkno = ItemPointerGetBlockNumber(&(idxtuple->t_tid));
755 
756  /*
757  * Check that it's not a leftover invalid tuple from pre-9.1
758  */
759  if (GistTupleIsInvalid(idxtuple))
760  ereport(ERROR,
761  (errmsg("index \"%s\" contains an inner tuple marked as invalid",
763  errdetail("This is caused by an incomplete page split at crash recovery before upgrading to PostgreSQL 9.1."),
764  errhint("Please REINDEX it.")));
765 
766  /*
767  * Check that the key representing the target child node is
768  * consistent with the key we're inserting. Update it if it's not.
769  */
770  newtup = gistgetadjusted(state.r, idxtuple, itup, giststate);
771  if (newtup)
772  {
773  /*
774  * Swap shared lock for an exclusive one. Beware, the page may
775  * change while we unlock/lock the page...
776  */
777  if (!xlocked)
778  {
779  LockBuffer(stack->buffer, GIST_UNLOCK);
781  xlocked = true;
782  stack->page = (Page) BufferGetPage(stack->buffer);
783 
784  if (PageGetLSN(stack->page) != stack->lsn)
785  {
786  /* the page was changed while we unlocked it, retry */
787  continue;
788  }
789  }
790 
791  /*
792  * Update the tuple.
793  *
794  * We still hold the lock after gistinserttuple(), but it
795  * might have to split the page to make the updated tuple fit.
796  * In that case the updated tuple might migrate to the other
797  * half of the split, so we have to go back to the parent and
798  * descend back to the half that's a better fit for the new
799  * tuple.
800  */
801  if (gistinserttuple(&state, stack, giststate, newtup,
802  downlinkoffnum))
803  {
804  /*
805  * If this was a root split, the root page continues to be
806  * the parent and the updated tuple went to one of the
807  * child pages, so we just need to retry from the root
808  * page.
809  */
810  if (stack->blkno != GIST_ROOT_BLKNO)
811  {
812  UnlockReleaseBuffer(stack->buffer);
813  xlocked = false;
814  state.stack = stack = stack->parent;
815  }
816  continue;
817  }
818  }
819  LockBuffer(stack->buffer, GIST_UNLOCK);
820  xlocked = false;
821 
822  /* descend to the chosen child */
823  item = (GISTInsertStack *) palloc0(sizeof(GISTInsertStack));
824  item->blkno = childblkno;
825  item->parent = stack;
826  item->downlinkoffnum = downlinkoffnum;
827  state.stack = stack = item;
828  }
829  else
830  {
831  /*
832  * Leaf page. Insert the new key. We've already updated all the
833  * parents on the way down, but we might have to split the page if
834  * it doesn't fit. gistinserttuple() will take care of that.
835  */
836 
837  /*
838  * Swap shared lock for an exclusive one. Be careful, the page may
839  * change while we unlock/lock the page...
840  */
841  if (!xlocked)
842  {
843  LockBuffer(stack->buffer, GIST_UNLOCK);
845  xlocked = true;
846  stack->page = (Page) BufferGetPage(stack->buffer);
847  stack->lsn = PageGetLSN(stack->page);
848 
849  if (stack->blkno == GIST_ROOT_BLKNO)
850  {
851  /*
852  * the only page that can become inner instead of leaf is
853  * the root page, so for root we should recheck it
854  */
855  if (!GistPageIsLeaf(stack->page))
856  {
857  /*
858  * very rare situation: during unlock/lock index with
859  * number of pages = 1 was increased
860  */
861  LockBuffer(stack->buffer, GIST_UNLOCK);
862  xlocked = false;
863  continue;
864  }
865 
866  /*
867  * we don't need to check root split, because checking
868  * leaf/inner is enough to recognize split for root
869  */
870  }
871  else if ((GistFollowRight(stack->page) ||
872  stack->parent->lsn < GistPageGetNSN(stack->page)) ||
873  GistPageIsDeleted(stack->page))
874  {
875  /*
876  * The page was split or deleted while we momentarily
877  * unlocked the page. Go back to parent.
878  */
879  UnlockReleaseBuffer(stack->buffer);
880  xlocked = false;
881  state.stack = stack = stack->parent;
882  continue;
883  }
884  }
885 
886  /* now state.stack->(page, buffer and blkno) points to leaf page */
887 
888  gistinserttuple(&state, stack, giststate, itup,
890  LockBuffer(stack->buffer, GIST_UNLOCK);
891 
892  /* Release any pins we might still hold before exiting */
893  for (; stack; stack = stack->parent)
894  ReleaseBuffer(stack->buffer);
895  break;
896  }
897  }
898 }
899 
900 /*
901  * Traverse the tree to find path from root page to specified "child" block.
902  *
903  * returns a new insertion stack, starting from the parent of "child", up
904  * to the root. *downlinkoffnum is set to the offset of the downlink in the
905  * direct parent of child.
906  *
907  * To prevent deadlocks, this should lock only one page at a time.
908  */
909 static GISTInsertStack *
910 gistFindPath(Relation r, BlockNumber child, OffsetNumber *downlinkoffnum)
911 {
912  Page page;
913  Buffer buffer;
914  OffsetNumber i,
915  maxoff;
916  ItemId iid;
917  IndexTuple idxtuple;
918  List *fifo;
919  GISTInsertStack *top,
920  *ptr;
921  BlockNumber blkno;
922 
923  top = (GISTInsertStack *) palloc0(sizeof(GISTInsertStack));
924  top->blkno = GIST_ROOT_BLKNO;
926 
927  fifo = list_make1(top);
928  while (fifo != NIL)
929  {
930  /* Get next page to visit */
931  top = linitial(fifo);
932  fifo = list_delete_first(fifo);
933 
934  buffer = ReadBuffer(r, top->blkno);
935  LockBuffer(buffer, GIST_SHARE);
936  gistcheckpage(r, buffer);
937  page = (Page) BufferGetPage(buffer);
938 
939  if (GistPageIsLeaf(page))
940  {
941  /*
942  * Because we scan the index top-down, all the rest of the pages
943  * in the queue must be leaf pages as well.
944  */
945  UnlockReleaseBuffer(buffer);
946  break;
947  }
948 
949  /* currently, internal pages are never deleted */
950  Assert(!GistPageIsDeleted(page));
951 
952  top->lsn = BufferGetLSNAtomic(buffer);
953 
954  /*
955  * If F_FOLLOW_RIGHT is set, the page to the right doesn't have a
956  * downlink. This should not normally happen..
957  */
958  if (GistFollowRight(page))
959  elog(ERROR, "concurrent GiST page split was incomplete");
960 
961  if (top->parent && top->parent->lsn < GistPageGetNSN(page) &&
962  GistPageGetOpaque(page)->rightlink != InvalidBlockNumber /* sanity check */ )
963  {
964  /*
965  * Page was split while we looked elsewhere. We didn't see the
966  * downlink to the right page when we scanned the parent, so add
967  * it to the queue now.
968  *
969  * Put the right page ahead of the queue, so that we visit it
970  * next. That's important, because if this is the lowest internal
971  * level, just above leaves, we might already have queued up some
972  * leaf pages, and we assume that there can't be any non-leaf
973  * pages behind leaf pages.
974  */
975  ptr = (GISTInsertStack *) palloc0(sizeof(GISTInsertStack));
976  ptr->blkno = GistPageGetOpaque(page)->rightlink;
978  ptr->parent = top->parent;
979 
980  fifo = lcons(ptr, fifo);
981  }
982 
983  maxoff = PageGetMaxOffsetNumber(page);
984 
985  for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
986  {
987  iid = PageGetItemId(page, i);
988  idxtuple = (IndexTuple) PageGetItem(page, iid);
989  blkno = ItemPointerGetBlockNumber(&(idxtuple->t_tid));
990  if (blkno == child)
991  {
992  /* Found it! */
993  UnlockReleaseBuffer(buffer);
994  *downlinkoffnum = i;
995  return top;
996  }
997  else
998  {
999  /* Append this child to the list of pages to visit later */
1000  ptr = (GISTInsertStack *) palloc0(sizeof(GISTInsertStack));
1001  ptr->blkno = blkno;
1002  ptr->downlinkoffnum = i;
1003  ptr->parent = top;
1004 
1005  fifo = lappend(fifo, ptr);
1006  }
1007  }
1008 
1009  UnlockReleaseBuffer(buffer);
1010  }
1011 
1012  elog(ERROR, "failed to re-find parent of a page in index \"%s\", block %u",
1013  RelationGetRelationName(r), child);
1014  return NULL; /* keep compiler quiet */
1015 }
1016 
1017 /*
1018  * Updates the stack so that child->parent is the correct parent of the
1019  * child. child->parent must be exclusively locked on entry, and will
1020  * remain so at exit, but it might not be the same page anymore.
1021  */
1022 static void
1024 {
1025  GISTInsertStack *parent = child->parent;
1026  ItemId iid;
1027  IndexTuple idxtuple;
1028  OffsetNumber maxoff;
1029  GISTInsertStack *ptr;
1030 
1031  gistcheckpage(r, parent->buffer);
1032  parent->page = (Page) BufferGetPage(parent->buffer);
1033  maxoff = PageGetMaxOffsetNumber(parent->page);
1034 
1035  /* Check if the downlink is still where it was before */
1036  if (child->downlinkoffnum != InvalidOffsetNumber && child->downlinkoffnum <= maxoff)
1037  {
1038  iid = PageGetItemId(parent->page, child->downlinkoffnum);
1039  idxtuple = (IndexTuple) PageGetItem(parent->page, iid);
1040  if (ItemPointerGetBlockNumber(&(idxtuple->t_tid)) == child->blkno)
1041  return; /* still there */
1042  }
1043 
1044  /*
1045  * The page has changed since we looked. During normal operation, every
1046  * update of a page changes its LSN, so the LSN we memorized should have
1047  * changed too. During index build, however, we don't WAL-log the changes
1048  * until we have built the index, so the LSN doesn't change. There is no
1049  * concurrent activity during index build, but we might have changed the
1050  * parent ourselves.
1051  */
1052  Assert(parent->lsn != PageGetLSN(parent->page) || is_build);
1053 
1054  /*
1055  * Scan the page to re-find the downlink. If the page was split, it might
1056  * have moved to a different page, so follow the right links until we find
1057  * it.
1058  */
1059  while (true)
1060  {
1061  OffsetNumber i;
1062 
1063  maxoff = PageGetMaxOffsetNumber(parent->page);
1064  for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
1065  {
1066  iid = PageGetItemId(parent->page, i);
1067  idxtuple = (IndexTuple) PageGetItem(parent->page, iid);
1068  if (ItemPointerGetBlockNumber(&(idxtuple->t_tid)) == child->blkno)
1069  {
1070  /* yes!!, found */
1071  child->downlinkoffnum = i;
1072  return;
1073  }
1074  }
1075 
1076  parent->blkno = GistPageGetOpaque(parent->page)->rightlink;
1078  UnlockReleaseBuffer(parent->buffer);
1079  if (parent->blkno == InvalidBlockNumber)
1080  {
1081  /*
1082  * End of chain and still didn't find parent. It's a very-very
1083  * rare situation when the root was split.
1084  */
1085  break;
1086  }
1087  parent->buffer = ReadBuffer(r, parent->blkno);
1088  LockBuffer(parent->buffer, GIST_EXCLUSIVE);
1089  gistcheckpage(r, parent->buffer);
1090  parent->page = (Page) BufferGetPage(parent->buffer);
1091  }
1092 
1093  /*
1094  * awful!!, we need search tree to find parent ... , but before we should
1095  * release all old parent
1096  */
1097 
1098  ptr = child->parent->parent; /* child->parent already released above */
1099  while (ptr)
1100  {
1101  ReleaseBuffer(ptr->buffer);
1102  ptr = ptr->parent;
1103  }
1104 
1105  /* ok, find new path */
1106  ptr = parent = gistFindPath(r, child->blkno, &child->downlinkoffnum);
1107 
1108  /* read all buffers as expected by caller */
1109  /* note we don't lock them or gistcheckpage them here! */
1110  while (ptr)
1111  {
1112  ptr->buffer = ReadBuffer(r, ptr->blkno);
1113  ptr->page = (Page) BufferGetPage(ptr->buffer);
1114  ptr = ptr->parent;
1115  }
1116 
1117  /* install new chain of parents to stack */
1118  child->parent = parent;
1119 
1120  /* make recursive call to normal processing */
1122  gistFindCorrectParent(r, child, is_build);
1123 }
1124 
1125 /*
1126  * Form a downlink pointer for the page in 'buf'.
1127  */
1128 static IndexTuple
1130  GISTInsertStack *stack, bool is_build)
1131 {
1132  Page page = BufferGetPage(buf);
1133  OffsetNumber maxoff;
1134  OffsetNumber offset;
1135  IndexTuple downlink = NULL;
1136 
1137  maxoff = PageGetMaxOffsetNumber(page);
1138  for (offset = FirstOffsetNumber; offset <= maxoff; offset = OffsetNumberNext(offset))
1139  {
1140  IndexTuple ituple = (IndexTuple)
1141  PageGetItem(page, PageGetItemId(page, offset));
1142 
1143  if (downlink == NULL)
1144  downlink = CopyIndexTuple(ituple);
1145  else
1146  {
1147  IndexTuple newdownlink;
1148 
1149  newdownlink = gistgetadjusted(rel, downlink, ituple,
1150  giststate);
1151  if (newdownlink)
1152  downlink = newdownlink;
1153  }
1154  }
1155 
1156  /*
1157  * If the page is completely empty, we can't form a meaningful downlink
1158  * for it. But we have to insert a downlink for the page. Any key will do,
1159  * as long as its consistent with the downlink of parent page, so that we
1160  * can legally insert it to the parent. A minimal one that matches as few
1161  * scans as possible would be best, to keep scans from doing useless work,
1162  * but we don't know how to construct that. So we just use the downlink of
1163  * the original page that was split - that's as far from optimal as it can
1164  * get but will do..
1165  */
1166  if (!downlink)
1167  {
1168  ItemId iid;
1169 
1171  gistFindCorrectParent(rel, stack, is_build);
1172  iid = PageGetItemId(stack->parent->page, stack->downlinkoffnum);
1173  downlink = (IndexTuple) PageGetItem(stack->parent->page, iid);
1174  downlink = CopyIndexTuple(downlink);
1175  LockBuffer(stack->parent->buffer, GIST_UNLOCK);
1176  }
1177 
1179  GistTupleSetValid(downlink);
1180 
1181  return downlink;
1182 }
1183 
1184 
1185 /*
1186  * Complete the incomplete split of state->stack->page.
1187  */
1188 static void
1190 {
1191  GISTInsertStack *stack = state->stack;
1192  Buffer buf;
1193  Page page;
1194  List *splitinfo = NIL;
1195 
1196  ereport(LOG,
1197  (errmsg("fixing incomplete split in index \"%s\", block %u",
1198  RelationGetRelationName(state->r), stack->blkno)));
1199 
1200  Assert(GistFollowRight(stack->page));
1202 
1203  buf = stack->buffer;
1204 
1205  /*
1206  * Read the chain of split pages, following the rightlinks. Construct a
1207  * downlink tuple for each page.
1208  */
1209  for (;;)
1210  {
1212  IndexTuple downlink;
1213 
1214  page = BufferGetPage(buf);
1215 
1216  /* Form the new downlink tuples to insert to parent */
1217  downlink = gistformdownlink(state->r, buf, giststate, stack, state->is_build);
1218 
1219  si->buf = buf;
1220  si->downlink = downlink;
1221 
1222  splitinfo = lappend(splitinfo, si);
1223 
1224  if (GistFollowRight(page))
1225  {
1226  /* lock next page */
1227  buf = ReadBuffer(state->r, GistPageGetOpaque(page)->rightlink);
1229  }
1230  else
1231  break;
1232  }
1233 
1234  /* Insert the downlinks */
1235  gistfinishsplit(state, stack, giststate, splitinfo, false);
1236 }
1237 
1238 /*
1239  * Insert or replace a tuple in stack->buffer. If 'oldoffnum' is valid, the
1240  * tuple at 'oldoffnum' is replaced, otherwise the tuple is inserted as new.
1241  * 'stack' represents the path from the root to the page being updated.
1242  *
1243  * The caller must hold an exclusive lock on stack->buffer. The lock is still
1244  * held on return, but the page might not contain the inserted tuple if the
1245  * page was split. The function returns true if the page was split, false
1246  * otherwise.
1247  */
1248 static bool
1250  GISTSTATE *giststate, IndexTuple tuple, OffsetNumber oldoffnum)
1251 {
1252  return gistinserttuples(state, stack, giststate, &tuple, 1, oldoffnum,
1253  InvalidBuffer, InvalidBuffer, false, false);
1254 }
1255 
1256 /* ----------------
1257  * An extended workhorse version of gistinserttuple(). This version allows
1258  * inserting multiple tuples, or replacing a single tuple with multiple tuples.
1259  * This is used to recursively update the downlinks in the parent when a page
1260  * is split.
1261  *
1262  * If leftchild and rightchild are valid, we're inserting/replacing the
1263  * downlink for rightchild, and leftchild is its left sibling. We clear the
1264  * F_FOLLOW_RIGHT flag and update NSN on leftchild, atomically with the
1265  * insertion of the downlink.
1266  *
1267  * To avoid holding locks for longer than necessary, when recursing up the
1268  * tree to update the parents, the locking is a bit peculiar here. On entry,
1269  * the caller must hold an exclusive lock on stack->buffer, as well as
1270  * leftchild and rightchild if given. On return:
1271  *
1272  * - Lock on stack->buffer is released, if 'unlockbuf' is true. The page is
1273  * always kept pinned, however.
1274  * - Lock on 'leftchild' is released, if 'unlockleftchild' is true. The page
1275  * is kept pinned.
1276  * - Lock and pin on 'rightchild' are always released.
1277  *
1278  * Returns 'true' if the page had to be split. Note that if the page was
1279  * split, the inserted/updated tuples might've been inserted to a right
1280  * sibling of stack->buffer instead of stack->buffer itself.
1281  */
1282 static bool
1284  GISTSTATE *giststate,
1285  IndexTuple *tuples, int ntup, OffsetNumber oldoffnum,
1287  bool unlockbuf, bool unlockleftchild)
1288 {
1289  List *splitinfo;
1290  bool is_split;
1291 
1292  /*
1293  * Check for any rw conflicts (in serializable isolation level) just
1294  * before we intend to modify the page
1295  */
1297 
1298  /* Insert the tuple(s) to the page, splitting the page if necessary */
1299  is_split = gistplacetopage(state->r, state->freespace, giststate,
1300  stack->buffer,
1301  tuples, ntup,
1302  oldoffnum, NULL,
1303  leftchild,
1304  &splitinfo,
1305  true,
1306  state->heapRel,
1307  state->is_build);
1308 
1309  /*
1310  * Before recursing up in case the page was split, release locks on the
1311  * child pages. We don't need to keep them locked when updating the
1312  * parent.
1313  */
1316  if (BufferIsValid(leftchild) && unlockleftchild)
1318 
1319  /*
1320  * If we had to split, insert/update the downlinks in the parent. If the
1321  * caller requested us to release the lock on stack->buffer, tell
1322  * gistfinishsplit() to do that as soon as it's safe to do so. If we
1323  * didn't have to split, release it ourselves.
1324  */
1325  if (splitinfo)
1326  gistfinishsplit(state, stack, giststate, splitinfo, unlockbuf);
1327  else if (unlockbuf)
1328  LockBuffer(stack->buffer, GIST_UNLOCK);
1329 
1330  return is_split;
1331 }
1332 
1333 /*
1334  * Finish an incomplete split by inserting/updating the downlinks in parent
1335  * page. 'splitinfo' contains all the child pages involved in the split,
1336  * from left-to-right.
1337  *
1338  * On entry, the caller must hold a lock on stack->buffer and all the child
1339  * pages in 'splitinfo'. If 'unlockbuf' is true, the lock on stack->buffer is
1340  * released on return. The child pages are always unlocked and unpinned.
1341  */
1342 static void
1344  GISTSTATE *giststate, List *splitinfo, bool unlockbuf)
1345 {
1346  GISTPageSplitInfo *right;
1347  GISTPageSplitInfo *left;
1348  IndexTuple tuples[2];
1349 
1350  /* A split always contains at least two halves */
1351  Assert(list_length(splitinfo) >= 2);
1352 
1353  /*
1354  * We need to insert downlinks for each new page, and update the downlink
1355  * for the original (leftmost) page in the split. Begin at the rightmost
1356  * page, inserting one downlink at a time until there's only two pages
1357  * left. Finally insert the downlink for the last new page and update the
1358  * downlink for the original page as one operation.
1359  */
1361 
1362  /*
1363  * Insert downlinks for the siblings from right to left, until there are
1364  * only two siblings left.
1365  */
1366  for (int pos = list_length(splitinfo) - 1; pos > 1; pos--)
1367  {
1368  right = (GISTPageSplitInfo *) list_nth(splitinfo, pos);
1369  left = (GISTPageSplitInfo *) list_nth(splitinfo, pos - 1);
1370 
1371  gistFindCorrectParent(state->r, stack, state->is_build);
1372  if (gistinserttuples(state, stack->parent, giststate,
1373  &right->downlink, 1,
1375  left->buf, right->buf, false, false))
1376  {
1377  /*
1378  * If the parent page was split, the existing downlink might have
1379  * moved.
1380  */
1382  }
1383  /* gistinserttuples() released the lock on right->buf. */
1384  }
1385 
1386  right = (GISTPageSplitInfo *) lsecond(splitinfo);
1387  left = (GISTPageSplitInfo *) linitial(splitinfo);
1388 
1389  /*
1390  * Finally insert downlink for the remaining right page and update the
1391  * downlink for the original page to not contain the tuples that were
1392  * moved to the new pages.
1393  */
1394  tuples[0] = left->downlink;
1395  tuples[1] = right->downlink;
1396  gistFindCorrectParent(state->r, stack, state->is_build);
1397  (void) gistinserttuples(state, stack->parent, giststate,
1398  tuples, 2,
1399  stack->downlinkoffnum,
1400  left->buf, right->buf,
1401  true, /* Unlock parent */
1402  unlockbuf /* Unlock stack->buffer if caller
1403  * wants that */
1404  );
1405 
1406  /*
1407  * The downlink might have moved when we updated it. Even if the page
1408  * wasn't split, because gistinserttuples() implements updating the old
1409  * tuple by removing and re-inserting it!
1410  */
1412 
1413  Assert(left->buf == stack->buffer);
1414 
1415  /*
1416  * If we split the page because we had to adjust the downlink on an
1417  * internal page, while descending the tree for inserting a new tuple,
1418  * then this might no longer be the correct page for the new tuple. The
1419  * downlink to this page might not cover the new tuple anymore, it might
1420  * need to go to the newly-created right sibling instead. Tell the caller
1421  * to walk back up the stack, to re-check at the parent which page to
1422  * insert to.
1423  *
1424  * Normally, the LSN-NSN interlock during the tree descend would also
1425  * detect that a concurrent split happened (by ourselves), and cause us to
1426  * retry at the parent. But that mechanism doesn't work during index
1427  * build, because we don't do WAL-logging, and don't update LSNs, during
1428  * index build.
1429  */
1430  stack->retry_from_parent = true;
1431 }
1432 
1433 /*
1434  * gistSplit -- split a page in the tree and fill struct
1435  * used for XLOG and real writes buffers. Function is recursive, ie
1436  * it will split page until keys will fit in every page.
1437  */
1440  Page page,
1441  IndexTuple *itup, /* contains compressed entry */
1442  int len,
1443  GISTSTATE *giststate)
1444 {
1445  IndexTuple *lvectup,
1446  *rvectup;
1447  GistSplitVector v;
1448  int i;
1449  SplitPageLayout *res = NULL;
1450 
1451  /* this should never recurse very deeply, but better safe than sorry */
1453 
1454  /* there's no point in splitting an empty page */
1455  Assert(len > 0);
1456 
1457  /*
1458  * If a single tuple doesn't fit on a page, no amount of splitting will
1459  * help.
1460  */
1461  if (len == 1)
1462  ereport(ERROR,
1463  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1464  errmsg("index row size %zu exceeds maximum %zu for index \"%s\"",
1465  IndexTupleSize(itup[0]), GiSTPageSize,
1467 
1468  memset(v.spl_lisnull, true,
1469  sizeof(bool) * giststate->nonLeafTupdesc->natts);
1470  memset(v.spl_risnull, true,
1471  sizeof(bool) * giststate->nonLeafTupdesc->natts);
1472  gistSplitByKey(r, page, itup, len, giststate, &v, 0);
1473 
1474  /* form left and right vector */
1475  lvectup = (IndexTuple *) palloc(sizeof(IndexTuple) * (len + 1));
1476  rvectup = (IndexTuple *) palloc(sizeof(IndexTuple) * (len + 1));
1477 
1478  for (i = 0; i < v.splitVector.spl_nleft; i++)
1479  lvectup[i] = itup[v.splitVector.spl_left[i] - 1];
1480 
1481  for (i = 0; i < v.splitVector.spl_nright; i++)
1482  rvectup[i] = itup[v.splitVector.spl_right[i] - 1];
1483 
1484  /* finalize splitting (may need another split) */
1485  if (!gistfitpage(rvectup, v.splitVector.spl_nright))
1486  {
1487  res = gistSplit(r, page, rvectup, v.splitVector.spl_nright, giststate);
1488  }
1489  else
1490  {
1491  ROTATEDIST(res);
1492  res->block.num = v.splitVector.spl_nright;
1493  res->list = gistfillitupvec(rvectup, v.splitVector.spl_nright, &(res->lenlist));
1494  res->itup = gistFormTuple(giststate, r, v.spl_rattr, v.spl_risnull, false);
1495  }
1496 
1497  if (!gistfitpage(lvectup, v.splitVector.spl_nleft))
1498  {
1499  SplitPageLayout *resptr,
1500  *subres;
1501 
1502  resptr = subres = gistSplit(r, page, lvectup, v.splitVector.spl_nleft, giststate);
1503 
1504  /* install on list's tail */
1505  while (resptr->next)
1506  resptr = resptr->next;
1507 
1508  resptr->next = res;
1509  res = subres;
1510  }
1511  else
1512  {
1513  ROTATEDIST(res);
1514  res->block.num = v.splitVector.spl_nleft;
1515  res->list = gistfillitupvec(lvectup, v.splitVector.spl_nleft, &(res->lenlist));
1516  res->itup = gistFormTuple(giststate, r, v.spl_lattr, v.spl_lisnull, false);
1517  }
1518 
1519  return res;
1520 }
1521 
1522 /*
1523  * Create a GISTSTATE and fill it with information about the index
1524  */
1525 GISTSTATE *
1527 {
1528  GISTSTATE *giststate;
1529  MemoryContext scanCxt;
1530  MemoryContext oldCxt;
1531  int i;
1532 
1533  /* safety check to protect fixed-size arrays in GISTSTATE */
1534  if (index->rd_att->natts > INDEX_MAX_KEYS)
1535  elog(ERROR, "numberOfAttributes %d > %d",
1536  index->rd_att->natts, INDEX_MAX_KEYS);
1537 
1538  /* Create the memory context that will hold the GISTSTATE */
1540  "GiST scan context",
1542  oldCxt = MemoryContextSwitchTo(scanCxt);
1543 
1544  /* Create and fill in the GISTSTATE */
1545  giststate = (GISTSTATE *) palloc(sizeof(GISTSTATE));
1546 
1547  giststate->scanCxt = scanCxt;
1548  giststate->tempCxt = scanCxt; /* caller must change this if needed */
1549  giststate->leafTupdesc = index->rd_att;
1550 
1551  /*
1552  * The truncated tupdesc for non-leaf index tuples, which doesn't contain
1553  * the INCLUDE attributes.
1554  *
1555  * It is used to form tuples during tuple adjustment and page split.
1556  * B-tree creates shortened tuple descriptor for every truncated tuple,
1557  * because it is doing this less often: it does not have to form truncated
1558  * tuples during page split. Also, B-tree is not adjusting tuples on
1559  * internal pages the way GiST does.
1560  */
1561  giststate->nonLeafTupdesc = CreateTupleDescCopyConstr(index->rd_att);
1562  giststate->nonLeafTupdesc->natts =
1564 
1566  {
1567  fmgr_info_copy(&(giststate->consistentFn[i]),
1569  scanCxt);
1570  fmgr_info_copy(&(giststate->unionFn[i]),
1572  scanCxt);
1573 
1574  /* opclasses are not required to provide a Compress method */
1576  fmgr_info_copy(&(giststate->compressFn[i]),
1578  scanCxt);
1579  else
1580  giststate->compressFn[i].fn_oid = InvalidOid;
1581 
1582  /* opclasses are not required to provide a Decompress method */
1584  fmgr_info_copy(&(giststate->decompressFn[i]),
1586  scanCxt);
1587  else
1588  giststate->decompressFn[i].fn_oid = InvalidOid;
1589 
1590  fmgr_info_copy(&(giststate->penaltyFn[i]),
1592  scanCxt);
1593  fmgr_info_copy(&(giststate->picksplitFn[i]),
1595  scanCxt);
1596  fmgr_info_copy(&(giststate->equalFn[i]),
1598  scanCxt);
1599 
1600  /* opclasses are not required to provide a Distance method */
1602  fmgr_info_copy(&(giststate->distanceFn[i]),
1604  scanCxt);
1605  else
1606  giststate->distanceFn[i].fn_oid = InvalidOid;
1607 
1608  /* opclasses are not required to provide a Fetch method */
1610  fmgr_info_copy(&(giststate->fetchFn[i]),
1612  scanCxt);
1613  else
1614  giststate->fetchFn[i].fn_oid = InvalidOid;
1615 
1616  /*
1617  * If the index column has a specified collation, we should honor that
1618  * while doing comparisons. However, we may have a collatable storage
1619  * type for a noncollatable indexed data type. If there's no index
1620  * collation then specify default collation in case the support
1621  * functions need collation. This is harmless if the support
1622  * functions don't care about collation, so we just do it
1623  * unconditionally. (We could alternatively call get_typcollation,
1624  * but that seems like expensive overkill --- there aren't going to be
1625  * any cases where a GiST storage type has a nondefault collation.)
1626  */
1627  if (OidIsValid(index->rd_indcollation[i]))
1628  giststate->supportCollation[i] = index->rd_indcollation[i];
1629  else
1630  giststate->supportCollation[i] = DEFAULT_COLLATION_OID;
1631  }
1632 
1633  /* No opclass information for INCLUDE attributes */
1634  for (; i < index->rd_att->natts; i++)
1635  {
1636  giststate->consistentFn[i].fn_oid = InvalidOid;
1637  giststate->unionFn[i].fn_oid = InvalidOid;
1638  giststate->compressFn[i].fn_oid = InvalidOid;
1639  giststate->decompressFn[i].fn_oid = InvalidOid;
1640  giststate->penaltyFn[i].fn_oid = InvalidOid;
1641  giststate->picksplitFn[i].fn_oid = InvalidOid;
1642  giststate->equalFn[i].fn_oid = InvalidOid;
1643  giststate->distanceFn[i].fn_oid = InvalidOid;
1644  giststate->fetchFn[i].fn_oid = InvalidOid;
1645  giststate->supportCollation[i] = InvalidOid;
1646  }
1647 
1648  MemoryContextSwitchTo(oldCxt);
1649 
1650  return giststate;
1651 }
1652 
1653 void
1655 {
1656  /* It's sufficient to delete the scanCxt */
1657  MemoryContextDelete(giststate->scanCxt);
1658 }
1659 
1660 /*
1661  * gistprunepage() -- try to remove LP_DEAD items from the given page.
1662  * Function assumes that buffer is exclusively locked.
1663  */
1664 static void
1665 gistprunepage(Relation rel, Page page, Buffer buffer, Relation heapRel)
1666 {
1668  int ndeletable = 0;
1669  OffsetNumber offnum,
1670  maxoff;
1671 
1672  Assert(GistPageIsLeaf(page));
1673 
1674  /*
1675  * Scan over all items to see which ones need to be deleted according to
1676  * LP_DEAD flags.
1677  */
1678  maxoff = PageGetMaxOffsetNumber(page);
1679  for (offnum = FirstOffsetNumber;
1680  offnum <= maxoff;
1681  offnum = OffsetNumberNext(offnum))
1682  {
1683  ItemId itemId = PageGetItemId(page, offnum);
1684 
1685  if (ItemIdIsDead(itemId))
1686  deletable[ndeletable++] = offnum;
1687  }
1688 
1689  if (ndeletable > 0)
1690  {
1691  TransactionId snapshotConflictHorizon = InvalidTransactionId;
1692 
1694  snapshotConflictHorizon =
1695  index_compute_xid_horizon_for_tuples(rel, heapRel, buffer,
1696  deletable, ndeletable);
1697 
1699 
1700  PageIndexMultiDelete(page, deletable, ndeletable);
1701 
1702  /*
1703  * Mark the page as not containing any LP_DEAD items. This is not
1704  * certainly true (there might be some that have recently been marked,
1705  * but weren't included in our target-item list), but it will almost
1706  * always be true and it doesn't seem worth an additional page scan to
1707  * check it. Remember that F_HAS_GARBAGE is only a hint anyway.
1708  */
1710 
1711  MarkBufferDirty(buffer);
1712 
1713  /* XLOG stuff */
1714  if (RelationNeedsWAL(rel))
1715  {
1716  XLogRecPtr recptr;
1717 
1718  recptr = gistXLogDelete(buffer,
1719  deletable, ndeletable,
1720  snapshotConflictHorizon,
1721  heapRel);
1722 
1723  PageSetLSN(page, recptr);
1724  }
1725  else
1726  PageSetLSN(page, gistGetFakeLSN(rel));
1727 
1728  END_CRIT_SECTION();
1729  }
1730 
1731  /*
1732  * Note: if we didn't find any LP_DEAD items, then the page's
1733  * F_HAS_GARBAGE hint bit is falsely set. We do not bother expending a
1734  * separate write to clear it, however. We will clear it when we split
1735  * the page.
1736  */
1737 }
uint32 BlockNumber
Definition: block.h:31
#define InvalidBlockNumber
Definition: block.h:33
static Datum values[MAXATTR]
Definition: bootstrap.c:150
int Buffer
Definition: buf.h:23
#define InvalidBuffer
Definition: buf.h:25
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:3706
Buffer ExtendBufferedRel(BufferManagerRelation bmr, ForkNumber forkNum, BufferAccessStrategy strategy, uint32 flags)
Definition: bufmgr.c:846
void ReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:4906
XLogRecPtr BufferGetLSNAtomic(Buffer buffer)
Definition: bufmgr.c:3967
void UnlockReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:4923
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:2514
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:5140
Buffer ReadBuffer(Relation reln, BlockNumber blockNum)
Definition: bufmgr.c:746
static Page BufferGetPage(Buffer buffer)
Definition: bufmgr.h:400
@ EB_SKIP_EXTENSION_LOCK
Definition: bufmgr.h:74
@ EB_LOCK_FIRST
Definition: bufmgr.h:86
#define BMR_REL(p_rel)
Definition: bufmgr.h:107
static bool BufferIsValid(Buffer bufnum)
Definition: bufmgr.h:351
void PageRestoreTempPage(Page tempPage, Page oldPage)
Definition: bufpage.c:424
void PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems)
Definition: bufpage.c:1161
Page PageGetTempPageCopySpecial(Page page)
Definition: bufpage.c:402
bool PageIndexTupleOverwrite(Page page, OffsetNumber offnum, Item newtup, Size newsize)
Definition: bufpage.c:1405
void PageIndexTupleDelete(Page page, OffsetNumber offnum)
Definition: bufpage.c:1052
Pointer Page
Definition: bufpage.h:81
static Item PageGetItem(Page page, ItemId itemId)
Definition: bufpage.h:354
static ItemId PageGetItemId(Page page, OffsetNumber offsetNumber)
Definition: bufpage.h:243
static void PageSetLSN(Page page, XLogRecPtr lsn)
Definition: bufpage.h:391
static XLogRecPtr PageGetLSN(const char *page)
Definition: bufpage.h:386
static OffsetNumber PageGetMaxOffsetNumber(Page page)
Definition: bufpage.h:372
#define PageAddItem(page, item, size, offsetNumber, overwrite, is_heap)
Definition: bufpage.h:471
#define Assert(condition)
Definition: c.h:858
uint32 TransactionId
Definition: c.h:652
#define OidIsValid(objectId)
Definition: c.h:775
size_t Size
Definition: c.h:605
int errdetail(const char *fmt,...)
Definition: elog.c:1203
int errhint(const char *fmt,...)
Definition: elog.c:1317
int errcode(int sqlerrcode)
Definition: elog.c:853
int errmsg(const char *fmt,...)
Definition: elog.c:1070
#define LOG
Definition: elog.h:31
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:225
#define ereport(elevel,...)
Definition: elog.h:149
void fmgr_info_copy(FmgrInfo *dstinfo, FmgrInfo *srcinfo, MemoryContext destcxt)
Definition: fmgr.c:580
#define PG_RETURN_POINTER(x)
Definition: fmgr.h:361
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
#define rightchild(x)
Definition: fsmpage.c:30
#define leftchild(x)
Definition: fsmpage.c:29
TransactionId index_compute_xid_horizon_for_tuples(Relation irel, Relation hrel, Buffer ibuf, OffsetNumber *itemnos, int nitems)
Definition: genam.c:292
IndexUniqueCheck
Definition: genam.h:116
void gistdoinsert(Relation r, IndexTuple itup, Size freespace, GISTSTATE *giststate, Relation heapRel, bool is_build)
Definition: gist.c:635
static void gistfixsplit(GISTInsertState *state, GISTSTATE *giststate)
Definition: gist.c:1189
static void gistprunepage(Relation rel, Page page, Buffer buffer, Relation heapRel)
Definition: gist.c:1665
bool gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate, Buffer buffer, IndexTuple *itup, int ntup, OffsetNumber oldoffnum, BlockNumber *newblkno, Buffer leftchildbuf, List **splitinfo, bool markfollowright, Relation heapRel, bool is_build)
Definition: gist.c:226
bool gistinsert(Relation r, Datum *values, bool *isnull, ItemPointer ht_ctid, Relation heapRel, IndexUniqueCheck checkUnique, bool indexUnchanged, IndexInfo *indexInfo)
Definition: gist.c:160
GISTSTATE * initGISTstate(Relation index)
Definition: gist.c:1526
void gistbuildempty(Relation index)
Definition: gist.c:134
static GISTInsertStack * gistFindPath(Relation r, BlockNumber child, OffsetNumber *downlinkoffnum)
Definition: gist.c:910
static bool gistinserttuples(GISTInsertState *state, GISTInsertStack *stack, GISTSTATE *giststate, IndexTuple *tuples, int ntup, OffsetNumber oldoffnum, Buffer leftchild, Buffer rightchild, bool unlockbuf, bool unlockleftchild)
Definition: gist.c:1283
MemoryContext createTempGistContext(void)
Definition: gist.c:123
void freeGISTstate(GISTSTATE *giststate)
Definition: gist.c:1654
SplitPageLayout * gistSplit(Relation r, Page page, IndexTuple *itup, int len, GISTSTATE *giststate)
Definition: gist.c:1439
static bool gistinserttuple(GISTInsertState *state, GISTInsertStack *stack, GISTSTATE *giststate, IndexTuple tuple, OffsetNumber oldoffnum)
Definition: gist.c:1249
#define ROTATEDIST(d)
Definition: gist.c:45
static void gistfinishsplit(GISTInsertState *state, GISTInsertStack *stack, GISTSTATE *giststate, List *splitinfo, bool unlockbuf)
Definition: gist.c:1343
static void gistFindCorrectParent(Relation r, GISTInsertStack *child, bool is_build)
Definition: gist.c:1023
static IndexTuple gistformdownlink(Relation rel, Buffer buf, GISTSTATE *giststate, GISTInsertStack *stack, bool is_build)
Definition: gist.c:1129
Datum gisthandler(PG_FUNCTION_ARGS)
Definition: gist.c:59
#define GIST_DECOMPRESS_PROC
Definition: gist.h:34
#define GIST_PICKSPLIT_PROC
Definition: gist.h:36
#define GistMarkFollowRight(page)
Definition: gist.h:183
#define F_LEAF
Definition: gist.h:48
#define GIST_CONSISTENT_PROC
Definition: gist.h:31
#define GistClearFollowRight(page)
Definition: gist.h:184
#define GIST_UNION_PROC
Definition: gist.h:32
#define GIST_FETCH_PROC
Definition: gist.h:39
#define GIST_COMPRESS_PROC
Definition: gist.h:33
#define GISTNProcs
Definition: gist.h:43
#define GistClearPageHasGarbage(page)
Definition: gist.h:180
#define GIST_PENALTY_PROC
Definition: gist.h:35
#define GistPageIsLeaf(page)
Definition: gist.h:169
#define GistFollowRight(page)
Definition: gist.h:182
#define GIST_OPTIONS_PROC
Definition: gist.h:40
#define GIST_DISTANCE_PROC
Definition: gist.h:38
#define GistPageSetNSN(page, val)
Definition: gist.h:187
#define GistPageIsDeleted(page)
Definition: gist.h:172
#define GistPageGetOpaque(page)
Definition: gist.h:167
#define GIST_EQUAL_PROC
Definition: gist.h:37
#define GistPageHasGarbage(page)
Definition: gist.h:178
#define GistPageGetNSN(page)
Definition: gist.h:186
XLogRecPtr GistNSN
Definition: gist.h:62
#define GistBuildLSN
Definition: gist.h:69
#define GIST_MAX_SPLIT_PAGES
Definition: gist_private.h:39
#define GistTupleSetValid(itup)
Definition: gist_private.h:289
#define GIST_UNLOCK
Definition: gist_private.h:44
#define GIST_ROOT_BLKNO
Definition: gist_private.h:262
#define GIST_EXCLUSIVE
Definition: gist_private.h:43
#define GiSTPageSize
Definition: gist_private.h:476
#define GistTupleIsInvalid(itup)
Definition: gist_private.h:288
#define GIST_SHARE
Definition: gist_private.h:42
IndexBuildResult * gistbuild(Relation heap, Relation index, IndexInfo *indexInfo)
Definition: gistbuild.c:179
bool gistgettuple(IndexScanDesc scan, ScanDirection dir)
Definition: gistget.c:612
int64 gistgetbitmap(IndexScanDesc scan, TIDBitmap *tbm)
Definition: gistget.c:743
bool gistcanreturn(Relation index, int attno)
Definition: gistget.c:793
IndexScanDesc gistbeginscan(Relation r, int nkeys, int norderbys)
Definition: gistscan.c:74
void gistendscan(IndexScanDesc scan)
Definition: gistscan.c:347
void gistrescan(IndexScanDesc scan, ScanKey key, int nkeys, ScanKey orderbys, int norderbys)
Definition: gistscan.c:127
void gistSplitByKey(Relation r, Page page, IndexTuple *itup, int len, GISTSTATE *giststate, GistSplitVector *v, int attno)
Definition: gistsplit.c:623
Buffer gistNewBuffer(Relation r, Relation heaprel)
Definition: gistutil.c:824
bool gistproperty(Oid index_oid, int attno, IndexAMProperty prop, const char *propname, bool *res, bool *isnull)
Definition: gistutil.c:933
bool gistnospace(Page page, IndexTuple *itvec, int len, OffsetNumber todelete, Size freespace)
Definition: gistutil.c:59
void gistfillbuffer(Page page, IndexTuple *itup, int len, OffsetNumber off)
Definition: gistutil.c:34
IndexTuple gistFormTuple(GISTSTATE *giststate, Relation r, const Datum *attdata, const bool *isnull, bool isleaf)
Definition: gistutil.c:575
IndexTuple * gistextractpage(Page page, int *len)
Definition: gistutil.c:95
bool gistfitpage(IndexTuple *itvec, int len)
Definition: gistutil.c:79
IndexTuple gistgetadjusted(Relation r, IndexTuple oldtup, IndexTuple addtup, GISTSTATE *giststate)
Definition: gistutil.c:316
OffsetNumber gistchoose(Relation r, Page p, IndexTuple it, GISTSTATE *giststate)
Definition: gistutil.c:374
XLogRecPtr gistGetFakeLSN(Relation rel)
Definition: gistutil.c:1016
IndexTuple * gistjoinvector(IndexTuple *itvec, int *len, IndexTuple *additvec, int addlen)
Definition: gistutil.c:114
bytea * gistoptions(Datum reloptions, bool validate)
Definition: gistutil.c:912
void GISTInitBuffer(Buffer b, uint32 f)
Definition: gistutil.c:773
IndexTupleData * gistfillitupvec(IndexTuple *vec, int veclen, int *memlen)
Definition: gistutil.c:127
void gistcheckpage(Relation rel, Buffer buf)
Definition: gistutil.c:785
IndexBulkDeleteResult * gistbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state)
Definition: gistvacuum.c:59
IndexBulkDeleteResult * gistvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
Definition: gistvacuum.c:75
void gistadjustmembers(Oid opfamilyoid, Oid opclassoid, List *operators, List *functions)
Definition: gistvalidate.c:295
bool gistvalidate(Oid opclassoid)
Definition: gistvalidate.c:33
XLogRecPtr gistXLogSplit(bool page_is_leaf, SplitPageLayout *dist, BlockNumber origrlink, GistNSN orignsn, Buffer leftchildbuf, bool markfollowright)
Definition: gistxlog.c:495
XLogRecPtr gistXLogDelete(Buffer buffer, OffsetNumber *todelete, int ntodelete, TransactionId snapshotConflictHorizon, Relation heaprel)
Definition: gistxlog.c:670
XLogRecPtr gistXLogUpdate(Buffer buffer, OffsetNumber *todelete, int ntodelete, IndexTuple *itup, int ituplen, Buffer leftchildbuf)
Definition: gistxlog.c:629
for(;;)
FmgrInfo * index_getprocinfo(Relation irel, AttrNumber attnum, uint16 procnum)
Definition: indexam.c:860
RegProcedure index_getprocid(Relation irel, AttrNumber attnum, uint16 procnum)
Definition: indexam.c:826
IndexTuple CopyIndexTuple(IndexTuple source)
Definition: indextuple.c:547
int i
Definition: isn.c:73
if(TABLE==NULL||TABLE_index==NULL)
Definition: isn.c:77
Pointer Item
Definition: item.h:17
#define ItemIdIsDead(itemId)
Definition: itemid.h:113
bool ItemPointerEquals(ItemPointer pointer1, ItemPointer pointer2)
Definition: itemptr.c:35
static void ItemPointerSetBlockNumber(ItemPointerData *pointer, BlockNumber blockNumber)
Definition: itemptr.h:147
static BlockNumber ItemPointerGetBlockNumber(const ItemPointerData *pointer)
Definition: itemptr.h:103
IndexTupleData * IndexTuple
Definition: itup.h:53
#define IndexTupleSize(itup)
Definition: itup.h:70
#define MaxIndexTuplesPerPage
Definition: itup.h:165
List * lappend(List *list, void *datum)
Definition: list.c:339
List * list_delete_first(List *list)
Definition: list.c:943
List * lcons(void *datum, List *list)
Definition: list.c:495
void MemoryContextReset(MemoryContext context)
Definition: mcxt.c:383
void * palloc0(Size size)
Definition: mcxt.c:1347
MemoryContext CurrentMemoryContext
Definition: mcxt.c:143
void MemoryContextDelete(MemoryContext context)
Definition: mcxt.c:454
void * palloc(Size size)
Definition: mcxt.c:1317
#define AllocSetContextCreate
Definition: memutils.h:129
#define ALLOCSET_DEFAULT_SIZES
Definition: memutils.h:160
#define START_CRIT_SECTION()
Definition: miscadmin.h:149
#define END_CRIT_SECTION()
Definition: miscadmin.h:151
#define makeNode(_type_)
Definition: nodes.h:155
#define InvalidOffsetNumber
Definition: off.h:26
#define OffsetNumberIsValid(offsetNumber)
Definition: off.h:39
#define OffsetNumberNext(offsetNumber)
Definition: off.h:52
uint16 OffsetNumber
Definition: off.h:24
#define FirstOffsetNumber
Definition: off.h:27
#define INDEX_MAX_KEYS
const void size_t len
const void * data
static int list_length(const List *l)
Definition: pg_list.h:152
#define NIL
Definition: pg_list.h:68
#define list_make1(x1)
Definition: pg_list.h:212
#define linitial(l)
Definition: pg_list.h:178
#define lsecond(l)
Definition: pg_list.h:183
static void * list_nth(const List *list, int n)
Definition: pg_list.h:299
static char * buf
Definition: pg_test_fsync.c:73
void check_stack_depth(void)
Definition: postgres.c:3564
uintptr_t Datum
Definition: postgres.h:64
#define InvalidOid
Definition: postgres_ext.h:36
void PredicateLockPageSplit(Relation relation, BlockNumber oldblkno, BlockNumber newblkno)
Definition: predicate.c:3129
void CheckForSerializableConflictIn(Relation relation, ItemPointer tid, BlockNumber blkno)
Definition: predicate.c:4321
MemoryContextSwitchTo(old_ctx)
#define RelationGetRelationName(relation)
Definition: rel.h:539
#define RelationNeedsWAL(relation)
Definition: rel.h:628
#define IndexRelationGetNumberOfKeyAttributes(relation)
Definition: rel.h:524
@ INIT_FORKNUM
Definition: relpath.h:61
void gistcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, Cost *indexStartupCost, Cost *indexTotalCost, Selectivity *indexSelectivity, double *indexCorrelation, double *indexPages)
Definition: selfuncs.c:7181
Oid fn_oid
Definition: fmgr.h:59
BlockNumber blkno
Definition: gist_private.h:210
OffsetNumber downlinkoffnum
Definition: gist_private.h:228
struct GISTInsertStack * parent
Definition: gist_private.h:231
IndexTuple downlink
Definition: gist_private.h:422
FmgrInfo fetchFn[INDEX_MAX_KEYS]
Definition: gist_private.h:94
TupleDesc leafTupdesc
Definition: gist_private.h:80
TupleDesc nonLeafTupdesc
Definition: gist_private.h:81
FmgrInfo penaltyFn[INDEX_MAX_KEYS]
Definition: gist_private.h:90
MemoryContext tempCxt
Definition: gist_private.h:78
Oid supportCollation[INDEX_MAX_KEYS]
Definition: gist_private.h:97
FmgrInfo distanceFn[INDEX_MAX_KEYS]
Definition: gist_private.h:93
FmgrInfo consistentFn[INDEX_MAX_KEYS]
Definition: gist_private.h:86
MemoryContext scanCxt
Definition: gist_private.h:77
FmgrInfo decompressFn[INDEX_MAX_KEYS]
Definition: gist_private.h:89
FmgrInfo compressFn[INDEX_MAX_KEYS]
Definition: gist_private.h:88
FmgrInfo equalFn[INDEX_MAX_KEYS]
Definition: gist_private.h:92
FmgrInfo unionFn[INDEX_MAX_KEYS]
Definition: gist_private.h:87
FmgrInfo picksplitFn[INDEX_MAX_KEYS]
Definition: gist_private.h:91
int spl_nleft
Definition: gist.h:143
OffsetNumber * spl_right
Definition: gist.h:147
int spl_nright
Definition: gist.h:148
OffsetNumber * spl_left
Definition: gist.h:142
GIST_SPLITVEC splitVector
Definition: gist_private.h:237
Datum spl_lattr[INDEX_MAX_KEYS]
Definition: gist_private.h:239
bool spl_lisnull[INDEX_MAX_KEYS]
Definition: gist_private.h:241
Datum spl_rattr[INDEX_MAX_KEYS]
Definition: gist_private.h:243
bool spl_risnull[INDEX_MAX_KEYS]
Definition: gist_private.h:245
ambuildphasename_function ambuildphasename
Definition: amapi.h:285
ambuildempty_function ambuildempty
Definition: amapi.h:275
amvacuumcleanup_function amvacuumcleanup
Definition: amapi.h:279
bool amclusterable
Definition: amapi.h:249
amoptions_function amoptions
Definition: amapi.h:283
amestimateparallelscan_function amestimateparallelscan
Definition: amapi.h:297
amrestrpos_function amrestrpos
Definition: amapi.h:294
aminsert_function aminsert
Definition: amapi.h:276
amendscan_function amendscan
Definition: amapi.h:292
uint16 amoptsprocnum
Definition: amapi.h:229
amparallelrescan_function amparallelrescan
Definition: amapi.h:299
Oid amkeytype
Definition: amapi.h:265
bool ampredlocks
Definition: amapi.h:251
uint16 amsupport
Definition: amapi.h:227
amcostestimate_function amcostestimate
Definition: amapi.h:281
bool amcanorderbyop
Definition: amapi.h:233
amadjustmembers_function amadjustmembers
Definition: amapi.h:287
ambuild_function ambuild
Definition: amapi.h:274
bool amstorage
Definition: amapi.h:247
uint16 amstrategies
Definition: amapi.h:225
bool amoptionalkey
Definition: amapi.h:241
amgettuple_function amgettuple
Definition: amapi.h:290
amcanreturn_function amcanreturn
Definition: amapi.h:280
bool amcanunique
Definition: amapi.h:237
amgetbitmap_function amgetbitmap
Definition: amapi.h:291
amproperty_function amproperty
Definition: amapi.h:284
ambulkdelete_function ambulkdelete
Definition: amapi.h:278
bool amsearcharray
Definition: amapi.h:243
bool amsummarizing
Definition: amapi.h:261
amvalidate_function amvalidate
Definition: amapi.h:286
ammarkpos_function ammarkpos
Definition: amapi.h:293
bool amcanmulticol
Definition: amapi.h:239
bool amusemaintenanceworkmem
Definition: amapi.h:259
ambeginscan_function ambeginscan
Definition: amapi.h:288
bool amcanparallel
Definition: amapi.h:253
amrescan_function amrescan
Definition: amapi.h:289
bool amcanorder
Definition: amapi.h:231
bool amcanbuildparallel
Definition: amapi.h:255
aminitparallelscan_function aminitparallelscan
Definition: amapi.h:298
uint8 amparallelvacuumoptions
Definition: amapi.h:263
aminsertcleanup_function aminsertcleanup
Definition: amapi.h:277
bool amcanbackward
Definition: amapi.h:235
amgettreeheight_function amgettreeheight
Definition: amapi.h:282
bool amcaninclude
Definition: amapi.h:257
bool amsearchnulls
Definition: amapi.h:245
void * ii_AmCache
Definition: execnodes.h:210
MemoryContext ii_Context
Definition: execnodes.h:211
ItemPointerData t_tid
Definition: itup.h:37
Definition: pg_list.h:54
gistxlogPage block
Definition: gist_private.h:193
struct SplitPageLayout * next
Definition: gist_private.h:200
IndexTuple itup
Definition: gist_private.h:196
IndexTupleData * list
Definition: gist_private.h:194
BlockNumber blkno
Definition: gist_private.h:186
Definition: type.h:95
Definition: regguts.h:323
#define InvalidTransactionId
Definition: transam.h:31
TupleDesc CreateTupleDescCopyConstr(TupleDesc tupdesc)
Definition: tupdesc.c:173
#define VACUUM_OPTION_PARALLEL_BULKDEL
Definition: vacuum.h:48
#define VACUUM_OPTION_PARALLEL_COND_CLEANUP
Definition: vacuum.h:55
#define XLogStandbyInfoActive()
Definition: xlog.h:123
#define XLogRecPtrIsInvalid(r)
Definition: xlogdefs.h:29
uint64 XLogRecPtr
Definition: xlogdefs.h:21
XLogRecPtr log_newpage_buffer(Buffer buffer, bool page_std)
Definition: xloginsert.c:1237
void XLogEnsureRecordSpace(int max_block_id, int ndatas)
Definition: xloginsert.c:175