PostgreSQL Source Code  git master
gist.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * gist.c
4  * interface routines for the postgres GiST index access method.
5  *
6  *
7  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
8  * Portions Copyright (c) 1994, Regents of the University of California
9  *
10  * IDENTIFICATION
11  * src/backend/access/gist/gist.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 #include "postgres.h"
16 
17 #include "access/gist_private.h"
18 #include "access/gistscan.h"
19 #include "access/xloginsert.h"
20 #include "catalog/pg_collation.h"
21 #include "commands/vacuum.h"
22 #include "miscadmin.h"
23 #include "nodes/execnodes.h"
24 #include "storage/lmgr.h"
25 #include "storage/predicate.h"
26 #include "utils/builtins.h"
27 #include "utils/index_selfuncs.h"
28 #include "utils/memutils.h"
29 #include "utils/rel.h"
30 
31 /* non-export function prototypes */
32 static void gistfixsplit(GISTInsertState *state, GISTSTATE *giststate);
34  GISTSTATE *giststate, IndexTuple tuple, OffsetNumber oldoffnum);
36  GISTSTATE *giststate,
37  IndexTuple *tuples, int ntup, OffsetNumber oldoffnum,
39  bool unlockbuf, bool unlockleftchild);
41  GISTSTATE *giststate, List *splitinfo, bool unlockbuf);
42 static void gistprunepage(Relation rel, Page page, Buffer buffer,
43  Relation heapRel);
44 
45 
46 #define ROTATEDIST(d) do { \
47  SplitedPageLayout *tmp=(SplitedPageLayout*)palloc0(sizeof(SplitedPageLayout)); \
48  tmp->block.blkno = InvalidBlockNumber; \
49  tmp->buffer = InvalidBuffer; \
50  tmp->next = (d); \
51  (d)=tmp; \
52 } while(0)
53 
54 
55 /*
56  * GiST handler function: return IndexAmRoutine with access method parameters
57  * and callbacks.
58  */
59 Datum
61 {
63 
64  amroutine->amstrategies = 0;
65  amroutine->amsupport = GISTNProcs;
66  amroutine->amoptsprocnum = GIST_OPTIONS_PROC;
67  amroutine->amcanorder = false;
68  amroutine->amcanorderbyop = true;
69  amroutine->amcanbackward = false;
70  amroutine->amcanunique = false;
71  amroutine->amcanmulticol = true;
72  amroutine->amoptionalkey = true;
73  amroutine->amsearcharray = false;
74  amroutine->amsearchnulls = true;
75  amroutine->amstorage = true;
76  amroutine->amclusterable = true;
77  amroutine->ampredlocks = true;
78  amroutine->amcanparallel = false;
79  amroutine->amcaninclude = true;
80  amroutine->amusemaintenanceworkmem = false;
81  amroutine->amsummarizing = false;
82  amroutine->amparallelvacuumoptions =
84  amroutine->amkeytype = InvalidOid;
85 
86  amroutine->ambuild = gistbuild;
87  amroutine->ambuildempty = gistbuildempty;
88  amroutine->aminsert = gistinsert;
89  amroutine->ambulkdelete = gistbulkdelete;
91  amroutine->amcanreturn = gistcanreturn;
92  amroutine->amcostestimate = gistcostestimate;
93  amroutine->amoptions = gistoptions;
94  amroutine->amproperty = gistproperty;
95  amroutine->ambuildphasename = NULL;
96  amroutine->amvalidate = gistvalidate;
98  amroutine->ambeginscan = gistbeginscan;
99  amroutine->amrescan = gistrescan;
100  amroutine->amgettuple = gistgettuple;
101  amroutine->amgetbitmap = gistgetbitmap;
102  amroutine->amendscan = gistendscan;
103  amroutine->ammarkpos = NULL;
104  amroutine->amrestrpos = NULL;
105  amroutine->amestimateparallelscan = NULL;
106  amroutine->aminitparallelscan = NULL;
107  amroutine->amparallelrescan = NULL;
108 
109  PG_RETURN_POINTER(amroutine);
110 }
111 
112 /*
113  * Create and return a temporary memory context for use by GiST. We
114  * _always_ invoke user-provided methods in a temporary memory
115  * context, so that memory leaks in those functions cannot cause
116  * problems. Also, we use some additional temporary contexts in the
117  * GiST code itself, to avoid the need to do some awkward manual
118  * memory management.
119  */
122 {
124  "GiST temporary context",
126 }
127 
128 /*
129  * gistbuildempty() -- build an empty gist index in the initialization fork
130  */
131 void
133 {
134  Buffer buffer;
135 
136  /* Initialize the root page */
137  buffer = ExtendBufferedRel(BMR_REL(index), INIT_FORKNUM, NULL,
139 
140  /* Initialize and xlog buffer */
142  GISTInitBuffer(buffer, F_LEAF);
143  MarkBufferDirty(buffer);
144  log_newpage_buffer(buffer, true);
146 
147  /* Unlock and release the buffer */
148  UnlockReleaseBuffer(buffer);
149 }
150 
151 /*
152  * gistinsert -- wrapper for GiST tuple insertion.
153  *
154  * This is the public interface routine for tuple insertion in GiSTs.
155  * It doesn't do any work; just locks the relation and passes the buck.
156  */
157 bool
158 gistinsert(Relation r, Datum *values, bool *isnull,
159  ItemPointer ht_ctid, Relation heapRel,
160  IndexUniqueCheck checkUnique,
161  bool indexUnchanged,
162  IndexInfo *indexInfo)
163 {
164  GISTSTATE *giststate = (GISTSTATE *) indexInfo->ii_AmCache;
165  IndexTuple itup;
166  MemoryContext oldCxt;
167 
168  /* Initialize GISTSTATE cache if first call in this statement */
169  if (giststate == NULL)
170  {
171  oldCxt = MemoryContextSwitchTo(indexInfo->ii_Context);
172  giststate = initGISTstate(r);
173  giststate->tempCxt = createTempGistContext();
174  indexInfo->ii_AmCache = (void *) giststate;
175  MemoryContextSwitchTo(oldCxt);
176  }
177 
178  oldCxt = MemoryContextSwitchTo(giststate->tempCxt);
179 
180  itup = gistFormTuple(giststate, r,
181  values, isnull, true /* size is currently bogus */ );
182  itup->t_tid = *ht_ctid;
183 
184  gistdoinsert(r, itup, 0, giststate, heapRel, false);
185 
186  /* cleanup */
187  MemoryContextSwitchTo(oldCxt);
188  MemoryContextReset(giststate->tempCxt);
189 
190  return false;
191 }
192 
193 
194 /*
195  * Place tuples from 'itup' to 'buffer'. If 'oldoffnum' is valid, the tuple
196  * at that offset is atomically removed along with inserting the new tuples.
197  * This is used to replace a tuple with a new one.
198  *
199  * If 'leftchildbuf' is valid, we're inserting the downlink for the page
200  * to the right of 'leftchildbuf', or updating the downlink for 'leftchildbuf'.
201  * F_FOLLOW_RIGHT flag on 'leftchildbuf' is cleared and NSN is set.
202  *
203  * If 'markfollowright' is true and the page is split, the left child is
204  * marked with F_FOLLOW_RIGHT flag. That is the normal case. During buffered
205  * index build, however, there is no concurrent access and the page splitting
206  * is done in a slightly simpler fashion, and false is passed.
207  *
208  * If there is not enough room on the page, it is split. All the split
209  * pages are kept pinned and locked and returned in *splitinfo, the caller
210  * is responsible for inserting the downlinks for them. However, if
211  * 'buffer' is the root page and it needs to be split, gistplacetopage()
212  * performs the split as one atomic operation, and *splitinfo is set to NIL.
213  * In that case, we continue to hold the root page locked, and the child
214  * pages are released; note that new tuple(s) are *not* on the root page
215  * but in one of the new child pages.
216  *
217  * If 'newblkno' is not NULL, returns the block number of page the first
218  * new/updated tuple was inserted to. Usually it's the given page, but could
219  * be its right sibling if the page was split.
220  *
221  * Returns 'true' if the page was split, 'false' otherwise.
222  */
223 bool
224 gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
225  Buffer buffer,
226  IndexTuple *itup, int ntup, OffsetNumber oldoffnum,
227  BlockNumber *newblkno,
228  Buffer leftchildbuf,
229  List **splitinfo,
230  bool markfollowright,
231  Relation heapRel,
232  bool is_build)
233 {
234  BlockNumber blkno = BufferGetBlockNumber(buffer);
235  Page page = BufferGetPage(buffer);
236  bool is_leaf = (GistPageIsLeaf(page)) ? true : false;
237  XLogRecPtr recptr;
238  bool is_split;
239 
240  /*
241  * Refuse to modify a page that's incompletely split. This should not
242  * happen because we finish any incomplete splits while we walk down the
243  * tree. However, it's remotely possible that another concurrent inserter
244  * splits a parent page, and errors out before completing the split. We
245  * will just throw an error in that case, and leave any split we had in
246  * progress unfinished too. The next insert that comes along will clean up
247  * the mess.
248  */
249  if (GistFollowRight(page))
250  elog(ERROR, "concurrent GiST page split was incomplete");
251 
252  /* should never try to insert to a deleted page */
253  Assert(!GistPageIsDeleted(page));
254 
255  *splitinfo = NIL;
256 
257  /*
258  * if isupdate, remove old key: This node's key has been modified, either
259  * because a child split occurred or because we needed to adjust our key
260  * for an insert in a child node. Therefore, remove the old version of
261  * this node's key.
262  *
263  * for WAL replay, in the non-split case we handle this by setting up a
264  * one-element todelete array; in the split case, it's handled implicitly
265  * because the tuple vector passed to gistSplit won't include this tuple.
266  */
267  is_split = gistnospace(page, itup, ntup, oldoffnum, freespace);
268 
269  /*
270  * If leaf page is full, try at first to delete dead tuples. And then
271  * check again.
272  */
273  if (is_split && GistPageIsLeaf(page) && GistPageHasGarbage(page))
274  {
275  gistprunepage(rel, page, buffer, heapRel);
276  is_split = gistnospace(page, itup, ntup, oldoffnum, freespace);
277  }
278 
279  if (is_split)
280  {
281  /* no space for insertion */
282  IndexTuple *itvec;
283  int tlen;
284  SplitedPageLayout *dist = NULL,
285  *ptr;
286  BlockNumber oldrlink = InvalidBlockNumber;
287  GistNSN oldnsn = 0;
288  SplitedPageLayout rootpg;
289  bool is_rootsplit;
290  int npage;
291 
292  is_rootsplit = (blkno == GIST_ROOT_BLKNO);
293 
294  /*
295  * Form index tuples vector to split. If we're replacing an old tuple,
296  * remove the old version from the vector.
297  */
298  itvec = gistextractpage(page, &tlen);
299  if (OffsetNumberIsValid(oldoffnum))
300  {
301  /* on inner page we should remove old tuple */
302  int pos = oldoffnum - FirstOffsetNumber;
303 
304  tlen--;
305  if (pos != tlen)
306  memmove(itvec + pos, itvec + pos + 1, sizeof(IndexTuple) * (tlen - pos));
307  }
308  itvec = gistjoinvector(itvec, &tlen, itup, ntup);
309  dist = gistSplit(rel, page, itvec, tlen, giststate);
310 
311  /*
312  * Check that split didn't produce too many pages.
313  */
314  npage = 0;
315  for (ptr = dist; ptr; ptr = ptr->next)
316  npage++;
317  /* in a root split, we'll add one more page to the list below */
318  if (is_rootsplit)
319  npage++;
320  if (npage > GIST_MAX_SPLIT_PAGES)
321  elog(ERROR, "GiST page split into too many halves (%d, maximum %d)",
322  npage, GIST_MAX_SPLIT_PAGES);
323 
324  /*
325  * Set up pages to work with. Allocate new buffers for all but the
326  * leftmost page. The original page becomes the new leftmost page, and
327  * is just replaced with the new contents.
328  *
329  * For a root-split, allocate new buffers for all child pages, the
330  * original page is overwritten with new root page containing
331  * downlinks to the new child pages.
332  */
333  ptr = dist;
334  if (!is_rootsplit)
335  {
336  /* save old rightlink and NSN */
337  oldrlink = GistPageGetOpaque(page)->rightlink;
338  oldnsn = GistPageGetNSN(page);
339 
340  dist->buffer = buffer;
341  dist->block.blkno = BufferGetBlockNumber(buffer);
343 
344  /* clean all flags except F_LEAF */
345  GistPageGetOpaque(dist->page)->flags = (is_leaf) ? F_LEAF : 0;
346 
347  ptr = ptr->next;
348  }
349  for (; ptr; ptr = ptr->next)
350  {
351  /* Allocate new page */
352  ptr->buffer = gistNewBuffer(rel, heapRel);
353  GISTInitBuffer(ptr->buffer, (is_leaf) ? F_LEAF : 0);
354  ptr->page = BufferGetPage(ptr->buffer);
355  ptr->block.blkno = BufferGetBlockNumber(ptr->buffer);
357  BufferGetBlockNumber(buffer),
358  BufferGetBlockNumber(ptr->buffer));
359  }
360 
361  /*
362  * Now that we know which blocks the new pages go to, set up downlink
363  * tuples to point to them.
364  */
365  for (ptr = dist; ptr; ptr = ptr->next)
366  {
367  ItemPointerSetBlockNumber(&(ptr->itup->t_tid), ptr->block.blkno);
368  GistTupleSetValid(ptr->itup);
369  }
370 
371  /*
372  * If this is a root split, we construct the new root page with the
373  * downlinks here directly, instead of requiring the caller to insert
374  * them. Add the new root page to the list along with the child pages.
375  */
376  if (is_rootsplit)
377  {
378  IndexTuple *downlinks;
379  int ndownlinks = 0;
380  int i;
381 
382  rootpg.buffer = buffer;
384  GistPageGetOpaque(rootpg.page)->flags = 0;
385 
386  /* Prepare a vector of all the downlinks */
387  for (ptr = dist; ptr; ptr = ptr->next)
388  ndownlinks++;
389  downlinks = palloc(sizeof(IndexTuple) * ndownlinks);
390  for (i = 0, ptr = dist; ptr; ptr = ptr->next)
391  downlinks[i++] = ptr->itup;
392 
393  rootpg.block.blkno = GIST_ROOT_BLKNO;
394  rootpg.block.num = ndownlinks;
395  rootpg.list = gistfillitupvec(downlinks, ndownlinks,
396  &(rootpg.lenlist));
397  rootpg.itup = NULL;
398 
399  rootpg.next = dist;
400  dist = &rootpg;
401  }
402  else
403  {
404  /* Prepare split-info to be returned to caller */
405  for (ptr = dist; ptr; ptr = ptr->next)
406  {
408 
409  si->buf = ptr->buffer;
410  si->downlink = ptr->itup;
411  *splitinfo = lappend(*splitinfo, si);
412  }
413  }
414 
415  /*
416  * Fill all pages. All the pages are new, ie. freshly allocated empty
417  * pages, or a temporary copy of the old page.
418  */
419  for (ptr = dist; ptr; ptr = ptr->next)
420  {
421  char *data = (char *) (ptr->list);
422 
423  for (int i = 0; i < ptr->block.num; i++)
424  {
425  IndexTuple thistup = (IndexTuple) data;
426 
427  if (PageAddItem(ptr->page, (Item) data, IndexTupleSize(thistup), i + FirstOffsetNumber, false, false) == InvalidOffsetNumber)
428  elog(ERROR, "failed to add item to index page in \"%s\"", RelationGetRelationName(rel));
429 
430  /*
431  * If this is the first inserted/updated tuple, let the caller
432  * know which page it landed on.
433  */
434  if (newblkno && ItemPointerEquals(&thistup->t_tid, &(*itup)->t_tid))
435  *newblkno = ptr->block.blkno;
436 
437  data += IndexTupleSize(thistup);
438  }
439 
440  /* Set up rightlinks */
441  if (ptr->next && ptr->block.blkno != GIST_ROOT_BLKNO)
442  GistPageGetOpaque(ptr->page)->rightlink =
443  ptr->next->block.blkno;
444  else
445  GistPageGetOpaque(ptr->page)->rightlink = oldrlink;
446 
447  /*
448  * Mark the all but the right-most page with the follow-right
449  * flag. It will be cleared as soon as the downlink is inserted
450  * into the parent, but this ensures that if we error out before
451  * that, the index is still consistent. (in buffering build mode,
452  * any error will abort the index build anyway, so this is not
453  * needed.)
454  */
455  if (ptr->next && !is_rootsplit && markfollowright)
456  GistMarkFollowRight(ptr->page);
457  else
458  GistClearFollowRight(ptr->page);
459 
460  /*
461  * Copy the NSN of the original page to all pages. The
462  * F_FOLLOW_RIGHT flags ensure that scans will follow the
463  * rightlinks until the downlinks are inserted.
464  */
465  GistPageSetNSN(ptr->page, oldnsn);
466  }
467 
468  /*
469  * gistXLogSplit() needs to WAL log a lot of pages, prepare WAL
470  * insertion for that. NB: The number of pages and data segments
471  * specified here must match the calculations in gistXLogSplit()!
472  */
473  if (!is_build && RelationNeedsWAL(rel))
474  XLogEnsureRecordSpace(npage, 1 + npage * 2);
475 
477 
478  /*
479  * Must mark buffers dirty before XLogInsert, even though we'll still
480  * be changing their opaque fields below.
481  */
482  for (ptr = dist; ptr; ptr = ptr->next)
483  MarkBufferDirty(ptr->buffer);
484  if (BufferIsValid(leftchildbuf))
485  MarkBufferDirty(leftchildbuf);
486 
487  /*
488  * The first page in the chain was a temporary working copy meant to
489  * replace the old page. Copy it over the old page.
490  */
492  dist->page = BufferGetPage(dist->buffer);
493 
494  /*
495  * Write the WAL record.
496  *
497  * If we're building a new index, however, we don't WAL-log changes
498  * yet. The LSN-NSN interlock between parent and child requires that
499  * LSNs never move backwards, so set the LSNs to a value that's
500  * smaller than any real or fake unlogged LSN that might be generated
501  * later. (There can't be any concurrent scans during index build, so
502  * we don't need to be able to detect concurrent splits yet.)
503  */
504  if (is_build)
505  recptr = GistBuildLSN;
506  else
507  {
508  if (RelationNeedsWAL(rel))
509  recptr = gistXLogSplit(is_leaf,
510  dist, oldrlink, oldnsn, leftchildbuf,
511  markfollowright);
512  else
513  recptr = gistGetFakeLSN(rel);
514  }
515 
516  for (ptr = dist; ptr; ptr = ptr->next)
517  PageSetLSN(ptr->page, recptr);
518 
519  /*
520  * Return the new child buffers to the caller.
521  *
522  * If this was a root split, we've already inserted the downlink
523  * pointers, in the form of a new root page. Therefore we can release
524  * all the new buffers, and keep just the root page locked.
525  */
526  if (is_rootsplit)
527  {
528  for (ptr = dist->next; ptr; ptr = ptr->next)
529  UnlockReleaseBuffer(ptr->buffer);
530  }
531  }
532  else
533  {
534  /*
535  * Enough space. We always get here if ntup==0.
536  */
538 
539  /*
540  * Delete old tuple if any, then insert new tuple(s) if any. If
541  * possible, use the fast path of PageIndexTupleOverwrite.
542  */
543  if (OffsetNumberIsValid(oldoffnum))
544  {
545  if (ntup == 1)
546  {
547  /* One-for-one replacement, so use PageIndexTupleOverwrite */
548  if (!PageIndexTupleOverwrite(page, oldoffnum, (Item) *itup,
549  IndexTupleSize(*itup)))
550  elog(ERROR, "failed to add item to index page in \"%s\"",
552  }
553  else
554  {
555  /* Delete old, then append new tuple(s) to page */
556  PageIndexTupleDelete(page, oldoffnum);
557  gistfillbuffer(page, itup, ntup, InvalidOffsetNumber);
558  }
559  }
560  else
561  {
562  /* Just append new tuples at the end of the page */
563  gistfillbuffer(page, itup, ntup, InvalidOffsetNumber);
564  }
565 
566  MarkBufferDirty(buffer);
567 
568  if (BufferIsValid(leftchildbuf))
569  MarkBufferDirty(leftchildbuf);
570 
571  if (is_build)
572  recptr = GistBuildLSN;
573  else
574  {
575  if (RelationNeedsWAL(rel))
576  {
577  OffsetNumber ndeloffs = 0,
578  deloffs[1];
579 
580  if (OffsetNumberIsValid(oldoffnum))
581  {
582  deloffs[0] = oldoffnum;
583  ndeloffs = 1;
584  }
585 
586  recptr = gistXLogUpdate(buffer,
587  deloffs, ndeloffs, itup, ntup,
588  leftchildbuf);
589  }
590  else
591  recptr = gistGetFakeLSN(rel);
592  }
593  PageSetLSN(page, recptr);
594 
595  if (newblkno)
596  *newblkno = blkno;
597  }
598 
599  /*
600  * If we inserted the downlink for a child page, set NSN and clear
601  * F_FOLLOW_RIGHT flag on the left child, so that concurrent scans know to
602  * follow the rightlink if and only if they looked at the parent page
603  * before we inserted the downlink.
604  *
605  * Note that we do this *after* writing the WAL record. That means that
606  * the possible full page image in the WAL record does not include these
607  * changes, and they must be replayed even if the page is restored from
608  * the full page image. There's a chicken-and-egg problem: if we updated
609  * the child pages first, we wouldn't know the recptr of the WAL record
610  * we're about to write.
611  */
612  if (BufferIsValid(leftchildbuf))
613  {
614  Page leftpg = BufferGetPage(leftchildbuf);
615 
616  GistPageSetNSN(leftpg, recptr);
617  GistClearFollowRight(leftpg);
618 
619  PageSetLSN(leftpg, recptr);
620  }
621 
623 
624  return is_split;
625 }
626 
627 /*
628  * Workhouse routine for doing insertion into a GiST index. Note that
629  * this routine assumes it is invoked in a short-lived memory context,
630  * so it does not bother releasing palloc'd allocations.
631  */
632 void
634  GISTSTATE *giststate, Relation heapRel, bool is_build)
635 {
636  ItemId iid;
637  IndexTuple idxtuple;
638  GISTInsertStack firststack;
639  GISTInsertStack *stack;
641  bool xlocked = false;
642 
643  memset(&state, 0, sizeof(GISTInsertState));
644  state.freespace = freespace;
645  state.r = r;
646  state.heapRel = heapRel;
647  state.is_build = is_build;
648 
649  /* Start from the root */
650  firststack.blkno = GIST_ROOT_BLKNO;
651  firststack.lsn = 0;
652  firststack.retry_from_parent = false;
653  firststack.parent = NULL;
654  firststack.downlinkoffnum = InvalidOffsetNumber;
655  state.stack = stack = &firststack;
656 
657  /*
658  * Walk down along the path of smallest penalty, updating the parent
659  * pointers with the key we're inserting as we go. If we crash in the
660  * middle, the tree is consistent, although the possible parent updates
661  * were a waste.
662  */
663  for (;;)
664  {
665  /*
666  * If we split an internal page while descending the tree, we have to
667  * retry at the parent. (Normally, the LSN-NSN interlock below would
668  * also catch this and cause us to retry. But LSNs are not updated
669  * during index build.)
670  */
671  while (stack->retry_from_parent)
672  {
673  if (xlocked)
674  LockBuffer(stack->buffer, GIST_UNLOCK);
675  xlocked = false;
676  ReleaseBuffer(stack->buffer);
677  state.stack = stack = stack->parent;
678  }
679 
680  if (XLogRecPtrIsInvalid(stack->lsn))
681  stack->buffer = ReadBuffer(state.r, stack->blkno);
682 
683  /*
684  * Be optimistic and grab shared lock first. Swap it for an exclusive
685  * lock later if we need to update the page.
686  */
687  if (!xlocked)
688  {
689  LockBuffer(stack->buffer, GIST_SHARE);
690  gistcheckpage(state.r, stack->buffer);
691  }
692 
693  stack->page = (Page) BufferGetPage(stack->buffer);
694  stack->lsn = xlocked ?
695  PageGetLSN(stack->page) : BufferGetLSNAtomic(stack->buffer);
697 
698  /*
699  * If this page was split but the downlink was never inserted to the
700  * parent because the inserting backend crashed before doing that, fix
701  * that now.
702  */
703  if (GistFollowRight(stack->page))
704  {
705  if (!xlocked)
706  {
707  LockBuffer(stack->buffer, GIST_UNLOCK);
709  xlocked = true;
710  /* someone might've completed the split when we unlocked */
711  if (!GistFollowRight(stack->page))
712  continue;
713  }
714  gistfixsplit(&state, giststate);
715 
716  UnlockReleaseBuffer(stack->buffer);
717  xlocked = false;
718  state.stack = stack = stack->parent;
719  continue;
720  }
721 
722  if ((stack->blkno != GIST_ROOT_BLKNO &&
723  stack->parent->lsn < GistPageGetNSN(stack->page)) ||
724  GistPageIsDeleted(stack->page))
725  {
726  /*
727  * Concurrent split or page deletion detected. There's no
728  * guarantee that the downlink for this page is consistent with
729  * the tuple we're inserting anymore, so go back to parent and
730  * rechoose the best child.
731  */
732  UnlockReleaseBuffer(stack->buffer);
733  xlocked = false;
734  state.stack = stack = stack->parent;
735  continue;
736  }
737 
738  if (!GistPageIsLeaf(stack->page))
739  {
740  /*
741  * This is an internal page so continue to walk down the tree.
742  * Find the child node that has the minimum insertion penalty.
743  */
744  BlockNumber childblkno;
745  IndexTuple newtup;
746  GISTInsertStack *item;
747  OffsetNumber downlinkoffnum;
748 
749  downlinkoffnum = gistchoose(state.r, stack->page, itup, giststate);
750  iid = PageGetItemId(stack->page, downlinkoffnum);
751  idxtuple = (IndexTuple) PageGetItem(stack->page, iid);
752  childblkno = ItemPointerGetBlockNumber(&(idxtuple->t_tid));
753 
754  /*
755  * Check that it's not a leftover invalid tuple from pre-9.1
756  */
757  if (GistTupleIsInvalid(idxtuple))
758  ereport(ERROR,
759  (errmsg("index \"%s\" contains an inner tuple marked as invalid",
761  errdetail("This is caused by an incomplete page split at crash recovery before upgrading to PostgreSQL 9.1."),
762  errhint("Please REINDEX it.")));
763 
764  /*
765  * Check that the key representing the target child node is
766  * consistent with the key we're inserting. Update it if it's not.
767  */
768  newtup = gistgetadjusted(state.r, idxtuple, itup, giststate);
769  if (newtup)
770  {
771  /*
772  * Swap shared lock for an exclusive one. Beware, the page may
773  * change while we unlock/lock the page...
774  */
775  if (!xlocked)
776  {
777  LockBuffer(stack->buffer, GIST_UNLOCK);
779  xlocked = true;
780  stack->page = (Page) BufferGetPage(stack->buffer);
781 
782  if (PageGetLSN(stack->page) != stack->lsn)
783  {
784  /* the page was changed while we unlocked it, retry */
785  continue;
786  }
787  }
788 
789  /*
790  * Update the tuple.
791  *
792  * We still hold the lock after gistinserttuple(), but it
793  * might have to split the page to make the updated tuple fit.
794  * In that case the updated tuple might migrate to the other
795  * half of the split, so we have to go back to the parent and
796  * descend back to the half that's a better fit for the new
797  * tuple.
798  */
799  if (gistinserttuple(&state, stack, giststate, newtup,
800  downlinkoffnum))
801  {
802  /*
803  * If this was a root split, the root page continues to be
804  * the parent and the updated tuple went to one of the
805  * child pages, so we just need to retry from the root
806  * page.
807  */
808  if (stack->blkno != GIST_ROOT_BLKNO)
809  {
810  UnlockReleaseBuffer(stack->buffer);
811  xlocked = false;
812  state.stack = stack = stack->parent;
813  }
814  continue;
815  }
816  }
817  LockBuffer(stack->buffer, GIST_UNLOCK);
818  xlocked = false;
819 
820  /* descend to the chosen child */
821  item = (GISTInsertStack *) palloc0(sizeof(GISTInsertStack));
822  item->blkno = childblkno;
823  item->parent = stack;
824  item->downlinkoffnum = downlinkoffnum;
825  state.stack = stack = item;
826  }
827  else
828  {
829  /*
830  * Leaf page. Insert the new key. We've already updated all the
831  * parents on the way down, but we might have to split the page if
832  * it doesn't fit. gistinserttuple() will take care of that.
833  */
834 
835  /*
836  * Swap shared lock for an exclusive one. Be careful, the page may
837  * change while we unlock/lock the page...
838  */
839  if (!xlocked)
840  {
841  LockBuffer(stack->buffer, GIST_UNLOCK);
843  xlocked = true;
844  stack->page = (Page) BufferGetPage(stack->buffer);
845  stack->lsn = PageGetLSN(stack->page);
846 
847  if (stack->blkno == GIST_ROOT_BLKNO)
848  {
849  /*
850  * the only page that can become inner instead of leaf is
851  * the root page, so for root we should recheck it
852  */
853  if (!GistPageIsLeaf(stack->page))
854  {
855  /*
856  * very rare situation: during unlock/lock index with
857  * number of pages = 1 was increased
858  */
859  LockBuffer(stack->buffer, GIST_UNLOCK);
860  xlocked = false;
861  continue;
862  }
863 
864  /*
865  * we don't need to check root split, because checking
866  * leaf/inner is enough to recognize split for root
867  */
868  }
869  else if ((GistFollowRight(stack->page) ||
870  stack->parent->lsn < GistPageGetNSN(stack->page)) ||
871  GistPageIsDeleted(stack->page))
872  {
873  /*
874  * The page was split or deleted while we momentarily
875  * unlocked the page. Go back to parent.
876  */
877  UnlockReleaseBuffer(stack->buffer);
878  xlocked = false;
879  state.stack = stack = stack->parent;
880  continue;
881  }
882  }
883 
884  /* now state.stack->(page, buffer and blkno) points to leaf page */
885 
886  gistinserttuple(&state, stack, giststate, itup,
888  LockBuffer(stack->buffer, GIST_UNLOCK);
889 
890  /* Release any pins we might still hold before exiting */
891  for (; stack; stack = stack->parent)
892  ReleaseBuffer(stack->buffer);
893  break;
894  }
895  }
896 }
897 
898 /*
899  * Traverse the tree to find path from root page to specified "child" block.
900  *
901  * returns a new insertion stack, starting from the parent of "child", up
902  * to the root. *downlinkoffnum is set to the offset of the downlink in the
903  * direct parent of child.
904  *
905  * To prevent deadlocks, this should lock only one page at a time.
906  */
907 static GISTInsertStack *
908 gistFindPath(Relation r, BlockNumber child, OffsetNumber *downlinkoffnum)
909 {
910  Page page;
911  Buffer buffer;
912  OffsetNumber i,
913  maxoff;
914  ItemId iid;
915  IndexTuple idxtuple;
916  List *fifo;
917  GISTInsertStack *top,
918  *ptr;
919  BlockNumber blkno;
920 
921  top = (GISTInsertStack *) palloc0(sizeof(GISTInsertStack));
922  top->blkno = GIST_ROOT_BLKNO;
924 
925  fifo = list_make1(top);
926  while (fifo != NIL)
927  {
928  /* Get next page to visit */
929  top = linitial(fifo);
930  fifo = list_delete_first(fifo);
931 
932  buffer = ReadBuffer(r, top->blkno);
933  LockBuffer(buffer, GIST_SHARE);
934  gistcheckpage(r, buffer);
935  page = (Page) BufferGetPage(buffer);
936 
937  if (GistPageIsLeaf(page))
938  {
939  /*
940  * Because we scan the index top-down, all the rest of the pages
941  * in the queue must be leaf pages as well.
942  */
943  UnlockReleaseBuffer(buffer);
944  break;
945  }
946 
947  /* currently, internal pages are never deleted */
948  Assert(!GistPageIsDeleted(page));
949 
950  top->lsn = BufferGetLSNAtomic(buffer);
951 
952  /*
953  * If F_FOLLOW_RIGHT is set, the page to the right doesn't have a
954  * downlink. This should not normally happen..
955  */
956  if (GistFollowRight(page))
957  elog(ERROR, "concurrent GiST page split was incomplete");
958 
959  if (top->parent && top->parent->lsn < GistPageGetNSN(page) &&
960  GistPageGetOpaque(page)->rightlink != InvalidBlockNumber /* sanity check */ )
961  {
962  /*
963  * Page was split while we looked elsewhere. We didn't see the
964  * downlink to the right page when we scanned the parent, so add
965  * it to the queue now.
966  *
967  * Put the right page ahead of the queue, so that we visit it
968  * next. That's important, because if this is the lowest internal
969  * level, just above leaves, we might already have queued up some
970  * leaf pages, and we assume that there can't be any non-leaf
971  * pages behind leaf pages.
972  */
973  ptr = (GISTInsertStack *) palloc0(sizeof(GISTInsertStack));
974  ptr->blkno = GistPageGetOpaque(page)->rightlink;
976  ptr->parent = top->parent;
977 
978  fifo = lcons(ptr, fifo);
979  }
980 
981  maxoff = PageGetMaxOffsetNumber(page);
982 
983  for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
984  {
985  iid = PageGetItemId(page, i);
986  idxtuple = (IndexTuple) PageGetItem(page, iid);
987  blkno = ItemPointerGetBlockNumber(&(idxtuple->t_tid));
988  if (blkno == child)
989  {
990  /* Found it! */
991  UnlockReleaseBuffer(buffer);
992  *downlinkoffnum = i;
993  return top;
994  }
995  else
996  {
997  /* Append this child to the list of pages to visit later */
998  ptr = (GISTInsertStack *) palloc0(sizeof(GISTInsertStack));
999  ptr->blkno = blkno;
1000  ptr->downlinkoffnum = i;
1001  ptr->parent = top;
1002 
1003  fifo = lappend(fifo, ptr);
1004  }
1005  }
1006 
1007  UnlockReleaseBuffer(buffer);
1008  }
1009 
1010  elog(ERROR, "failed to re-find parent of a page in index \"%s\", block %u",
1011  RelationGetRelationName(r), child);
1012  return NULL; /* keep compiler quiet */
1013 }
1014 
1015 /*
1016  * Updates the stack so that child->parent is the correct parent of the
1017  * child. child->parent must be exclusively locked on entry, and will
1018  * remain so at exit, but it might not be the same page anymore.
1019  */
1020 static void
1022 {
1023  GISTInsertStack *parent = child->parent;
1024  ItemId iid;
1025  IndexTuple idxtuple;
1026  OffsetNumber maxoff;
1027  GISTInsertStack *ptr;
1028 
1029  gistcheckpage(r, parent->buffer);
1030  parent->page = (Page) BufferGetPage(parent->buffer);
1031  maxoff = PageGetMaxOffsetNumber(parent->page);
1032 
1033  /* Check if the downlink is still where it was before */
1034  if (child->downlinkoffnum != InvalidOffsetNumber && child->downlinkoffnum <= maxoff)
1035  {
1036  iid = PageGetItemId(parent->page, child->downlinkoffnum);
1037  idxtuple = (IndexTuple) PageGetItem(parent->page, iid);
1038  if (ItemPointerGetBlockNumber(&(idxtuple->t_tid)) == child->blkno)
1039  return; /* still there */
1040  }
1041 
1042  /*
1043  * The page has changed since we looked. During normal operation, every
1044  * update of a page changes its LSN, so the LSN we memorized should have
1045  * changed too. During index build, however, we don't WAL-log the changes
1046  * until we have built the index, so the LSN doesn't change. There is no
1047  * concurrent activity during index build, but we might have changed the
1048  * parent ourselves.
1049  */
1050  Assert(parent->lsn != PageGetLSN(parent->page) || is_build);
1051 
1052  /*
1053  * Scan the page to re-find the downlink. If the page was split, it might
1054  * have moved to a different page, so follow the right links until we find
1055  * it.
1056  */
1057  while (true)
1058  {
1059  OffsetNumber i;
1060 
1061  maxoff = PageGetMaxOffsetNumber(parent->page);
1062  for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
1063  {
1064  iid = PageGetItemId(parent->page, i);
1065  idxtuple = (IndexTuple) PageGetItem(parent->page, iid);
1066  if (ItemPointerGetBlockNumber(&(idxtuple->t_tid)) == child->blkno)
1067  {
1068  /* yes!!, found */
1069  child->downlinkoffnum = i;
1070  return;
1071  }
1072  }
1073 
1074  parent->blkno = GistPageGetOpaque(parent->page)->rightlink;
1076  UnlockReleaseBuffer(parent->buffer);
1077  if (parent->blkno == InvalidBlockNumber)
1078  {
1079  /*
1080  * End of chain and still didn't find parent. It's a very-very
1081  * rare situation when root splitted.
1082  */
1083  break;
1084  }
1085  parent->buffer = ReadBuffer(r, parent->blkno);
1086  LockBuffer(parent->buffer, GIST_EXCLUSIVE);
1087  gistcheckpage(r, parent->buffer);
1088  parent->page = (Page) BufferGetPage(parent->buffer);
1089  }
1090 
1091  /*
1092  * awful!!, we need search tree to find parent ... , but before we should
1093  * release all old parent
1094  */
1095 
1096  ptr = child->parent->parent; /* child->parent already released above */
1097  while (ptr)
1098  {
1099  ReleaseBuffer(ptr->buffer);
1100  ptr = ptr->parent;
1101  }
1102 
1103  /* ok, find new path */
1104  ptr = parent = gistFindPath(r, child->blkno, &child->downlinkoffnum);
1105 
1106  /* read all buffers as expected by caller */
1107  /* note we don't lock them or gistcheckpage them here! */
1108  while (ptr)
1109  {
1110  ptr->buffer = ReadBuffer(r, ptr->blkno);
1111  ptr->page = (Page) BufferGetPage(ptr->buffer);
1112  ptr = ptr->parent;
1113  }
1114 
1115  /* install new chain of parents to stack */
1116  child->parent = parent;
1117 
1118  /* make recursive call to normal processing */
1120  gistFindCorrectParent(r, child, is_build);
1121 }
1122 
1123 /*
1124  * Form a downlink pointer for the page in 'buf'.
1125  */
1126 static IndexTuple
1128  GISTInsertStack *stack, bool is_build)
1129 {
1130  Page page = BufferGetPage(buf);
1131  OffsetNumber maxoff;
1132  OffsetNumber offset;
1133  IndexTuple downlink = NULL;
1134 
1135  maxoff = PageGetMaxOffsetNumber(page);
1136  for (offset = FirstOffsetNumber; offset <= maxoff; offset = OffsetNumberNext(offset))
1137  {
1138  IndexTuple ituple = (IndexTuple)
1139  PageGetItem(page, PageGetItemId(page, offset));
1140 
1141  if (downlink == NULL)
1142  downlink = CopyIndexTuple(ituple);
1143  else
1144  {
1145  IndexTuple newdownlink;
1146 
1147  newdownlink = gistgetadjusted(rel, downlink, ituple,
1148  giststate);
1149  if (newdownlink)
1150  downlink = newdownlink;
1151  }
1152  }
1153 
1154  /*
1155  * If the page is completely empty, we can't form a meaningful downlink
1156  * for it. But we have to insert a downlink for the page. Any key will do,
1157  * as long as its consistent with the downlink of parent page, so that we
1158  * can legally insert it to the parent. A minimal one that matches as few
1159  * scans as possible would be best, to keep scans from doing useless work,
1160  * but we don't know how to construct that. So we just use the downlink of
1161  * the original page that was split - that's as far from optimal as it can
1162  * get but will do..
1163  */
1164  if (!downlink)
1165  {
1166  ItemId iid;
1167 
1169  gistFindCorrectParent(rel, stack, is_build);
1170  iid = PageGetItemId(stack->parent->page, stack->downlinkoffnum);
1171  downlink = (IndexTuple) PageGetItem(stack->parent->page, iid);
1172  downlink = CopyIndexTuple(downlink);
1173  LockBuffer(stack->parent->buffer, GIST_UNLOCK);
1174  }
1175 
1177  GistTupleSetValid(downlink);
1178 
1179  return downlink;
1180 }
1181 
1182 
1183 /*
1184  * Complete the incomplete split of state->stack->page.
1185  */
1186 static void
1188 {
1189  GISTInsertStack *stack = state->stack;
1190  Buffer buf;
1191  Page page;
1192  List *splitinfo = NIL;
1193 
1194  ereport(LOG,
1195  (errmsg("fixing incomplete split in index \"%s\", block %u",
1196  RelationGetRelationName(state->r), stack->blkno)));
1197 
1198  Assert(GistFollowRight(stack->page));
1200 
1201  buf = stack->buffer;
1202 
1203  /*
1204  * Read the chain of split pages, following the rightlinks. Construct a
1205  * downlink tuple for each page.
1206  */
1207  for (;;)
1208  {
1210  IndexTuple downlink;
1211 
1212  page = BufferGetPage(buf);
1213 
1214  /* Form the new downlink tuples to insert to parent */
1215  downlink = gistformdownlink(state->r, buf, giststate, stack, state->is_build);
1216 
1217  si->buf = buf;
1218  si->downlink = downlink;
1219 
1220  splitinfo = lappend(splitinfo, si);
1221 
1222  if (GistFollowRight(page))
1223  {
1224  /* lock next page */
1225  buf = ReadBuffer(state->r, GistPageGetOpaque(page)->rightlink);
1227  }
1228  else
1229  break;
1230  }
1231 
1232  /* Insert the downlinks */
1233  gistfinishsplit(state, stack, giststate, splitinfo, false);
1234 }
1235 
1236 /*
1237  * Insert or replace a tuple in stack->buffer. If 'oldoffnum' is valid, the
1238  * tuple at 'oldoffnum' is replaced, otherwise the tuple is inserted as new.
1239  * 'stack' represents the path from the root to the page being updated.
1240  *
1241  * The caller must hold an exclusive lock on stack->buffer. The lock is still
1242  * held on return, but the page might not contain the inserted tuple if the
1243  * page was split. The function returns true if the page was split, false
1244  * otherwise.
1245  */
1246 static bool
1248  GISTSTATE *giststate, IndexTuple tuple, OffsetNumber oldoffnum)
1249 {
1250  return gistinserttuples(state, stack, giststate, &tuple, 1, oldoffnum,
1251  InvalidBuffer, InvalidBuffer, false, false);
1252 }
1253 
1254 /* ----------------
1255  * An extended workhorse version of gistinserttuple(). This version allows
1256  * inserting multiple tuples, or replacing a single tuple with multiple tuples.
1257  * This is used to recursively update the downlinks in the parent when a page
1258  * is split.
1259  *
1260  * If leftchild and rightchild are valid, we're inserting/replacing the
1261  * downlink for rightchild, and leftchild is its left sibling. We clear the
1262  * F_FOLLOW_RIGHT flag and update NSN on leftchild, atomically with the
1263  * insertion of the downlink.
1264  *
1265  * To avoid holding locks for longer than necessary, when recursing up the
1266  * tree to update the parents, the locking is a bit peculiar here. On entry,
1267  * the caller must hold an exclusive lock on stack->buffer, as well as
1268  * leftchild and rightchild if given. On return:
1269  *
1270  * - Lock on stack->buffer is released, if 'unlockbuf' is true. The page is
1271  * always kept pinned, however.
1272  * - Lock on 'leftchild' is released, if 'unlockleftchild' is true. The page
1273  * is kept pinned.
1274  * - Lock and pin on 'rightchild' are always released.
1275  *
1276  * Returns 'true' if the page had to be split. Note that if the page was
1277  * split, the inserted/updated tuples might've been inserted to a right
1278  * sibling of stack->buffer instead of stack->buffer itself.
1279  */
1280 static bool
1282  GISTSTATE *giststate,
1283  IndexTuple *tuples, int ntup, OffsetNumber oldoffnum,
1285  bool unlockbuf, bool unlockleftchild)
1286 {
1287  List *splitinfo;
1288  bool is_split;
1289 
1290  /*
1291  * Check for any rw conflicts (in serializable isolation level) just
1292  * before we intend to modify the page
1293  */
1295 
1296  /* Insert the tuple(s) to the page, splitting the page if necessary */
1297  is_split = gistplacetopage(state->r, state->freespace, giststate,
1298  stack->buffer,
1299  tuples, ntup,
1300  oldoffnum, NULL,
1301  leftchild,
1302  &splitinfo,
1303  true,
1304  state->heapRel,
1305  state->is_build);
1306 
1307  /*
1308  * Before recursing up in case the page was split, release locks on the
1309  * child pages. We don't need to keep them locked when updating the
1310  * parent.
1311  */
1314  if (BufferIsValid(leftchild) && unlockleftchild)
1316 
1317  /*
1318  * If we had to split, insert/update the downlinks in the parent. If the
1319  * caller requested us to release the lock on stack->buffer, tell
1320  * gistfinishsplit() to do that as soon as it's safe to do so. If we
1321  * didn't have to split, release it ourselves.
1322  */
1323  if (splitinfo)
1324  gistfinishsplit(state, stack, giststate, splitinfo, unlockbuf);
1325  else if (unlockbuf)
1326  LockBuffer(stack->buffer, GIST_UNLOCK);
1327 
1328  return is_split;
1329 }
1330 
1331 /*
1332  * Finish an incomplete split by inserting/updating the downlinks in parent
1333  * page. 'splitinfo' contains all the child pages involved in the split,
1334  * from left-to-right.
1335  *
1336  * On entry, the caller must hold a lock on stack->buffer and all the child
1337  * pages in 'splitinfo'. If 'unlockbuf' is true, the lock on stack->buffer is
1338  * released on return. The child pages are always unlocked and unpinned.
1339  */
1340 static void
1342  GISTSTATE *giststate, List *splitinfo, bool unlockbuf)
1343 {
1344  GISTPageSplitInfo *right;
1345  GISTPageSplitInfo *left;
1346  IndexTuple tuples[2];
1347 
1348  /* A split always contains at least two halves */
1349  Assert(list_length(splitinfo) >= 2);
1350 
1351  /*
1352  * We need to insert downlinks for each new page, and update the downlink
1353  * for the original (leftmost) page in the split. Begin at the rightmost
1354  * page, inserting one downlink at a time until there's only two pages
1355  * left. Finally insert the downlink for the last new page and update the
1356  * downlink for the original page as one operation.
1357  */
1359 
1360  /*
1361  * Insert downlinks for the siblings from right to left, until there are
1362  * only two siblings left.
1363  */
1364  for (int pos = list_length(splitinfo) - 1; pos > 1; pos--)
1365  {
1366  right = (GISTPageSplitInfo *) list_nth(splitinfo, pos);
1367  left = (GISTPageSplitInfo *) list_nth(splitinfo, pos - 1);
1368 
1369  gistFindCorrectParent(state->r, stack, state->is_build);
1370  if (gistinserttuples(state, stack->parent, giststate,
1371  &right->downlink, 1,
1373  left->buf, right->buf, false, false))
1374  {
1375  /*
1376  * If the parent page was split, the existing downlink might have
1377  * moved.
1378  */
1380  }
1381  /* gistinserttuples() released the lock on right->buf. */
1382  }
1383 
1384  right = (GISTPageSplitInfo *) lsecond(splitinfo);
1385  left = (GISTPageSplitInfo *) linitial(splitinfo);
1386 
1387  /*
1388  * Finally insert downlink for the remaining right page and update the
1389  * downlink for the original page to not contain the tuples that were
1390  * moved to the new pages.
1391  */
1392  tuples[0] = left->downlink;
1393  tuples[1] = right->downlink;
1394  gistFindCorrectParent(state->r, stack, state->is_build);
1395  (void) gistinserttuples(state, stack->parent, giststate,
1396  tuples, 2,
1397  stack->downlinkoffnum,
1398  left->buf, right->buf,
1399  true, /* Unlock parent */
1400  unlockbuf /* Unlock stack->buffer if caller
1401  * wants that */
1402  );
1403 
1404  /*
1405  * The downlink might have moved when we updated it. Even if the page
1406  * wasn't split, because gistinserttuples() implements updating the old
1407  * tuple by removing and re-inserting it!
1408  */
1410 
1411  Assert(left->buf == stack->buffer);
1412 
1413  /*
1414  * If we split the page because we had to adjust the downlink on an
1415  * internal page, while descending the tree for inserting a new tuple,
1416  * then this might no longer be the correct page for the new tuple. The
1417  * downlink to this page might not cover the new tuple anymore, it might
1418  * need to go to the newly-created right sibling instead. Tell the caller
1419  * to walk back up the stack, to re-check at the parent which page to
1420  * insert to.
1421  *
1422  * Normally, the LSN-NSN interlock during the tree descend would also
1423  * detect that a concurrent split happened (by ourselves), and cause us to
1424  * retry at the parent. But that mechanism doesn't work during index
1425  * build, because we don't do WAL-logging, and don't update LSNs, during
1426  * index build.
1427  */
1428  stack->retry_from_parent = true;
1429 }
1430 
1431 /*
1432  * gistSplit -- split a page in the tree and fill struct
1433  * used for XLOG and real writes buffers. Function is recursive, ie
1434  * it will split page until keys will fit in every page.
1435  */
1438  Page page,
1439  IndexTuple *itup, /* contains compressed entry */
1440  int len,
1441  GISTSTATE *giststate)
1442 {
1443  IndexTuple *lvectup,
1444  *rvectup;
1445  GistSplitVector v;
1446  int i;
1447  SplitedPageLayout *res = NULL;
1448 
1449  /* this should never recurse very deeply, but better safe than sorry */
1451 
1452  /* there's no point in splitting an empty page */
1453  Assert(len > 0);
1454 
1455  /*
1456  * If a single tuple doesn't fit on a page, no amount of splitting will
1457  * help.
1458  */
1459  if (len == 1)
1460  ereport(ERROR,
1461  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1462  errmsg("index row size %zu exceeds maximum %zu for index \"%s\"",
1463  IndexTupleSize(itup[0]), GiSTPageSize,
1465 
1466  memset(v.spl_lisnull, true,
1467  sizeof(bool) * giststate->nonLeafTupdesc->natts);
1468  memset(v.spl_risnull, true,
1469  sizeof(bool) * giststate->nonLeafTupdesc->natts);
1470  gistSplitByKey(r, page, itup, len, giststate, &v, 0);
1471 
1472  /* form left and right vector */
1473  lvectup = (IndexTuple *) palloc(sizeof(IndexTuple) * (len + 1));
1474  rvectup = (IndexTuple *) palloc(sizeof(IndexTuple) * (len + 1));
1475 
1476  for (i = 0; i < v.splitVector.spl_nleft; i++)
1477  lvectup[i] = itup[v.splitVector.spl_left[i] - 1];
1478 
1479  for (i = 0; i < v.splitVector.spl_nright; i++)
1480  rvectup[i] = itup[v.splitVector.spl_right[i] - 1];
1481 
1482  /* finalize splitting (may need another split) */
1483  if (!gistfitpage(rvectup, v.splitVector.spl_nright))
1484  {
1485  res = gistSplit(r, page, rvectup, v.splitVector.spl_nright, giststate);
1486  }
1487  else
1488  {
1489  ROTATEDIST(res);
1490  res->block.num = v.splitVector.spl_nright;
1491  res->list = gistfillitupvec(rvectup, v.splitVector.spl_nright, &(res->lenlist));
1492  res->itup = gistFormTuple(giststate, r, v.spl_rattr, v.spl_risnull, false);
1493  }
1494 
1495  if (!gistfitpage(lvectup, v.splitVector.spl_nleft))
1496  {
1497  SplitedPageLayout *resptr,
1498  *subres;
1499 
1500  resptr = subres = gistSplit(r, page, lvectup, v.splitVector.spl_nleft, giststate);
1501 
1502  /* install on list's tail */
1503  while (resptr->next)
1504  resptr = resptr->next;
1505 
1506  resptr->next = res;
1507  res = subres;
1508  }
1509  else
1510  {
1511  ROTATEDIST(res);
1512  res->block.num = v.splitVector.spl_nleft;
1513  res->list = gistfillitupvec(lvectup, v.splitVector.spl_nleft, &(res->lenlist));
1514  res->itup = gistFormTuple(giststate, r, v.spl_lattr, v.spl_lisnull, false);
1515  }
1516 
1517  return res;
1518 }
1519 
1520 /*
1521  * Create a GISTSTATE and fill it with information about the index
1522  */
1523 GISTSTATE *
1525 {
1526  GISTSTATE *giststate;
1527  MemoryContext scanCxt;
1528  MemoryContext oldCxt;
1529  int i;
1530 
1531  /* safety check to protect fixed-size arrays in GISTSTATE */
1532  if (index->rd_att->natts > INDEX_MAX_KEYS)
1533  elog(ERROR, "numberOfAttributes %d > %d",
1534  index->rd_att->natts, INDEX_MAX_KEYS);
1535 
1536  /* Create the memory context that will hold the GISTSTATE */
1538  "GiST scan context",
1540  oldCxt = MemoryContextSwitchTo(scanCxt);
1541 
1542  /* Create and fill in the GISTSTATE */
1543  giststate = (GISTSTATE *) palloc(sizeof(GISTSTATE));
1544 
1545  giststate->scanCxt = scanCxt;
1546  giststate->tempCxt = scanCxt; /* caller must change this if needed */
1547  giststate->leafTupdesc = index->rd_att;
1548 
1549  /*
1550  * The truncated tupdesc for non-leaf index tuples, which doesn't contain
1551  * the INCLUDE attributes.
1552  *
1553  * It is used to form tuples during tuple adjustment and page split.
1554  * B-tree creates shortened tuple descriptor for every truncated tuple,
1555  * because it is doing this less often: it does not have to form truncated
1556  * tuples during page split. Also, B-tree is not adjusting tuples on
1557  * internal pages the way GiST does.
1558  */
1559  giststate->nonLeafTupdesc = CreateTupleDescCopyConstr(index->rd_att);
1560  giststate->nonLeafTupdesc->natts =
1562 
1564  {
1565  fmgr_info_copy(&(giststate->consistentFn[i]),
1567  scanCxt);
1568  fmgr_info_copy(&(giststate->unionFn[i]),
1570  scanCxt);
1571 
1572  /* opclasses are not required to provide a Compress method */
1574  fmgr_info_copy(&(giststate->compressFn[i]),
1576  scanCxt);
1577  else
1578  giststate->compressFn[i].fn_oid = InvalidOid;
1579 
1580  /* opclasses are not required to provide a Decompress method */
1582  fmgr_info_copy(&(giststate->decompressFn[i]),
1584  scanCxt);
1585  else
1586  giststate->decompressFn[i].fn_oid = InvalidOid;
1587 
1588  fmgr_info_copy(&(giststate->penaltyFn[i]),
1590  scanCxt);
1591  fmgr_info_copy(&(giststate->picksplitFn[i]),
1593  scanCxt);
1594  fmgr_info_copy(&(giststate->equalFn[i]),
1596  scanCxt);
1597 
1598  /* opclasses are not required to provide a Distance method */
1600  fmgr_info_copy(&(giststate->distanceFn[i]),
1602  scanCxt);
1603  else
1604  giststate->distanceFn[i].fn_oid = InvalidOid;
1605 
1606  /* opclasses are not required to provide a Fetch method */
1608  fmgr_info_copy(&(giststate->fetchFn[i]),
1610  scanCxt);
1611  else
1612  giststate->fetchFn[i].fn_oid = InvalidOid;
1613 
1614  /*
1615  * If the index column has a specified collation, we should honor that
1616  * while doing comparisons. However, we may have a collatable storage
1617  * type for a noncollatable indexed data type. If there's no index
1618  * collation then specify default collation in case the support
1619  * functions need collation. This is harmless if the support
1620  * functions don't care about collation, so we just do it
1621  * unconditionally. (We could alternatively call get_typcollation,
1622  * but that seems like expensive overkill --- there aren't going to be
1623  * any cases where a GiST storage type has a nondefault collation.)
1624  */
1625  if (OidIsValid(index->rd_indcollation[i]))
1626  giststate->supportCollation[i] = index->rd_indcollation[i];
1627  else
1628  giststate->supportCollation[i] = DEFAULT_COLLATION_OID;
1629  }
1630 
1631  /* No opclass information for INCLUDE attributes */
1632  for (; i < index->rd_att->natts; i++)
1633  {
1634  giststate->consistentFn[i].fn_oid = InvalidOid;
1635  giststate->unionFn[i].fn_oid = InvalidOid;
1636  giststate->compressFn[i].fn_oid = InvalidOid;
1637  giststate->decompressFn[i].fn_oid = InvalidOid;
1638  giststate->penaltyFn[i].fn_oid = InvalidOid;
1639  giststate->picksplitFn[i].fn_oid = InvalidOid;
1640  giststate->equalFn[i].fn_oid = InvalidOid;
1641  giststate->distanceFn[i].fn_oid = InvalidOid;
1642  giststate->fetchFn[i].fn_oid = InvalidOid;
1643  giststate->supportCollation[i] = InvalidOid;
1644  }
1645 
1646  MemoryContextSwitchTo(oldCxt);
1647 
1648  return giststate;
1649 }
1650 
1651 void
1653 {
1654  /* It's sufficient to delete the scanCxt */
1655  MemoryContextDelete(giststate->scanCxt);
1656 }
1657 
1658 /*
1659  * gistprunepage() -- try to remove LP_DEAD items from the given page.
1660  * Function assumes that buffer is exclusively locked.
1661  */
1662 static void
1663 gistprunepage(Relation rel, Page page, Buffer buffer, Relation heapRel)
1664 {
1666  int ndeletable = 0;
1667  OffsetNumber offnum,
1668  maxoff;
1669 
1670  Assert(GistPageIsLeaf(page));
1671 
1672  /*
1673  * Scan over all items to see which ones need to be deleted according to
1674  * LP_DEAD flags.
1675  */
1676  maxoff = PageGetMaxOffsetNumber(page);
1677  for (offnum = FirstOffsetNumber;
1678  offnum <= maxoff;
1679  offnum = OffsetNumberNext(offnum))
1680  {
1681  ItemId itemId = PageGetItemId(page, offnum);
1682 
1683  if (ItemIdIsDead(itemId))
1684  deletable[ndeletable++] = offnum;
1685  }
1686 
1687  if (ndeletable > 0)
1688  {
1689  TransactionId snapshotConflictHorizon = InvalidTransactionId;
1690 
1692  snapshotConflictHorizon =
1693  index_compute_xid_horizon_for_tuples(rel, heapRel, buffer,
1694  deletable, ndeletable);
1695 
1697 
1698  PageIndexMultiDelete(page, deletable, ndeletable);
1699 
1700  /*
1701  * Mark the page as not containing any LP_DEAD items. This is not
1702  * certainly true (there might be some that have recently been marked,
1703  * but weren't included in our target-item list), but it will almost
1704  * always be true and it doesn't seem worth an additional page scan to
1705  * check it. Remember that F_HAS_GARBAGE is only a hint anyway.
1706  */
1708 
1709  MarkBufferDirty(buffer);
1710 
1711  /* XLOG stuff */
1712  if (RelationNeedsWAL(rel))
1713  {
1714  XLogRecPtr recptr;
1715 
1716  recptr = gistXLogDelete(buffer,
1717  deletable, ndeletable,
1718  snapshotConflictHorizon,
1719  heapRel);
1720 
1721  PageSetLSN(page, recptr);
1722  }
1723  else
1724  PageSetLSN(page, gistGetFakeLSN(rel));
1725 
1726  END_CRIT_SECTION();
1727  }
1728 
1729  /*
1730  * Note: if we didn't find any LP_DEAD items, then the page's
1731  * F_HAS_GARBAGE hint bit is falsely set. We do not bother expending a
1732  * separate write to clear it, however. We will clear it when we split
1733  * the page.
1734  */
1735 }
uint32 BlockNumber
Definition: block.h:31
#define InvalidBlockNumber
Definition: block.h:33
static Datum values[MAXATTR]
Definition: bootstrap.c:156
int Buffer
Definition: buf.h:23
#define InvalidBuffer
Definition: buf.h:25
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:3290
Buffer ExtendBufferedRel(BufferManagerRelation bmr, ForkNumber forkNum, BufferAccessStrategy strategy, uint32 flags)
Definition: bufmgr.c:812
void ReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:4480
XLogRecPtr BufferGetLSNAtomic(Buffer buffer)
Definition: bufmgr.c:3551
void UnlockReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:4497
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:2111
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:4715
Buffer ReadBuffer(Relation reln, BlockNumber blockNum)
Definition: bufmgr.c:708
static Page BufferGetPage(Buffer buffer)
Definition: bufmgr.h:350
@ EB_SKIP_EXTENSION_LOCK
Definition: bufmgr.h:73
@ EB_LOCK_FIRST
Definition: bufmgr.h:85
#define BMR_REL(p_rel)
Definition: bufmgr.h:106
static bool BufferIsValid(Buffer bufnum)
Definition: bufmgr.h:301
void PageRestoreTempPage(Page tempPage, Page oldPage)
Definition: bufpage.c:424
void PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems)
Definition: bufpage.c:1161
Page PageGetTempPageCopySpecial(Page page)
Definition: bufpage.c:402
bool PageIndexTupleOverwrite(Page page, OffsetNumber offnum, Item newtup, Size newsize)
Definition: bufpage.c:1405
void PageIndexTupleDelete(Page page, OffsetNumber offnum)
Definition: bufpage.c:1052
Pointer Page
Definition: bufpage.h:78
static Item PageGetItem(Page page, ItemId itemId)
Definition: bufpage.h:351
static ItemId PageGetItemId(Page page, OffsetNumber offsetNumber)
Definition: bufpage.h:240
static void PageSetLSN(Page page, XLogRecPtr lsn)
Definition: bufpage.h:388
static XLogRecPtr PageGetLSN(Page page)
Definition: bufpage.h:383
static OffsetNumber PageGetMaxOffsetNumber(Page page)
Definition: bufpage.h:369
#define PageAddItem(page, item, size, offsetNumber, overwrite, is_heap)
Definition: bufpage.h:468
uint32 TransactionId
Definition: c.h:641
#define OidIsValid(objectId)
Definition: c.h:764
size_t Size
Definition: c.h:594
int errdetail(const char *fmt,...)
Definition: elog.c:1202
int errhint(const char *fmt,...)
Definition: elog.c:1316
int errcode(int sqlerrcode)
Definition: elog.c:858
int errmsg(const char *fmt,...)
Definition: elog.c:1069
#define LOG
Definition: elog.h:31
#define ERROR
Definition: elog.h:39
#define ereport(elevel,...)
Definition: elog.h:149
void fmgr_info_copy(FmgrInfo *dstinfo, FmgrInfo *srcinfo, MemoryContext destcxt)
Definition: fmgr.c:580
#define PG_RETURN_POINTER(x)
Definition: fmgr.h:361
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
#define rightchild(x)
Definition: fsmpage.c:30
#define leftchild(x)
Definition: fsmpage.c:29
TransactionId index_compute_xid_horizon_for_tuples(Relation irel, Relation hrel, Buffer ibuf, OffsetNumber *itemnos, int nitems)
Definition: genam.c:294
IndexUniqueCheck
Definition: genam.h:116
void gistdoinsert(Relation r, IndexTuple itup, Size freespace, GISTSTATE *giststate, Relation heapRel, bool is_build)
Definition: gist.c:633
static void gistfixsplit(GISTInsertState *state, GISTSTATE *giststate)
Definition: gist.c:1187
static void gistprunepage(Relation rel, Page page, Buffer buffer, Relation heapRel)
Definition: gist.c:1663
bool gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate, Buffer buffer, IndexTuple *itup, int ntup, OffsetNumber oldoffnum, BlockNumber *newblkno, Buffer leftchildbuf, List **splitinfo, bool markfollowright, Relation heapRel, bool is_build)
Definition: gist.c:224
bool gistinsert(Relation r, Datum *values, bool *isnull, ItemPointer ht_ctid, Relation heapRel, IndexUniqueCheck checkUnique, bool indexUnchanged, IndexInfo *indexInfo)
Definition: gist.c:158
SplitedPageLayout * gistSplit(Relation r, Page page, IndexTuple *itup, int len, GISTSTATE *giststate)
Definition: gist.c:1437
GISTSTATE * initGISTstate(Relation index)
Definition: gist.c:1524
void gistbuildempty(Relation index)
Definition: gist.c:132
static GISTInsertStack * gistFindPath(Relation r, BlockNumber child, OffsetNumber *downlinkoffnum)
Definition: gist.c:908
static bool gistinserttuples(GISTInsertState *state, GISTInsertStack *stack, GISTSTATE *giststate, IndexTuple *tuples, int ntup, OffsetNumber oldoffnum, Buffer leftchild, Buffer rightchild, bool unlockbuf, bool unlockleftchild)
Definition: gist.c:1281
MemoryContext createTempGistContext(void)
Definition: gist.c:121
void freeGISTstate(GISTSTATE *giststate)
Definition: gist.c:1652
static bool gistinserttuple(GISTInsertState *state, GISTInsertStack *stack, GISTSTATE *giststate, IndexTuple tuple, OffsetNumber oldoffnum)
Definition: gist.c:1247
#define ROTATEDIST(d)
Definition: gist.c:46
static void gistfinishsplit(GISTInsertState *state, GISTInsertStack *stack, GISTSTATE *giststate, List *splitinfo, bool unlockbuf)
Definition: gist.c:1341
static void gistFindCorrectParent(Relation r, GISTInsertStack *child, bool is_build)
Definition: gist.c:1021
static IndexTuple gistformdownlink(Relation rel, Buffer buf, GISTSTATE *giststate, GISTInsertStack *stack, bool is_build)
Definition: gist.c:1127
Datum gisthandler(PG_FUNCTION_ARGS)
Definition: gist.c:60
#define GIST_DECOMPRESS_PROC
Definition: gist.h:33
#define GIST_PICKSPLIT_PROC
Definition: gist.h:35
#define GistMarkFollowRight(page)
Definition: gist.h:181
#define F_LEAF
Definition: gist.h:46
#define GIST_CONSISTENT_PROC
Definition: gist.h:30
#define GistClearFollowRight(page)
Definition: gist.h:182
#define GIST_UNION_PROC
Definition: gist.h:31
#define GIST_FETCH_PROC
Definition: gist.h:38
#define GIST_COMPRESS_PROC
Definition: gist.h:32
#define GISTNProcs
Definition: gist.h:41
#define GistClearPageHasGarbage(page)
Definition: gist.h:178
#define GIST_PENALTY_PROC
Definition: gist.h:34
#define GistPageIsLeaf(page)
Definition: gist.h:167
#define GistFollowRight(page)
Definition: gist.h:180
#define GIST_OPTIONS_PROC
Definition: gist.h:39
#define GIST_DISTANCE_PROC
Definition: gist.h:37
#define GistPageSetNSN(page, val)
Definition: gist.h:185
#define GistPageIsDeleted(page)
Definition: gist.h:170
#define GistPageGetOpaque(page)
Definition: gist.h:165
#define GIST_EQUAL_PROC
Definition: gist.h:36
#define GistPageHasGarbage(page)
Definition: gist.h:176
#define GistPageGetNSN(page)
Definition: gist.h:184
XLogRecPtr GistNSN
Definition: gist.h:60
#define GistBuildLSN
Definition: gist.h:67
#define GIST_MAX_SPLIT_PAGES
Definition: gist_private.h:39
#define GistTupleSetValid(itup)
Definition: gist_private.h:289
#define GIST_UNLOCK
Definition: gist_private.h:44
#define GIST_ROOT_BLKNO
Definition: gist_private.h:262
#define GIST_EXCLUSIVE
Definition: gist_private.h:43
#define GiSTPageSize
Definition: gist_private.h:476
#define GistTupleIsInvalid(itup)
Definition: gist_private.h:288
#define GIST_SHARE
Definition: gist_private.h:42
IndexBuildResult * gistbuild(Relation heap, Relation index, IndexInfo *indexInfo)
Definition: gistbuild.c:183
bool gistgettuple(IndexScanDesc scan, ScanDirection dir)
Definition: gistget.c:613
int64 gistgetbitmap(IndexScanDesc scan, TIDBitmap *tbm)
Definition: gistget.c:744
bool gistcanreturn(Relation index, int attno)
Definition: gistget.c:794
IndexScanDesc gistbeginscan(Relation r, int nkeys, int norderbys)
Definition: gistscan.c:74
void gistendscan(IndexScanDesc scan)
Definition: gistscan.c:349
void gistrescan(IndexScanDesc scan, ScanKey key, int nkeys, ScanKey orderbys, int norderbys)
Definition: gistscan.c:127
void gistSplitByKey(Relation r, Page page, IndexTuple *itup, int len, GISTSTATE *giststate, GistSplitVector *v, int attno)
Definition: gistsplit.c:623
IndexTuple gistFormTuple(GISTSTATE *giststate, Relation r, Datum *attdata, bool *isnull, bool isleaf)
Definition: gistutil.c:575
Buffer gistNewBuffer(Relation r, Relation heaprel)
Definition: gistutil.c:824
bool gistproperty(Oid index_oid, int attno, IndexAMProperty prop, const char *propname, bool *res, bool *isnull)
Definition: gistutil.c:933
bool gistnospace(Page page, IndexTuple *itvec, int len, OffsetNumber todelete, Size freespace)
Definition: gistutil.c:59
void gistfillbuffer(Page page, IndexTuple *itup, int len, OffsetNumber off)
Definition: gistutil.c:34
IndexTuple * gistextractpage(Page page, int *len)
Definition: gistutil.c:95
bool gistfitpage(IndexTuple *itvec, int len)
Definition: gistutil.c:79
IndexTuple gistgetadjusted(Relation r, IndexTuple oldtup, IndexTuple addtup, GISTSTATE *giststate)
Definition: gistutil.c:316
OffsetNumber gistchoose(Relation r, Page p, IndexTuple it, GISTSTATE *giststate)
Definition: gistutil.c:374
XLogRecPtr gistGetFakeLSN(Relation rel)
Definition: gistutil.c:1016
IndexTuple * gistjoinvector(IndexTuple *itvec, int *len, IndexTuple *additvec, int addlen)
Definition: gistutil.c:114
bytea * gistoptions(Datum reloptions, bool validate)
Definition: gistutil.c:912
void GISTInitBuffer(Buffer b, uint32 f)
Definition: gistutil.c:773
IndexTupleData * gistfillitupvec(IndexTuple *vec, int veclen, int *memlen)
Definition: gistutil.c:127
void gistcheckpage(Relation rel, Buffer buf)
Definition: gistutil.c:785
IndexBulkDeleteResult * gistbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state)
Definition: gistvacuum.c:59
IndexBulkDeleteResult * gistvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
Definition: gistvacuum.c:75
void gistadjustmembers(Oid opfamilyoid, Oid opclassoid, List *operators, List *functions)
Definition: gistvalidate.c:291
bool gistvalidate(Oid opclassoid)
Definition: gistvalidate.c:34
XLogRecPtr gistXLogDelete(Buffer buffer, OffsetNumber *todelete, int ntodelete, TransactionId snapshotConflictHorizon, Relation heaprel)
Definition: gistxlog.c:672
XLogRecPtr gistXLogUpdate(Buffer buffer, OffsetNumber *todelete, int ntodelete, IndexTuple *itup, int ituplen, Buffer leftchildbuf)
Definition: gistxlog.c:631
XLogRecPtr gistXLogSplit(bool page_is_leaf, SplitedPageLayout *dist, BlockNumber origrlink, GistNSN orignsn, Buffer leftchildbuf, bool markfollowright)
Definition: gistxlog.c:497
FmgrInfo * index_getprocinfo(Relation irel, AttrNumber attnum, uint16 procnum)
Definition: indexam.c:811
RegProcedure index_getprocid(Relation irel, AttrNumber attnum, uint16 procnum)
Definition: indexam.c:777
IndexTuple CopyIndexTuple(IndexTuple source)
Definition: indextuple.c:547
int i
Definition: isn.c:73
if(TABLE==NULL||TABLE_index==NULL)
Definition: isn.c:77
Pointer Item
Definition: item.h:17
#define ItemIdIsDead(itemId)
Definition: itemid.h:113
bool ItemPointerEquals(ItemPointer pointer1, ItemPointer pointer2)
Definition: itemptr.c:35
static void ItemPointerSetBlockNumber(ItemPointerData *pointer, BlockNumber blockNumber)
Definition: itemptr.h:147
static BlockNumber ItemPointerGetBlockNumber(const ItemPointerData *pointer)
Definition: itemptr.h:103
IndexTupleData * IndexTuple
Definition: itup.h:53
#define IndexTupleSize(itup)
Definition: itup.h:70
#define MaxIndexTuplesPerPage
Definition: itup.h:165
Assert(fmt[strlen(fmt) - 1] !='\n')
List * lappend(List *list, void *datum)
Definition: list.c:338
List * list_delete_first(List *list)
Definition: list.c:942
List * lcons(void *datum, List *list)
Definition: list.c:494
void MemoryContextReset(MemoryContext context)
Definition: mcxt.c:330
void * palloc0(Size size)
Definition: mcxt.c:1257
MemoryContext CurrentMemoryContext
Definition: mcxt.c:135
void MemoryContextDelete(MemoryContext context)
Definition: mcxt.c:403
void * palloc(Size size)
Definition: mcxt.c:1226
#define AllocSetContextCreate
Definition: memutils.h:129
#define ALLOCSET_DEFAULT_SIZES
Definition: memutils.h:153
#define START_CRIT_SECTION()
Definition: miscadmin.h:148
#define END_CRIT_SECTION()
Definition: miscadmin.h:150
#define makeNode(_type_)
Definition: nodes.h:176
#define InvalidOffsetNumber
Definition: off.h:26
#define OffsetNumberIsValid(offsetNumber)
Definition: off.h:39
#define OffsetNumberNext(offsetNumber)
Definition: off.h:52
uint16 OffsetNumber
Definition: off.h:24
#define FirstOffsetNumber
Definition: off.h:27
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:138
#define INDEX_MAX_KEYS
const void size_t len
const void * data
static int list_length(const List *l)
Definition: pg_list.h:152
#define NIL
Definition: pg_list.h:68
#define list_make1(x1)
Definition: pg_list.h:212
#define linitial(l)
Definition: pg_list.h:178
#define lsecond(l)
Definition: pg_list.h:183
static void * list_nth(const List *list, int n)
Definition: pg_list.h:299
static char * buf
Definition: pg_test_fsync.c:67
void check_stack_depth(void)
Definition: postgres.c:3523
uintptr_t Datum
Definition: postgres.h:64
#define InvalidOid
Definition: postgres_ext.h:36
void PredicateLockPageSplit(Relation relation, BlockNumber oldblkno, BlockNumber newblkno)
Definition: predicate.c:3078
void CheckForSerializableConflictIn(Relation relation, ItemPointer tid, BlockNumber blkno)
Definition: predicate.c:4270
#define RelationGetRelationName(relation)
Definition: rel.h:538
#define RelationNeedsWAL(relation)
Definition: rel.h:629
#define IndexRelationGetNumberOfKeyAttributes(relation)
Definition: rel.h:523
@ INIT_FORKNUM
Definition: relpath.h:53
void gistcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, Cost *indexStartupCost, Cost *indexTotalCost, Selectivity *indexSelectivity, double *indexCorrelation, double *indexPages)
Definition: selfuncs.c:7015
Oid fn_oid
Definition: fmgr.h:59
BlockNumber blkno
Definition: gist_private.h:210
OffsetNumber downlinkoffnum
Definition: gist_private.h:228
struct GISTInsertStack * parent
Definition: gist_private.h:231
IndexTuple downlink
Definition: gist_private.h:422
FmgrInfo fetchFn[INDEX_MAX_KEYS]
Definition: gist_private.h:94
TupleDesc leafTupdesc
Definition: gist_private.h:80
TupleDesc nonLeafTupdesc
Definition: gist_private.h:81
FmgrInfo penaltyFn[INDEX_MAX_KEYS]
Definition: gist_private.h:90
MemoryContext tempCxt
Definition: gist_private.h:78
Oid supportCollation[INDEX_MAX_KEYS]
Definition: gist_private.h:97
FmgrInfo distanceFn[INDEX_MAX_KEYS]
Definition: gist_private.h:93
FmgrInfo consistentFn[INDEX_MAX_KEYS]
Definition: gist_private.h:86
MemoryContext scanCxt
Definition: gist_private.h:77
FmgrInfo decompressFn[INDEX_MAX_KEYS]
Definition: gist_private.h:89
FmgrInfo compressFn[INDEX_MAX_KEYS]
Definition: gist_private.h:88
FmgrInfo equalFn[INDEX_MAX_KEYS]
Definition: gist_private.h:92
FmgrInfo unionFn[INDEX_MAX_KEYS]
Definition: gist_private.h:87
FmgrInfo picksplitFn[INDEX_MAX_KEYS]
Definition: gist_private.h:91
int spl_nleft
Definition: gist.h:141
OffsetNumber * spl_right
Definition: gist.h:145
int spl_nright
Definition: gist.h:146
OffsetNumber * spl_left
Definition: gist.h:140
GIST_SPLITVEC splitVector
Definition: gist_private.h:237
Datum spl_lattr[INDEX_MAX_KEYS]
Definition: gist_private.h:239
bool spl_lisnull[INDEX_MAX_KEYS]
Definition: gist_private.h:241
Datum spl_rattr[INDEX_MAX_KEYS]
Definition: gist_private.h:243
bool spl_risnull[INDEX_MAX_KEYS]
Definition: gist_private.h:245
ambuildphasename_function ambuildphasename
Definition: amapi.h:270
ambuildempty_function ambuildempty
Definition: amapi.h:262
amvacuumcleanup_function amvacuumcleanup
Definition: amapi.h:265
bool amclusterable
Definition: amapi.h:238
amoptions_function amoptions
Definition: amapi.h:268
amestimateparallelscan_function amestimateparallelscan
Definition: amapi.h:282
amrestrpos_function amrestrpos
Definition: amapi.h:279
aminsert_function aminsert
Definition: amapi.h:263
amendscan_function amendscan
Definition: amapi.h:277
uint16 amoptsprocnum
Definition: amapi.h:218
amparallelrescan_function amparallelrescan
Definition: amapi.h:284
Oid amkeytype
Definition: amapi.h:252
bool ampredlocks
Definition: amapi.h:240
uint16 amsupport
Definition: amapi.h:216
amcostestimate_function amcostestimate
Definition: amapi.h:267
bool amcanorderbyop
Definition: amapi.h:222
amadjustmembers_function amadjustmembers
Definition: amapi.h:272
ambuild_function ambuild
Definition: amapi.h:261
bool amstorage
Definition: amapi.h:236
uint16 amstrategies
Definition: amapi.h:214
bool amoptionalkey
Definition: amapi.h:230
amgettuple_function amgettuple
Definition: amapi.h:275
amcanreturn_function amcanreturn
Definition: amapi.h:266
bool amcanunique
Definition: amapi.h:226
amgetbitmap_function amgetbitmap
Definition: amapi.h:276
amproperty_function amproperty
Definition: amapi.h:269
ambulkdelete_function ambulkdelete
Definition: amapi.h:264
bool amsearcharray
Definition: amapi.h:232
bool amsummarizing
Definition: amapi.h:248
amvalidate_function amvalidate
Definition: amapi.h:271
ammarkpos_function ammarkpos
Definition: amapi.h:278
bool amcanmulticol
Definition: amapi.h:228
bool amusemaintenanceworkmem
Definition: amapi.h:246
ambeginscan_function ambeginscan
Definition: amapi.h:273
bool amcanparallel
Definition: amapi.h:242
amrescan_function amrescan
Definition: amapi.h:274
bool amcanorder
Definition: amapi.h:220
aminitparallelscan_function aminitparallelscan
Definition: amapi.h:283
uint8 amparallelvacuumoptions
Definition: amapi.h:250
bool amcanbackward
Definition: amapi.h:224
bool amcaninclude
Definition: amapi.h:244
bool amsearchnulls
Definition: amapi.h:234
void * ii_AmCache
Definition: execnodes.h:201
MemoryContext ii_Context
Definition: execnodes.h:202
ItemPointerData t_tid
Definition: itup.h:37
Definition: pg_list.h:54
gistxlogPage block
Definition: gist_private.h:193
IndexTupleData * list
Definition: gist_private.h:194
struct SplitedPageLayout * next
Definition: gist_private.h:200
BlockNumber blkno
Definition: gist_private.h:186
Definition: type.h:95
Definition: regguts.h:323
#define InvalidTransactionId
Definition: transam.h:31
TupleDesc CreateTupleDescCopyConstr(TupleDesc tupdesc)
Definition: tupdesc.c:151
#define VACUUM_OPTION_PARALLEL_BULKDEL
Definition: vacuum.h:47
#define VACUUM_OPTION_PARALLEL_COND_CLEANUP
Definition: vacuum.h:54
#define XLogStandbyInfoActive()
Definition: xlog.h:118
#define XLogRecPtrIsInvalid(r)
Definition: xlogdefs.h:29
uint64 XLogRecPtr
Definition: xlogdefs.h:21
XLogRecPtr log_newpage_buffer(Buffer buffer, bool page_std)
Definition: xloginsert.c:1225
void XLogEnsureRecordSpace(int max_block_id, int ndatas)
Definition: xloginsert.c:176