PostgreSQL Source Code  git master
ginbtree.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * ginbtree.c
4  * page utilities routines for the postgres inverted index access method.
5  *
6  *
7  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
8  * Portions Copyright (c) 1994, Regents of the University of California
9  *
10  * IDENTIFICATION
11  * src/backend/access/gin/ginbtree.c
12  *-------------------------------------------------------------------------
13  */
14 
15 #include "postgres.h"
16 
17 #include "access/gin_private.h"
18 #include "access/ginxlog.h"
19 #include "access/xloginsert.h"
20 #include "miscadmin.h"
21 #include "storage/predicate.h"
22 #include "utils/injection_point.h"
23 #include "utils/memutils.h"
24 #include "utils/rel.h"
25 
26 static void ginFindParents(GinBtree btree, GinBtreeStack *stack);
27 static bool ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
28  void *insertdata, BlockNumber updateblkno,
29  Buffer childbuf, GinStatsData *buildStats);
30 static void ginFinishSplit(GinBtree btree, GinBtreeStack *stack,
31  bool freestack, GinStatsData *buildStats);
32 static void ginFinishOldSplit(GinBtree btree, GinBtreeStack *stack,
33  GinStatsData *buildStats, int access);
34 
35 /*
36  * Lock buffer by needed method for search.
37  */
38 int
39 ginTraverseLock(Buffer buffer, bool searchMode)
40 {
41  Page page;
42  int access = GIN_SHARE;
43 
44  LockBuffer(buffer, GIN_SHARE);
45  page = BufferGetPage(buffer);
46  if (GinPageIsLeaf(page))
47  {
48  if (searchMode == false)
49  {
50  /* we should relock our page */
51  LockBuffer(buffer, GIN_UNLOCK);
52  LockBuffer(buffer, GIN_EXCLUSIVE);
53 
54  /* But root can become non-leaf during relock */
55  if (!GinPageIsLeaf(page))
56  {
57  /* restore old lock type (very rare) */
58  LockBuffer(buffer, GIN_UNLOCK);
59  LockBuffer(buffer, GIN_SHARE);
60  }
61  else
63  }
64  }
65 
66  return access;
67 }
68 
69 /*
70  * Descend the tree to the leaf page that contains or would contain the key
71  * we're searching for. The key should already be filled in 'btree', in
72  * tree-type specific manner. If btree->fullScan is true, descends to the
73  * leftmost leaf page.
74  *
75  * If 'searchmode' is false, on return stack->buffer is exclusively locked,
76  * and the stack represents the full path to the root. Otherwise stack->buffer
77  * is share-locked, and stack->parent is NULL.
78  *
79  * If 'rootConflictCheck' is true, tree root is checked for serialization
80  * conflict.
81  */
83 ginFindLeafPage(GinBtree btree, bool searchMode,
84  bool rootConflictCheck)
85 {
86  GinBtreeStack *stack;
87 
88  stack = (GinBtreeStack *) palloc(sizeof(GinBtreeStack));
89  stack->blkno = btree->rootBlkno;
90  stack->buffer = ReadBuffer(btree->index, btree->rootBlkno);
91  stack->parent = NULL;
92  stack->predictNumber = 1;
93 
94  if (rootConflictCheck)
95  CheckForSerializableConflictIn(btree->index, NULL, btree->rootBlkno);
96 
97  for (;;)
98  {
99  Page page;
100  BlockNumber child;
101  int access;
102 
103  stack->off = InvalidOffsetNumber;
104 
105  page = BufferGetPage(stack->buffer);
106 
107  access = ginTraverseLock(stack->buffer, searchMode);
108 
109  /*
110  * If we're going to modify the tree, finish any incomplete splits we
111  * encounter on the way.
112  */
113  if (!searchMode && GinPageIsIncompleteSplit(page))
114  ginFinishOldSplit(btree, stack, NULL, access);
115 
116  /*
117  * ok, page is correctly locked, we should check to move right ..,
118  * root never has a right link, so small optimization
119  */
120  while (btree->fullScan == false && stack->blkno != btree->rootBlkno &&
121  btree->isMoveRight(btree, page))
122  {
123  BlockNumber rightlink = GinPageGetOpaque(page)->rightlink;
124 
125  if (rightlink == InvalidBlockNumber)
126  /* rightmost page */
127  break;
128 
129  stack->buffer = ginStepRight(stack->buffer, btree->index, access);
130  stack->blkno = rightlink;
131  page = BufferGetPage(stack->buffer);
132 
133  if (!searchMode && GinPageIsIncompleteSplit(page))
134  ginFinishOldSplit(btree, stack, NULL, access);
135  }
136 
137  if (GinPageIsLeaf(page)) /* we found, return locked page */
138  return stack;
139 
140  /* now we have correct buffer, try to find child */
141  child = btree->findChildPage(btree, stack);
142 
143  LockBuffer(stack->buffer, GIN_UNLOCK);
144  Assert(child != InvalidBlockNumber);
145  Assert(stack->blkno != child);
146 
147  if (searchMode)
148  {
149  /* in search mode we may forget path to leaf */
150  stack->blkno = child;
151  stack->buffer = ReleaseAndReadBuffer(stack->buffer, btree->index, stack->blkno);
152  }
153  else
154  {
155  GinBtreeStack *ptr = (GinBtreeStack *) palloc(sizeof(GinBtreeStack));
156 
157  ptr->parent = stack;
158  stack = ptr;
159  stack->blkno = child;
160  stack->buffer = ReadBuffer(btree->index, stack->blkno);
161  stack->predictNumber = 1;
162  }
163  }
164 }
165 
166 /*
167  * Step right from current page.
168  *
169  * The next page is locked first, before releasing the current page. This is
170  * crucial to prevent concurrent VACUUM from deleting a page that we are about
171  * to step to. (The lock-coupling isn't strictly necessary when we are
172  * traversing the tree to find an insert location, because page deletion grabs
173  * a cleanup lock on the root to prevent any concurrent inserts. See Page
174  * deletion section in the README. But there's no harm in doing it always.)
175  */
176 Buffer
177 ginStepRight(Buffer buffer, Relation index, int lockmode)
178 {
179  Buffer nextbuffer;
180  Page page = BufferGetPage(buffer);
181  bool isLeaf = GinPageIsLeaf(page);
182  bool isData = GinPageIsData(page);
183  BlockNumber blkno = GinPageGetOpaque(page)->rightlink;
184 
185  nextbuffer = ReadBuffer(index, blkno);
186  LockBuffer(nextbuffer, lockmode);
187  UnlockReleaseBuffer(buffer);
188 
189  /* Sanity check that the page we stepped to is of similar kind. */
190  page = BufferGetPage(nextbuffer);
191  if (isLeaf != GinPageIsLeaf(page) || isData != GinPageIsData(page))
192  elog(ERROR, "right sibling of GIN page is of different type");
193 
194  return nextbuffer;
195 }
196 
197 void
199 {
200  while (stack)
201  {
202  GinBtreeStack *tmp = stack->parent;
203 
204  if (stack->buffer != InvalidBuffer)
205  ReleaseBuffer(stack->buffer);
206 
207  pfree(stack);
208  stack = tmp;
209  }
210 }
211 
212 /*
213  * Try to find parent for current stack position. Returns correct parent and
214  * child's offset in stack->parent. The root page is never released, to
215  * prevent conflict with vacuum process.
216  */
217 static void
219 {
220  Page page;
221  Buffer buffer;
222  BlockNumber blkno,
223  leftmostBlkno;
224  OffsetNumber offset;
226  GinBtreeStack *ptr;
227 
228  /*
229  * Unwind the stack all the way up to the root, leaving only the root
230  * item.
231  *
232  * Be careful not to release the pin on the root page! The pin on root
233  * page is required to lock out concurrent vacuums on the tree.
234  */
235  root = stack->parent;
236  while (root->parent)
237  {
238  ReleaseBuffer(root->buffer);
239  root = root->parent;
240  }
241 
242  Assert(root->blkno == btree->rootBlkno);
243  Assert(BufferGetBlockNumber(root->buffer) == btree->rootBlkno);
244  root->off = InvalidOffsetNumber;
245 
246  blkno = root->blkno;
247  buffer = root->buffer;
248 
249  ptr = (GinBtreeStack *) palloc(sizeof(GinBtreeStack));
250 
251  for (;;)
252  {
253  LockBuffer(buffer, GIN_EXCLUSIVE);
254  page = BufferGetPage(buffer);
255  if (GinPageIsLeaf(page))
256  elog(ERROR, "Lost path");
257 
258  if (GinPageIsIncompleteSplit(page))
259  {
260  Assert(blkno != btree->rootBlkno);
261  ptr->blkno = blkno;
262  ptr->buffer = buffer;
263 
264  /*
265  * parent may be wrong, but if so, the ginFinishSplit call will
266  * recurse to call ginFindParents again to fix it.
267  */
268  ptr->parent = root;
269  ptr->off = InvalidOffsetNumber;
270 
271  ginFinishOldSplit(btree, ptr, NULL, GIN_EXCLUSIVE);
272  }
273 
274  leftmostBlkno = btree->getLeftMostChild(btree, page);
275 
276  while ((offset = btree->findChildPtr(btree, page, stack->blkno, InvalidOffsetNumber)) == InvalidOffsetNumber)
277  {
278  blkno = GinPageGetOpaque(page)->rightlink;
279  if (blkno == InvalidBlockNumber)
280  {
281  /* Link not present in this level */
282  LockBuffer(buffer, GIN_UNLOCK);
283  /* Do not release pin on the root buffer */
284  if (buffer != root->buffer)
285  ReleaseBuffer(buffer);
286  break;
287  }
288  buffer = ginStepRight(buffer, btree->index, GIN_EXCLUSIVE);
289  page = BufferGetPage(buffer);
290 
291  /* finish any incomplete splits, as above */
292  if (GinPageIsIncompleteSplit(page))
293  {
294  Assert(blkno != btree->rootBlkno);
295  ptr->blkno = blkno;
296  ptr->buffer = buffer;
297  ptr->parent = root;
298  ptr->off = InvalidOffsetNumber;
299 
300  ginFinishOldSplit(btree, ptr, NULL, GIN_EXCLUSIVE);
301  }
302  }
303 
304  if (blkno != InvalidBlockNumber)
305  {
306  ptr->blkno = blkno;
307  ptr->buffer = buffer;
308  ptr->parent = root; /* it may be wrong, but in next call we will
309  * correct */
310  ptr->off = offset;
311  stack->parent = ptr;
312  return;
313  }
314 
315  /* Descend down to next level */
316  blkno = leftmostBlkno;
317  buffer = ReadBuffer(btree->index, blkno);
318  }
319 }
320 
321 /*
322  * Insert a new item to a page.
323  *
324  * Returns true if the insertion was finished. On false, the page was split and
325  * the parent needs to be updated. (A root split returns true as it doesn't
326  * need any further action by the caller to complete.)
327  *
328  * When inserting a downlink to an internal page, 'childbuf' contains the
329  * child page that was split. Its GIN_INCOMPLETE_SPLIT flag will be cleared
330  * atomically with the insert. Also, the existing item at offset stack->off
331  * in the target page is updated to point to updateblkno.
332  *
333  * stack->buffer is locked on entry, and is kept locked.
334  * Likewise for childbuf, if given.
335  */
336 static bool
338  void *insertdata, BlockNumber updateblkno,
339  Buffer childbuf, GinStatsData *buildStats)
340 {
341  Page page = BufferGetPage(stack->buffer);
342  bool result;
343  GinPlaceToPageRC rc;
344  uint16 xlflags = 0;
345  Page childpage = NULL;
346  Page newlpage = NULL,
347  newrpage = NULL;
348  void *ptp_workspace = NULL;
349  MemoryContext tmpCxt;
350  MemoryContext oldCxt;
351 
352  /*
353  * We do all the work of this function and its subfunctions in a temporary
354  * memory context. This avoids leakages and simplifies APIs, since some
355  * subfunctions allocate storage that has to survive until we've finished
356  * the WAL insertion.
357  */
359  "ginPlaceToPage temporary context",
361  oldCxt = MemoryContextSwitchTo(tmpCxt);
362 
363  if (GinPageIsData(page))
364  xlflags |= GIN_INSERT_ISDATA;
365  if (GinPageIsLeaf(page))
366  {
367  xlflags |= GIN_INSERT_ISLEAF;
368  Assert(!BufferIsValid(childbuf));
369  Assert(updateblkno == InvalidBlockNumber);
370  }
371  else
372  {
373  Assert(BufferIsValid(childbuf));
374  Assert(updateblkno != InvalidBlockNumber);
375  childpage = BufferGetPage(childbuf);
376  }
377 
378  /*
379  * See if the incoming tuple will fit on the page. beginPlaceToPage will
380  * decide if the page needs to be split, and will compute the split
381  * contents if so. See comments for beginPlaceToPage and execPlaceToPage
382  * functions for more details of the API here.
383  */
384  rc = btree->beginPlaceToPage(btree, stack->buffer, stack,
385  insertdata, updateblkno,
386  &ptp_workspace,
387  &newlpage, &newrpage);
388 
389  if (rc == GPTP_NO_WORK)
390  {
391  /* Nothing to do */
392  result = true;
393  }
394  else if (rc == GPTP_INSERT)
395  {
396  /* It will fit, perform the insertion */
398 
399  if (RelationNeedsWAL(btree->index) && !btree->isBuild)
400  XLogBeginInsert();
401 
402  /*
403  * Perform the page update, dirty and register stack->buffer, and
404  * register any extra WAL data.
405  */
406  btree->execPlaceToPage(btree, stack->buffer, stack,
407  insertdata, updateblkno, ptp_workspace);
408 
409  /* An insert to an internal page finishes the split of the child. */
410  if (BufferIsValid(childbuf))
411  {
412  GinPageGetOpaque(childpage)->flags &= ~GIN_INCOMPLETE_SPLIT;
413  MarkBufferDirty(childbuf);
414  if (RelationNeedsWAL(btree->index) && !btree->isBuild)
415  XLogRegisterBuffer(1, childbuf, REGBUF_STANDARD);
416  }
417 
418  if (RelationNeedsWAL(btree->index) && !btree->isBuild)
419  {
420  XLogRecPtr recptr;
421  ginxlogInsert xlrec;
422  BlockIdData childblknos[2];
423 
424  xlrec.flags = xlflags;
425 
426  XLogRegisterData((char *) &xlrec, sizeof(ginxlogInsert));
427 
428  /*
429  * Log information about child if this was an insertion of a
430  * downlink.
431  */
432  if (BufferIsValid(childbuf))
433  {
434  BlockIdSet(&childblknos[0], BufferGetBlockNumber(childbuf));
435  BlockIdSet(&childblknos[1], GinPageGetOpaque(childpage)->rightlink);
436  XLogRegisterData((char *) childblknos,
437  sizeof(BlockIdData) * 2);
438  }
439 
440  recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_INSERT);
441  PageSetLSN(page, recptr);
442  if (BufferIsValid(childbuf))
443  PageSetLSN(childpage, recptr);
444  }
445 
447 
448  /* Insertion is complete. */
449  result = true;
450  }
451  else if (rc == GPTP_SPLIT)
452  {
453  /*
454  * Didn't fit, need to split. The split has been computed in newlpage
455  * and newrpage, which are pointers to palloc'd pages, not associated
456  * with buffers. stack->buffer is not touched yet.
457  */
458  Buffer rbuffer;
459  BlockNumber savedRightLink;
461  Buffer lbuffer = InvalidBuffer;
462  Page newrootpg = NULL;
463 
464  /* Get a new index page to become the right page */
465  rbuffer = GinNewBuffer(btree->index);
466 
467  /* During index build, count the new page */
468  if (buildStats)
469  {
470  if (btree->isData)
471  buildStats->nDataPages++;
472  else
473  buildStats->nEntryPages++;
474  }
475 
476  savedRightLink = GinPageGetOpaque(page)->rightlink;
477 
478  /* Begin setting up WAL record */
479  data.locator = btree->index->rd_locator;
480  data.flags = xlflags;
481  if (BufferIsValid(childbuf))
482  {
483  data.leftChildBlkno = BufferGetBlockNumber(childbuf);
484  data.rightChildBlkno = GinPageGetOpaque(childpage)->rightlink;
485  }
486  else
487  data.leftChildBlkno = data.rightChildBlkno = InvalidBlockNumber;
488 
489  if (stack->parent == NULL)
490  {
491  /*
492  * splitting the root, so we need to allocate new left page and
493  * place pointers to left and right page on root page.
494  */
495  lbuffer = GinNewBuffer(btree->index);
496 
497  /* During index build, count the new left page */
498  if (buildStats)
499  {
500  if (btree->isData)
501  buildStats->nDataPages++;
502  else
503  buildStats->nEntryPages++;
504  }
505 
506  data.rrlink = InvalidBlockNumber;
507  data.flags |= GIN_SPLIT_ROOT;
508 
509  GinPageGetOpaque(newrpage)->rightlink = InvalidBlockNumber;
510  GinPageGetOpaque(newlpage)->rightlink = BufferGetBlockNumber(rbuffer);
511 
512  /*
513  * Construct a new root page containing downlinks to the new left
514  * and right pages. (Do this in a temporary copy rather than
515  * overwriting the original page directly, since we're not in the
516  * critical section yet.)
517  */
518  newrootpg = PageGetTempPage(newrpage);
519  GinInitPage(newrootpg, GinPageGetOpaque(newlpage)->flags & ~(GIN_LEAF | GIN_COMPRESSED), BLCKSZ);
520 
521  btree->fillRoot(btree, newrootpg,
522  BufferGetBlockNumber(lbuffer), newlpage,
523  BufferGetBlockNumber(rbuffer), newrpage);
524 
525  if (GinPageIsLeaf(BufferGetPage(stack->buffer)))
526  {
527 
530  BufferGetBlockNumber(lbuffer));
531 
534  BufferGetBlockNumber(rbuffer));
535  }
536  }
537  else
538  {
539  /* splitting a non-root page */
540  data.rrlink = savedRightLink;
541 
542  GinPageGetOpaque(newrpage)->rightlink = savedRightLink;
543  GinPageGetOpaque(newlpage)->flags |= GIN_INCOMPLETE_SPLIT;
544  GinPageGetOpaque(newlpage)->rightlink = BufferGetBlockNumber(rbuffer);
545 
546  if (GinPageIsLeaf(BufferGetPage(stack->buffer)))
547  {
548 
551  BufferGetBlockNumber(rbuffer));
552  }
553  }
554 
555  /*
556  * OK, we have the new contents of the left page in a temporary copy
557  * now (newlpage), and likewise for the new contents of the
558  * newly-allocated right block. The original page is still unchanged.
559  *
560  * If this is a root split, we also have a temporary page containing
561  * the new contents of the root.
562  */
563 
565 
566  MarkBufferDirty(rbuffer);
567  MarkBufferDirty(stack->buffer);
568 
569  /*
570  * Restore the temporary copies over the real buffers.
571  */
572  if (stack->parent == NULL)
573  {
574  /* Splitting the root, three pages to update */
575  MarkBufferDirty(lbuffer);
576  memcpy(page, newrootpg, BLCKSZ);
577  memcpy(BufferGetPage(lbuffer), newlpage, BLCKSZ);
578  memcpy(BufferGetPage(rbuffer), newrpage, BLCKSZ);
579  }
580  else
581  {
582  /* Normal split, only two pages to update */
583  memcpy(page, newlpage, BLCKSZ);
584  memcpy(BufferGetPage(rbuffer), newrpage, BLCKSZ);
585  }
586 
587  /* We also clear childbuf's INCOMPLETE_SPLIT flag, if passed */
588  if (BufferIsValid(childbuf))
589  {
590  GinPageGetOpaque(childpage)->flags &= ~GIN_INCOMPLETE_SPLIT;
591  MarkBufferDirty(childbuf);
592  }
593 
594  /* write WAL record */
595  if (RelationNeedsWAL(btree->index) && !btree->isBuild)
596  {
597  XLogRecPtr recptr;
598 
599  XLogBeginInsert();
600 
601  /*
602  * We just take full page images of all the split pages. Splits
603  * are uncommon enough that it's not worth complicating the code
604  * to be more efficient.
605  */
606  if (stack->parent == NULL)
607  {
611  }
612  else
613  {
616  }
617  if (BufferIsValid(childbuf))
618  XLogRegisterBuffer(3, childbuf, REGBUF_STANDARD);
619 
620  XLogRegisterData((char *) &data, sizeof(ginxlogSplit));
621 
622  recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_SPLIT);
623 
624  PageSetLSN(page, recptr);
625  PageSetLSN(BufferGetPage(rbuffer), recptr);
626  if (stack->parent == NULL)
627  PageSetLSN(BufferGetPage(lbuffer), recptr);
628  if (BufferIsValid(childbuf))
629  PageSetLSN(childpage, recptr);
630  }
632 
633  /*
634  * We can release the locks/pins on the new pages now, but keep
635  * stack->buffer locked. childbuf doesn't get unlocked either.
636  */
637  UnlockReleaseBuffer(rbuffer);
638  if (stack->parent == NULL)
639  UnlockReleaseBuffer(lbuffer);
640 
641  /*
642  * If we split the root, we're done. Otherwise the split is not
643  * complete until the downlink for the new page has been inserted to
644  * the parent.
645  */
646  result = (stack->parent == NULL);
647  }
648  else
649  {
650  elog(ERROR, "invalid return code from GIN beginPlaceToPage method: %d", rc);
651  result = false; /* keep compiler quiet */
652  }
653 
654  /* Clean up temp context */
655  MemoryContextSwitchTo(oldCxt);
656  MemoryContextDelete(tmpCxt);
657 
658  return result;
659 }
660 
661 /*
662  * Finish a split by inserting the downlink for the new page to parent.
663  *
664  * On entry, stack->buffer is exclusively locked.
665  *
666  * If freestack is true, all the buffers are released and unlocked as we
667  * crawl up the tree, and 'stack' is freed. Otherwise stack->buffer is kept
668  * locked, and stack is unmodified, except for possibly moving right to find
669  * the correct parent of page.
670  */
671 static void
672 ginFinishSplit(GinBtree btree, GinBtreeStack *stack, bool freestack,
673  GinStatsData *buildStats)
674 {
675  Page page;
676  bool done;
677  bool first = true;
678 
679  /* this loop crawls up the stack until the insertion is complete */
680  do
681  {
682  GinBtreeStack *parent = stack->parent;
683  void *insertdata;
684  BlockNumber updateblkno;
685 
686 #ifdef USE_INJECTION_POINTS
687  if (GinPageIsLeaf(BufferGetPage(stack->buffer)))
688  INJECTION_POINT("gin-leave-leaf-split-incomplete");
689  else
690  INJECTION_POINT("gin-leave-internal-split-incomplete");
691 #endif
692 
693  /* search parent to lock */
694  LockBuffer(parent->buffer, GIN_EXCLUSIVE);
695 
696  /*
697  * If the parent page was incompletely split, finish that split first,
698  * then continue with the current one.
699  *
700  * Note: we have to finish *all* incomplete splits we encounter, even
701  * if we have to move right. Otherwise we might choose as the target a
702  * page that has no downlink in the parent, and splitting it further
703  * would fail.
704  */
706  ginFinishOldSplit(btree, parent, buildStats, GIN_EXCLUSIVE);
707 
708  /* move right if it's needed */
709  page = BufferGetPage(parent->buffer);
710  while ((parent->off = btree->findChildPtr(btree, page, stack->blkno, parent->off)) == InvalidOffsetNumber)
711  {
712  if (GinPageRightMost(page))
713  {
714  /*
715  * rightmost page, but we don't find parent, we should use
716  * plain search...
717  */
718  LockBuffer(parent->buffer, GIN_UNLOCK);
719  ginFindParents(btree, stack);
720  parent = stack->parent;
721  Assert(parent != NULL);
722  break;
723  }
724 
725  parent->buffer = ginStepRight(parent->buffer, btree->index, GIN_EXCLUSIVE);
726  parent->blkno = BufferGetBlockNumber(parent->buffer);
727  page = BufferGetPage(parent->buffer);
728 
730  ginFinishOldSplit(btree, parent, buildStats, GIN_EXCLUSIVE);
731  }
732 
733  /* insert the downlink */
734  insertdata = btree->prepareDownlink(btree, stack->buffer);
735  updateblkno = GinPageGetOpaque(BufferGetPage(stack->buffer))->rightlink;
736  done = ginPlaceToPage(btree, parent,
737  insertdata, updateblkno,
738  stack->buffer, buildStats);
739  pfree(insertdata);
740 
741  /*
742  * If the caller requested to free the stack, unlock and release the
743  * child buffer now. Otherwise keep it pinned and locked, but if we
744  * have to recurse up the tree, we can unlock the upper pages, only
745  * keeping the page at the bottom of the stack locked.
746  */
747  if (!first || freestack)
748  LockBuffer(stack->buffer, GIN_UNLOCK);
749  if (freestack)
750  {
751  ReleaseBuffer(stack->buffer);
752  pfree(stack);
753  }
754  stack = parent;
755 
756  first = false;
757  } while (!done);
758 
759  /* unlock the parent */
760  LockBuffer(stack->buffer, GIN_UNLOCK);
761 
762  if (freestack)
763  freeGinBtreeStack(stack);
764 }
765 
766 /*
767  * An entry point to ginFinishSplit() that is used when we stumble upon an
768  * existing incompletely split page in the tree, as opposed to completing a
769  * split that we just made outselves. The difference is that stack->buffer may
770  * be merely share-locked on entry, and will be upgraded to exclusive mode.
771  *
772  * Note: Upgrading the lock momentarily releases it. Doing that in a scan
773  * would not be OK, because a concurrent VACUUM might delete the page while
774  * we're not holding the lock. It's OK in an insert, though, because VACUUM
775  * has a different mechanism that prevents it from running concurrently with
776  * inserts. (Namely, it holds a cleanup lock on the root.)
777  */
778 static void
780 {
781  INJECTION_POINT("gin-finish-incomplete-split");
782  elog(DEBUG1, "finishing incomplete split of block %u in gin index \"%s\"",
783  stack->blkno, RelationGetRelationName(btree->index));
784 
785  if (access == GIN_SHARE)
786  {
787  LockBuffer(stack->buffer, GIN_UNLOCK);
789 
791  {
792  /*
793  * Someone else already completed the split while we were not
794  * holding the lock.
795  */
796  return;
797  }
798  }
799 
800  ginFinishSplit(btree, stack, false, buildStats);
801 }
802 
803 /*
804  * Insert a value to tree described by stack.
805  *
806  * The value to be inserted is given in 'insertdata'. Its format depends
807  * on whether this is an entry or data tree, ginInsertValue just passes it
808  * through to the tree-specific callback function.
809  *
810  * During an index build, buildStats is non-null and the counters it contains
811  * are incremented as needed.
812  *
813  * NB: the passed-in stack is freed, as though by freeGinBtreeStack.
814  */
815 void
816 ginInsertValue(GinBtree btree, GinBtreeStack *stack, void *insertdata,
817  GinStatsData *buildStats)
818 {
819  bool done;
820 
821  /* If the leaf page was incompletely split, finish the split first */
823  ginFinishOldSplit(btree, stack, buildStats, GIN_EXCLUSIVE);
824 
825  done = ginPlaceToPage(btree, stack,
826  insertdata, InvalidBlockNumber,
827  InvalidBuffer, buildStats);
828  if (done)
829  {
830  LockBuffer(stack->buffer, GIN_UNLOCK);
831  freeGinBtreeStack(stack);
832  }
833  else
834  ginFinishSplit(btree, stack, true, buildStats);
835 }
uint32 BlockNumber
Definition: block.h:31
#define InvalidBlockNumber
Definition: block.h:33
static void BlockIdSet(BlockIdData *blockId, BlockNumber blockNumber)
Definition: block.h:81
int Buffer
Definition: buf.h:23
#define InvalidBuffer
Definition: buf.h:25
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:3377
Buffer ReleaseAndReadBuffer(Buffer buffer, Relation relation, BlockNumber blockNum)
Definition: bufmgr.c:2252
void ReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:4560
void UnlockReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:4577
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:2189
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:4795
Buffer ReadBuffer(Relation reln, BlockNumber blockNum)
Definition: bufmgr.c:734
static Page BufferGetPage(Buffer buffer)
Definition: bufmgr.h:350
static bool BufferIsValid(Buffer bufnum)
Definition: bufmgr.h:301
Page PageGetTempPage(Page page)
Definition: bufpage.c:365
Pointer Page
Definition: bufpage.h:78
static void PageSetLSN(Page page, XLogRecPtr lsn)
Definition: bufpage.h:388
unsigned short uint16
Definition: c.h:492
#define DEBUG1
Definition: elog.h:30
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:224
#define GIN_UNLOCK
Definition: gin_private.h:49
GinPlaceToPageRC
Definition: gin_private.h:144
@ GPTP_INSERT
Definition: gin_private.h:146
@ GPTP_SPLIT
Definition: gin_private.h:147
@ GPTP_NO_WORK
Definition: gin_private.h:145
#define GIN_EXCLUSIVE
Definition: gin_private.h:51
#define GIN_SHARE
Definition: gin_private.h:50
#define GIN_COMPRESSED
Definition: ginblock.h:48
#define GinPageGetOpaque(page)
Definition: ginblock.h:110
#define GIN_LEAF
Definition: ginblock.h:42
#define GinPageIsData(page)
Definition: ginblock.h:115
#define GinPageRightMost(page)
Definition: ginblock.h:129
#define GIN_INCOMPLETE_SPLIT
Definition: ginblock.h:47
#define GinPageIsLeaf(page)
Definition: ginblock.h:112
#define GinPageIsIncompleteSplit(page)
Definition: ginblock.h:127
int ginTraverseLock(Buffer buffer, bool searchMode)
Definition: ginbtree.c:39
void freeGinBtreeStack(GinBtreeStack *stack)
Definition: ginbtree.c:198
void ginInsertValue(GinBtree btree, GinBtreeStack *stack, void *insertdata, GinStatsData *buildStats)
Definition: ginbtree.c:816
static bool ginPlaceToPage(GinBtree btree, GinBtreeStack *stack, void *insertdata, BlockNumber updateblkno, Buffer childbuf, GinStatsData *buildStats)
Definition: ginbtree.c:337
static void ginFindParents(GinBtree btree, GinBtreeStack *stack)
Definition: ginbtree.c:218
static void ginFinishSplit(GinBtree btree, GinBtreeStack *stack, bool freestack, GinStatsData *buildStats)
Definition: ginbtree.c:672
static void ginFinishOldSplit(GinBtree btree, GinBtreeStack *stack, GinStatsData *buildStats, int access)
Definition: ginbtree.c:779
Buffer ginStepRight(Buffer buffer, Relation index, int lockmode)
Definition: ginbtree.c:177
GinBtreeStack * ginFindLeafPage(GinBtree btree, bool searchMode, bool rootConflictCheck)
Definition: ginbtree.c:83
void GinInitPage(Page page, uint32 f, Size pageSize)
Definition: ginutil.c:338
Buffer GinNewBuffer(Relation index)
Definition: ginutil.c:300
#define GIN_INSERT_ISDATA
Definition: ginxlog.h:124
#define GIN_INSERT_ISLEAF
Definition: ginxlog.h:125
#define GIN_SPLIT_ROOT
Definition: ginxlog.h:126
#define XLOG_GIN_INSERT
Definition: ginxlog.h:35
#define XLOG_GIN_SPLIT
Definition: ginxlog.h:109
#define INJECTION_POINT(name)
Assert(fmt[strlen(fmt) - 1] !='\n')
void pfree(void *pointer)
Definition: mcxt.c:1508
MemoryContext CurrentMemoryContext
Definition: mcxt.c:131
void MemoryContextDelete(MemoryContext context)
Definition: mcxt.c:442
void * palloc(Size size)
Definition: mcxt.c:1304
#define AllocSetContextCreate
Definition: memutils.h:129
#define ALLOCSET_DEFAULT_SIZES
Definition: memutils.h:153
#define START_CRIT_SECTION()
Definition: miscadmin.h:149
#define END_CRIT_SECTION()
Definition: miscadmin.h:151
#define InvalidOffsetNumber
Definition: off.h:26
uint16 OffsetNumber
Definition: off.h:24
const void * data
void PredicateLockPageSplit(Relation relation, BlockNumber oldblkno, BlockNumber newblkno)
Definition: predicate.c:3124
void CheckForSerializableConflictIn(Relation relation, ItemPointer tid, BlockNumber blkno)
Definition: predicate.c:4316
short access
Definition: preproc-type.c:36
MemoryContextSwitchTo(old_ctx)
tree ctl root
Definition: radixtree.h:1840
#define RelationGetRelationName(relation)
Definition: rel.h:541
#define RelationNeedsWAL(relation)
Definition: rel.h:630
BlockNumber(* findChildPage)(GinBtree, GinBtreeStack *)
Definition: gin_private.h:153
void(* execPlaceToPage)(GinBtree, Buffer, GinBtreeStack *, void *, BlockNumber, void *)
Definition: gin_private.h:161
BlockNumber(* getLeftMostChild)(GinBtree, Page)
Definition: gin_private.h:154
bool(* isMoveRight)(GinBtree, Page)
Definition: gin_private.h:155
GinPlaceToPageRC(* beginPlaceToPage)(GinBtree, Buffer, GinBtreeStack *, void *, BlockNumber, void **, Page *, Page *)
Definition: gin_private.h:160
void *(* prepareDownlink)(GinBtree, Buffer)
Definition: gin_private.h:162
OffsetNumber(* findChildPtr)(GinBtree, Page, BlockNumber, OffsetNumber)
Definition: gin_private.h:159
Relation index
Definition: gin_private.h:167
void(* fillRoot)(GinBtree, Page, BlockNumber, Page, BlockNumber, Page)
Definition: gin_private.h:163
BlockNumber rootBlkno
Definition: gin_private.h:168
OffsetNumber off
Definition: gin_private.h:133
uint32 predictNumber
Definition: gin_private.h:136
struct GinBtreeStack * parent
Definition: gin_private.h:137
BlockNumber blkno
Definition: gin_private.h:131
BlockNumber nDataPages
Definition: gin.h:47
BlockNumber nEntryPages
Definition: gin.h:46
RelFileLocator rd_locator
Definition: rel.h:57
uint16 flags
Definition: ginxlog.h:39
Definition: type.h:95
uint64 XLogRecPtr
Definition: xlogdefs.h:21
void XLogRegisterData(char *data, uint32 len)
Definition: xloginsert.c:364
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition: xloginsert.c:474
void XLogRegisterBuffer(uint8 block_id, Buffer buffer, uint8 flags)
Definition: xloginsert.c:242
void XLogBeginInsert(void)
Definition: xloginsert.c:149
#define REGBUF_STANDARD
Definition: xloginsert.h:34
#define REGBUF_FORCE_IMAGE
Definition: xloginsert.h:31