PostgreSQL Source Code  git master
spgvacuum.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * spgvacuum.c
4  * vacuum for SP-GiST
5  *
6  *
7  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
8  * Portions Copyright (c) 1994, Regents of the University of California
9  *
10  * IDENTIFICATION
11  * src/backend/access/spgist/spgvacuum.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 
16 #include "postgres.h"
17 
18 #include "access/genam.h"
19 #include "access/spgist_private.h"
20 #include "access/spgxlog.h"
21 #include "access/transam.h"
22 #include "access/xloginsert.h"
23 #include "commands/vacuum.h"
24 #include "miscadmin.h"
25 #include "storage/bufmgr.h"
26 #include "storage/indexfsm.h"
27 #include "storage/lmgr.h"
28 #include "utils/snapmgr.h"
29 
30 
31 /* Entry in pending-list of TIDs we need to revisit */
32 typedef struct spgVacPendingItem
33 {
34  ItemPointerData tid; /* redirection target to visit */
35  bool done; /* have we dealt with this? */
36  struct spgVacPendingItem *next; /* list link */
38 
39 /* Local state for vacuum operations */
40 typedef struct spgBulkDeleteState
41 {
42  /* Parameters passed in to spgvacuumscan */
47 
48  /* Additional working state */
49  SpGistState spgstate; /* for SPGiST operations that need one */
50  spgVacPendingItem *pendingList; /* TIDs we need to (re)visit */
51  TransactionId myXmin; /* for detecting newly-added redirects */
52  BlockNumber lastFilledBlock; /* last non-deletable block */
54 
55 
56 /*
57  * Add TID to pendingList, but only if not already present.
58  *
59  * Note that new items are always appended at the end of the list; this
60  * ensures that scans of the list don't miss items added during the scan.
61  */
62 static void
64 {
65  spgVacPendingItem *pitem;
66  spgVacPendingItem **listLink;
67 
68  /* search the list for pre-existing entry */
69  listLink = &bds->pendingList;
70  while (*listLink != NULL)
71  {
72  pitem = *listLink;
73  if (ItemPointerEquals(tid, &pitem->tid))
74  return; /* already in list, do nothing */
75  listLink = &pitem->next;
76  }
77  /* not there, so append new entry */
78  pitem = (spgVacPendingItem *) palloc(sizeof(spgVacPendingItem));
79  pitem->tid = *tid;
80  pitem->done = false;
81  pitem->next = NULL;
82  *listLink = pitem;
83 }
84 
85 /*
86  * Clear pendingList
87  */
88 static void
90 {
91  spgVacPendingItem *pitem;
92  spgVacPendingItem *nitem;
93 
94  for (pitem = bds->pendingList; pitem != NULL; pitem = nitem)
95  {
96  nitem = pitem->next;
97  /* All items in list should have been dealt with */
98  Assert(pitem->done);
99  pfree(pitem);
100  }
101  bds->pendingList = NULL;
102 }
103 
104 /*
105  * Vacuum a regular (non-root) leaf page
106  *
107  * We must delete tuples that are targeted for deletion by the VACUUM,
108  * but not move any tuples that are referenced by outside links; we assume
109  * those are the ones that are heads of chains.
110  *
111  * If we find a REDIRECT that was made by a concurrently-running transaction,
112  * we must add its target TID to pendingList. (We don't try to visit the
113  * target immediately, first because we don't want VACUUM locking more than
114  * one buffer at a time, and second because the duplicate-filtering logic
115  * in spgAddPendingTID is useful to ensure we can't get caught in an infinite
116  * loop in the face of continuous concurrent insertions.)
117  *
118  * If forPending is true, we are examining the page as a consequence of
119  * chasing a redirect link, not as part of the normal sequential scan.
120  * We still vacuum the page normally, but we don't increment the stats
121  * about live tuples; else we'd double-count those tuples, since the page
122  * has been or will be visited in the sequential scan as well.
123  */
124 static void
126  bool forPending)
127 {
128  Page page = BufferGetPage(buffer);
129  spgxlogVacuumLeaf xlrec;
131  OffsetNumber toPlaceholder[MaxIndexTuplesPerPage];
136  OffsetNumber predecessor[MaxIndexTuplesPerPage + 1];
137  bool deletable[MaxIndexTuplesPerPage + 1];
138  int nDeletable;
139  OffsetNumber i,
140  max = PageGetMaxOffsetNumber(page);
141 
142  memset(predecessor, 0, sizeof(predecessor));
143  memset(deletable, 0, sizeof(deletable));
144  nDeletable = 0;
145 
146  /* Scan page, identify tuples to delete, accumulate stats */
147  for (i = FirstOffsetNumber; i <= max; i++)
148  {
149  SpGistLeafTuple lt;
150 
151  lt = (SpGistLeafTuple) PageGetItem(page,
152  PageGetItemId(page, i));
153  if (lt->tupstate == SPGIST_LIVE)
154  {
156 
157  if (bds->callback(&lt->heapPtr, bds->callback_state))
158  {
159  bds->stats->tuples_removed += 1;
160  deletable[i] = true;
161  nDeletable++;
162  }
163  else
164  {
165  if (!forPending)
166  bds->stats->num_index_tuples += 1;
167  }
168 
169  /* Form predecessor map, too */
171  {
172  /* paranoia about corrupted chain links */
174  SGLT_GET_NEXTOFFSET(lt) > max ||
175  predecessor[SGLT_GET_NEXTOFFSET(lt)] != InvalidOffsetNumber)
176  elog(ERROR, "inconsistent tuple chain links in page %u of index \"%s\"",
177  BufferGetBlockNumber(buffer),
179  predecessor[SGLT_GET_NEXTOFFSET(lt)] = i;
180  }
181  }
182  else if (lt->tupstate == SPGIST_REDIRECT)
183  {
185 
188 
189  /*
190  * Add target TID to pending list if the redirection could have
191  * happened since VACUUM started.
192  *
193  * Note: we could make a tighter test by seeing if the xid is
194  * "running" according to the active snapshot; but snapmgr.c
195  * doesn't currently export a suitable API, and it's not entirely
196  * clear that a tighter test is worth the cycles anyway.
197  */
198  if (TransactionIdFollowsOrEquals(dt->xid, bds->myXmin))
199  spgAddPendingTID(bds, &dt->pointer);
200  }
201  else
202  {
204  }
205  }
206 
207  if (nDeletable == 0)
208  return; /* nothing more to do */
209 
210  /*----------
211  * Figure out exactly what we have to do. We do this separately from
212  * actually modifying the page, mainly so that we have a representation
213  * that can be dumped into WAL and then the replay code can do exactly
214  * the same thing. The output of this step consists of six arrays
215  * describing four kinds of operations, to be performed in this order:
216  *
217  * toDead[]: tuple numbers to be replaced with DEAD tuples
218  * toPlaceholder[]: tuple numbers to be replaced with PLACEHOLDER tuples
219  * moveSrc[]: tuple numbers that need to be relocated to another offset
220  * (replacing the tuple there) and then replaced with PLACEHOLDER tuples
221  * moveDest[]: new locations for moveSrc tuples
222  * chainSrc[]: tuple numbers whose chain links (nextOffset) need updates
223  * chainDest[]: new values of nextOffset for chainSrc members
224  *
225  * It's easiest to figure out what we have to do by processing tuple
226  * chains, so we iterate over all the tuples (not just the deletable
227  * ones!) to identify chain heads, then chase down each chain and make
228  * work item entries for deletable tuples within the chain.
229  *----------
230  */
231  xlrec.nDead = xlrec.nPlaceholder = xlrec.nMove = xlrec.nChain = 0;
232 
233  for (i = FirstOffsetNumber; i <= max; i++)
234  {
235  SpGistLeafTuple head;
236  bool interveningDeletable;
237  OffsetNumber prevLive;
238  OffsetNumber j;
239 
240  head = (SpGistLeafTuple) PageGetItem(page,
241  PageGetItemId(page, i));
242  if (head->tupstate != SPGIST_LIVE)
243  continue; /* can't be a chain member */
244  if (predecessor[i] != 0)
245  continue; /* not a chain head */
246 
247  /* initialize ... */
248  interveningDeletable = false;
249  prevLive = deletable[i] ? InvalidOffsetNumber : i;
250 
251  /* scan down the chain ... */
252  j = SGLT_GET_NEXTOFFSET(head);
253  while (j != InvalidOffsetNumber)
254  {
255  SpGistLeafTuple lt;
256 
257  lt = (SpGistLeafTuple) PageGetItem(page,
258  PageGetItemId(page, j));
259  if (lt->tupstate != SPGIST_LIVE)
260  {
261  /* all tuples in chain should be live */
262  elog(ERROR, "unexpected SPGiST tuple state: %d",
263  lt->tupstate);
264  }
265 
266  if (deletable[j])
267  {
268  /* This tuple should be replaced by a placeholder */
269  toPlaceholder[xlrec.nPlaceholder] = j;
270  xlrec.nPlaceholder++;
271  /* previous live tuple's chain link will need an update */
272  interveningDeletable = true;
273  }
274  else if (prevLive == InvalidOffsetNumber)
275  {
276  /*
277  * This is the first live tuple in the chain. It has to move
278  * to the head position.
279  */
280  moveSrc[xlrec.nMove] = j;
281  moveDest[xlrec.nMove] = i;
282  xlrec.nMove++;
283  /* Chain updates will be applied after the move */
284  prevLive = i;
285  interveningDeletable = false;
286  }
287  else
288  {
289  /*
290  * Second or later live tuple. Arrange to re-chain it to the
291  * previous live one, if there was a gap.
292  */
293  if (interveningDeletable)
294  {
295  chainSrc[xlrec.nChain] = prevLive;
296  chainDest[xlrec.nChain] = j;
297  xlrec.nChain++;
298  }
299  prevLive = j;
300  interveningDeletable = false;
301  }
302 
303  j = SGLT_GET_NEXTOFFSET(lt);
304  }
305 
306  if (prevLive == InvalidOffsetNumber)
307  {
308  /* The chain is entirely removable, so we need a DEAD tuple */
309  toDead[xlrec.nDead] = i;
310  xlrec.nDead++;
311  }
312  else if (interveningDeletable)
313  {
314  /* One or more deletions at end of chain, so close it off */
315  chainSrc[xlrec.nChain] = prevLive;
316  chainDest[xlrec.nChain] = InvalidOffsetNumber;
317  xlrec.nChain++;
318  }
319  }
320 
321  /* sanity check ... */
322  if (nDeletable != xlrec.nDead + xlrec.nPlaceholder + xlrec.nMove)
323  elog(ERROR, "inconsistent counts of deletable tuples");
324 
325  /* Do the updates */
327 
328  spgPageIndexMultiDelete(&bds->spgstate, page,
329  toDead, xlrec.nDead,
332 
333  spgPageIndexMultiDelete(&bds->spgstate, page,
334  toPlaceholder, xlrec.nPlaceholder,
337 
338  /*
339  * We implement the move step by swapping the line pointers of the source
340  * and target tuples, then replacing the newly-source tuples with
341  * placeholders. This is perhaps unduly friendly with the page data
342  * representation, but it's fast and doesn't risk page overflow when a
343  * tuple to be relocated is large.
344  */
345  for (i = 0; i < xlrec.nMove; i++)
346  {
347  ItemId idSrc = PageGetItemId(page, moveSrc[i]);
348  ItemId idDest = PageGetItemId(page, moveDest[i]);
349  ItemIdData tmp;
350 
351  tmp = *idSrc;
352  *idSrc = *idDest;
353  *idDest = tmp;
354  }
355 
356  spgPageIndexMultiDelete(&bds->spgstate, page,
357  moveSrc, xlrec.nMove,
360 
361  for (i = 0; i < xlrec.nChain; i++)
362  {
363  SpGistLeafTuple lt;
364 
365  lt = (SpGistLeafTuple) PageGetItem(page,
366  PageGetItemId(page, chainSrc[i]));
367  Assert(lt->tupstate == SPGIST_LIVE);
368  SGLT_SET_NEXTOFFSET(lt, chainDest[i]);
369  }
370 
371  MarkBufferDirty(buffer);
372 
373  if (RelationNeedsWAL(index))
374  {
375  XLogRecPtr recptr;
376 
377  XLogBeginInsert();
378 
379  STORE_STATE(&bds->spgstate, xlrec.stateSrc);
380 
381  XLogRegisterData((char *) &xlrec, SizeOfSpgxlogVacuumLeaf);
382  /* sizeof(xlrec) should be a multiple of sizeof(OffsetNumber) */
383  XLogRegisterData((char *) toDead, sizeof(OffsetNumber) * xlrec.nDead);
384  XLogRegisterData((char *) toPlaceholder, sizeof(OffsetNumber) * xlrec.nPlaceholder);
385  XLogRegisterData((char *) moveSrc, sizeof(OffsetNumber) * xlrec.nMove);
386  XLogRegisterData((char *) moveDest, sizeof(OffsetNumber) * xlrec.nMove);
387  XLogRegisterData((char *) chainSrc, sizeof(OffsetNumber) * xlrec.nChain);
388  XLogRegisterData((char *) chainDest, sizeof(OffsetNumber) * xlrec.nChain);
389 
391 
392  recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_VACUUM_LEAF);
393 
394  PageSetLSN(page, recptr);
395  }
396 
398 }
399 
400 /*
401  * Vacuum a root page when it is also a leaf
402  *
403  * On the root, we just delete any dead leaf tuples; no fancy business
404  */
405 static void
407 {
408  Page page = BufferGetPage(buffer);
409  spgxlogVacuumRoot xlrec;
411  OffsetNumber i,
412  max = PageGetMaxOffsetNumber(page);
413 
414  xlrec.nDelete = 0;
415 
416  /* Scan page, identify tuples to delete, accumulate stats */
417  for (i = FirstOffsetNumber; i <= max; i++)
418  {
419  SpGistLeafTuple lt;
420 
421  lt = (SpGistLeafTuple) PageGetItem(page,
422  PageGetItemId(page, i));
423  if (lt->tupstate == SPGIST_LIVE)
424  {
426 
427  if (bds->callback(&lt->heapPtr, bds->callback_state))
428  {
429  bds->stats->tuples_removed += 1;
430  toDelete[xlrec.nDelete] = i;
431  xlrec.nDelete++;
432  }
433  else
434  {
435  bds->stats->num_index_tuples += 1;
436  }
437  }
438  else
439  {
440  /* all tuples on root should be live */
441  elog(ERROR, "unexpected SPGiST tuple state: %d",
442  lt->tupstate);
443  }
444  }
445 
446  if (xlrec.nDelete == 0)
447  return; /* nothing more to do */
448 
449  /* Do the update */
451 
452  /* The tuple numbers are in order, so we can use PageIndexMultiDelete */
453  PageIndexMultiDelete(page, toDelete, xlrec.nDelete);
454 
455  MarkBufferDirty(buffer);
456 
457  if (RelationNeedsWAL(index))
458  {
459  XLogRecPtr recptr;
460 
461  XLogBeginInsert();
462 
463  /* Prepare WAL record */
464  STORE_STATE(&bds->spgstate, xlrec.stateSrc);
465 
466  XLogRegisterData((char *) &xlrec, SizeOfSpgxlogVacuumRoot);
467  /* sizeof(xlrec) should be a multiple of sizeof(OffsetNumber) */
468  XLogRegisterData((char *) toDelete,
469  sizeof(OffsetNumber) * xlrec.nDelete);
470 
472 
473  recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_VACUUM_ROOT);
474 
475  PageSetLSN(page, recptr);
476  }
477 
479 }
480 
481 /*
482  * Clean up redirect and placeholder tuples on the given page
483  *
484  * Redirect tuples can be marked placeholder once they're old enough.
485  * Placeholder tuples can be removed if it won't change the offsets of
486  * non-placeholder ones.
487  *
488  * Unlike the routines above, this works on both leaf and inner pages.
489  */
490 static void
492 {
493  Page page = BufferGetPage(buffer);
494  SpGistPageOpaque opaque = SpGistPageGetOpaque(page);
495  OffsetNumber i,
496  max = PageGetMaxOffsetNumber(page),
497  firstPlaceholder = InvalidOffsetNumber;
498  bool hasNonPlaceholder = false;
499  bool hasUpdate = false;
500  OffsetNumber itemToPlaceholder[MaxIndexTuplesPerPage];
502  spgxlogVacuumRedirect xlrec;
503  GlobalVisState *vistest;
504 
506  xlrec.nToPlaceholder = 0;
508 
509  vistest = GlobalVisTestFor(heaprel);
510 
512 
513  /*
514  * Scan backwards to convert old redirection tuples to placeholder tuples,
515  * and identify location of last non-placeholder tuple while at it.
516  */
517  for (i = max;
518  i >= FirstOffsetNumber &&
519  (opaque->nRedirection > 0 || !hasNonPlaceholder);
520  i--)
521  {
522  SpGistDeadTuple dt;
523 
524  dt = (SpGistDeadTuple) PageGetItem(page, PageGetItemId(page, i));
525 
526  if (dt->tupstate == SPGIST_REDIRECT &&
527  GlobalVisTestIsRemovableXid(vistest, dt->xid))
528  {
530  Assert(opaque->nRedirection > 0);
531  opaque->nRedirection--;
532  opaque->nPlaceholder++;
533 
534  /* remember newest XID among the removed redirects */
537  xlrec.snapshotConflictHorizon = dt->xid;
538 
540 
541  itemToPlaceholder[xlrec.nToPlaceholder] = i;
542  xlrec.nToPlaceholder++;
543 
544  hasUpdate = true;
545  }
546 
547  if (dt->tupstate == SPGIST_PLACEHOLDER)
548  {
549  if (!hasNonPlaceholder)
550  firstPlaceholder = i;
551  }
552  else
553  {
554  hasNonPlaceholder = true;
555  }
556  }
557 
558  /*
559  * Any placeholder tuples at the end of page can safely be removed. We
560  * can't remove ones before the last non-placeholder, though, because we
561  * can't alter the offset numbers of non-placeholder tuples.
562  */
563  if (firstPlaceholder != InvalidOffsetNumber)
564  {
565  /*
566  * We do not store this array to rdata because it's easy to recreate.
567  */
568  for (i = firstPlaceholder; i <= max; i++)
569  itemnos[i - firstPlaceholder] = i;
570 
571  i = max - firstPlaceholder + 1;
572  Assert(opaque->nPlaceholder >= i);
573  opaque->nPlaceholder -= i;
574 
575  /* The array is surely sorted, so can use PageIndexMultiDelete */
576  PageIndexMultiDelete(page, itemnos, i);
577 
578  hasUpdate = true;
579  }
580 
581  xlrec.firstPlaceholder = firstPlaceholder;
582 
583  if (hasUpdate)
584  MarkBufferDirty(buffer);
585 
586  if (hasUpdate && RelationNeedsWAL(index))
587  {
588  XLogRecPtr recptr;
589 
590  XLogBeginInsert();
591 
593  XLogRegisterData((char *) itemToPlaceholder,
594  sizeof(OffsetNumber) * xlrec.nToPlaceholder);
595 
597 
598  recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_VACUUM_REDIRECT);
599 
600  PageSetLSN(page, recptr);
601  }
602 
604 }
605 
606 /*
607  * Process one page during a bulkdelete scan
608  */
609 static void
611 {
612  Relation index = bds->info->index;
613  Buffer buffer;
614  Page page;
615 
616  /* call vacuum_delay_point while not holding any buffer lock */
618 
619  buffer = ReadBufferExtended(index, MAIN_FORKNUM, blkno,
620  RBM_NORMAL, bds->info->strategy);
622  page = (Page) BufferGetPage(buffer);
623 
624  if (PageIsNew(page))
625  {
626  /*
627  * We found an all-zero page, which could happen if the database
628  * crashed just after extending the file. Recycle it.
629  */
630  }
631  else if (PageIsEmpty(page))
632  {
633  /* nothing to do */
634  }
635  else if (SpGistPageIsLeaf(page))
636  {
637  if (SpGistBlockIsRoot(blkno))
638  {
639  vacuumLeafRoot(bds, index, buffer);
640  /* no need for vacuumRedirectAndPlaceholder */
641  }
642  else
643  {
644  vacuumLeafPage(bds, index, buffer, false);
646  }
647  }
648  else
649  {
650  /* inner page */
652  }
653 
654  /*
655  * The root pages must never be deleted, nor marked as available in FSM,
656  * because we don't want them ever returned by a search for a place to put
657  * a new tuple. Otherwise, check for empty page, and make sure the FSM
658  * knows about it.
659  */
660  if (!SpGistBlockIsRoot(blkno))
661  {
662  if (PageIsNew(page) || PageIsEmpty(page))
663  {
664  RecordFreeIndexPage(index, blkno);
665  bds->stats->pages_deleted++;
666  }
667  else
668  {
669  SpGistSetLastUsedPage(index, buffer);
670  bds->lastFilledBlock = blkno;
671  }
672  }
673 
674  UnlockReleaseBuffer(buffer);
675 }
676 
677 /*
678  * Process the pending-TID list between pages of the main scan
679  */
680 static void
682 {
683  Relation index = bds->info->index;
684  spgVacPendingItem *pitem;
685  spgVacPendingItem *nitem;
686  BlockNumber blkno;
687  Buffer buffer;
688  Page page;
689 
690  for (pitem = bds->pendingList; pitem != NULL; pitem = pitem->next)
691  {
692  if (pitem->done)
693  continue; /* ignore already-done items */
694 
695  /* call vacuum_delay_point while not holding any buffer lock */
697 
698  /* examine the referenced page */
699  blkno = ItemPointerGetBlockNumber(&pitem->tid);
700  buffer = ReadBufferExtended(index, MAIN_FORKNUM, blkno,
701  RBM_NORMAL, bds->info->strategy);
703  page = (Page) BufferGetPage(buffer);
704 
705  if (PageIsNew(page) || SpGistPageIsDeleted(page))
706  {
707  /* Probably shouldn't happen, but ignore it */
708  }
709  else if (SpGistPageIsLeaf(page))
710  {
711  if (SpGistBlockIsRoot(blkno))
712  {
713  /* this should definitely not happen */
714  elog(ERROR, "redirection leads to root page of index \"%s\"",
716  }
717 
718  /* deal with any deletable tuples */
719  vacuumLeafPage(bds, index, buffer, true);
720  /* might as well do this while we are here */
722 
723  SpGistSetLastUsedPage(index, buffer);
724 
725  /*
726  * We can mark as done not only this item, but any later ones
727  * pointing at the same page, since we vacuumed the whole page.
728  */
729  pitem->done = true;
730  for (nitem = pitem->next; nitem != NULL; nitem = nitem->next)
731  {
732  if (ItemPointerGetBlockNumber(&nitem->tid) == blkno)
733  nitem->done = true;
734  }
735  }
736  else
737  {
738  /*
739  * On an inner page, visit the referenced inner tuple and add all
740  * its downlinks to the pending list. We might have pending items
741  * for more than one inner tuple on the same page (in fact this is
742  * pretty likely given the way space allocation works), so get
743  * them all while we are here.
744  */
745  for (nitem = pitem; nitem != NULL; nitem = nitem->next)
746  {
747  if (nitem->done)
748  continue;
749  if (ItemPointerGetBlockNumber(&nitem->tid) == blkno)
750  {
751  OffsetNumber offset;
752  SpGistInnerTuple innerTuple;
753 
754  offset = ItemPointerGetOffsetNumber(&nitem->tid);
755  innerTuple = (SpGistInnerTuple) PageGetItem(page,
756  PageGetItemId(page, offset));
757  if (innerTuple->tupstate == SPGIST_LIVE)
758  {
759  SpGistNodeTuple node;
760  int i;
761 
762  SGITITERATE(innerTuple, i, node)
763  {
764  if (ItemPointerIsValid(&node->t_tid))
765  spgAddPendingTID(bds, &node->t_tid);
766  }
767  }
768  else if (innerTuple->tupstate == SPGIST_REDIRECT)
769  {
770  /* transfer attention to redirect point */
771  spgAddPendingTID(bds,
772  &((SpGistDeadTuple) innerTuple)->pointer);
773  }
774  else
775  elog(ERROR, "unexpected SPGiST tuple state: %d",
776  innerTuple->tupstate);
777 
778  nitem->done = true;
779  }
780  }
781  }
782 
783  UnlockReleaseBuffer(buffer);
784  }
785 
786  spgClearPendingList(bds);
787 }
788 
789 /*
790  * Perform a bulkdelete scan
791  */
792 static void
794 {
795  Relation index = bds->info->index;
796  bool needLock;
797  BlockNumber num_pages,
798  blkno;
799 
800  /* Finish setting up spgBulkDeleteState */
802  bds->pendingList = NULL;
803  bds->myXmin = GetActiveSnapshot()->xmin;
805 
806  /*
807  * Reset counts that will be incremented during the scan; needed in case
808  * of multiple scans during a single VACUUM command
809  */
810  bds->stats->estimated_count = false;
811  bds->stats->num_index_tuples = 0;
812  bds->stats->pages_deleted = 0;
813 
814  /* We can skip locking for new or temp relations */
815  needLock = !RELATION_IS_LOCAL(index);
816 
817  /*
818  * The outer loop iterates over all index pages except the metapage, in
819  * physical order (we hope the kernel will cooperate in providing
820  * read-ahead for speed). It is critical that we visit all leaf pages,
821  * including ones added after we start the scan, else we might fail to
822  * delete some deletable tuples. See more extensive comments about this
823  * in btvacuumscan().
824  */
825  blkno = SPGIST_METAPAGE_BLKNO + 1;
826  for (;;)
827  {
828  /* Get the current relation length */
829  if (needLock)
831  num_pages = RelationGetNumberOfBlocks(index);
832  if (needLock)
834 
835  /* Quit if we've scanned the whole relation */
836  if (blkno >= num_pages)
837  break;
838  /* Iterate over pages, then loop back to recheck length */
839  for (; blkno < num_pages; blkno++)
840  {
841  spgvacuumpage(bds, blkno);
842  /* empty the pending-list after each page */
843  if (bds->pendingList != NULL)
844  spgprocesspending(bds);
845  }
846  }
847 
848  /* Propagate local lastUsedPages cache to metablock */
850 
851  /*
852  * If we found any empty pages (and recorded them in the FSM), then
853  * forcibly update the upper-level FSM pages to ensure that searchers can
854  * find them. It's possible that the pages were also found during
855  * previous scans and so this is a waste of time, but it's cheap enough
856  * relative to scanning the index that it shouldn't matter much, and
857  * making sure that free pages are available sooner not later seems
858  * worthwhile.
859  *
860  * Note that if no empty pages exist, we don't bother vacuuming the FSM at
861  * all.
862  */
863  if (bds->stats->pages_deleted > 0)
865 
866  /*
867  * Truncate index if possible
868  *
869  * XXX disabled because it's unsafe due to possible concurrent inserts.
870  * We'd have to rescan the pages to make sure they're still empty, and it
871  * doesn't seem worth it. Note that btree doesn't do this either.
872  *
873  * Another reason not to truncate is that it could invalidate the cached
874  * pages-with-freespace pointers in the metapage and other backends'
875  * relation caches, that is leave them pointing to nonexistent pages.
876  * Adding RelationGetNumberOfBlocks calls to protect the places that use
877  * those pointers would be unduly expensive.
878  */
879 #ifdef NOT_USED
880  if (num_pages > bds->lastFilledBlock + 1)
881  {
882  BlockNumber lastBlock = num_pages - 1;
883 
884  num_pages = bds->lastFilledBlock + 1;
885  RelationTruncate(index, num_pages);
886  bds->stats->pages_removed += lastBlock - bds->lastFilledBlock;
887  bds->stats->pages_deleted -= lastBlock - bds->lastFilledBlock;
888  }
889 #endif
890 
891  /* Report final stats */
892  bds->stats->num_pages = num_pages;
894  bds->stats->pages_free = bds->stats->pages_deleted;
895 }
896 
897 /*
898  * Bulk deletion of all index entries pointing to a set of heap tuples.
899  * The set of target tuples is specified via a callback routine that tells
900  * whether any given heap tuple (identified by ItemPointer) is being deleted.
901  *
902  * Result: a palloc'd struct containing statistical info for VACUUM displays.
903  */
906  IndexBulkDeleteCallback callback, void *callback_state)
907 {
908  spgBulkDeleteState bds;
909 
910  /* allocate stats if first time through, else re-use existing struct */
911  if (stats == NULL)
913  bds.info = info;
914  bds.stats = stats;
915  bds.callback = callback;
916  bds.callback_state = callback_state;
917 
918  spgvacuumscan(&bds);
919 
920  return stats;
921 }
922 
923 /* Dummy callback to delete no tuples during spgvacuumcleanup */
924 static bool
926 {
927  return false;
928 }
929 
930 /*
931  * Post-VACUUM cleanup.
932  *
933  * Result: a palloc'd struct containing statistical info for VACUUM displays.
934  */
937 {
938  spgBulkDeleteState bds;
939 
940  /* No-op in ANALYZE ONLY mode */
941  if (info->analyze_only)
942  return stats;
943 
944  /*
945  * We don't need to scan the index if there was a preceding bulkdelete
946  * pass. Otherwise, make a pass that won't delete any live tuples, but
947  * might still accomplish useful stuff with redirect/placeholder cleanup
948  * and/or FSM housekeeping, and in any case will provide stats.
949  */
950  if (stats == NULL)
951  {
953  bds.info = info;
954  bds.stats = stats;
955  bds.callback = dummy_callback;
956  bds.callback_state = NULL;
957 
958  spgvacuumscan(&bds);
959  }
960 
961  /*
962  * It's quite possible for us to be fooled by concurrent tuple moves into
963  * double-counting some index tuples, so disbelieve any total that exceeds
964  * the underlying heap's count ... if we know that accurately. Otherwise
965  * this might just make matters worse.
966  */
967  if (!info->estimated_count)
968  {
969  if (stats->num_index_tuples > info->num_heap_tuples)
970  stats->num_index_tuples = info->num_heap_tuples;
971  }
972 
973  return stats;
974 }
uint32 BlockNumber
Definition: block.h:31
#define InvalidBlockNumber
Definition: block.h:33
int Buffer
Definition: buf.h:23
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:3377
void UnlockReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:4577
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:2189
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:4795
Buffer ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
Definition: bufmgr.c:781
#define RelationGetNumberOfBlocks(reln)
Definition: bufmgr.h:229
static Page BufferGetPage(Buffer buffer)
Definition: bufmgr.h:350
#define BUFFER_LOCK_EXCLUSIVE
Definition: bufmgr.h:159
@ RBM_NORMAL
Definition: bufmgr.h:44
void PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems)
Definition: bufpage.c:1161
static bool PageIsEmpty(Page page)
Definition: bufpage.h:220
Pointer Page
Definition: bufpage.h:78
static Item PageGetItem(Page page, ItemId itemId)
Definition: bufpage.h:351
static ItemId PageGetItemId(Page page, OffsetNumber offsetNumber)
Definition: bufpage.h:240
static bool PageIsNew(Page page)
Definition: bufpage.h:230
static void PageSetLSN(Page page, XLogRecPtr lsn)
Definition: bufpage.h:388
static OffsetNumber PageGetMaxOffsetNumber(Page page)
Definition: bufpage.h:369
uint32 TransactionId
Definition: c.h:639
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:224
bool(* IndexBulkDeleteCallback)(ItemPointer itemptr, void *state)
Definition: genam.h:87
void IndexFreeSpaceMapVacuum(Relation rel)
Definition: indexfsm.c:71
void RecordFreeIndexPage(Relation rel, BlockNumber freeBlock)
Definition: indexfsm.c:52
int j
Definition: isn.c:74
int i
Definition: isn.c:73
bool ItemPointerEquals(ItemPointer pointer1, ItemPointer pointer2)
Definition: itemptr.c:35
static void ItemPointerSetInvalid(ItemPointerData *pointer)
Definition: itemptr.h:184
static OffsetNumber ItemPointerGetOffsetNumber(const ItemPointerData *pointer)
Definition: itemptr.h:124
static BlockNumber ItemPointerGetBlockNumber(const ItemPointerData *pointer)
Definition: itemptr.h:103
static bool ItemPointerIsValid(const ItemPointerData *pointer)
Definition: itemptr.h:83
#define MaxIndexTuplesPerPage
Definition: itup.h:165
Assert(fmt[strlen(fmt) - 1] !='\n')
void LockRelationForExtension(Relation relation, LOCKMODE lockmode)
Definition: lmgr.c:430
void UnlockRelationForExtension(Relation relation, LOCKMODE lockmode)
Definition: lmgr.c:480
#define ExclusiveLock
Definition: lockdefs.h:42
void pfree(void *pointer)
Definition: mcxt.c:1508
void * palloc0(Size size)
Definition: mcxt.c:1334
void * palloc(Size size)
Definition: mcxt.c:1304
#define START_CRIT_SECTION()
Definition: miscadmin.h:149
#define END_CRIT_SECTION()
Definition: miscadmin.h:151
#define InvalidOffsetNumber
Definition: off.h:26
uint16 OffsetNumber
Definition: off.h:24
#define FirstOffsetNumber
Definition: off.h:27
bool GlobalVisTestIsRemovableXid(GlobalVisState *state, TransactionId xid)
Definition: procarray.c:4248
GlobalVisState * GlobalVisTestFor(Relation rel)
Definition: procarray.c:4091
#define RELATION_IS_LOCAL(relation)
Definition: rel.h:648
#define RelationGetRelationName(relation)
Definition: rel.h:539
#define RelationIsAccessibleInLogicalDecoding(relation)
Definition: rel.h:684
#define RelationNeedsWAL(relation)
Definition: rel.h:628
@ MAIN_FORKNUM
Definition: relpath.h:50
Snapshot GetActiveSnapshot(void)
Definition: snapmgr.c:770
void spgPageIndexMultiDelete(SpGistState *state, Page page, OffsetNumber *itemnos, int nitems, int firststate, int reststate, BlockNumber blkno, OffsetNumber offnum)
Definition: spgdoinsert.c:131
SpGistDeadTupleData * SpGistDeadTuple
#define SPGIST_REDIRECT
SpGistInnerTupleData * SpGistInnerTuple
#define SPGIST_LIVE
#define SGLT_GET_NEXTOFFSET(spgLeafTuple)
#define SPGIST_PLACEHOLDER
#define SGITITERATE(x, i, nt)
#define SPGIST_DEAD
#define SpGistPageIsLeaf(page)
#define SPGIST_METAPAGE_BLKNO
#define SpGistPageIsDeleted(page)
#define SGLT_SET_NEXTOFFSET(spgLeafTuple, offsetNumber)
#define SpGistBlockIsRoot(blkno)
struct SpGistLeafTupleData * SpGistLeafTuple
#define STORE_STATE(s, d)
#define SpGistPageGetOpaque(page)
#define SPGIST_LAST_FIXED_BLKNO
void initSpGistState(SpGistState *state, Relation index)
Definition: spgutils.c:340
void SpGistUpdateMetaPage(Relation index)
Definition: spgutils.c:431
void SpGistSetLastUsedPage(Relation index, Buffer buffer)
Definition: spgutils.c:654
struct spgBulkDeleteState spgBulkDeleteState
static void vacuumRedirectAndPlaceholder(Relation index, Relation heaprel, Buffer buffer)
Definition: spgvacuum.c:491
static bool dummy_callback(ItemPointer itemptr, void *state)
Definition: spgvacuum.c:925
struct spgVacPendingItem spgVacPendingItem
IndexBulkDeleteResult * spgvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
Definition: spgvacuum.c:936
static void vacuumLeafPage(spgBulkDeleteState *bds, Relation index, Buffer buffer, bool forPending)
Definition: spgvacuum.c:125
static void spgvacuumscan(spgBulkDeleteState *bds)
Definition: spgvacuum.c:793
static void spgprocesspending(spgBulkDeleteState *bds)
Definition: spgvacuum.c:681
IndexBulkDeleteResult * spgbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state)
Definition: spgvacuum.c:905
static void vacuumLeafRoot(spgBulkDeleteState *bds, Relation index, Buffer buffer)
Definition: spgvacuum.c:406
static void spgClearPendingList(spgBulkDeleteState *bds)
Definition: spgvacuum.c:89
static void spgvacuumpage(spgBulkDeleteState *bds, BlockNumber blkno)
Definition: spgvacuum.c:610
static void spgAddPendingTID(spgBulkDeleteState *bds, ItemPointer tid)
Definition: spgvacuum.c:63
#define XLOG_SPGIST_VACUUM_ROOT
Definition: spgxlog.h:28
#define SizeOfSpgxlogVacuumLeaf
Definition: spgxlog.h:223
#define SizeOfSpgxlogVacuumRedirect
Definition: spgxlog.h:250
#define XLOG_SPGIST_VACUUM_LEAF
Definition: spgxlog.h:27
#define XLOG_SPGIST_VACUUM_REDIRECT
Definition: spgxlog.h:29
#define SizeOfSpgxlogVacuumRoot
Definition: spgxlog.h:236
void RelationTruncate(Relation rel, BlockNumber nblocks)
Definition: storage.c:288
bool estimated_count
Definition: genam.h:78
BlockNumber pages_deleted
Definition: genam.h:82
BlockNumber pages_newly_deleted
Definition: genam.h:81
BlockNumber pages_free
Definition: genam.h:83
BlockNumber num_pages
Definition: genam.h:77
double tuples_removed
Definition: genam.h:80
double num_index_tuples
Definition: genam.h:79
ItemPointerData t_tid
Definition: itup.h:37
Relation index
Definition: genam.h:46
double num_heap_tuples
Definition: genam.h:52
bool analyze_only
Definition: genam.h:48
BufferAccessStrategy strategy
Definition: genam.h:53
Relation heaprel
Definition: genam.h:47
bool estimated_count
Definition: genam.h:50
TransactionId xmin
Definition: snapshot.h:157
unsigned int tupstate
ItemPointerData pointer
unsigned int tupstate
ItemPointerData heapPtr
Definition: type.h:95
IndexBulkDeleteResult * stats
Definition: spgvacuum.c:44
IndexBulkDeleteCallback callback
Definition: spgvacuum.c:45
spgVacPendingItem * pendingList
Definition: spgvacuum.c:50
SpGistState spgstate
Definition: spgvacuum.c:49
void * callback_state
Definition: spgvacuum.c:46
TransactionId myXmin
Definition: spgvacuum.c:51
BlockNumber lastFilledBlock
Definition: spgvacuum.c:52
IndexVacuumInfo * info
Definition: spgvacuum.c:43
ItemPointerData tid
Definition: spgvacuum.c:34
struct spgVacPendingItem * next
Definition: spgvacuum.c:36
spgxlogState stateSrc
Definition: spgxlog.h:208
uint16 nPlaceholder
Definition: spgxlog.h:204
OffsetNumber firstPlaceholder
Definition: spgxlog.h:241
TransactionId snapshotConflictHorizon
Definition: spgxlog.h:242
spgxlogState stateSrc
Definition: spgxlog.h:230
uint16 nDelete
Definition: spgxlog.h:228
Definition: regguts.h:323
static void callback(struct sockaddr *addr, struct sockaddr *mask, void *unused)
Definition: test_ifaddrs.c:46
bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition: transam.c:280
bool TransactionIdFollowsOrEquals(TransactionId id1, TransactionId id2)
Definition: transam.c:329
#define InvalidTransactionId
Definition: transam.h:31
#define TransactionIdIsValid(xid)
Definition: transam.h:41
void vacuum_delay_point(void)
Definition: vacuum.c:2337
uint64 XLogRecPtr
Definition: xlogdefs.h:21
void XLogRegisterData(char *data, uint32 len)
Definition: xloginsert.c:364
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition: xloginsert.c:474
void XLogRegisterBuffer(uint8 block_id, Buffer buffer, uint8 flags)
Definition: xloginsert.c:242
void XLogBeginInsert(void)
Definition: xloginsert.c:149
#define REGBUF_STANDARD
Definition: xloginsert.h:34