PostgreSQL Source Code  git master
spgvacuum.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * spgvacuum.c
4  * vacuum for SP-GiST
5  *
6  *
7  * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
8  * Portions Copyright (c) 1994, Regents of the University of California
9  *
10  * IDENTIFICATION
11  * src/backend/access/spgist/spgvacuum.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 
16 #include "postgres.h"
17 
18 #include "access/genam.h"
19 #include "access/spgist_private.h"
20 #include "access/spgxlog.h"
21 #include "access/transam.h"
22 #include "access/xloginsert.h"
23 #include "catalog/storage_xlog.h"
24 #include "commands/vacuum.h"
25 #include "miscadmin.h"
26 #include "storage/bufmgr.h"
27 #include "storage/indexfsm.h"
28 #include "storage/lmgr.h"
29 #include "utils/snapmgr.h"
30 
31 
32 /* Entry in pending-list of TIDs we need to revisit */
33 typedef struct spgVacPendingItem
34 {
35  ItemPointerData tid; /* redirection target to visit */
36  bool done; /* have we dealt with this? */
37  struct spgVacPendingItem *next; /* list link */
39 
40 /* Local state for vacuum operations */
41 typedef struct spgBulkDeleteState
42 {
43  /* Parameters passed in to spgvacuumscan */
48 
49  /* Additional working state */
50  SpGistState spgstate; /* for SPGiST operations that need one */
51  spgVacPendingItem *pendingList; /* TIDs we need to (re)visit */
52  TransactionId myXmin; /* for detecting newly-added redirects */
53  BlockNumber lastFilledBlock; /* last non-deletable block */
55 
56 
57 /*
58  * Add TID to pendingList, but only if not already present.
59  *
60  * Note that new items are always appended at the end of the list; this
61  * ensures that scans of the list don't miss items added during the scan.
62  */
63 static void
65 {
66  spgVacPendingItem *pitem;
67  spgVacPendingItem **listLink;
68 
69  /* search the list for pre-existing entry */
70  listLink = &bds->pendingList;
71  while (*listLink != NULL)
72  {
73  pitem = *listLink;
74  if (ItemPointerEquals(tid, &pitem->tid))
75  return; /* already in list, do nothing */
76  listLink = &pitem->next;
77  }
78  /* not there, so append new entry */
79  pitem = (spgVacPendingItem *) palloc(sizeof(spgVacPendingItem));
80  pitem->tid = *tid;
81  pitem->done = false;
82  pitem->next = NULL;
83  *listLink = pitem;
84 }
85 
86 /*
87  * Clear pendingList
88  */
89 static void
91 {
92  spgVacPendingItem *pitem;
93  spgVacPendingItem *nitem;
94 
95  for (pitem = bds->pendingList; pitem != NULL; pitem = nitem)
96  {
97  nitem = pitem->next;
98  /* All items in list should have been dealt with */
99  Assert(pitem->done);
100  pfree(pitem);
101  }
102  bds->pendingList = NULL;
103 }
104 
105 /*
106  * Vacuum a regular (non-root) leaf page
107  *
108  * We must delete tuples that are targeted for deletion by the VACUUM,
109  * but not move any tuples that are referenced by outside links; we assume
110  * those are the ones that are heads of chains.
111  *
112  * If we find a REDIRECT that was made by a concurrently-running transaction,
113  * we must add its target TID to pendingList. (We don't try to visit the
114  * target immediately, first because we don't want VACUUM locking more than
115  * one buffer at a time, and second because the duplicate-filtering logic
116  * in spgAddPendingTID is useful to ensure we can't get caught in an infinite
117  * loop in the face of continuous concurrent insertions.)
118  *
119  * If forPending is true, we are examining the page as a consequence of
120  * chasing a redirect link, not as part of the normal sequential scan.
121  * We still vacuum the page normally, but we don't increment the stats
122  * about live tuples; else we'd double-count those tuples, since the page
123  * has been or will be visited in the sequential scan as well.
124  */
125 static void
127  bool forPending)
128 {
129  Page page = BufferGetPage(buffer);
130  spgxlogVacuumLeaf xlrec;
132  OffsetNumber toPlaceholder[MaxIndexTuplesPerPage];
137  OffsetNumber predecessor[MaxIndexTuplesPerPage + 1];
138  bool deletable[MaxIndexTuplesPerPage + 1];
139  int nDeletable;
140  OffsetNumber i,
141  max = PageGetMaxOffsetNumber(page);
142 
143  memset(predecessor, 0, sizeof(predecessor));
144  memset(deletable, 0, sizeof(deletable));
145  nDeletable = 0;
146 
147  /* Scan page, identify tuples to delete, accumulate stats */
148  for (i = FirstOffsetNumber; i <= max; i++)
149  {
150  SpGistLeafTuple lt;
151 
152  lt = (SpGistLeafTuple) PageGetItem(page,
153  PageGetItemId(page, i));
154  if (lt->tupstate == SPGIST_LIVE)
155  {
157 
158  if (bds->callback(&lt->heapPtr, bds->callback_state))
159  {
160  bds->stats->tuples_removed += 1;
161  deletable[i] = true;
162  nDeletable++;
163  }
164  else
165  {
166  if (!forPending)
167  bds->stats->num_index_tuples += 1;
168  }
169 
170  /* Form predecessor map, too */
171  if (lt->nextOffset != InvalidOffsetNumber)
172  {
173  /* paranoia about corrupted chain links */
174  if (lt->nextOffset < FirstOffsetNumber ||
175  lt->nextOffset > max ||
176  predecessor[lt->nextOffset] != InvalidOffsetNumber)
177  elog(ERROR, "inconsistent tuple chain links in page %u of index \"%s\"",
178  BufferGetBlockNumber(buffer),
179  RelationGetRelationName(index));
180  predecessor[lt->nextOffset] = i;
181  }
182  }
183  else if (lt->tupstate == SPGIST_REDIRECT)
184  {
186 
189 
190  /*
191  * Add target TID to pending list if the redirection could have
192  * happened since VACUUM started.
193  *
194  * Note: we could make a tighter test by seeing if the xid is
195  * "running" according to the active snapshot; but snapmgr.c
196  * doesn't currently export a suitable API, and it's not entirely
197  * clear that a tighter test is worth the cycles anyway.
198  */
199  if (TransactionIdFollowsOrEquals(dt->xid, bds->myXmin))
200  spgAddPendingTID(bds, &dt->pointer);
201  }
202  else
203  {
205  }
206  }
207 
208  if (nDeletable == 0)
209  return; /* nothing more to do */
210 
211  /*----------
212  * Figure out exactly what we have to do. We do this separately from
213  * actually modifying the page, mainly so that we have a representation
214  * that can be dumped into WAL and then the replay code can do exactly
215  * the same thing. The output of this step consists of six arrays
216  * describing four kinds of operations, to be performed in this order:
217  *
218  * toDead[]: tuple numbers to be replaced with DEAD tuples
219  * toPlaceholder[]: tuple numbers to be replaced with PLACEHOLDER tuples
220  * moveSrc[]: tuple numbers that need to be relocated to another offset
221  * (replacing the tuple there) and then replaced with PLACEHOLDER tuples
222  * moveDest[]: new locations for moveSrc tuples
223  * chainSrc[]: tuple numbers whose chain links (nextOffset) need updates
224  * chainDest[]: new values of nextOffset for chainSrc members
225  *
226  * It's easiest to figure out what we have to do by processing tuple
227  * chains, so we iterate over all the tuples (not just the deletable
228  * ones!) to identify chain heads, then chase down each chain and make
229  * work item entries for deletable tuples within the chain.
230  *----------
231  */
232  xlrec.nDead = xlrec.nPlaceholder = xlrec.nMove = xlrec.nChain = 0;
233 
234  for (i = FirstOffsetNumber; i <= max; i++)
235  {
236  SpGistLeafTuple head;
237  bool interveningDeletable;
238  OffsetNumber prevLive;
239  OffsetNumber j;
240 
241  head = (SpGistLeafTuple) PageGetItem(page,
242  PageGetItemId(page, i));
243  if (head->tupstate != SPGIST_LIVE)
244  continue; /* can't be a chain member */
245  if (predecessor[i] != 0)
246  continue; /* not a chain head */
247 
248  /* initialize ... */
249  interveningDeletable = false;
250  prevLive = deletable[i] ? InvalidOffsetNumber : i;
251 
252  /* scan down the chain ... */
253  j = head->nextOffset;
254  while (j != InvalidOffsetNumber)
255  {
256  SpGistLeafTuple lt;
257 
258  lt = (SpGistLeafTuple) PageGetItem(page,
259  PageGetItemId(page, j));
260  if (lt->tupstate != SPGIST_LIVE)
261  {
262  /* all tuples in chain should be live */
263  elog(ERROR, "unexpected SPGiST tuple state: %d",
264  lt->tupstate);
265  }
266 
267  if (deletable[j])
268  {
269  /* This tuple should be replaced by a placeholder */
270  toPlaceholder[xlrec.nPlaceholder] = j;
271  xlrec.nPlaceholder++;
272  /* previous live tuple's chain link will need an update */
273  interveningDeletable = true;
274  }
275  else if (prevLive == InvalidOffsetNumber)
276  {
277  /*
278  * This is the first live tuple in the chain. It has to move
279  * to the head position.
280  */
281  moveSrc[xlrec.nMove] = j;
282  moveDest[xlrec.nMove] = i;
283  xlrec.nMove++;
284  /* Chain updates will be applied after the move */
285  prevLive = i;
286  interveningDeletable = false;
287  }
288  else
289  {
290  /*
291  * Second or later live tuple. Arrange to re-chain it to the
292  * previous live one, if there was a gap.
293  */
294  if (interveningDeletable)
295  {
296  chainSrc[xlrec.nChain] = prevLive;
297  chainDest[xlrec.nChain] = j;
298  xlrec.nChain++;
299  }
300  prevLive = j;
301  interveningDeletable = false;
302  }
303 
304  j = lt->nextOffset;
305  }
306 
307  if (prevLive == InvalidOffsetNumber)
308  {
309  /* The chain is entirely removable, so we need a DEAD tuple */
310  toDead[xlrec.nDead] = i;
311  xlrec.nDead++;
312  }
313  else if (interveningDeletable)
314  {
315  /* One or more deletions at end of chain, so close it off */
316  chainSrc[xlrec.nChain] = prevLive;
317  chainDest[xlrec.nChain] = InvalidOffsetNumber;
318  xlrec.nChain++;
319  }
320  }
321 
322  /* sanity check ... */
323  if (nDeletable != xlrec.nDead + xlrec.nPlaceholder + xlrec.nMove)
324  elog(ERROR, "inconsistent counts of deletable tuples");
325 
326  /* Do the updates */
328 
329  spgPageIndexMultiDelete(&bds->spgstate, page,
330  toDead, xlrec.nDead,
333 
334  spgPageIndexMultiDelete(&bds->spgstate, page,
335  toPlaceholder, xlrec.nPlaceholder,
338 
339  /*
340  * We implement the move step by swapping the line pointers of the source
341  * and target tuples, then replacing the newly-source tuples with
342  * placeholders. This is perhaps unduly friendly with the page data
343  * representation, but it's fast and doesn't risk page overflow when a
344  * tuple to be relocated is large.
345  */
346  for (i = 0; i < xlrec.nMove; i++)
347  {
348  ItemId idSrc = PageGetItemId(page, moveSrc[i]);
349  ItemId idDest = PageGetItemId(page, moveDest[i]);
350  ItemIdData tmp;
351 
352  tmp = *idSrc;
353  *idSrc = *idDest;
354  *idDest = tmp;
355  }
356 
357  spgPageIndexMultiDelete(&bds->spgstate, page,
358  moveSrc, xlrec.nMove,
361 
362  for (i = 0; i < xlrec.nChain; i++)
363  {
364  SpGistLeafTuple lt;
365 
366  lt = (SpGistLeafTuple) PageGetItem(page,
367  PageGetItemId(page, chainSrc[i]));
368  Assert(lt->tupstate == SPGIST_LIVE);
369  lt->nextOffset = chainDest[i];
370  }
371 
372  MarkBufferDirty(buffer);
373 
374  if (RelationNeedsWAL(index))
375  {
376  XLogRecPtr recptr;
377 
378  XLogBeginInsert();
379 
380  STORE_STATE(&bds->spgstate, xlrec.stateSrc);
381 
382  XLogRegisterData((char *) &xlrec, SizeOfSpgxlogVacuumLeaf);
383  /* sizeof(xlrec) should be a multiple of sizeof(OffsetNumber) */
384  XLogRegisterData((char *) toDead, sizeof(OffsetNumber) * xlrec.nDead);
385  XLogRegisterData((char *) toPlaceholder, sizeof(OffsetNumber) * xlrec.nPlaceholder);
386  XLogRegisterData((char *) moveSrc, sizeof(OffsetNumber) * xlrec.nMove);
387  XLogRegisterData((char *) moveDest, sizeof(OffsetNumber) * xlrec.nMove);
388  XLogRegisterData((char *) chainSrc, sizeof(OffsetNumber) * xlrec.nChain);
389  XLogRegisterData((char *) chainDest, sizeof(OffsetNumber) * xlrec.nChain);
390 
392 
393  recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_VACUUM_LEAF);
394 
395  PageSetLSN(page, recptr);
396  }
397 
399 }
400 
401 /*
402  * Vacuum a root page when it is also a leaf
403  *
404  * On the root, we just delete any dead leaf tuples; no fancy business
405  */
406 static void
408 {
409  Page page = BufferGetPage(buffer);
410  spgxlogVacuumRoot xlrec;
412  OffsetNumber i,
413  max = PageGetMaxOffsetNumber(page);
414 
415  xlrec.nDelete = 0;
416 
417  /* Scan page, identify tuples to delete, accumulate stats */
418  for (i = FirstOffsetNumber; i <= max; i++)
419  {
420  SpGistLeafTuple lt;
421 
422  lt = (SpGistLeafTuple) PageGetItem(page,
423  PageGetItemId(page, i));
424  if (lt->tupstate == SPGIST_LIVE)
425  {
427 
428  if (bds->callback(&lt->heapPtr, bds->callback_state))
429  {
430  bds->stats->tuples_removed += 1;
431  toDelete[xlrec.nDelete] = i;
432  xlrec.nDelete++;
433  }
434  else
435  {
436  bds->stats->num_index_tuples += 1;
437  }
438  }
439  else
440  {
441  /* all tuples on root should be live */
442  elog(ERROR, "unexpected SPGiST tuple state: %d",
443  lt->tupstate);
444  }
445  }
446 
447  if (xlrec.nDelete == 0)
448  return; /* nothing more to do */
449 
450  /* Do the update */
452 
453  /* The tuple numbers are in order, so we can use PageIndexMultiDelete */
454  PageIndexMultiDelete(page, toDelete, xlrec.nDelete);
455 
456  MarkBufferDirty(buffer);
457 
458  if (RelationNeedsWAL(index))
459  {
460  XLogRecPtr recptr;
461 
462  XLogBeginInsert();
463 
464  /* Prepare WAL record */
465  STORE_STATE(&bds->spgstate, xlrec.stateSrc);
466 
467  XLogRegisterData((char *) &xlrec, SizeOfSpgxlogVacuumRoot);
468  /* sizeof(xlrec) should be a multiple of sizeof(OffsetNumber) */
469  XLogRegisterData((char *) toDelete,
470  sizeof(OffsetNumber) * xlrec.nDelete);
471 
473 
474  recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_VACUUM_ROOT);
475 
476  PageSetLSN(page, recptr);
477  }
478 
480 }
481 
482 /*
483  * Clean up redirect and placeholder tuples on the given page
484  *
485  * Redirect tuples can be marked placeholder once they're old enough.
486  * Placeholder tuples can be removed if it won't change the offsets of
487  * non-placeholder ones.
488  *
489  * Unlike the routines above, this works on both leaf and inner pages.
490  */
491 static void
493 {
494  Page page = BufferGetPage(buffer);
495  SpGistPageOpaque opaque = SpGistPageGetOpaque(page);
496  OffsetNumber i,
497  max = PageGetMaxOffsetNumber(page),
498  firstPlaceholder = InvalidOffsetNumber;
499  bool hasNonPlaceholder = false;
500  bool hasUpdate = false;
501  OffsetNumber itemToPlaceholder[MaxIndexTuplesPerPage];
503  spgxlogVacuumRedirect xlrec;
504  GlobalVisState *vistest;
505 
506  xlrec.nToPlaceholder = 0;
508 
509  /* XXX: providing heap relation would allow more pruning */
510  vistest = GlobalVisTestFor(NULL);
511 
513 
514  /*
515  * Scan backwards to convert old redirection tuples to placeholder tuples,
516  * and identify location of last non-placeholder tuple while at it.
517  */
518  for (i = max;
519  i >= FirstOffsetNumber &&
520  (opaque->nRedirection > 0 || !hasNonPlaceholder);
521  i--)
522  {
523  SpGistDeadTuple dt;
524 
525  dt = (SpGistDeadTuple) PageGetItem(page, PageGetItemId(page, i));
526 
527  if (dt->tupstate == SPGIST_REDIRECT &&
528  GlobalVisTestIsRemovableXid(vistest, dt->xid))
529  {
531  Assert(opaque->nRedirection > 0);
532  opaque->nRedirection--;
533  opaque->nPlaceholder++;
534 
535  /* remember newest XID among the removed redirects */
538  xlrec.newestRedirectXid = dt->xid;
539 
541 
542  itemToPlaceholder[xlrec.nToPlaceholder] = i;
543  xlrec.nToPlaceholder++;
544 
545  hasUpdate = true;
546  }
547 
548  if (dt->tupstate == SPGIST_PLACEHOLDER)
549  {
550  if (!hasNonPlaceholder)
551  firstPlaceholder = i;
552  }
553  else
554  {
555  hasNonPlaceholder = true;
556  }
557  }
558 
559  /*
560  * Any placeholder tuples at the end of page can safely be removed. We
561  * can't remove ones before the last non-placeholder, though, because we
562  * can't alter the offset numbers of non-placeholder tuples.
563  */
564  if (firstPlaceholder != InvalidOffsetNumber)
565  {
566  /*
567  * We do not store this array to rdata because it's easy to recreate.
568  */
569  for (i = firstPlaceholder; i <= max; i++)
570  itemnos[i - firstPlaceholder] = i;
571 
572  i = max - firstPlaceholder + 1;
573  Assert(opaque->nPlaceholder >= i);
574  opaque->nPlaceholder -= i;
575 
576  /* The array is surely sorted, so can use PageIndexMultiDelete */
577  PageIndexMultiDelete(page, itemnos, i);
578 
579  hasUpdate = true;
580  }
581 
582  xlrec.firstPlaceholder = firstPlaceholder;
583 
584  if (hasUpdate)
585  MarkBufferDirty(buffer);
586 
587  if (hasUpdate && RelationNeedsWAL(index))
588  {
589  XLogRecPtr recptr;
590 
591  XLogBeginInsert();
592 
594  XLogRegisterData((char *) itemToPlaceholder,
595  sizeof(OffsetNumber) * xlrec.nToPlaceholder);
596 
598 
599  recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_VACUUM_REDIRECT);
600 
601  PageSetLSN(page, recptr);
602  }
603 
605 }
606 
607 /*
608  * Process one page during a bulkdelete scan
609  */
610 static void
612 {
613  Relation index = bds->info->index;
614  Buffer buffer;
615  Page page;
616 
617  /* call vacuum_delay_point while not holding any buffer lock */
619 
620  buffer = ReadBufferExtended(index, MAIN_FORKNUM, blkno,
621  RBM_NORMAL, bds->info->strategy);
623  page = (Page) BufferGetPage(buffer);
624 
625  if (PageIsNew(page))
626  {
627  /*
628  * We found an all-zero page, which could happen if the database
629  * crashed just after extending the file. Recycle it.
630  */
631  }
632  else if (PageIsEmpty(page))
633  {
634  /* nothing to do */
635  }
636  else if (SpGistPageIsLeaf(page))
637  {
638  if (SpGistBlockIsRoot(blkno))
639  {
640  vacuumLeafRoot(bds, index, buffer);
641  /* no need for vacuumRedirectAndPlaceholder */
642  }
643  else
644  {
645  vacuumLeafPage(bds, index, buffer, false);
646  vacuumRedirectAndPlaceholder(index, buffer);
647  }
648  }
649  else
650  {
651  /* inner page */
652  vacuumRedirectAndPlaceholder(index, buffer);
653  }
654 
655  /*
656  * The root pages must never be deleted, nor marked as available in FSM,
657  * because we don't want them ever returned by a search for a place to put
658  * a new tuple. Otherwise, check for empty page, and make sure the FSM
659  * knows about it.
660  */
661  if (!SpGistBlockIsRoot(blkno))
662  {
663  if (PageIsNew(page) || PageIsEmpty(page))
664  {
665  RecordFreeIndexPage(index, blkno);
666  bds->stats->pages_deleted++;
667  }
668  else
669  {
670  SpGistSetLastUsedPage(index, buffer);
671  bds->lastFilledBlock = blkno;
672  }
673  }
674 
675  UnlockReleaseBuffer(buffer);
676 }
677 
678 /*
679  * Process the pending-TID list between pages of the main scan
680  */
681 static void
683 {
684  Relation index = bds->info->index;
685  spgVacPendingItem *pitem;
686  spgVacPendingItem *nitem;
687  BlockNumber blkno;
688  Buffer buffer;
689  Page page;
690 
691  for (pitem = bds->pendingList; pitem != NULL; pitem = pitem->next)
692  {
693  if (pitem->done)
694  continue; /* ignore already-done items */
695 
696  /* call vacuum_delay_point while not holding any buffer lock */
698 
699  /* examine the referenced page */
700  blkno = ItemPointerGetBlockNumber(&pitem->tid);
701  buffer = ReadBufferExtended(index, MAIN_FORKNUM, blkno,
702  RBM_NORMAL, bds->info->strategy);
704  page = (Page) BufferGetPage(buffer);
705 
706  if (PageIsNew(page) || SpGistPageIsDeleted(page))
707  {
708  /* Probably shouldn't happen, but ignore it */
709  }
710  else if (SpGistPageIsLeaf(page))
711  {
712  if (SpGistBlockIsRoot(blkno))
713  {
714  /* this should definitely not happen */
715  elog(ERROR, "redirection leads to root page of index \"%s\"",
716  RelationGetRelationName(index));
717  }
718 
719  /* deal with any deletable tuples */
720  vacuumLeafPage(bds, index, buffer, true);
721  /* might as well do this while we are here */
722  vacuumRedirectAndPlaceholder(index, buffer);
723 
724  SpGistSetLastUsedPage(index, buffer);
725 
726  /*
727  * We can mark as done not only this item, but any later ones
728  * pointing at the same page, since we vacuumed the whole page.
729  */
730  pitem->done = true;
731  for (nitem = pitem->next; nitem != NULL; nitem = nitem->next)
732  {
733  if (ItemPointerGetBlockNumber(&nitem->tid) == blkno)
734  nitem->done = true;
735  }
736  }
737  else
738  {
739  /*
740  * On an inner page, visit the referenced inner tuple and add all
741  * its downlinks to the pending list. We might have pending items
742  * for more than one inner tuple on the same page (in fact this is
743  * pretty likely given the way space allocation works), so get
744  * them all while we are here.
745  */
746  for (nitem = pitem; nitem != NULL; nitem = nitem->next)
747  {
748  if (nitem->done)
749  continue;
750  if (ItemPointerGetBlockNumber(&nitem->tid) == blkno)
751  {
752  OffsetNumber offset;
753  SpGistInnerTuple innerTuple;
754 
755  offset = ItemPointerGetOffsetNumber(&nitem->tid);
756  innerTuple = (SpGistInnerTuple) PageGetItem(page,
757  PageGetItemId(page, offset));
758  if (innerTuple->tupstate == SPGIST_LIVE)
759  {
760  SpGistNodeTuple node;
761  int i;
762 
763  SGITITERATE(innerTuple, i, node)
764  {
765  if (ItemPointerIsValid(&node->t_tid))
766  spgAddPendingTID(bds, &node->t_tid);
767  }
768  }
769  else if (innerTuple->tupstate == SPGIST_REDIRECT)
770  {
771  /* transfer attention to redirect point */
772  spgAddPendingTID(bds,
773  &((SpGistDeadTuple) innerTuple)->pointer);
774  }
775  else
776  elog(ERROR, "unexpected SPGiST tuple state: %d",
777  innerTuple->tupstate);
778 
779  nitem->done = true;
780  }
781  }
782  }
783 
784  UnlockReleaseBuffer(buffer);
785  }
786 
787  spgClearPendingList(bds);
788 }
789 
790 /*
791  * Perform a bulkdelete scan
792  */
793 static void
795 {
796  Relation index = bds->info->index;
797  bool needLock;
798  BlockNumber num_pages,
799  blkno;
800 
801  /* Finish setting up spgBulkDeleteState */
802  initSpGistState(&bds->spgstate, index);
803  bds->pendingList = NULL;
804  bds->myXmin = GetActiveSnapshot()->xmin;
806 
807  /*
808  * Reset counts that will be incremented during the scan; needed in case
809  * of multiple scans during a single VACUUM command
810  */
811  bds->stats->estimated_count = false;
812  bds->stats->num_index_tuples = 0;
813  bds->stats->pages_deleted = 0;
814 
815  /* We can skip locking for new or temp relations */
816  needLock = !RELATION_IS_LOCAL(index);
817 
818  /*
819  * The outer loop iterates over all index pages except the metapage, in
820  * physical order (we hope the kernel will cooperate in providing
821  * read-ahead for speed). It is critical that we visit all leaf pages,
822  * including ones added after we start the scan, else we might fail to
823  * delete some deletable tuples. See more extensive comments about this
824  * in btvacuumscan().
825  */
826  blkno = SPGIST_METAPAGE_BLKNO + 1;
827  for (;;)
828  {
829  /* Get the current relation length */
830  if (needLock)
832  num_pages = RelationGetNumberOfBlocks(index);
833  if (needLock)
835 
836  /* Quit if we've scanned the whole relation */
837  if (blkno >= num_pages)
838  break;
839  /* Iterate over pages, then loop back to recheck length */
840  for (; blkno < num_pages; blkno++)
841  {
842  spgvacuumpage(bds, blkno);
843  /* empty the pending-list after each page */
844  if (bds->pendingList != NULL)
845  spgprocesspending(bds);
846  }
847  }
848 
849  /* Propagate local lastUsedPages cache to metablock */
850  SpGistUpdateMetaPage(index);
851 
852  /*
853  * If we found any empty pages (and recorded them in the FSM), then
854  * forcibly update the upper-level FSM pages to ensure that searchers can
855  * find them. It's possible that the pages were also found during
856  * previous scans and so this is a waste of time, but it's cheap enough
857  * relative to scanning the index that it shouldn't matter much, and
858  * making sure that free pages are available sooner not later seems
859  * worthwhile.
860  *
861  * Note that if no empty pages exist, we don't bother vacuuming the FSM at
862  * all.
863  */
864  if (bds->stats->pages_deleted > 0)
866 
867  /*
868  * Truncate index if possible
869  *
870  * XXX disabled because it's unsafe due to possible concurrent inserts.
871  * We'd have to rescan the pages to make sure they're still empty, and it
872  * doesn't seem worth it. Note that btree doesn't do this either.
873  *
874  * Another reason not to truncate is that it could invalidate the cached
875  * pages-with-freespace pointers in the metapage and other backends'
876  * relation caches, that is leave them pointing to nonexistent pages.
877  * Adding RelationGetNumberOfBlocks calls to protect the places that use
878  * those pointers would be unduly expensive.
879  */
880 #ifdef NOT_USED
881  if (num_pages > bds->lastFilledBlock + 1)
882  {
883  BlockNumber lastBlock = num_pages - 1;
884 
885  num_pages = bds->lastFilledBlock + 1;
886  RelationTruncate(index, num_pages);
887  bds->stats->pages_removed += lastBlock - bds->lastFilledBlock;
888  bds->stats->pages_deleted -= lastBlock - bds->lastFilledBlock;
889  }
890 #endif
891 
892  /* Report final stats */
893  bds->stats->num_pages = num_pages;
894  bds->stats->pages_free = bds->stats->pages_deleted;
895 }
896 
897 /*
898  * Bulk deletion of all index entries pointing to a set of heap tuples.
899  * The set of target tuples is specified via a callback routine that tells
900  * whether any given heap tuple (identified by ItemPointer) is being deleted.
901  *
902  * Result: a palloc'd struct containing statistical info for VACUUM displays.
903  */
906  IndexBulkDeleteCallback callback, void *callback_state)
907 {
908  spgBulkDeleteState bds;
909 
910  /* allocate stats if first time through, else re-use existing struct */
911  if (stats == NULL)
913  bds.info = info;
914  bds.stats = stats;
915  bds.callback = callback;
916  bds.callback_state = callback_state;
917 
918  spgvacuumscan(&bds);
919 
920  return stats;
921 }
922 
923 /* Dummy callback to delete no tuples during spgvacuumcleanup */
924 static bool
926 {
927  return false;
928 }
929 
930 /*
931  * Post-VACUUM cleanup.
932  *
933  * Result: a palloc'd struct containing statistical info for VACUUM displays.
934  */
937 {
938  spgBulkDeleteState bds;
939 
940  /* No-op in ANALYZE ONLY mode */
941  if (info->analyze_only)
942  return stats;
943 
944  /*
945  * We don't need to scan the index if there was a preceding bulkdelete
946  * pass. Otherwise, make a pass that won't delete any live tuples, but
947  * might still accomplish useful stuff with redirect/placeholder cleanup
948  * and/or FSM housekeeping, and in any case will provide stats.
949  */
950  if (stats == NULL)
951  {
953  bds.info = info;
954  bds.stats = stats;
955  bds.callback = dummy_callback;
956  bds.callback_state = NULL;
957 
958  spgvacuumscan(&bds);
959  }
960 
961  /*
962  * It's quite possible for us to be fooled by concurrent tuple moves into
963  * double-counting some index tuples, so disbelieve any total that exceeds
964  * the underlying heap's count ... if we know that accurately. Otherwise
965  * this might just make matters worse.
966  */
967  if (!info->estimated_count)
968  {
969  if (stats->num_index_tuples > info->num_heap_tuples)
970  stats->num_index_tuples = info->num_heap_tuples;
971  }
972 
973  return stats;
974 }
#define ItemPointerIsValid(pointer)
Definition: itemptr.h:82
void SpGistUpdateMetaPage(Relation index)
Definition: spgutils.c:282
#define SPGIST_DEAD
#define XLOG_SPGIST_VACUUM_REDIRECT
Definition: spgxlog.h:29
SpGistInnerTupleData * SpGistInnerTuple
#define PageIsEmpty(page)
Definition: bufpage.h:222
#define SpGistPageIsLeaf(page)
double tuples_removed
Definition: genam.h:78
BlockNumber lastFilledBlock
Definition: spgvacuum.c:53
static void vacuumLeafRoot(spgBulkDeleteState *bds, Relation index, Buffer buffer)
Definition: spgvacuum.c:407
uint32 TransactionId
Definition: c.h:521
#define SPGIST_REDIRECT
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:1469
#define ExclusiveLock
Definition: lockdefs.h:44
IndexVacuumInfo * info
Definition: spgvacuum.c:44
void XLogRegisterBuffer(uint8 block_id, Buffer buffer, uint8 flags)
Definition: xloginsert.c:220
#define SPGIST_LAST_FIXED_BLKNO
IndexBulkDeleteResult * spgvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
Definition: spgvacuum.c:936
#define SGITITERATE(x, i, nt)
#define RELATION_IS_LOCAL(relation)
Definition: rel.h:583
bool analyze_only
Definition: genam.h:47
Buffer ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
Definition: bufmgr.c:653
ItemPointerData t_tid
Definition: itup.h:37
#define SPGIST_PLACEHOLDER
OffsetNumber firstPlaceholder
Definition: spgxlog.h:241
#define END_CRIT_SECTION()
Definition: miscadmin.h:134
BufferAccessStrategy strategy
Definition: genam.h:52
void SpGistSetLastUsedPage(Relation index, Buffer buffer)
Definition: spgutils.c:505
bool TransactionIdFollowsOrEquals(TransactionId id1, TransactionId id2)
Definition: transam.c:349
Snapshot GetActiveSnapshot(void)
Definition: snapmgr.c:786
#define START_CRIT_SECTION()
Definition: miscadmin.h:132
Relation index
Definition: genam.h:46
uint16 nDelete
Definition: spgxlog.h:228
uint32 BlockNumber
Definition: block.h:31
#define BUFFER_LOCK_EXCLUSIVE
Definition: bufmgr.h:98
static void spgClearPendingList(spgBulkDeleteState *bds)
Definition: spgvacuum.c:90
#define PageGetMaxOffsetNumber(page)
Definition: bufpage.h:357
bool GlobalVisTestIsRemovableXid(GlobalVisState *state, TransactionId xid)
Definition: procarray.c:4028
uint16 OffsetNumber
Definition: off.h:24
Definition: type.h:89
TransactionId newestRedirectXid
Definition: spgxlog.h:242
static void spgvacuumscan(spgBulkDeleteState *bds)
Definition: spgvacuum.c:794
static void spgvacuumpage(spgBulkDeleteState *bds, BlockNumber blkno)
Definition: spgvacuum.c:611
#define XLOG_SPGIST_VACUUM_ROOT
Definition: spgxlog.h:28
GlobalVisState * GlobalVisTestFor(Relation rel)
Definition: procarray.c:3866
void pfree(void *pointer)
Definition: mcxt.c:1057
void spgPageIndexMultiDelete(SpGistState *state, Page page, OffsetNumber *itemnos, int nitems, int firststate, int reststate, BlockNumber blkno, OffsetNumber offnum)
Definition: spgdoinsert.c:131
void UnlockReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:3534
ItemPointerData pointer
#define ERROR
Definition: elog.h:43
IndexBulkDeleteResult * spgbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state)
Definition: spgvacuum.c:905
BlockNumber num_pages
Definition: genam.h:74
BlockNumber pages_free
Definition: genam.h:80
spgxlogState stateSrc
Definition: spgxlog.h:208
static void callback(struct sockaddr *addr, struct sockaddr *mask, void *unused)
Definition: test_ifaddrs.c:48
#define FirstOffsetNumber
Definition: off.h:27
#define REGBUF_STANDARD
Definition: xloginsert.h:35
#define SPGIST_METAPAGE_BLKNO
IndexBulkDeleteCallback callback
Definition: spgvacuum.c:46
void initSpGistState(SpGistState *state, Relation index)
Definition: spgutils.c:187
SpGistDeadTupleData * SpGistDeadTuple
#define InvalidTransactionId
Definition: transam.h:31
#define RelationGetRelationName(relation)
Definition: rel.h:490
#define SizeOfSpgxlogVacuumRedirect
Definition: spgxlog.h:248
struct spgVacPendingItem * next
Definition: spgvacuum.c:37
TransactionId xmin
Definition: snapshot.h:157
struct spgVacPendingItem spgVacPendingItem
BlockNumber pages_deleted
Definition: genam.h:79
#define BufferGetPage(buffer)
Definition: bufmgr.h:169
BlockNumber pages_removed
Definition: genam.h:75
bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition: transam.c:300
static void vacuumLeafPage(spgBulkDeleteState *bds, Relation index, Buffer buffer, bool forPending)
Definition: spgvacuum.c:126
#define PageGetItemId(page, offsetNumber)
Definition: bufpage.h:235
void LockRelationForExtension(Relation relation, LOCKMODE lockmode)
Definition: lmgr.c:402
void XLogRegisterData(char *data, int len)
Definition: xloginsert.c:330
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition: xloginsert.c:422
unsigned int tupstate
void * palloc0(Size size)
Definition: mcxt.c:981
unsigned int tupstate
void UnlockRelationForExtension(Relation relation, LOCKMODE lockmode)
Definition: lmgr.c:452
TransactionId myXmin
Definition: spgvacuum.c:52
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:3750
#define RelationGetNumberOfBlocks(reln)
Definition: bufmgr.h:211
#define STORE_STATE(s, d)
OffsetNumber nextOffset
#define InvalidOffsetNumber
Definition: off.h:26
static bool dummy_callback(ItemPointer itemptr, void *state)
Definition: spgvacuum.c:925
#define SizeOfSpgxlogVacuumLeaf
Definition: spgxlog.h:223
double num_heap_tuples
Definition: genam.h:51
void * callback_state
Definition: spgvacuum.c:47
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define Assert(condition)
Definition: c.h:746
uint16 nPlaceholder
Definition: spgxlog.h:204
spgVacPendingItem * pendingList
Definition: spgvacuum.c:51
Definition: regguts.h:298
void IndexFreeSpaceMapVacuum(Relation rel)
Definition: indexfsm.c:71
#define SpGistPageIsDeleted(page)
IndexBulkDeleteResult * stats
Definition: spgvacuum.c:45
spgxlogState stateSrc
Definition: spgxlog.h:230
void PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems)
Definition: bufpage.c:1036
#define SpGistBlockIsRoot(blkno)
#define SPGIST_LIVE
#define InvalidBlockNumber
Definition: block.h:33
OffsetNumber nextOffset
#define XLOG_SPGIST_VACUUM_LEAF
Definition: spgxlog.h:27
#define ItemPointerGetOffsetNumber(pointer)
Definition: itemptr.h:117
#define RelationNeedsWAL(relation)
Definition: rel.h:562
bool ItemPointerEquals(ItemPointer pointer1, ItemPointer pointer2)
Definition: itemptr.c:29
static void spgAddPendingTID(spgBulkDeleteState *bds, ItemPointer tid)
Definition: spgvacuum.c:64
#define SpGistPageGetOpaque(page)
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:2661
#define ItemPointerSetInvalid(pointer)
Definition: itemptr.h:172
#define PageIsNew(page)
Definition: bufpage.h:229
#define MaxIndexTuplesPerPage
Definition: itup.h:145
void * palloc(Size size)
Definition: mcxt.c:950
SpGistState spgstate
Definition: spgvacuum.c:50
#define elog(elevel,...)
Definition: elog.h:214
int i
#define SizeOfSpgxlogVacuumRoot
Definition: spgxlog.h:236
ItemPointerData tid
Definition: spgvacuum.c:35
static void vacuumRedirectAndPlaceholder(Relation index, Buffer buffer)
Definition: spgvacuum.c:492
ItemPointerData heapPtr
#define ItemPointerGetBlockNumber(pointer)
Definition: itemptr.h:98
#define TransactionIdIsValid(xid)
Definition: transam.h:41
void vacuum_delay_point(void)
Definition: vacuum.c:2051
SpGistLeafTupleData * SpGistLeafTuple
void XLogBeginInsert(void)
Definition: xloginsert.c:123
void RecordFreeIndexPage(Relation rel, BlockNumber freeBlock)
Definition: indexfsm.c:52
#define PageSetLSN(page, lsn)
Definition: bufpage.h:368
struct spgBulkDeleteState spgBulkDeleteState
double num_index_tuples
Definition: genam.h:77
int Buffer
Definition: buf.h:23
bool estimated_count
Definition: genam.h:76
#define PageGetItem(page, itemId)
Definition: bufpage.h:340
static void spgprocesspending(spgBulkDeleteState *bds)
Definition: spgvacuum.c:682
Pointer Page
Definition: bufpage.h:78
bool(* IndexBulkDeleteCallback)(ItemPointer itemptr, void *state)
Definition: genam.h:84
bool estimated_count
Definition: genam.h:49
void RelationTruncate(Relation rel, BlockNumber nblocks)
Definition: storage.c:277