PostgreSQL Source Code  git master
vacuumlazy.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * vacuumlazy.c
4  * Concurrent ("lazy") vacuuming.
5  *
6  *
7  * The major space usage for LAZY VACUUM is storage for the array of dead tuple
8  * TIDs. We want to ensure we can vacuum even the very largest relations with
9  * finite memory space usage. To do that, we set upper bounds on the number of
10  * tuples we will keep track of at once.
11  *
12  * We are willing to use at most maintenance_work_mem (or perhaps
13  * autovacuum_work_mem) memory space to keep track of dead tuples. We
14  * initially allocate an array of TIDs of that size, with an upper limit that
15  * depends on table size (this limit ensures we don't allocate a huge area
16  * uselessly for vacuuming small tables). If the array threatens to overflow,
17  * we suspend the heap scan phase and perform a pass of index cleanup and page
18  * compaction, then resume the heap scan with an empty TID array.
19  *
20  * If we're processing a table with no indexes, we can just vacuum each page
21  * as we go; there's no need to save up multiple tuples to minimize the number
22  * of index scans performed. So we don't use maintenance_work_mem memory for
23  * the TID array, just enough to hold as many heap tuples as fit on one page.
24  *
25  * Lazy vacuum supports parallel execution with parallel worker processes. In
26  * a parallel vacuum, we perform both index vacuum and index cleanup with
27  * parallel worker processes. Individual indexes are processed by one vacuum
28  * process. At the beginning of a lazy vacuum (at lazy_scan_heap) we prepare
29  * the parallel context and initialize the DSM segment that contains shared
30  * information as well as the memory space for storing dead tuples. When
31  * starting either index vacuum or index cleanup, we launch parallel worker
32  * processes. Once all indexes are processed the parallel worker processes
33  * exit. After that, the leader process re-initializes the parallel context
34  * so that it can use the same DSM for multiple passes of index vacuum and
35  * for performing index cleanup. For updating the index statistics, we need
36  * to update the system table and since updates are not allowed during
37  * parallel mode we update the index statistics after exiting from the
38  * parallel mode.
39  *
40  * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
41  * Portions Copyright (c) 1994, Regents of the University of California
42  *
43  *
44  * IDENTIFICATION
45  * src/backend/access/heap/vacuumlazy.c
46  *
47  *-------------------------------------------------------------------------
48  */
49 #include "postgres.h"
50 
51 #include <math.h>
52 
53 #include "access/amapi.h"
54 #include "access/genam.h"
55 #include "access/heapam.h"
56 #include "access/heapam_xlog.h"
57 #include "access/htup_details.h"
58 #include "access/multixact.h"
59 #include "access/parallel.h"
60 #include "access/transam.h"
61 #include "access/visibilitymap.h"
62 #include "access/xact.h"
63 #include "access/xlog.h"
64 #include "catalog/index.h"
65 #include "catalog/storage.h"
66 #include "commands/dbcommands.h"
67 #include "commands/progress.h"
68 #include "commands/vacuum.h"
69 #include "executor/instrument.h"
70 #include "miscadmin.h"
71 #include "optimizer/paths.h"
72 #include "pgstat.h"
73 #include "portability/instr_time.h"
74 #include "postmaster/autovacuum.h"
75 #include "storage/bufmgr.h"
76 #include "storage/freespace.h"
77 #include "storage/lmgr.h"
78 #include "tcop/tcopprot.h"
79 #include "utils/lsyscache.h"
80 #include "utils/memutils.h"
81 #include "utils/pg_rusage.h"
82 #include "utils/timestamp.h"
83 
84 
85 /*
86  * Space/time tradeoff parameters: do these need to be user-tunable?
87  *
88  * To consider truncating the relation, we want there to be at least
89  * REL_TRUNCATE_MINIMUM or (relsize / REL_TRUNCATE_FRACTION) (whichever
90  * is less) potentially-freeable pages.
91  */
92 #define REL_TRUNCATE_MINIMUM 1000
93 #define REL_TRUNCATE_FRACTION 16
94 
95 /*
96  * Timing parameters for truncate locking heuristics.
97  *
98  * These were not exposed as user tunable GUC values because it didn't seem
99  * that the potential for improvement was great enough to merit the cost of
100  * supporting them.
101  */
102 #define VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL 20 /* ms */
103 #define VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL 50 /* ms */
104 #define VACUUM_TRUNCATE_LOCK_TIMEOUT 5000 /* ms */
105 
106 /*
107  * Threshold that controls whether we bypass index vacuuming and heap
108  * vacuuming as an optimization
109  */
110 #define BYPASS_THRESHOLD_PAGES 0.02 /* i.e. 2% of rel_pages */
111 
112 /*
113  * Perform a failsafe check every 4GB during the heap scan, approximately
114  */
115 #define FAILSAFE_EVERY_PAGES \
116  ((BlockNumber) (((uint64) 4 * 1024 * 1024 * 1024) / BLCKSZ))
117 
118 /*
119  * When a table has no indexes, vacuum the FSM after every 8GB, approximately
120  * (it won't be exact because we only vacuum FSM after processing a heap page
121  * that has some removable tuples). When there are indexes, this is ignored,
122  * and we vacuum FSM after each index/heap cleaning pass.
123  */
124 #define VACUUM_FSM_EVERY_PAGES \
125  ((BlockNumber) (((uint64) 8 * 1024 * 1024 * 1024) / BLCKSZ))
126 
127 /*
128  * Guesstimation of number of dead tuples per page. This is used to
129  * provide an upper limit to memory allocated when vacuuming small
130  * tables.
131  */
132 #define LAZY_ALLOC_TUPLES MaxHeapTuplesPerPage
133 
134 /*
135  * Before we consider skipping a page that's marked as clean in
136  * visibility map, we must've seen at least this many clean pages.
137  */
138 #define SKIP_PAGES_THRESHOLD ((BlockNumber) 32)
139 
140 /*
141  * Size of the prefetch window for lazy vacuum backwards truncation scan.
142  * Needs to be a power of 2.
143  */
144 #define PREFETCH_SIZE ((BlockNumber) 32)
145 
146 /*
147  * DSM keys for parallel vacuum. Unlike other parallel execution code, since
148  * we don't need to worry about DSM keys conflicting with plan_node_id we can
149  * use small integers.
150  */
151 #define PARALLEL_VACUUM_KEY_SHARED 1
152 #define PARALLEL_VACUUM_KEY_DEAD_TUPLES 2
153 #define PARALLEL_VACUUM_KEY_QUERY_TEXT 3
154 #define PARALLEL_VACUUM_KEY_BUFFER_USAGE 4
155 #define PARALLEL_VACUUM_KEY_WAL_USAGE 5
156 
157 /*
158  * Macro to check if we are in a parallel vacuum. If true, we are in the
159  * parallel mode and the DSM segment is initialized.
160  */
161 #define ParallelVacuumIsActive(vacrel) ((vacrel)->lps != NULL)
162 
163 /* Phases of vacuum during which we report error context. */
164 typedef enum
165 {
172 } VacErrPhase;
173 
174 /*
175  * LVDeadTuples stores the dead tuple TIDs collected during the heap scan.
176  * This is allocated in the DSM segment in parallel mode and in local memory
177  * in non-parallel mode.
178  */
179 typedef struct LVDeadTuples
180 {
181  int max_tuples; /* # slots allocated in array */
182  int num_tuples; /* current # of entries */
183  /* List of TIDs of tuples we intend to delete */
184  /* NB: this list is ordered by TID address */
186  * ItemPointerData */
187 } LVDeadTuples;
188 
189 /* The dead tuple space consists of LVDeadTuples and dead tuple TIDs */
190 #define SizeOfDeadTuples(cnt) \
191  add_size(offsetof(LVDeadTuples, itemptrs), \
192  mul_size(sizeof(ItemPointerData), cnt))
193 #define MAXDEADTUPLES(max_size) \
194  (((max_size) - offsetof(LVDeadTuples, itemptrs)) / sizeof(ItemPointerData))
195 
196 /*
197  * Shared information among parallel workers. So this is allocated in the DSM
198  * segment.
199  */
200 typedef struct LVShared
201 {
202  /*
203  * Target table relid and log level. These fields are not modified during
204  * the lazy vacuum.
205  */
207  int elevel;
208 
209  /*
210  * An indication for vacuum workers to perform either index vacuum or
211  * index cleanup. first_time is true only if for_cleanup is true and
212  * bulk-deletion is not performed yet.
213  */
216 
217  /*
218  * Fields for both index vacuum and cleanup.
219  *
220  * reltuples is the total number of input heap tuples. We set either old
221  * live tuples in the index vacuum case or the new live tuples in the
222  * index cleanup case.
223  *
224  * estimated_count is true if reltuples is an estimated value. (Note that
225  * reltuples could be -1 in this case, indicating we have no idea.)
226  */
227  double reltuples;
229 
230  /*
231  * In single process lazy vacuum we could consume more memory during index
232  * vacuuming or cleanup apart from the memory for heap scanning. In
233  * parallel vacuum, since individual vacuum workers can consume memory
234  * equal to maintenance_work_mem, the new maintenance_work_mem for each
235  * worker is set such that the parallel operation doesn't consume more
236  * memory than single process lazy vacuum.
237  */
239 
240  /*
241  * Shared vacuum cost balance. During parallel vacuum,
242  * VacuumSharedCostBalance points to this value and it accumulates the
243  * balance of each parallel vacuum worker.
244  */
246 
247  /*
248  * Number of active parallel workers. This is used for computing the
249  * minimum threshold of the vacuum cost balance before a worker sleeps for
250  * cost-based delay.
251  */
253 
254  /*
255  * Variables to control parallel vacuum. We have a bitmap to indicate
256  * which index has stats in shared memory. The set bit in the map
257  * indicates that the particular index supports a parallel vacuum.
258  */
259  pg_atomic_uint32 idx; /* counter for vacuuming and clean up */
260  uint32 offset; /* sizeof header incl. bitmap */
261  bits8 bitmap[FLEXIBLE_ARRAY_MEMBER]; /* bit map of NULLs */
262 
263  /* Shared index statistics data follows at end of struct */
264 } LVShared;
265 
266 #define SizeOfLVShared (offsetof(LVShared, bitmap) + sizeof(bits8))
267 #define GetSharedIndStats(s) \
268  ((LVSharedIndStats *)((char *)(s) + ((LVShared *)(s))->offset))
269 #define IndStatsIsNull(s, i) \
270  (!(((LVShared *)(s))->bitmap[(i) >> 3] & (1 << ((i) & 0x07))))
271 
272 /*
273  * Struct for an index bulk-deletion statistic used for parallel vacuum. This
274  * is allocated in the DSM segment.
275  */
276 typedef struct LVSharedIndStats
277 {
278  bool updated; /* are the stats updated? */
281 
282 /* Struct for maintaining a parallel vacuum state. */
283 typedef struct LVParallelState
284 {
286 
287  /* Shared information among parallel vacuum workers */
289 
290  /* Points to buffer usage area in DSM */
292 
293  /* Points to WAL usage area in DSM */
295 
296  /*
297  * The number of indexes that support parallel index bulk-deletion and
298  * parallel index cleanup respectively.
299  */
304 
305 typedef struct LVRelState
306 {
307  /* Target heap relation and its indexes */
310  int nindexes;
311 
312  /* Wraparound failsafe has been triggered? */
314  /* Consider index vacuuming bypass optimization? */
316 
317  /* Doing index vacuuming, index cleanup, rel truncation? */
321 
322  /* Buffer access strategy and parallel state */
325 
326  /* rel's initial relfrozenxid and relminmxid */
329  double old_live_tuples; /* previous value of pg_class.reltuples */
330 
331  /* VACUUM operation's cutoff for pruning */
333  /* VACUUM operation's cutoff for freezing XIDs and MultiXactIds */
336 
337  /* Error reporting state */
339  char *relname;
340  char *indname;
341  BlockNumber blkno; /* used only for heap operations */
342  OffsetNumber offnum; /* used only for heap operations */
344 
345  /*
346  * State managed by lazy_scan_heap() follows
347  */
348  LVDeadTuples *dead_tuples; /* items to vacuum from indexes */
349  BlockNumber rel_pages; /* total number of pages */
350  BlockNumber scanned_pages; /* number of pages we examined */
351  BlockNumber pinskipped_pages; /* # of pages skipped due to a pin */
352  BlockNumber frozenskipped_pages; /* # of frozen pages we skipped */
353  BlockNumber tupcount_pages; /* pages whose tuples we counted */
354  BlockNumber pages_removed; /* pages remove by truncation */
355  BlockNumber lpdead_item_pages; /* # pages with LP_DEAD items */
356  BlockNumber nonempty_pages; /* actually, last nonempty page + 1 */
357 
358  /* Statistics output by us, for table */
359  double new_rel_tuples; /* new estimated total # of tuples */
360  double new_live_tuples; /* new estimated total # of live tuples */
361  /* Statistics output by index AMs */
363 
364  /* Instrumentation counters */
366  int64 tuples_deleted; /* # deleted from table */
367  int64 lpdead_items; /* # deleted from indexes */
368  int64 new_dead_tuples; /* new estimated total # of dead items in
369  * table */
370  int64 num_tuples; /* total number of nonremovable tuples */
371  int64 live_tuples; /* live tuples (reltuples estimate) */
372 } LVRelState;
373 
374 /*
375  * State returned by lazy_scan_prune()
376  */
377 typedef struct LVPagePruneState
378 {
379  bool hastup; /* Page is truncatable? */
380  bool has_lpdead_items; /* includes existing LP_DEAD items */
381 
382  /*
383  * State describes the proper VM bit states to set for the page following
384  * pruning and freezing. all_visible implies !has_lpdead_items, but don't
385  * trust all_frozen result unless all_visible is also set to true.
386  */
387  bool all_visible; /* Every item visible to all? */
388  bool all_frozen; /* provided all_visible is also true */
389  TransactionId visibility_cutoff_xid; /* For recovery conflicts */
391 
392 /* Struct for saving and restoring vacuum error information. */
393 typedef struct LVSavedErrInfo
394 {
399 
400 /* elevel controls whole VACUUM's verbosity */
401 static int elevel = -1;
402 
403 
404 /* non-export function prototypes */
405 static void lazy_scan_heap(LVRelState *vacrel, VacuumParams *params,
406  bool aggressive);
407 static void lazy_scan_prune(LVRelState *vacrel, Buffer buf,
408  BlockNumber blkno, Page page,
409  GlobalVisState *vistest,
410  LVPagePruneState *prunestate);
411 static void lazy_vacuum(LVRelState *vacrel);
412 static bool lazy_vacuum_all_indexes(LVRelState *vacrel);
413 static void lazy_vacuum_heap_rel(LVRelState *vacrel);
414 static int lazy_vacuum_heap_page(LVRelState *vacrel, BlockNumber blkno,
415  Buffer buffer, int tupindex, Buffer *vmbuffer);
416 static bool lazy_check_needs_freeze(Buffer buf, bool *hastup,
417  LVRelState *vacrel);
418 static bool lazy_check_wraparound_failsafe(LVRelState *vacrel);
421 static void do_parallel_vacuum_or_cleanup(LVRelState *vacrel, int nworkers);
422 static void do_parallel_processing(LVRelState *vacrel,
423  LVShared *lvshared);
425  LVShared *lvshared);
427  IndexBulkDeleteResult *istat,
428  LVShared *lvshared,
429  LVSharedIndStats *shared_indstats,
430  LVRelState *vacrel);
431 static void lazy_cleanup_all_indexes(LVRelState *vacrel);
433  IndexBulkDeleteResult *istat,
434  double reltuples,
435  LVRelState *vacrel);
437  IndexBulkDeleteResult *istat,
438  double reltuples,
439  bool estimated_count,
440  LVRelState *vacrel);
441 static bool should_attempt_truncation(LVRelState *vacrel);
442 static void lazy_truncate_heap(LVRelState *vacrel);
444  bool *lock_waiter_detected);
445 static long compute_max_dead_tuples(BlockNumber relblocks, bool hasindex);
446 static void lazy_space_alloc(LVRelState *vacrel, int nworkers,
447  BlockNumber relblocks);
448 static void lazy_space_free(LVRelState *vacrel);
449 static bool lazy_tid_reaped(ItemPointer itemptr, void *state);
450 static int vac_cmp_itemptr(const void *left, const void *right);
451 static bool heap_page_is_all_visible(LVRelState *vacrel, Buffer buf,
452  TransactionId *visibility_cutoff_xid, bool *all_frozen);
454  int nrequested,
455  bool *can_parallel_vacuum);
456 static void update_index_statistics(LVRelState *vacrel);
458  BlockNumber nblocks,
459  int nrequested);
460 static void end_parallel_vacuum(LVRelState *vacrel);
461 static LVSharedIndStats *parallel_stats_for_idx(LVShared *lvshared, int getidx);
462 static bool parallel_processing_is_safe(Relation indrel, LVShared *lvshared);
463 static void vacuum_error_callback(void *arg);
464 static void update_vacuum_error_info(LVRelState *vacrel,
465  LVSavedErrInfo *saved_vacrel,
466  int phase, BlockNumber blkno,
467  OffsetNumber offnum);
468 static void restore_vacuum_error_info(LVRelState *vacrel,
469  const LVSavedErrInfo *saved_vacrel);
470 
471 
472 /*
473  * heap_vacuum_rel() -- perform VACUUM for one heap relation
474  *
475  * This routine vacuums a single heap, cleans out its indexes, and
476  * updates its relpages and reltuples statistics.
477  *
478  * At entry, we have already established a transaction and opened
479  * and locked the relation.
480  */
481 void
483  BufferAccessStrategy bstrategy)
484 {
485  LVRelState *vacrel;
486  PGRUsage ru0;
487  TimestampTz starttime = 0;
488  WalUsage walusage_start = pgWalUsage;
489  WalUsage walusage = {0, 0, 0};
490  long secs;
491  int usecs;
492  double read_rate,
493  write_rate;
494  bool aggressive; /* should we scan all unfrozen pages? */
495  bool scanned_all_unfrozen; /* actually scanned all such pages? */
496  char **indnames = NULL;
497  TransactionId xidFullScanLimit;
498  MultiXactId mxactFullScanLimit;
499  BlockNumber new_rel_pages;
500  BlockNumber new_rel_allvisible;
501  double new_live_tuples;
502  TransactionId new_frozen_xid;
503  MultiXactId new_min_multi;
504  ErrorContextCallback errcallback;
505  PgStat_Counter startreadtime = 0;
506  PgStat_Counter startwritetime = 0;
507  TransactionId OldestXmin;
508  TransactionId FreezeLimit;
509  MultiXactId MultiXactCutoff;
510 
511  /* measure elapsed time iff autovacuum logging requires it */
512  if (IsAutoVacuumWorkerProcess() && params->log_min_duration >= 0)
513  {
514  pg_rusage_init(&ru0);
515  starttime = GetCurrentTimestamp();
516  if (track_io_timing)
517  {
518  startreadtime = pgStatBlockReadTime;
519  startwritetime = pgStatBlockWriteTime;
520  }
521  }
522 
523  if (params->options & VACOPT_VERBOSE)
524  elevel = INFO;
525  else
526  elevel = DEBUG2;
527 
529  RelationGetRelid(rel));
530 
532  params->freeze_min_age,
533  params->freeze_table_age,
534  params->multixact_freeze_min_age,
536  &OldestXmin, &FreezeLimit, &xidFullScanLimit,
537  &MultiXactCutoff, &mxactFullScanLimit);
538 
539  /*
540  * We request an aggressive scan if the table's frozen Xid is now older
541  * than or equal to the requested Xid full-table scan limit; or if the
542  * table's minimum MultiXactId is older than or equal to the requested
543  * mxid full-table scan limit; or if DISABLE_PAGE_SKIPPING was specified.
544  */
545  aggressive = TransactionIdPrecedesOrEquals(rel->rd_rel->relfrozenxid,
546  xidFullScanLimit);
547  aggressive |= MultiXactIdPrecedesOrEquals(rel->rd_rel->relminmxid,
548  mxactFullScanLimit);
550  aggressive = true;
551 
552  vacrel = (LVRelState *) palloc0(sizeof(LVRelState));
553 
554  /* Set up high level stuff about rel */
555  vacrel->rel = rel;
556  vac_open_indexes(vacrel->rel, RowExclusiveLock, &vacrel->nindexes,
557  &vacrel->indrels);
558  vacrel->failsafe_active = false;
559  vacrel->consider_bypass_optimization = true;
560 
561  /*
562  * The index_cleanup param either disables index vacuuming and cleanup or
563  * forces it to go ahead when we would otherwise apply the index bypass
564  * optimization. The default is 'auto', which leaves the final decision
565  * up to lazy_vacuum().
566  *
567  * The truncate param allows user to avoid attempting relation truncation,
568  * though it can't force truncation to happen.
569  */
572  params->truncate != VACOPTVALUE_AUTO);
573  vacrel->do_index_vacuuming = true;
574  vacrel->do_index_cleanup = true;
575  vacrel->do_rel_truncate = (params->truncate != VACOPTVALUE_DISABLED);
576  if (params->index_cleanup == VACOPTVALUE_DISABLED)
577  {
578  /* Force disable index vacuuming up-front */
579  vacrel->do_index_vacuuming = false;
580  vacrel->do_index_cleanup = false;
581  }
582  else if (params->index_cleanup == VACOPTVALUE_ENABLED)
583  {
584  /* Force index vacuuming. Note that failsafe can still bypass. */
585  vacrel->consider_bypass_optimization = false;
586  }
587  else
588  {
589  /* Default/auto, make all decisions dynamically */
591  }
592 
593  vacrel->bstrategy = bstrategy;
594  vacrel->relfrozenxid = rel->rd_rel->relfrozenxid;
595  vacrel->relminmxid = rel->rd_rel->relminmxid;
596  vacrel->old_live_tuples = rel->rd_rel->reltuples;
597 
598  /* Set cutoffs for entire VACUUM */
599  vacrel->OldestXmin = OldestXmin;
600  vacrel->FreezeLimit = FreezeLimit;
601  vacrel->MultiXactCutoff = MultiXactCutoff;
602 
604  vacrel->relname = pstrdup(RelationGetRelationName(rel));
605  vacrel->indname = NULL;
607 
608  /* Save index names iff autovacuum logging requires it */
609  if (IsAutoVacuumWorkerProcess() && params->log_min_duration >= 0 &&
610  vacrel->nindexes > 0)
611  {
612  indnames = palloc(sizeof(char *) * vacrel->nindexes);
613  for (int i = 0; i < vacrel->nindexes; i++)
614  indnames[i] =
616  }
617 
618  /*
619  * Setup error traceback support for ereport(). The idea is to set up an
620  * error context callback to display additional information on any error
621  * during a vacuum. During different phases of vacuum (heap scan, heap
622  * vacuum, index vacuum, index clean up, heap truncate), we update the
623  * error context callback to display appropriate information.
624  *
625  * Note that the index vacuum and heap vacuum phases may be called
626  * multiple times in the middle of the heap scan phase. So the old phase
627  * information is restored at the end of those phases.
628  */
629  errcallback.callback = vacuum_error_callback;
630  errcallback.arg = vacrel;
631  errcallback.previous = error_context_stack;
632  error_context_stack = &errcallback;
633 
634  /* Do the vacuuming */
635  lazy_scan_heap(vacrel, params, aggressive);
636 
637  /* Done with indexes */
638  vac_close_indexes(vacrel->nindexes, vacrel->indrels, NoLock);
639 
640  /*
641  * Compute whether we actually scanned the all unfrozen pages. If we did,
642  * we can adjust relfrozenxid and relminmxid.
643  *
644  * NB: We need to check this before truncating the relation, because that
645  * will change ->rel_pages.
646  */
647  if ((vacrel->scanned_pages + vacrel->frozenskipped_pages)
648  < vacrel->rel_pages)
649  {
650  Assert(!aggressive);
651  scanned_all_unfrozen = false;
652  }
653  else
654  scanned_all_unfrozen = true;
655 
656  /*
657  * Optionally truncate the relation.
658  */
659  if (should_attempt_truncation(vacrel))
660  {
661  /*
662  * Update error traceback information. This is the last phase during
663  * which we add context information to errors, so we don't need to
664  * revert to the previous phase.
665  */
667  vacrel->nonempty_pages,
669  lazy_truncate_heap(vacrel);
670  }
671 
672  /* Pop the error context stack */
673  error_context_stack = errcallback.previous;
674 
675  /* Report that we are now doing final cleanup */
678 
679  /*
680  * Update statistics in pg_class.
681  *
682  * In principle new_live_tuples could be -1 indicating that we (still)
683  * don't know the tuple count. In practice that probably can't happen,
684  * since we'd surely have scanned some pages if the table is new and
685  * nonempty.
686  *
687  * For safety, clamp relallvisible to be not more than what we're setting
688  * relpages to.
689  *
690  * Also, don't change relfrozenxid/relminmxid if we skipped any pages,
691  * since then we don't know for certain that all tuples have a newer xmin.
692  */
693  new_rel_pages = vacrel->rel_pages;
694  new_live_tuples = vacrel->new_live_tuples;
695 
696  visibilitymap_count(rel, &new_rel_allvisible, NULL);
697  if (new_rel_allvisible > new_rel_pages)
698  new_rel_allvisible = new_rel_pages;
699 
700  new_frozen_xid = scanned_all_unfrozen ? FreezeLimit : InvalidTransactionId;
701  new_min_multi = scanned_all_unfrozen ? MultiXactCutoff : InvalidMultiXactId;
702 
704  new_rel_pages,
705  new_live_tuples,
706  new_rel_allvisible,
707  vacrel->nindexes > 0,
708  new_frozen_xid,
709  new_min_multi,
710  false);
711 
712  /*
713  * Report results to the stats collector, too.
714  *
715  * Deliberately avoid telling the stats collector about LP_DEAD items that
716  * remain in the table due to VACUUM bypassing index and heap vacuuming.
717  * ANALYZE will consider the remaining LP_DEAD items to be dead tuples. It
718  * seems like a good idea to err on the side of not vacuuming again too
719  * soon in cases where the failsafe prevented significant amounts of heap
720  * vacuuming.
721  */
723  rel->rd_rel->relisshared,
724  Max(new_live_tuples, 0),
725  vacrel->new_dead_tuples);
727 
728  /* and log the action if appropriate */
729  if (IsAutoVacuumWorkerProcess() && params->log_min_duration >= 0)
730  {
731  TimestampTz endtime = GetCurrentTimestamp();
732 
733  if (params->log_min_duration == 0 ||
734  TimestampDifferenceExceeds(starttime, endtime,
735  params->log_min_duration))
736  {
738  char *msgfmt;
739  BlockNumber orig_rel_pages;
740 
741  TimestampDifference(starttime, endtime, &secs, &usecs);
742 
743  memset(&walusage, 0, sizeof(WalUsage));
744  WalUsageAccumDiff(&walusage, &pgWalUsage, &walusage_start);
745 
746  read_rate = 0;
747  write_rate = 0;
748  if ((secs > 0) || (usecs > 0))
749  {
750  read_rate = (double) BLCKSZ * VacuumPageMiss / (1024 * 1024) /
751  (secs + usecs / 1000000.0);
752  write_rate = (double) BLCKSZ * VacuumPageDirty / (1024 * 1024) /
753  (secs + usecs / 1000000.0);
754  }
755 
756  /*
757  * This is pretty messy, but we split it up so that we can skip
758  * emitting individual parts of the message when not applicable.
759  */
760  initStringInfo(&buf);
761  if (params->is_wraparound)
762  {
763  /*
764  * While it's possible for a VACUUM to be both is_wraparound
765  * and !aggressive, that's just a corner-case -- is_wraparound
766  * implies aggressive. Produce distinct output for the corner
767  * case all the same, just in case.
768  */
769  if (aggressive)
770  msgfmt = _("automatic aggressive vacuum to prevent wraparound of table \"%s.%s.%s\": index scans: %d\n");
771  else
772  msgfmt = _("automatic vacuum to prevent wraparound of table \"%s.%s.%s\": index scans: %d\n");
773  }
774  else
775  {
776  if (aggressive)
777  msgfmt = _("automatic aggressive vacuum of table \"%s.%s.%s\": index scans: %d\n");
778  else
779  msgfmt = _("automatic vacuum of table \"%s.%s.%s\": index scans: %d\n");
780  }
781  appendStringInfo(&buf, msgfmt,
783  vacrel->relnamespace,
784  vacrel->relname,
785  vacrel->num_index_scans);
786  appendStringInfo(&buf, _("pages: %u removed, %u remain, %u skipped due to pins, %u skipped frozen\n"),
787  vacrel->pages_removed,
788  vacrel->rel_pages,
789  vacrel->pinskipped_pages,
790  vacrel->frozenskipped_pages);
791  appendStringInfo(&buf,
792  _("tuples: %lld removed, %lld remain, %lld are dead but not yet removable, oldest xmin: %u\n"),
793  (long long) vacrel->tuples_deleted,
794  (long long) vacrel->new_rel_tuples,
795  (long long) vacrel->new_dead_tuples,
796  OldestXmin);
797  orig_rel_pages = vacrel->rel_pages + vacrel->pages_removed;
798  if (orig_rel_pages > 0)
799  {
800  if (vacrel->do_index_vacuuming)
801  {
802  if (vacrel->nindexes == 0 || vacrel->num_index_scans == 0)
803  appendStringInfoString(&buf, _("index scan not needed: "));
804  else
805  appendStringInfoString(&buf, _("index scan needed: "));
806 
807  msgfmt = _("%u pages from table (%.2f%% of total) had %lld dead item identifiers removed\n");
808  }
809  else
810  {
811  if (!vacrel->failsafe_active)
812  appendStringInfoString(&buf, _("index scan bypassed: "));
813  else
814  appendStringInfoString(&buf, _("index scan bypassed by failsafe: "));
815 
816  msgfmt = _("%u pages from table (%.2f%% of total) have %lld dead item identifiers\n");
817  }
818  appendStringInfo(&buf, msgfmt,
819  vacrel->lpdead_item_pages,
820  100.0 * vacrel->lpdead_item_pages / orig_rel_pages,
821  (long long) vacrel->lpdead_items);
822  }
823  for (int i = 0; i < vacrel->nindexes; i++)
824  {
825  IndexBulkDeleteResult *istat = vacrel->indstats[i];
826 
827  if (!istat)
828  continue;
829 
830  appendStringInfo(&buf,
831  _("index \"%s\": pages: %u in total, %u newly deleted, %u currently deleted, %u reusable\n"),
832  indnames[i],
833  istat->num_pages,
834  istat->pages_newly_deleted,
835  istat->pages_deleted,
836  istat->pages_free);
837  }
838  if (track_io_timing)
839  {
840  double read_ms = (double) (pgStatBlockReadTime - startreadtime) / 1000;
841  double write_ms = (double) (pgStatBlockWriteTime - startwritetime) / 1000;
842 
843  appendStringInfo(&buf, _("I/O timings: read: %.3f ms, write: %.3f ms\n"),
844  read_ms, write_ms);
845  }
846  appendStringInfo(&buf, _("avg read rate: %.3f MB/s, avg write rate: %.3f MB/s\n"),
847  read_rate, write_rate);
848  appendStringInfo(&buf,
849  _("buffer usage: %lld hits, %lld misses, %lld dirtied\n"),
850  (long long) VacuumPageHit,
851  (long long) VacuumPageMiss,
852  (long long) VacuumPageDirty);
853  appendStringInfo(&buf,
854  _("WAL usage: %lld records, %lld full page images, %llu bytes\n"),
855  (long long) walusage.wal_records,
856  (long long) walusage.wal_fpi,
857  (unsigned long long) walusage.wal_bytes);
858  appendStringInfo(&buf, _("system usage: %s"), pg_rusage_show(&ru0));
859 
860  ereport(LOG,
861  (errmsg_internal("%s", buf.data)));
862  pfree(buf.data);
863  }
864  }
865 
866  /* Cleanup index statistics and index names */
867  for (int i = 0; i < vacrel->nindexes; i++)
868  {
869  if (vacrel->indstats[i])
870  pfree(vacrel->indstats[i]);
871 
872  if (indnames && indnames[i])
873  pfree(indnames[i]);
874  }
875 }
876 
877 /*
878  * lazy_scan_heap() -- scan an open heap relation
879  *
880  * This routine prunes each page in the heap, which will among other
881  * things truncate dead tuples to dead line pointers, defragment the
882  * page, and set commit status bits (see heap_page_prune). It also builds
883  * lists of dead tuples and pages with free space, calculates statistics
884  * on the number of live tuples in the heap, and marks pages as
885  * all-visible if appropriate. When done, or when we run low on space
886  * for dead-tuple TIDs, invoke lazy_vacuum to vacuum indexes and vacuum
887  * heap relation during its own second pass over the heap.
888  *
889  * If the table has at least two indexes, we execute both index vacuum
890  * and index cleanup with parallel workers unless parallel vacuum is
891  * disabled. In a parallel vacuum, we enter parallel mode and then
892  * create both the parallel context and the DSM segment before starting
893  * heap scan so that we can record dead tuples to the DSM segment. All
894  * parallel workers are launched at beginning of index vacuuming and
895  * index cleanup and they exit once done with all indexes. At the end of
896  * this function we exit from parallel mode. Index bulk-deletion results
897  * are stored in the DSM segment and we update index statistics for all
898  * the indexes after exiting from parallel mode since writes are not
899  * allowed during parallel mode.
900  *
901  * If there are no indexes then we can reclaim line pointers on the fly;
902  * dead line pointers need only be retained until all index pointers that
903  * reference them have been killed.
904  */
905 static void
906 lazy_scan_heap(LVRelState *vacrel, VacuumParams *params, bool aggressive)
907 {
908  LVDeadTuples *dead_tuples;
909  BlockNumber nblocks,
910  blkno,
911  next_unskippable_block,
912  next_failsafe_block,
913  next_fsm_block_to_vacuum;
914  PGRUsage ru0;
915  Buffer vmbuffer = InvalidBuffer;
916  bool skipping_blocks;
918  const int initprog_index[] = {
922  };
923  int64 initprog_val[3];
924  GlobalVisState *vistest;
925 
926  pg_rusage_init(&ru0);
927 
928  if (aggressive)
929  ereport(elevel,
930  (errmsg("aggressively vacuuming \"%s.%s\"",
931  vacrel->relnamespace,
932  vacrel->relname)));
933  else
934  ereport(elevel,
935  (errmsg("vacuuming \"%s.%s\"",
936  vacrel->relnamespace,
937  vacrel->relname)));
938 
939  nblocks = RelationGetNumberOfBlocks(vacrel->rel);
940  next_unskippable_block = 0;
941  next_failsafe_block = 0;
942  next_fsm_block_to_vacuum = 0;
943  vacrel->rel_pages = nblocks;
944  vacrel->scanned_pages = 0;
945  vacrel->pinskipped_pages = 0;
946  vacrel->frozenskipped_pages = 0;
947  vacrel->tupcount_pages = 0;
948  vacrel->pages_removed = 0;
949  vacrel->lpdead_item_pages = 0;
950  vacrel->nonempty_pages = 0;
951 
952  /* Initialize instrumentation counters */
953  vacrel->num_index_scans = 0;
954  vacrel->tuples_deleted = 0;
955  vacrel->lpdead_items = 0;
956  vacrel->new_dead_tuples = 0;
957  vacrel->num_tuples = 0;
958  vacrel->live_tuples = 0;
959 
960  vistest = GlobalVisTestFor(vacrel->rel);
961 
962  vacrel->indstats = (IndexBulkDeleteResult **)
963  palloc0(vacrel->nindexes * sizeof(IndexBulkDeleteResult *));
964 
965  /*
966  * Before beginning scan, check if it's already necessary to apply
967  * failsafe
968  */
970 
971  /*
972  * Allocate the space for dead tuples. Note that this handles parallel
973  * VACUUM initialization as part of allocating shared memory space used
974  * for dead_tuples.
975  */
976  lazy_space_alloc(vacrel, params->nworkers, nblocks);
977  dead_tuples = vacrel->dead_tuples;
978 
979  /* Report that we're scanning the heap, advertising total # of blocks */
980  initprog_val[0] = PROGRESS_VACUUM_PHASE_SCAN_HEAP;
981  initprog_val[1] = nblocks;
982  initprog_val[2] = dead_tuples->max_tuples;
983  pgstat_progress_update_multi_param(3, initprog_index, initprog_val);
984 
985  /*
986  * Except when aggressive is set, we want to skip pages that are
987  * all-visible according to the visibility map, but only when we can skip
988  * at least SKIP_PAGES_THRESHOLD consecutive pages. Since we're reading
989  * sequentially, the OS should be doing readahead for us, so there's no
990  * gain in skipping a page now and then; that's likely to disable
991  * readahead and so be counterproductive. Also, skipping even a single
992  * page means that we can't update relfrozenxid, so we only want to do it
993  * if we can skip a goodly number of pages.
994  *
995  * When aggressive is set, we can't skip pages just because they are
996  * all-visible, but we can still skip pages that are all-frozen, since
997  * such pages do not need freezing and do not affect the value that we can
998  * safely set for relfrozenxid or relminmxid.
999  *
1000  * Before entering the main loop, establish the invariant that
1001  * next_unskippable_block is the next block number >= blkno that we can't
1002  * skip based on the visibility map, either all-visible for a regular scan
1003  * or all-frozen for an aggressive scan. We set it to nblocks if there's
1004  * no such block. We also set up the skipping_blocks flag correctly at
1005  * this stage.
1006  *
1007  * Note: The value returned by visibilitymap_get_status could be slightly
1008  * out-of-date, since we make this test before reading the corresponding
1009  * heap page or locking the buffer. This is OK. If we mistakenly think
1010  * that the page is all-visible or all-frozen when in fact the flag's just
1011  * been cleared, we might fail to vacuum the page. It's easy to see that
1012  * skipping a page when aggressive is not set is not a very big deal; we
1013  * might leave some dead tuples lying around, but the next vacuum will
1014  * find them. But even when aggressive *is* set, it's still OK if we miss
1015  * a page whose all-frozen marking has just been cleared. Any new XIDs
1016  * just added to that page are necessarily newer than the GlobalXmin we
1017  * computed, so they'll have no effect on the value to which we can safely
1018  * set relfrozenxid. A similar argument applies for MXIDs and relminmxid.
1019  *
1020  * We will scan the table's last page, at least to the extent of
1021  * determining whether it has tuples or not, even if it should be skipped
1022  * according to the above rules; except when we've already determined that
1023  * it's not worth trying to truncate the table. This avoids having
1024  * lazy_truncate_heap() take access-exclusive lock on the table to attempt
1025  * a truncation that just fails immediately because there are tuples in
1026  * the last page. This is worth avoiding mainly because such a lock must
1027  * be replayed on any hot standby, where it can be disruptive.
1028  */
1029  if ((params->options & VACOPT_DISABLE_PAGE_SKIPPING) == 0)
1030  {
1031  while (next_unskippable_block < nblocks)
1032  {
1033  uint8 vmstatus;
1034 
1035  vmstatus = visibilitymap_get_status(vacrel->rel,
1036  next_unskippable_block,
1037  &vmbuffer);
1038  if (aggressive)
1039  {
1040  if ((vmstatus & VISIBILITYMAP_ALL_FROZEN) == 0)
1041  break;
1042  }
1043  else
1044  {
1045  if ((vmstatus & VISIBILITYMAP_ALL_VISIBLE) == 0)
1046  break;
1047  }
1049  next_unskippable_block++;
1050  }
1051  }
1052 
1053  if (next_unskippable_block >= SKIP_PAGES_THRESHOLD)
1054  skipping_blocks = true;
1055  else
1056  skipping_blocks = false;
1057 
1058  for (blkno = 0; blkno < nblocks; blkno++)
1059  {
1060  Buffer buf;
1061  Page page;
1062  bool all_visible_according_to_vm = false;
1063  LVPagePruneState prunestate;
1064 
1065  /*
1066  * Consider need to skip blocks. See note above about forcing
1067  * scanning of last page.
1068  */
1069 #define FORCE_CHECK_PAGE() \
1070  (blkno == nblocks - 1 && should_attempt_truncation(vacrel))
1071 
1073 
1075  blkno, InvalidOffsetNumber);
1076 
1077  if (blkno == next_unskippable_block)
1078  {
1079  /* Time to advance next_unskippable_block */
1080  next_unskippable_block++;
1081  if ((params->options & VACOPT_DISABLE_PAGE_SKIPPING) == 0)
1082  {
1083  while (next_unskippable_block < nblocks)
1084  {
1085  uint8 vmskipflags;
1086 
1087  vmskipflags = visibilitymap_get_status(vacrel->rel,
1088  next_unskippable_block,
1089  &vmbuffer);
1090  if (aggressive)
1091  {
1092  if ((vmskipflags & VISIBILITYMAP_ALL_FROZEN) == 0)
1093  break;
1094  }
1095  else
1096  {
1097  if ((vmskipflags & VISIBILITYMAP_ALL_VISIBLE) == 0)
1098  break;
1099  }
1101  next_unskippable_block++;
1102  }
1103  }
1104 
1105  /*
1106  * We know we can't skip the current block. But set up
1107  * skipping_blocks to do the right thing at the following blocks.
1108  */
1109  if (next_unskippable_block - blkno > SKIP_PAGES_THRESHOLD)
1110  skipping_blocks = true;
1111  else
1112  skipping_blocks = false;
1113 
1114  /*
1115  * Normally, the fact that we can't skip this block must mean that
1116  * it's not all-visible. But in an aggressive vacuum we know only
1117  * that it's not all-frozen, so it might still be all-visible.
1118  */
1119  if (aggressive && VM_ALL_VISIBLE(vacrel->rel, blkno, &vmbuffer))
1120  all_visible_according_to_vm = true;
1121  }
1122  else
1123  {
1124  /*
1125  * The current block is potentially skippable; if we've seen a
1126  * long enough run of skippable blocks to justify skipping it, and
1127  * we're not forced to check it, then go ahead and skip.
1128  * Otherwise, the page must be at least all-visible if not
1129  * all-frozen, so we can set all_visible_according_to_vm = true.
1130  */
1131  if (skipping_blocks && !FORCE_CHECK_PAGE())
1132  {
1133  /*
1134  * Tricky, tricky. If this is in aggressive vacuum, the page
1135  * must have been all-frozen at the time we checked whether it
1136  * was skippable, but it might not be any more. We must be
1137  * careful to count it as a skipped all-frozen page in that
1138  * case, or else we'll think we can't update relfrozenxid and
1139  * relminmxid. If it's not an aggressive vacuum, we don't
1140  * know whether it was all-frozen, so we have to recheck; but
1141  * in this case an approximate answer is OK.
1142  */
1143  if (aggressive || VM_ALL_FROZEN(vacrel->rel, blkno, &vmbuffer))
1144  vacrel->frozenskipped_pages++;
1145  continue;
1146  }
1147  all_visible_according_to_vm = true;
1148  }
1149 
1151 
1152  /*
1153  * Regularly check if wraparound failsafe should trigger.
1154  *
1155  * There is a similar check inside lazy_vacuum_all_indexes(), but
1156  * relfrozenxid might start to look dangerously old before we reach
1157  * that point. This check also provides failsafe coverage for the
1158  * one-pass strategy, and the two-pass strategy with the index_cleanup
1159  * param set to 'off'.
1160  */
1161  if (blkno - next_failsafe_block >= FAILSAFE_EVERY_PAGES)
1162  {
1164  next_failsafe_block = blkno;
1165  }
1166 
1167  /*
1168  * Consider if we definitely have enough space to process TIDs on page
1169  * already. If we are close to overrunning the available space for
1170  * dead-tuple TIDs, pause and do a cycle of vacuuming before we tackle
1171  * this page.
1172  */
1173  if ((dead_tuples->max_tuples - dead_tuples->num_tuples) < MaxHeapTuplesPerPage &&
1174  dead_tuples->num_tuples > 0)
1175  {
1176  /*
1177  * Before beginning index vacuuming, we release any pin we may
1178  * hold on the visibility map page. This isn't necessary for
1179  * correctness, but we do it anyway to avoid holding the pin
1180  * across a lengthy, unrelated operation.
1181  */
1182  if (BufferIsValid(vmbuffer))
1183  {
1184  ReleaseBuffer(vmbuffer);
1185  vmbuffer = InvalidBuffer;
1186  }
1187 
1188  /* Remove the collected garbage tuples from table and indexes */
1189  vacrel->consider_bypass_optimization = false;
1190  lazy_vacuum(vacrel);
1191 
1192  /*
1193  * Vacuum the Free Space Map to make newly-freed space visible on
1194  * upper-level FSM pages. Note we have not yet processed blkno.
1195  */
1196  FreeSpaceMapVacuumRange(vacrel->rel, next_fsm_block_to_vacuum,
1197  blkno);
1198  next_fsm_block_to_vacuum = blkno;
1199 
1200  /* Report that we are once again scanning the heap */
1203  }
1204 
1205  /*
1206  * Set up visibility map page as needed.
1207  *
1208  * Pin the visibility map page in case we need to mark the page
1209  * all-visible. In most cases this will be very cheap, because we'll
1210  * already have the correct page pinned anyway. However, it's
1211  * possible that (a) next_unskippable_block is covered by a different
1212  * VM page than the current block or (b) we released our pin and did a
1213  * cycle of index vacuuming.
1214  */
1215  visibilitymap_pin(vacrel->rel, blkno, &vmbuffer);
1216 
1217  buf = ReadBufferExtended(vacrel->rel, MAIN_FORKNUM, blkno,
1218  RBM_NORMAL, vacrel->bstrategy);
1219 
1220  /*
1221  * We need buffer cleanup lock so that we can prune HOT chains and
1222  * defragment the page.
1223  */
1225  {
1226  bool hastup;
1227 
1228  /*
1229  * If we're not performing an aggressive scan to guard against XID
1230  * wraparound, and we don't want to forcibly check the page, then
1231  * it's OK to skip vacuuming pages we get a lock conflict on. They
1232  * will be dealt with in some future vacuum.
1233  */
1234  if (!aggressive && !FORCE_CHECK_PAGE())
1235  {
1236  ReleaseBuffer(buf);
1237  vacrel->pinskipped_pages++;
1238  continue;
1239  }
1240 
1241  /*
1242  * Read the page with share lock to see if any xids on it need to
1243  * be frozen. If not we just skip the page, after updating our
1244  * scan statistics. If there are some, we wait for cleanup lock.
1245  *
1246  * We could defer the lock request further by remembering the page
1247  * and coming back to it later, or we could even register
1248  * ourselves for multiple buffers and then service whichever one
1249  * is received first. For now, this seems good enough.
1250  *
1251  * If we get here with aggressive false, then we're just forcibly
1252  * checking the page, and so we don't want to insist on getting
1253  * the lock; we only need to know if the page contains tuples, so
1254  * that we can update nonempty_pages correctly. It's convenient
1255  * to use lazy_check_needs_freeze() for both situations, though.
1256  */
1258  if (!lazy_check_needs_freeze(buf, &hastup, vacrel))
1259  {
1260  UnlockReleaseBuffer(buf);
1261  vacrel->scanned_pages++;
1262  vacrel->pinskipped_pages++;
1263  if (hastup)
1264  vacrel->nonempty_pages = blkno + 1;
1265  continue;
1266  }
1267  if (!aggressive)
1268  {
1269  /*
1270  * Here, we must not advance scanned_pages; that would amount
1271  * to claiming that the page contains no freezable tuples.
1272  */
1273  UnlockReleaseBuffer(buf);
1274  vacrel->pinskipped_pages++;
1275  if (hastup)
1276  vacrel->nonempty_pages = blkno + 1;
1277  continue;
1278  }
1280  LockBufferForCleanup(buf);
1281  /* drop through to normal processing */
1282  }
1283 
1284  /*
1285  * By here we definitely have enough dead_tuples space for whatever
1286  * LP_DEAD tids are on this page, we have the visibility map page set
1287  * up in case we need to set this page's all_visible/all_frozen bit,
1288  * and we have a super-exclusive lock. Any tuples on this page are
1289  * now sure to be "counted" by this VACUUM.
1290  *
1291  * One last piece of preamble needs to take place before we can prune:
1292  * we need to consider new and empty pages.
1293  */
1294  vacrel->scanned_pages++;
1295  vacrel->tupcount_pages++;
1296 
1297  page = BufferGetPage(buf);
1298 
1299  if (PageIsNew(page))
1300  {
1301  /*
1302  * All-zeroes pages can be left over if either a backend extends
1303  * the relation by a single page, but crashes before the newly
1304  * initialized page has been written out, or when bulk-extending
1305  * the relation (which creates a number of empty pages at the tail
1306  * end of the relation, but enters them into the FSM).
1307  *
1308  * Note we do not enter the page into the visibilitymap. That has
1309  * the downside that we repeatedly visit this page in subsequent
1310  * vacuums, but otherwise we'll never not discover the space on a
1311  * promoted standby. The harm of repeated checking ought to
1312  * normally not be too bad - the space usually should be used at
1313  * some point, otherwise there wouldn't be any regular vacuums.
1314  *
1315  * Make sure these pages are in the FSM, to ensure they can be
1316  * reused. Do that by testing if there's any space recorded for
1317  * the page. If not, enter it. We do so after releasing the lock
1318  * on the heap page, the FSM is approximate, after all.
1319  */
1320  UnlockReleaseBuffer(buf);
1321 
1322  if (GetRecordedFreeSpace(vacrel->rel, blkno) == 0)
1323  {
1324  Size freespace = BLCKSZ - SizeOfPageHeaderData;
1325 
1326  RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
1327  }
1328  continue;
1329  }
1330 
1331  if (PageIsEmpty(page))
1332  {
1333  Size freespace = PageGetHeapFreeSpace(page);
1334 
1335  /*
1336  * Empty pages are always all-visible and all-frozen (note that
1337  * the same is currently not true for new pages, see above).
1338  */
1339  if (!PageIsAllVisible(page))
1340  {
1342 
1343  /* mark buffer dirty before writing a WAL record */
1344  MarkBufferDirty(buf);
1345 
1346  /*
1347  * It's possible that another backend has extended the heap,
1348  * initialized the page, and then failed to WAL-log the page
1349  * due to an ERROR. Since heap extension is not WAL-logged,
1350  * recovery might try to replay our record setting the page
1351  * all-visible and find that the page isn't initialized, which
1352  * will cause a PANIC. To prevent that, check whether the
1353  * page has been previously WAL-logged, and if not, do that
1354  * now.
1355  */
1356  if (RelationNeedsWAL(vacrel->rel) &&
1357  PageGetLSN(page) == InvalidXLogRecPtr)
1358  log_newpage_buffer(buf, true);
1359 
1360  PageSetAllVisible(page);
1361  visibilitymap_set(vacrel->rel, blkno, buf, InvalidXLogRecPtr,
1362  vmbuffer, InvalidTransactionId,
1364  END_CRIT_SECTION();
1365  }
1366 
1367  UnlockReleaseBuffer(buf);
1368  RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
1369  continue;
1370  }
1371 
1372  /*
1373  * Prune and freeze tuples.
1374  *
1375  * Accumulates details of remaining LP_DEAD line pointers on page in
1376  * dead tuple list. This includes LP_DEAD line pointers that we
1377  * pruned ourselves, as well as existing LP_DEAD line pointers that
1378  * were pruned some time earlier. Also considers freezing XIDs in the
1379  * tuple headers of remaining items with storage.
1380  */
1381  lazy_scan_prune(vacrel, buf, blkno, page, vistest, &prunestate);
1382 
1383  Assert(!prunestate.all_visible || !prunestate.has_lpdead_items);
1384 
1385  /* Remember the location of the last page with nonremovable tuples */
1386  if (prunestate.hastup)
1387  vacrel->nonempty_pages = blkno + 1;
1388 
1389  if (vacrel->nindexes == 0)
1390  {
1391  /*
1392  * Consider the need to do page-at-a-time heap vacuuming when
1393  * using the one-pass strategy now.
1394  *
1395  * The one-pass strategy will never call lazy_vacuum(). The steps
1396  * performed here can be thought of as the one-pass equivalent of
1397  * a call to lazy_vacuum().
1398  */
1399  if (prunestate.has_lpdead_items)
1400  {
1401  Size freespace;
1402 
1403  lazy_vacuum_heap_page(vacrel, blkno, buf, 0, &vmbuffer);
1404 
1405  /* Forget the now-vacuumed tuples */
1406  dead_tuples->num_tuples = 0;
1407 
1408  /*
1409  * Periodically perform FSM vacuuming to make newly-freed
1410  * space visible on upper FSM pages. Note we have not yet
1411  * performed FSM processing for blkno.
1412  */
1413  if (blkno - next_fsm_block_to_vacuum >= VACUUM_FSM_EVERY_PAGES)
1414  {
1415  FreeSpaceMapVacuumRange(vacrel->rel, next_fsm_block_to_vacuum,
1416  blkno);
1417  next_fsm_block_to_vacuum = blkno;
1418  }
1419 
1420  /*
1421  * Now perform FSM processing for blkno, and move on to next
1422  * page.
1423  *
1424  * Our call to lazy_vacuum_heap_page() will have considered if
1425  * it's possible to set all_visible/all_frozen independently
1426  * of lazy_scan_prune(). Note that prunestate was invalidated
1427  * by lazy_vacuum_heap_page() call.
1428  */
1429  freespace = PageGetHeapFreeSpace(page);
1430 
1431  UnlockReleaseBuffer(buf);
1432  RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
1433  continue;
1434  }
1435 
1436  /*
1437  * There was no call to lazy_vacuum_heap_page() because pruning
1438  * didn't encounter/create any LP_DEAD items that needed to be
1439  * vacuumed. Prune state has not been invalidated, so proceed
1440  * with prunestate-driven visibility map and FSM steps (just like
1441  * the two-pass strategy).
1442  */
1443  Assert(dead_tuples->num_tuples == 0);
1444  }
1445 
1446  /*
1447  * Handle setting visibility map bit based on what the VM said about
1448  * the page before pruning started, and using prunestate
1449  */
1450  if (!all_visible_according_to_vm && prunestate.all_visible)
1451  {
1453 
1454  if (prunestate.all_frozen)
1455  flags |= VISIBILITYMAP_ALL_FROZEN;
1456 
1457  /*
1458  * It should never be the case that the visibility map page is set
1459  * while the page-level bit is clear, but the reverse is allowed
1460  * (if checksums are not enabled). Regardless, set both bits so
1461  * that we get back in sync.
1462  *
1463  * NB: If the heap page is all-visible but the VM bit is not set,
1464  * we don't need to dirty the heap page. However, if checksums
1465  * are enabled, we do need to make sure that the heap page is
1466  * dirtied before passing it to visibilitymap_set(), because it
1467  * may be logged. Given that this situation should only happen in
1468  * rare cases after a crash, it is not worth optimizing.
1469  */
1470  PageSetAllVisible(page);
1471  MarkBufferDirty(buf);
1472  visibilitymap_set(vacrel->rel, blkno, buf, InvalidXLogRecPtr,
1473  vmbuffer, prunestate.visibility_cutoff_xid,
1474  flags);
1475  }
1476 
1477  /*
1478  * As of PostgreSQL 9.2, the visibility map bit should never be set if
1479  * the page-level bit is clear. However, it's possible that the bit
1480  * got cleared after we checked it and before we took the buffer
1481  * content lock, so we must recheck before jumping to the conclusion
1482  * that something bad has happened.
1483  */
1484  else if (all_visible_according_to_vm && !PageIsAllVisible(page)
1485  && VM_ALL_VISIBLE(vacrel->rel, blkno, &vmbuffer))
1486  {
1487  elog(WARNING, "page is not marked all-visible but visibility map bit is set in relation \"%s\" page %u",
1488  vacrel->relname, blkno);
1489  visibilitymap_clear(vacrel->rel, blkno, vmbuffer,
1491  }
1492 
1493  /*
1494  * It's possible for the value returned by
1495  * GetOldestNonRemovableTransactionId() to move backwards, so it's not
1496  * wrong for us to see tuples that appear to not be visible to
1497  * everyone yet, while PD_ALL_VISIBLE is already set. The real safe
1498  * xmin value never moves backwards, but
1499  * GetOldestNonRemovableTransactionId() is conservative and sometimes
1500  * returns a value that's unnecessarily small, so if we see that
1501  * contradiction it just means that the tuples that we think are not
1502  * visible to everyone yet actually are, and the PD_ALL_VISIBLE flag
1503  * is correct.
1504  *
1505  * There should never be dead tuples on a page with PD_ALL_VISIBLE
1506  * set, however.
1507  */
1508  else if (prunestate.has_lpdead_items && PageIsAllVisible(page))
1509  {
1510  elog(WARNING, "page containing dead tuples is marked as all-visible in relation \"%s\" page %u",
1511  vacrel->relname, blkno);
1512  PageClearAllVisible(page);
1513  MarkBufferDirty(buf);
1514  visibilitymap_clear(vacrel->rel, blkno, vmbuffer,
1516  }
1517 
1518  /*
1519  * If the all-visible page is all-frozen but not marked as such yet,
1520  * mark it as all-frozen. Note that all_frozen is only valid if
1521  * all_visible is true, so we must check both.
1522  */
1523  else if (all_visible_according_to_vm && prunestate.all_visible &&
1524  prunestate.all_frozen &&
1525  !VM_ALL_FROZEN(vacrel->rel, blkno, &vmbuffer))
1526  {
1527  /*
1528  * We can pass InvalidTransactionId as the cutoff XID here,
1529  * because setting the all-frozen bit doesn't cause recovery
1530  * conflicts.
1531  */
1532  visibilitymap_set(vacrel->rel, blkno, buf, InvalidXLogRecPtr,
1533  vmbuffer, InvalidTransactionId,
1535  }
1536 
1537  /*
1538  * Final steps for block: drop super-exclusive lock, record free space
1539  * in the FSM
1540  */
1541  if (prunestate.has_lpdead_items && vacrel->do_index_vacuuming)
1542  {
1543  /*
1544  * Wait until lazy_vacuum_heap_rel() to save free space. This
1545  * doesn't just save us some cycles; it also allows us to record
1546  * any additional free space that lazy_vacuum_heap_page() will
1547  * make available in cases where it's possible to truncate the
1548  * page's line pointer array.
1549  *
1550  * Note: It's not in fact 100% certain that we really will call
1551  * lazy_vacuum_heap_rel() -- lazy_vacuum() might yet opt to skip
1552  * index vacuuming (and so must skip heap vacuuming). This is
1553  * deemed okay because it only happens in emergencies, or when
1554  * there is very little free space anyway. (Besides, we start
1555  * recording free space in the FSM once index vacuuming has been
1556  * abandoned.)
1557  *
1558  * Note: The one-pass (no indexes) case is only supposed to make
1559  * it this far when there were no LP_DEAD items during pruning.
1560  */
1561  Assert(vacrel->nindexes > 0);
1562  UnlockReleaseBuffer(buf);
1563  }
1564  else
1565  {
1566  Size freespace = PageGetHeapFreeSpace(page);
1567 
1568  UnlockReleaseBuffer(buf);
1569  RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
1570  }
1571  }
1572 
1573  /* report that everything is now scanned */
1575 
1576  /* Clear the block number information */
1577  vacrel->blkno = InvalidBlockNumber;
1578 
1579  /* now we can compute the new value for pg_class.reltuples */
1580  vacrel->new_live_tuples = vac_estimate_reltuples(vacrel->rel, nblocks,
1581  vacrel->tupcount_pages,
1582  vacrel->live_tuples);
1583 
1584  /*
1585  * Also compute the total number of surviving heap entries. In the
1586  * (unlikely) scenario that new_live_tuples is -1, take it as zero.
1587  */
1588  vacrel->new_rel_tuples =
1589  Max(vacrel->new_live_tuples, 0) + vacrel->new_dead_tuples;
1590 
1591  /*
1592  * Release any remaining pin on visibility map page.
1593  */
1594  if (BufferIsValid(vmbuffer))
1595  {
1596  ReleaseBuffer(vmbuffer);
1597  vmbuffer = InvalidBuffer;
1598  }
1599 
1600  /* If any tuples need to be deleted, perform final vacuum cycle */
1601  if (dead_tuples->num_tuples > 0)
1602  lazy_vacuum(vacrel);
1603 
1604  /*
1605  * Vacuum the remainder of the Free Space Map. We must do this whether or
1606  * not there were indexes, and whether or not we bypassed index vacuuming.
1607  */
1608  if (blkno > next_fsm_block_to_vacuum)
1609  FreeSpaceMapVacuumRange(vacrel->rel, next_fsm_block_to_vacuum, blkno);
1610 
1611  /* report all blocks vacuumed */
1613 
1614  /* Do post-vacuum cleanup */
1615  if (vacrel->nindexes > 0 && vacrel->do_index_cleanup)
1616  lazy_cleanup_all_indexes(vacrel);
1617 
1618  /*
1619  * Free resources managed by lazy_space_alloc(). (We must end parallel
1620  * mode/free shared memory before updating index statistics. We cannot
1621  * write while in parallel mode.)
1622  */
1623  lazy_space_free(vacrel);
1624 
1625  /* Update index statistics */
1626  if (vacrel->nindexes > 0 && vacrel->do_index_cleanup)
1627  update_index_statistics(vacrel);
1628 
1629  /*
1630  * When the table has no indexes (i.e. in the one-pass strategy case),
1631  * make log report that lazy_vacuum_heap_rel would've made had there been
1632  * indexes. (As in the two-pass strategy case, only make this report when
1633  * there were LP_DEAD line pointers vacuumed in lazy_vacuum_heap_page.)
1634  */
1635  if (vacrel->nindexes == 0 && vacrel->lpdead_item_pages > 0)
1636  ereport(elevel,
1637  (errmsg("table \"%s\": removed %lld dead item identifiers in %u pages",
1638  vacrel->relname, (long long) vacrel->lpdead_items,
1639  vacrel->lpdead_item_pages)));
1640 
1641  /*
1642  * Make a log report summarizing pruning and freezing.
1643  *
1644  * The autovacuum specific logging in heap_vacuum_rel summarizes an entire
1645  * VACUUM operation, whereas each VACUUM VERBOSE log report generally
1646  * summarizes a single round of index/heap vacuuming (or rel truncation).
1647  * It wouldn't make sense to report on pruning or freezing while following
1648  * that convention, though. You can think of this log report as a summary
1649  * of our first pass over the heap.
1650  */
1651  initStringInfo(&buf);
1652  appendStringInfo(&buf,
1653  _("%lld dead row versions cannot be removed yet, oldest xmin: %u\n"),
1654  (long long) vacrel->new_dead_tuples, vacrel->OldestXmin);
1655  appendStringInfo(&buf, ngettext("Skipped %u page due to buffer pins, ",
1656  "Skipped %u pages due to buffer pins, ",
1657  vacrel->pinskipped_pages),
1658  vacrel->pinskipped_pages);
1659  appendStringInfo(&buf, ngettext("%u frozen page.\n",
1660  "%u frozen pages.\n",
1661  vacrel->frozenskipped_pages),
1662  vacrel->frozenskipped_pages);
1663  appendStringInfo(&buf, _("%s."), pg_rusage_show(&ru0));
1664 
1665  ereport(elevel,
1666  (errmsg("table \"%s.%s\": found %lld removable, %lld nonremovable row versions in %u out of %u pages",
1667  vacrel->relnamespace,
1668  vacrel->relname,
1669  (long long) vacrel->tuples_deleted,
1670  (long long) vacrel->num_tuples, vacrel->scanned_pages,
1671  nblocks),
1672  errdetail_internal("%s", buf.data)));
1673  pfree(buf.data);
1674 }
1675 
1676 /*
1677  * lazy_scan_prune() -- lazy_scan_heap() pruning and freezing.
1678  *
1679  * Caller must hold pin and buffer cleanup lock on the buffer.
1680  *
1681  * Prior to PostgreSQL 14 there were very rare cases where heap_page_prune()
1682  * was allowed to disagree with our HeapTupleSatisfiesVacuum() call about
1683  * whether or not a tuple should be considered DEAD. This happened when an
1684  * inserting transaction concurrently aborted (after our heap_page_prune()
1685  * call, before our HeapTupleSatisfiesVacuum() call). There was rather a lot
1686  * of complexity just so we could deal with tuples that were DEAD to VACUUM,
1687  * but nevertheless were left with storage after pruning.
1688  *
1689  * The approach we take now is to restart pruning when the race condition is
1690  * detected. This allows heap_page_prune() to prune the tuples inserted by
1691  * the now-aborted transaction. This is a little crude, but it guarantees
1692  * that any items that make it into the dead_tuples array are simple LP_DEAD
1693  * line pointers, and that every remaining item with tuple storage is
1694  * considered as a candidate for freezing.
1695  */
1696 static void
1698  Buffer buf,
1699  BlockNumber blkno,
1700  Page page,
1701  GlobalVisState *vistest,
1702  LVPagePruneState *prunestate)
1703 {
1704  Relation rel = vacrel->rel;
1705  OffsetNumber offnum,
1706  maxoff;
1707  ItemId itemid;
1708  HeapTupleData tuple;
1709  HTSV_Result res;
1710  int tuples_deleted,
1711  lpdead_items,
1712  new_dead_tuples,
1713  num_tuples,
1714  live_tuples;
1715  int nfrozen;
1716  OffsetNumber deadoffsets[MaxHeapTuplesPerPage];
1718 
1719  maxoff = PageGetMaxOffsetNumber(page);
1720 
1721 retry:
1722 
1723  /* Initialize (or reset) page-level counters */
1724  tuples_deleted = 0;
1725  lpdead_items = 0;
1726  new_dead_tuples = 0;
1727  num_tuples = 0;
1728  live_tuples = 0;
1729 
1730  /*
1731  * Prune all HOT-update chains in this page.
1732  *
1733  * We count tuples removed by the pruning step as tuples_deleted. Its
1734  * final value can be thought of as the number of tuples that have been
1735  * deleted from the table. It should not be confused with lpdead_items;
1736  * lpdead_items's final value can be thought of as the number of tuples
1737  * that were deleted from indexes.
1738  */
1739  tuples_deleted = heap_page_prune(rel, buf, vistest,
1740  InvalidTransactionId, 0, false,
1741  &vacrel->offnum);
1742 
1743  /*
1744  * Now scan the page to collect LP_DEAD items and check for tuples
1745  * requiring freezing among remaining tuples with storage
1746  */
1747  prunestate->hastup = false;
1748  prunestate->has_lpdead_items = false;
1749  prunestate->all_visible = true;
1750  prunestate->all_frozen = true;
1752  nfrozen = 0;
1753 
1754  for (offnum = FirstOffsetNumber;
1755  offnum <= maxoff;
1756  offnum = OffsetNumberNext(offnum))
1757  {
1758  bool tuple_totally_frozen;
1759 
1760  /*
1761  * Set the offset number so that we can display it along with any
1762  * error that occurred while processing this tuple.
1763  */
1764  vacrel->offnum = offnum;
1765  itemid = PageGetItemId(page, offnum);
1766 
1767  if (!ItemIdIsUsed(itemid))
1768  continue;
1769 
1770  /* Redirect items mustn't be touched */
1771  if (ItemIdIsRedirected(itemid))
1772  {
1773  prunestate->hastup = true; /* page won't be truncatable */
1774  continue;
1775  }
1776 
1777  /*
1778  * LP_DEAD items are processed outside of the loop.
1779  *
1780  * Note that we deliberately don't set hastup=true in the case of an
1781  * LP_DEAD item here, which is not how lazy_check_needs_freeze() or
1782  * count_nondeletable_pages() do it -- they only consider pages empty
1783  * when they only have LP_UNUSED items, which is important for
1784  * correctness.
1785  *
1786  * Our assumption is that any LP_DEAD items we encounter here will
1787  * become LP_UNUSED inside lazy_vacuum_heap_page() before we actually
1788  * call count_nondeletable_pages(). In any case our opinion of
1789  * whether or not a page 'hastup' (which is how our caller sets its
1790  * vacrel->nonempty_pages value) is inherently race-prone. It must be
1791  * treated as advisory/unreliable, so we might as well be slightly
1792  * optimistic.
1793  */
1794  if (ItemIdIsDead(itemid))
1795  {
1796  deadoffsets[lpdead_items++] = offnum;
1797  prunestate->all_visible = false;
1798  prunestate->has_lpdead_items = true;
1799  continue;
1800  }
1801 
1802  Assert(ItemIdIsNormal(itemid));
1803 
1804  ItemPointerSet(&(tuple.t_self), blkno, offnum);
1805  tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
1806  tuple.t_len = ItemIdGetLength(itemid);
1807  tuple.t_tableOid = RelationGetRelid(rel);
1808 
1809  /*
1810  * DEAD tuples are almost always pruned into LP_DEAD line pointers by
1811  * heap_page_prune(), but it's possible that the tuple state changed
1812  * since heap_page_prune() looked. Handle that here by restarting.
1813  * (See comments at the top of function for a full explanation.)
1814  */
1815  res = HeapTupleSatisfiesVacuum(&tuple, vacrel->OldestXmin, buf);
1816 
1817  if (unlikely(res == HEAPTUPLE_DEAD))
1818  goto retry;
1819 
1820  /*
1821  * The criteria for counting a tuple as live in this block need to
1822  * match what analyze.c's acquire_sample_rows() does, otherwise VACUUM
1823  * and ANALYZE may produce wildly different reltuples values, e.g.
1824  * when there are many recently-dead tuples.
1825  *
1826  * The logic here is a bit simpler than acquire_sample_rows(), as
1827  * VACUUM can't run inside a transaction block, which makes some cases
1828  * impossible (e.g. in-progress insert from the same transaction).
1829  *
1830  * We treat LP_DEAD items a little differently, too -- we don't count
1831  * them as dead_tuples at all (we only consider new_dead_tuples). The
1832  * outcome is no different because we assume that any LP_DEAD items we
1833  * encounter here will become LP_UNUSED inside lazy_vacuum_heap_page()
1834  * before we report anything to the stats collector. (Cases where we
1835  * bypass index vacuuming will violate our assumption, but the overall
1836  * impact of that should be negligible.)
1837  */
1838  switch (res)
1839  {
1840  case HEAPTUPLE_LIVE:
1841 
1842  /*
1843  * Count it as live. Not only is this natural, but it's also
1844  * what acquire_sample_rows() does.
1845  */
1846  live_tuples++;
1847 
1848  /*
1849  * Is the tuple definitely visible to all transactions?
1850  *
1851  * NB: Like with per-tuple hint bits, we can't set the
1852  * PD_ALL_VISIBLE flag if the inserter committed
1853  * asynchronously. See SetHintBits for more info. Check that
1854  * the tuple is hinted xmin-committed because of that.
1855  */
1856  if (prunestate->all_visible)
1857  {
1858  TransactionId xmin;
1859 
1861  {
1862  prunestate->all_visible = false;
1863  break;
1864  }
1865 
1866  /*
1867  * The inserter definitely committed. But is it old enough
1868  * that everyone sees it as committed?
1869  */
1870  xmin = HeapTupleHeaderGetXmin(tuple.t_data);
1871  if (!TransactionIdPrecedes(xmin, vacrel->OldestXmin))
1872  {
1873  prunestate->all_visible = false;
1874  break;
1875  }
1876 
1877  /* Track newest xmin on page. */
1878  if (TransactionIdFollows(xmin, prunestate->visibility_cutoff_xid))
1879  prunestate->visibility_cutoff_xid = xmin;
1880  }
1881  break;
1883 
1884  /*
1885  * If tuple is recently deleted then we must not remove it
1886  * from relation. (We only remove items that are LP_DEAD from
1887  * pruning.)
1888  */
1889  new_dead_tuples++;
1890  prunestate->all_visible = false;
1891  break;
1893 
1894  /*
1895  * We do not count these rows as live, because we expect the
1896  * inserting transaction to update the counters at commit, and
1897  * we assume that will happen only after we report our
1898  * results. This assumption is a bit shaky, but it is what
1899  * acquire_sample_rows() does, so be consistent.
1900  */
1901  prunestate->all_visible = false;
1902  break;
1904  /* This is an expected case during concurrent vacuum */
1905  prunestate->all_visible = false;
1906 
1907  /*
1908  * Count such rows as live. As above, we assume the deleting
1909  * transaction will commit and update the counters after we
1910  * report.
1911  */
1912  live_tuples++;
1913  break;
1914  default:
1915  elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
1916  break;
1917  }
1918 
1919  /*
1920  * Non-removable tuple (i.e. tuple with storage).
1921  *
1922  * Check tuple left behind after pruning to see if needs to be frozen
1923  * now.
1924  */
1925  num_tuples++;
1926  prunestate->hastup = true;
1928  vacrel->relfrozenxid,
1929  vacrel->relminmxid,
1930  vacrel->FreezeLimit,
1931  vacrel->MultiXactCutoff,
1932  &frozen[nfrozen],
1933  &tuple_totally_frozen))
1934  {
1935  /* Will execute freeze below */
1936  frozen[nfrozen++].offset = offnum;
1937  }
1938 
1939  /*
1940  * If tuple is not frozen (and not about to become frozen) then caller
1941  * had better not go on to set this page's VM bit
1942  */
1943  if (!tuple_totally_frozen)
1944  prunestate->all_frozen = false;
1945  }
1946 
1947  /*
1948  * We have now divided every item on the page into either an LP_DEAD item
1949  * that will need to be vacuumed in indexes later, or a LP_NORMAL tuple
1950  * that remains and needs to be considered for freezing now (LP_UNUSED and
1951  * LP_REDIRECT items also remain, but are of no further interest to us).
1952  */
1953  vacrel->offnum = InvalidOffsetNumber;
1954 
1955  /*
1956  * Consider the need to freeze any items with tuple storage from the page
1957  * first (arbitrary)
1958  */
1959  if (nfrozen > 0)
1960  {
1961  Assert(prunestate->hastup);
1962 
1963  /*
1964  * At least one tuple with storage needs to be frozen -- execute that
1965  * now.
1966  *
1967  * If we need to freeze any tuples we'll mark the buffer dirty, and
1968  * write a WAL record recording the changes. We must log the changes
1969  * to be crash-safe against future truncation of CLOG.
1970  */
1972 
1973  MarkBufferDirty(buf);
1974 
1975  /* execute collected freezes */
1976  for (int i = 0; i < nfrozen; i++)
1977  {
1978  HeapTupleHeader htup;
1979 
1980  itemid = PageGetItemId(page, frozen[i].offset);
1981  htup = (HeapTupleHeader) PageGetItem(page, itemid);
1982 
1983  heap_execute_freeze_tuple(htup, &frozen[i]);
1984  }
1985 
1986  /* Now WAL-log freezing if necessary */
1987  if (RelationNeedsWAL(vacrel->rel))
1988  {
1989  XLogRecPtr recptr;
1990 
1991  recptr = log_heap_freeze(vacrel->rel, buf, vacrel->FreezeLimit,
1992  frozen, nfrozen);
1993  PageSetLSN(page, recptr);
1994  }
1995 
1996  END_CRIT_SECTION();
1997  }
1998 
1999  /*
2000  * The second pass over the heap can also set visibility map bits, using
2001  * the same approach. This is important when the table frequently has a
2002  * few old LP_DEAD items on each page by the time we get to it (typically
2003  * because past opportunistic pruning operations freed some non-HOT
2004  * tuples).
2005  *
2006  * VACUUM will call heap_page_is_all_visible() during the second pass over
2007  * the heap to determine all_visible and all_frozen for the page -- this
2008  * is a specialized version of the logic from this function. Now that
2009  * we've finished pruning and freezing, make sure that we're in total
2010  * agreement with heap_page_is_all_visible() using an assertion.
2011  */
2012 #ifdef USE_ASSERT_CHECKING
2013  /* Note that all_frozen value does not matter when !all_visible */
2014  if (prunestate->all_visible)
2015  {
2016  TransactionId cutoff;
2017  bool all_frozen;
2018 
2019  if (!heap_page_is_all_visible(vacrel, buf, &cutoff, &all_frozen))
2020  Assert(false);
2021 
2022  Assert(lpdead_items == 0);
2023  Assert(prunestate->all_frozen == all_frozen);
2024 
2025  /*
2026  * It's possible that we froze tuples and made the page's XID cutoff
2027  * (for recovery conflict purposes) FrozenTransactionId. This is okay
2028  * because visibility_cutoff_xid will be logged by our caller in a
2029  * moment.
2030  */
2031  Assert(cutoff == FrozenTransactionId ||
2032  cutoff == prunestate->visibility_cutoff_xid);
2033  }
2034 #endif
2035 
2036  /*
2037  * Now save details of the LP_DEAD items from the page in the dead_tuples
2038  * array
2039  */
2040  if (lpdead_items > 0)
2041  {
2042  LVDeadTuples *dead_tuples = vacrel->dead_tuples;
2043  ItemPointerData tmp;
2044 
2045  Assert(!prunestate->all_visible);
2046  Assert(prunestate->has_lpdead_items);
2047 
2048  vacrel->lpdead_item_pages++;
2049 
2050  ItemPointerSetBlockNumber(&tmp, blkno);
2051 
2052  for (int i = 0; i < lpdead_items; i++)
2053  {
2054  ItemPointerSetOffsetNumber(&tmp, deadoffsets[i]);
2055  dead_tuples->itemptrs[dead_tuples->num_tuples++] = tmp;
2056  }
2057 
2058  Assert(dead_tuples->num_tuples <= dead_tuples->max_tuples);
2060  dead_tuples->num_tuples);
2061  }
2062 
2063  /* Finally, add page-local counts to whole-VACUUM counts */
2064  vacrel->tuples_deleted += tuples_deleted;
2065  vacrel->lpdead_items += lpdead_items;
2066  vacrel->new_dead_tuples += new_dead_tuples;
2067  vacrel->num_tuples += num_tuples;
2068  vacrel->live_tuples += live_tuples;
2069 }
2070 
2071 /*
2072  * Remove the collected garbage tuples from the table and its indexes.
2073  *
2074  * We may choose to bypass index vacuuming at this point, though only when the
2075  * ongoing VACUUM operation will definitely only have one index scan/round of
2076  * index vacuuming. Caller indicates whether or not this is such a VACUUM
2077  * operation using 'onecall' argument.
2078  *
2079  * In rare emergencies, the ongoing VACUUM operation can be made to skip both
2080  * index vacuuming and index cleanup at the point we're called. This avoids
2081  * having the whole system refuse to allocate further XIDs/MultiXactIds due to
2082  * wraparound.
2083  */
2084 static void
2086 {
2087  bool bypass;
2088 
2089  /* Should not end up here with no indexes */
2090  Assert(vacrel->nindexes > 0);
2092  Assert(vacrel->lpdead_item_pages > 0);
2093 
2094  if (!vacrel->do_index_vacuuming)
2095  {
2096  Assert(!vacrel->do_index_cleanup);
2097  vacrel->dead_tuples->num_tuples = 0;
2098  return;
2099  }
2100 
2101  /*
2102  * Consider bypassing index vacuuming (and heap vacuuming) entirely.
2103  *
2104  * We currently only do this in cases where the number of LP_DEAD items
2105  * for the entire VACUUM operation is close to zero. This avoids sharp
2106  * discontinuities in the duration and overhead of successive VACUUM
2107  * operations that run against the same table with a fixed workload.
2108  * Ideally, successive VACUUM operations will behave as if there are
2109  * exactly zero LP_DEAD items in cases where there are close to zero.
2110  *
2111  * This is likely to be helpful with a table that is continually affected
2112  * by UPDATEs that can mostly apply the HOT optimization, but occasionally
2113  * have small aberrations that lead to just a few heap pages retaining
2114  * only one or two LP_DEAD items. This is pretty common; even when the
2115  * DBA goes out of their way to make UPDATEs use HOT, it is practically
2116  * impossible to predict whether HOT will be applied in 100% of cases.
2117  * It's far easier to ensure that 99%+ of all UPDATEs against a table use
2118  * HOT through careful tuning.
2119  */
2120  bypass = false;
2121  if (vacrel->consider_bypass_optimization && vacrel->rel_pages > 0)
2122  {
2123  BlockNumber threshold;
2124 
2125  Assert(vacrel->num_index_scans == 0);
2126  Assert(vacrel->lpdead_items == vacrel->dead_tuples->num_tuples);
2127  Assert(vacrel->do_index_vacuuming);
2128  Assert(vacrel->do_index_cleanup);
2129 
2130  /*
2131  * This crossover point at which we'll start to do index vacuuming is
2132  * expressed as a percentage of the total number of heap pages in the
2133  * table that are known to have at least one LP_DEAD item. This is
2134  * much more important than the total number of LP_DEAD items, since
2135  * it's a proxy for the number of heap pages whose visibility map bits
2136  * cannot be set on account of bypassing index and heap vacuuming.
2137  *
2138  * We apply one further precautionary test: the space currently used
2139  * to store the TIDs (TIDs that now all point to LP_DEAD items) must
2140  * not exceed 32MB. This limits the risk that we will bypass index
2141  * vacuuming again and again until eventually there is a VACUUM whose
2142  * dead_tuples space is not CPU cache resident.
2143  *
2144  * We don't take any special steps to remember the LP_DEAD items (such
2145  * as counting them in new_dead_tuples report to the stats collector)
2146  * when the optimization is applied. Though the accounting used in
2147  * analyze.c's acquire_sample_rows() will recognize the same LP_DEAD
2148  * items as dead rows in its own stats collector report, that's okay.
2149  * The discrepancy should be negligible. If this optimization is ever
2150  * expanded to cover more cases then this may need to be reconsidered.
2151  */
2152  threshold = (double) vacrel->rel_pages * BYPASS_THRESHOLD_PAGES;
2153  bypass = (vacrel->lpdead_item_pages < threshold &&
2154  vacrel->lpdead_items < MAXDEADTUPLES(32L * 1024L * 1024L));
2155  }
2156 
2157  if (bypass)
2158  {
2159  /*
2160  * There are almost zero TIDs. Behave as if there were precisely
2161  * zero: bypass index vacuuming, but do index cleanup.
2162  *
2163  * We expect that the ongoing VACUUM operation will finish very
2164  * quickly, so there is no point in considering speeding up as a
2165  * failsafe against wraparound failure. (Index cleanup is expected to
2166  * finish very quickly in cases where there were no ambulkdelete()
2167  * calls.)
2168  */
2169  vacrel->do_index_vacuuming = false;
2170  ereport(elevel,
2171  (errmsg("table \"%s\": index scan bypassed: %u pages from table (%.2f%% of total) have %lld dead item identifiers",
2172  vacrel->relname, vacrel->lpdead_item_pages,
2173  100.0 * vacrel->lpdead_item_pages / vacrel->rel_pages,
2174  (long long) vacrel->lpdead_items)));
2175  }
2176  else if (lazy_vacuum_all_indexes(vacrel))
2177  {
2178  /*
2179  * We successfully completed a round of index vacuuming. Do related
2180  * heap vacuuming now.
2181  */
2182  lazy_vacuum_heap_rel(vacrel);
2183  }
2184  else
2185  {
2186  /*
2187  * Failsafe case.
2188  *
2189  * we attempted index vacuuming, but didn't finish a full round/full
2190  * index scan. This happens when relfrozenxid or relminmxid is too
2191  * far in the past.
2192  *
2193  * From this point on the VACUUM operation will do no further index
2194  * vacuuming or heap vacuuming. This VACUUM operation won't end up
2195  * back here again.
2196  */
2197  Assert(vacrel->failsafe_active);
2198  }
2199 
2200  /*
2201  * Forget the LP_DEAD items that we just vacuumed (or just decided to not
2202  * vacuum)
2203  */
2204  vacrel->dead_tuples->num_tuples = 0;
2205 }
2206 
2207 /*
2208  * lazy_vacuum_all_indexes() -- Main entry for index vacuuming
2209  *
2210  * Returns true in the common case when all indexes were successfully
2211  * vacuumed. Returns false in rare cases where we determined that the ongoing
2212  * VACUUM operation is at risk of taking too long to finish, leading to
2213  * wraparound failure.
2214  */
2215 static bool
2217 {
2218  bool allindexes = true;
2219 
2221  Assert(vacrel->nindexes > 0);
2222  Assert(vacrel->do_index_vacuuming);
2223  Assert(vacrel->do_index_cleanup);
2226 
2227  /* Precheck for XID wraparound emergencies */
2228  if (lazy_check_wraparound_failsafe(vacrel))
2229  {
2230  /* Wraparound emergency -- don't even start an index scan */
2231  return false;
2232  }
2233 
2234  /* Report that we are now vacuuming indexes */
2237 
2238  if (!ParallelVacuumIsActive(vacrel))
2239  {
2240  for (int idx = 0; idx < vacrel->nindexes; idx++)
2241  {
2242  Relation indrel = vacrel->indrels[idx];
2243  IndexBulkDeleteResult *istat = vacrel->indstats[idx];
2244 
2245  vacrel->indstats[idx] =
2246  lazy_vacuum_one_index(indrel, istat, vacrel->old_live_tuples,
2247  vacrel);
2248 
2249  if (lazy_check_wraparound_failsafe(vacrel))
2250  {
2251  /* Wraparound emergency -- end current index scan */
2252  allindexes = false;
2253  break;
2254  }
2255  }
2256  }
2257  else
2258  {
2259  /* Outsource everything to parallel variant */
2261 
2262  /*
2263  * Do a postcheck to consider applying wraparound failsafe now. Note
2264  * that parallel VACUUM only gets the precheck and this postcheck.
2265  */
2266  if (lazy_check_wraparound_failsafe(vacrel))
2267  allindexes = false;
2268  }
2269 
2270  /*
2271  * We delete all LP_DEAD items from the first heap pass in all indexes on
2272  * each call here (except calls where we choose to do the failsafe). This
2273  * makes the next call to lazy_vacuum_heap_rel() safe (except in the event
2274  * of the failsafe triggering, which prevents the next call from taking
2275  * place).
2276  */
2277  Assert(vacrel->num_index_scans > 0 ||
2278  vacrel->dead_tuples->num_tuples == vacrel->lpdead_items);
2279  Assert(allindexes || vacrel->failsafe_active);
2280 
2281  /*
2282  * Increase and report the number of index scans.
2283  *
2284  * We deliberately include the case where we started a round of bulk
2285  * deletes that we weren't able to finish due to the failsafe triggering.
2286  */
2287  vacrel->num_index_scans++;
2289  vacrel->num_index_scans);
2290 
2291  return allindexes;
2292 }
2293 
2294 /*
2295  * lazy_vacuum_heap_rel() -- second pass over the heap for two pass strategy
2296  *
2297  * This routine marks LP_DEAD items in vacrel->dead_tuples array as LP_UNUSED.
2298  * Pages that never had lazy_scan_prune record LP_DEAD items are not visited
2299  * at all.
2300  *
2301  * We may also be able to truncate the line pointer array of the heap pages we
2302  * visit. If there is a contiguous group of LP_UNUSED items at the end of the
2303  * array, it can be reclaimed as free space. These LP_UNUSED items usually
2304  * start out as LP_DEAD items recorded by lazy_scan_prune (we set items from
2305  * each page to LP_UNUSED, and then consider if it's possible to truncate the
2306  * page's line pointer array).
2307  *
2308  * Note: the reason for doing this as a second pass is we cannot remove the
2309  * tuples until we've removed their index entries, and we want to process
2310  * index entry removal in batches as large as possible.
2311  */
2312 static void
2314 {
2315  int tupindex;
2316  BlockNumber vacuumed_pages;
2317  PGRUsage ru0;
2318  Buffer vmbuffer = InvalidBuffer;
2319  LVSavedErrInfo saved_err_info;
2320 
2321  Assert(vacrel->do_index_vacuuming);
2322  Assert(vacrel->do_index_cleanup);
2323  Assert(vacrel->num_index_scans > 0);
2324 
2325  /* Report that we are now vacuuming the heap */
2328 
2329  /* Update error traceback information */
2330  update_vacuum_error_info(vacrel, &saved_err_info,
2333 
2334  pg_rusage_init(&ru0);
2335  vacuumed_pages = 0;
2336 
2337  tupindex = 0;
2338  while (tupindex < vacrel->dead_tuples->num_tuples)
2339  {
2340  BlockNumber tblk;
2341  Buffer buf;
2342  Page page;
2343  Size freespace;
2344 
2346 
2347  tblk = ItemPointerGetBlockNumber(&vacrel->dead_tuples->itemptrs[tupindex]);
2348  vacrel->blkno = tblk;
2349  buf = ReadBufferExtended(vacrel->rel, MAIN_FORKNUM, tblk, RBM_NORMAL,
2350  vacrel->bstrategy);
2352  tupindex = lazy_vacuum_heap_page(vacrel, tblk, buf, tupindex,
2353  &vmbuffer);
2354 
2355  /* Now that we've vacuumed the page, record its available space */
2356  page = BufferGetPage(buf);
2357  freespace = PageGetHeapFreeSpace(page);
2358 
2359  UnlockReleaseBuffer(buf);
2360  RecordPageWithFreeSpace(vacrel->rel, tblk, freespace);
2361  vacuumed_pages++;
2362  }
2363 
2364  /* Clear the block number information */
2365  vacrel->blkno = InvalidBlockNumber;
2366 
2367  if (BufferIsValid(vmbuffer))
2368  {
2369  ReleaseBuffer(vmbuffer);
2370  vmbuffer = InvalidBuffer;
2371  }
2372 
2373  /*
2374  * We set all LP_DEAD items from the first heap pass to LP_UNUSED during
2375  * the second heap pass. No more, no less.
2376  */
2377  Assert(tupindex > 0);
2378  Assert(vacrel->num_index_scans > 1 ||
2379  (tupindex == vacrel->lpdead_items &&
2380  vacuumed_pages == vacrel->lpdead_item_pages));
2381 
2382  ereport(elevel,
2383  (errmsg("table \"%s\": removed %lld dead item identifiers in %u pages",
2384  vacrel->relname, (long long ) tupindex, vacuumed_pages),
2385  errdetail_internal("%s", pg_rusage_show(&ru0))));
2386 
2387  /* Revert to the previous phase information for error traceback */
2388  restore_vacuum_error_info(vacrel, &saved_err_info);
2389 }
2390 
2391 /*
2392  * lazy_vacuum_heap_page() -- free page's LP_DEAD items listed in the
2393  * vacrel->dead_tuples array.
2394  *
2395  * Caller must have an exclusive buffer lock on the buffer (though a
2396  * super-exclusive lock is also acceptable).
2397  *
2398  * tupindex is the index in vacrel->dead_tuples of the first dead tuple for
2399  * this page. We assume the rest follow sequentially. The return value is
2400  * the first tupindex after the tuples of this page.
2401  *
2402  * Prior to PostgreSQL 14 there were rare cases where this routine had to set
2403  * tuples with storage to unused. These days it is strictly responsible for
2404  * marking LP_DEAD stub line pointers as unused. This only happens for those
2405  * LP_DEAD items on the page that were determined to be LP_DEAD items back
2406  * when the same page was visited by lazy_scan_prune() (i.e. those whose TID
2407  * was recorded in the dead_tuples array).
2408  */
2409 static int
2411  int tupindex, Buffer *vmbuffer)
2412 {
2413  LVDeadTuples *dead_tuples = vacrel->dead_tuples;
2414  Page page = BufferGetPage(buffer);
2416  int uncnt = 0;
2417  TransactionId visibility_cutoff_xid;
2418  bool all_frozen;
2419  LVSavedErrInfo saved_err_info;
2420 
2421  Assert(vacrel->nindexes == 0 || vacrel->do_index_vacuuming);
2422 
2424 
2425  /* Update error traceback information */
2426  update_vacuum_error_info(vacrel, &saved_err_info,
2429 
2431 
2432  for (; tupindex < dead_tuples->num_tuples; tupindex++)
2433  {
2434  BlockNumber tblk;
2435  OffsetNumber toff;
2436  ItemId itemid;
2437 
2438  tblk = ItemPointerGetBlockNumber(&dead_tuples->itemptrs[tupindex]);
2439  if (tblk != blkno)
2440  break; /* past end of tuples for this block */
2441  toff = ItemPointerGetOffsetNumber(&dead_tuples->itemptrs[tupindex]);
2442  itemid = PageGetItemId(page, toff);
2443 
2444  Assert(ItemIdIsDead(itemid) && !ItemIdHasStorage(itemid));
2445  ItemIdSetUnused(itemid);
2446  unused[uncnt++] = toff;
2447  }
2448 
2449  Assert(uncnt > 0);
2450 
2451  /* Attempt to truncate line pointer array now */
2453 
2454  /*
2455  * Mark buffer dirty before we write WAL.
2456  */
2457  MarkBufferDirty(buffer);
2458 
2459  /* XLOG stuff */
2460  if (RelationNeedsWAL(vacrel->rel))
2461  {
2462  xl_heap_vacuum xlrec;
2463  XLogRecPtr recptr;
2464 
2465  xlrec.nunused = uncnt;
2466 
2467  XLogBeginInsert();
2468  XLogRegisterData((char *) &xlrec, SizeOfHeapVacuum);
2469 
2470  XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
2471  XLogRegisterBufData(0, (char *) unused, uncnt * sizeof(OffsetNumber));
2472 
2473  recptr = XLogInsert(RM_HEAP2_ID, XLOG_HEAP2_VACUUM);
2474 
2475  PageSetLSN(page, recptr);
2476  }
2477 
2478  /*
2479  * End critical section, so we safely can do visibility tests (which
2480  * possibly need to perform IO and allocate memory!). If we crash now the
2481  * page (including the corresponding vm bit) might not be marked all
2482  * visible, but that's fine. A later vacuum will fix that.
2483  */
2484  END_CRIT_SECTION();
2485 
2486  /*
2487  * Now that we have removed the LD_DEAD items from the page, once again
2488  * check if the page has become all-visible. The page is already marked
2489  * dirty, exclusively locked, and, if needed, a full page image has been
2490  * emitted.
2491  */
2492  if (heap_page_is_all_visible(vacrel, buffer, &visibility_cutoff_xid,
2493  &all_frozen))
2494  PageSetAllVisible(page);
2495 
2496  /*
2497  * All the changes to the heap page have been done. If the all-visible
2498  * flag is now set, also set the VM all-visible bit (and, if possible, the
2499  * all-frozen bit) unless this has already been done previously.
2500  */
2501  if (PageIsAllVisible(page))
2502  {
2503  uint8 flags = 0;
2504  uint8 vm_status = visibilitymap_get_status(vacrel->rel,
2505  blkno, vmbuffer);
2506 
2507  /* Set the VM all-frozen bit to flag, if needed */
2508  if ((vm_status & VISIBILITYMAP_ALL_VISIBLE) == 0)
2509  flags |= VISIBILITYMAP_ALL_VISIBLE;
2510  if ((vm_status & VISIBILITYMAP_ALL_FROZEN) == 0 && all_frozen)
2511  flags |= VISIBILITYMAP_ALL_FROZEN;
2512 
2513  Assert(BufferIsValid(*vmbuffer));
2514  if (flags != 0)
2515  visibilitymap_set(vacrel->rel, blkno, buffer, InvalidXLogRecPtr,
2516  *vmbuffer, visibility_cutoff_xid, flags);
2517  }
2518 
2519  /* Revert to the previous phase information for error traceback */
2520  restore_vacuum_error_info(vacrel, &saved_err_info);
2521  return tupindex;
2522 }
2523 
2524 /*
2525  * lazy_check_needs_freeze() -- scan page to see if any tuples
2526  * need to be cleaned to avoid wraparound
2527  *
2528  * Returns true if the page needs to be vacuumed using cleanup lock.
2529  * Also returns a flag indicating whether page contains any tuples at all.
2530  */
2531 static bool
2533 {
2534  Page page = BufferGetPage(buf);
2535  OffsetNumber offnum,
2536  maxoff;
2537  HeapTupleHeader tupleheader;
2538 
2539  *hastup = false;
2540 
2541  /*
2542  * New and empty pages, obviously, don't contain tuples. We could make
2543  * sure that the page is registered in the FSM, but it doesn't seem worth
2544  * waiting for a cleanup lock just for that, especially because it's
2545  * likely that the pin holder will do so.
2546  */
2547  if (PageIsNew(page) || PageIsEmpty(page))
2548  return false;
2549 
2550  maxoff = PageGetMaxOffsetNumber(page);
2551  for (offnum = FirstOffsetNumber;
2552  offnum <= maxoff;
2553  offnum = OffsetNumberNext(offnum))
2554  {
2555  ItemId itemid;
2556 
2557  /*
2558  * Set the offset number so that we can display it along with any
2559  * error that occurred while processing this tuple.
2560  */
2561  vacrel->offnum = offnum;
2562  itemid = PageGetItemId(page, offnum);
2563 
2564  /* this should match hastup test in count_nondeletable_pages() */
2565  if (ItemIdIsUsed(itemid))
2566  *hastup = true;
2567 
2568  /* dead and redirect items never need freezing */
2569  if (!ItemIdIsNormal(itemid))
2570  continue;
2571 
2572  tupleheader = (HeapTupleHeader) PageGetItem(page, itemid);
2573 
2574  if (heap_tuple_needs_freeze(tupleheader, vacrel->FreezeLimit,
2575  vacrel->MultiXactCutoff, buf))
2576  break;
2577  } /* scan along page */
2578 
2579  /* Clear the offset information once we have processed the given page. */
2580  vacrel->offnum = InvalidOffsetNumber;
2581 
2582  return (offnum <= maxoff);
2583 }
2584 
2585 /*
2586  * Trigger the failsafe to avoid wraparound failure when vacrel table has a
2587  * relfrozenxid and/or relminmxid that is dangerously far in the past.
2588  * Triggering the failsafe makes the ongoing VACUUM bypass any further index
2589  * vacuuming and heap vacuuming. Truncating the heap is also bypassed.
2590  *
2591  * Any remaining work (work that VACUUM cannot just bypass) is typically sped
2592  * up when the failsafe triggers. VACUUM stops applying any cost-based delay
2593  * that it started out with.
2594  *
2595  * Returns true when failsafe has been triggered.
2596  */
2597 static bool
2599 {
2600  /* Don't warn more than once per VACUUM */
2601  if (vacrel->failsafe_active)
2602  return true;
2603 
2605  vacrel->relminmxid)))
2606  {
2607  vacrel->failsafe_active = true;
2608 
2609  /* Disable index vacuuming, index cleanup, and heap rel truncation */
2610  vacrel->do_index_vacuuming = false;
2611  vacrel->do_index_cleanup = false;
2612  vacrel->do_rel_truncate = false;
2613 
2614  ereport(WARNING,
2615  (errmsg("bypassing nonessential maintenance of table \"%s.%s.%s\" as a failsafe after %d index scans",
2617  vacrel->relnamespace,
2618  vacrel->relname,
2619  vacrel->num_index_scans),
2620  errdetail("The table's relfrozenxid or relminmxid is too far in the past."),
2621  errhint("Consider increasing configuration parameter \"maintenance_work_mem\" or \"autovacuum_work_mem\".\n"
2622  "You might also need to consider other ways for VACUUM to keep up with the allocation of transaction IDs.")));
2623 
2624  /* Stop applying cost limits from this point on */
2625  VacuumCostActive = false;
2626  VacuumCostBalance = 0;
2627 
2628  return true;
2629  }
2630 
2631  return false;
2632 }
2633 
2634 /*
2635  * Perform lazy_vacuum_all_indexes() steps in parallel
2636  */
2637 static void
2639 {
2640  /* Tell parallel workers to do index vacuuming */
2641  vacrel->lps->lvshared->for_cleanup = false;
2642  vacrel->lps->lvshared->first_time = false;
2643 
2644  /*
2645  * We can only provide an approximate value of num_heap_tuples in vacuum
2646  * cases.
2647  */
2648  vacrel->lps->lvshared->reltuples = vacrel->old_live_tuples;
2649  vacrel->lps->lvshared->estimated_count = true;
2650 
2652  vacrel->lps->nindexes_parallel_bulkdel);
2653 }
2654 
2655 /*
2656  * Perform lazy_cleanup_all_indexes() steps in parallel
2657  */
2658 static void
2660 {
2661  int nworkers;
2662 
2663  /*
2664  * If parallel vacuum is active we perform index cleanup with parallel
2665  * workers.
2666  *
2667  * Tell parallel workers to do index cleanup.
2668  */
2669  vacrel->lps->lvshared->for_cleanup = true;
2670  vacrel->lps->lvshared->first_time = (vacrel->num_index_scans == 0);
2671 
2672  /*
2673  * Now we can provide a better estimate of total number of surviving
2674  * tuples (we assume indexes are more interested in that than in the
2675  * number of nominally live tuples).
2676  */
2677  vacrel->lps->lvshared->reltuples = vacrel->new_rel_tuples;
2678  vacrel->lps->lvshared->estimated_count =
2679  (vacrel->tupcount_pages < vacrel->rel_pages);
2680 
2681  /* Determine the number of parallel workers to launch */
2682  if (vacrel->lps->lvshared->first_time)
2683  nworkers = vacrel->lps->nindexes_parallel_cleanup +
2685  else
2686  nworkers = vacrel->lps->nindexes_parallel_cleanup;
2687 
2688  do_parallel_vacuum_or_cleanup(vacrel, nworkers);
2689 }
2690 
2691 /*
2692  * Perform index vacuum or index cleanup with parallel workers. This function
2693  * must be used by the parallel vacuum leader process. The caller must set
2694  * lps->lvshared->for_cleanup to indicate whether to perform vacuum or
2695  * cleanup.
2696  */
2697 static void
2699 {
2700  LVParallelState *lps = vacrel->lps;
2701 
2703  Assert(ParallelVacuumIsActive(vacrel));
2704  Assert(vacrel->nindexes > 0);
2705 
2706  /* The leader process will participate */
2707  nworkers--;
2708 
2709  /*
2710  * It is possible that parallel context is initialized with fewer workers
2711  * than the number of indexes that need a separate worker in the current
2712  * phase, so we need to consider it. See compute_parallel_vacuum_workers.
2713  */
2714  nworkers = Min(nworkers, lps->pcxt->nworkers);
2715 
2716  /* Setup the shared cost-based vacuum delay and launch workers */
2717  if (nworkers > 0)
2718  {
2719  if (vacrel->num_index_scans > 0)
2720  {
2721  /* Reset the parallel index processing counter */
2722  pg_atomic_write_u32(&(lps->lvshared->idx), 0);
2723 
2724  /* Reinitialize the parallel context to relaunch parallel workers */
2726  }
2727 
2728  /*
2729  * Set up shared cost balance and the number of active workers for
2730  * vacuum delay. We need to do this before launching workers as
2731  * otherwise, they might not see the updated values for these
2732  * parameters.
2733  */
2736 
2737  /*
2738  * The number of workers can vary between bulkdelete and cleanup
2739  * phase.
2740  */
2741  ReinitializeParallelWorkers(lps->pcxt, nworkers);
2742 
2744 
2745  if (lps->pcxt->nworkers_launched > 0)
2746  {
2747  /*
2748  * Reset the local cost values for leader backend as we have
2749  * already accumulated the remaining balance of heap.
2750  */
2751  VacuumCostBalance = 0;
2753 
2754  /* Enable shared cost balance for leader backend */
2757  }
2758 
2759  if (lps->lvshared->for_cleanup)
2760  ereport(elevel,
2761  (errmsg(ngettext("launched %d parallel vacuum worker for index cleanup (planned: %d)",
2762  "launched %d parallel vacuum workers for index cleanup (planned: %d)",
2763  lps->pcxt->nworkers_launched),
2764  lps->pcxt->nworkers_launched, nworkers)));
2765  else
2766  ereport(elevel,
2767  (errmsg(ngettext("launched %d parallel vacuum worker for index vacuuming (planned: %d)",
2768  "launched %d parallel vacuum workers for index vacuuming (planned: %d)",
2769  lps->pcxt->nworkers_launched),
2770  lps->pcxt->nworkers_launched, nworkers)));
2771  }
2772 
2773  /* Process the indexes that can be processed by only leader process */
2775 
2776  /*
2777  * Join as a parallel worker. The leader process alone processes all the
2778  * indexes in the case where no workers are launched.
2779  */
2780  do_parallel_processing(vacrel, lps->lvshared);
2781 
2782  /*
2783  * Next, accumulate buffer and WAL usage. (This must wait for the workers
2784  * to finish, or we might get incomplete data.)
2785  */
2786  if (nworkers > 0)
2787  {
2788  /* Wait for all vacuum workers to finish */
2790 
2791  for (int i = 0; i < lps->pcxt->nworkers_launched; i++)
2793  }
2794 
2795  /*
2796  * Carry the shared balance value to heap scan and disable shared costing
2797  */
2799  {
2801  VacuumSharedCostBalance = NULL;
2802  VacuumActiveNWorkers = NULL;
2803  }
2804 }
2805 
2806 /*
2807  * Index vacuum/cleanup routine used by the leader process and parallel
2808  * vacuum worker processes to process the indexes in parallel.
2809  */
2810 static void
2812 {
2813  /*
2814  * Increment the active worker count if we are able to launch any worker.
2815  */
2818 
2819  /* Loop until all indexes are vacuumed */
2820  for (;;)
2821  {
2822  int idx;
2823  LVSharedIndStats *shared_istat;
2824  Relation indrel;
2825  IndexBulkDeleteResult *istat;
2826 
2827  /* Get an index number to process */
2828  idx = pg_atomic_fetch_add_u32(&(lvshared->idx), 1);
2829 
2830  /* Done for all indexes? */
2831  if (idx >= vacrel->nindexes)
2832  break;
2833 
2834  /* Get the index statistics of this index from DSM */
2835  shared_istat = parallel_stats_for_idx(lvshared, idx);
2836 
2837  /* Skip indexes not participating in parallelism */
2838  if (shared_istat == NULL)
2839  continue;
2840 
2841  indrel = vacrel->indrels[idx];
2842 
2843  /*
2844  * Skip processing indexes that are unsafe for workers (these are
2845  * processed in do_serial_processing_for_unsafe_indexes() by leader)
2846  */
2847  if (!parallel_processing_is_safe(indrel, lvshared))
2848  continue;
2849 
2850  /* Do vacuum or cleanup of the index */
2851  istat = vacrel->indstats[idx];
2852  vacrel->indstats[idx] = parallel_process_one_index(indrel, istat,
2853  lvshared,
2854  shared_istat,
2855  vacrel);
2856  }
2857 
2858  /*
2859  * We have completed the index vacuum so decrement the active worker
2860  * count.
2861  */
2864 }
2865 
2866 /*
2867  * Vacuum or cleanup indexes that can be processed by only the leader process
2868  * because these indexes don't support parallel operation at that phase.
2869  */
2870 static void
2872 {
2874 
2875  /*
2876  * Increment the active worker count if we are able to launch any worker.
2877  */
2880 
2881  for (int idx = 0; idx < vacrel->nindexes; idx++)
2882  {
2883  LVSharedIndStats *shared_istat;
2884  Relation indrel;
2885  IndexBulkDeleteResult *istat;
2886 
2887  shared_istat = parallel_stats_for_idx(lvshared, idx);
2888 
2889  /* Skip already-complete indexes */
2890  if (shared_istat != NULL)
2891  continue;
2892 
2893  indrel = vacrel->indrels[idx];
2894 
2895  /*
2896  * We're only here for the unsafe indexes
2897  */
2898  if (parallel_processing_is_safe(indrel, lvshared))
2899  continue;
2900 
2901  /* Do vacuum or cleanup of the index */
2902  istat = vacrel->indstats[idx];
2903  vacrel->indstats[idx] = parallel_process_one_index(indrel, istat,
2904  lvshared,
2905  shared_istat,
2906  vacrel);
2907  }
2908 
2909  /*
2910  * We have completed the index vacuum so decrement the active worker
2911  * count.
2912  */
2915 }
2916 
2917 /*
2918  * Vacuum or cleanup index either by leader process or by one of the worker
2919  * process. After processing the index this function copies the index
2920  * statistics returned from ambulkdelete and amvacuumcleanup to the DSM
2921  * segment.
2922  */
2923 static IndexBulkDeleteResult *
2925  IndexBulkDeleteResult *istat,
2926  LVShared *lvshared,
2927  LVSharedIndStats *shared_istat,
2928  LVRelState *vacrel)
2929 {
2930  IndexBulkDeleteResult *istat_res;
2931 
2932  /*
2933  * Update the pointer to the corresponding bulk-deletion result if someone
2934  * has already updated it
2935  */
2936  if (shared_istat && shared_istat->updated && istat == NULL)
2937  istat = &shared_istat->istat;
2938 
2939  /* Do vacuum or cleanup of the index */
2940  if (lvshared->for_cleanup)
2941  istat_res = lazy_cleanup_one_index(indrel, istat, lvshared->reltuples,
2942  lvshared->estimated_count, vacrel);
2943  else
2944  istat_res = lazy_vacuum_one_index(indrel, istat, lvshared->reltuples,
2945  vacrel);
2946 
2947  /*
2948  * Copy the index bulk-deletion result returned from ambulkdelete and
2949  * amvacuumcleanup to the DSM segment if it's the first cycle because they
2950  * allocate locally and it's possible that an index will be vacuumed by a
2951  * different vacuum process the next cycle. Copying the result normally
2952  * happens only the first time an index is vacuumed. For any additional
2953  * vacuum pass, we directly point to the result on the DSM segment and
2954  * pass it to vacuum index APIs so that workers can update it directly.
2955  *
2956  * Since all vacuum workers write the bulk-deletion result at different
2957  * slots we can write them without locking.
2958  */
2959  if (shared_istat && !shared_istat->updated && istat_res != NULL)
2960  {
2961  memcpy(&shared_istat->istat, istat_res, sizeof(IndexBulkDeleteResult));
2962  shared_istat->updated = true;
2963 
2964  /* Free the locally-allocated bulk-deletion result */
2965  pfree(istat_res);
2966 
2967  /* return the pointer to the result from shared memory */
2968  return &shared_istat->istat;
2969  }
2970 
2971  return istat_res;
2972 }
2973 
2974 /*
2975  * lazy_cleanup_all_indexes() -- cleanup all indexes of relation.
2976  */
2977 static void
2979 {
2981  Assert(vacrel->nindexes > 0);
2982 
2983  /* Report that we are now cleaning up indexes */
2986 
2987  if (!ParallelVacuumIsActive(vacrel))
2988  {
2989  double reltuples = vacrel->new_rel_tuples;
2990  bool estimated_count =
2991  vacrel->tupcount_pages < vacrel->rel_pages;
2992 
2993  for (int idx = 0; idx < vacrel->nindexes; idx++)
2994  {
2995  Relation indrel = vacrel->indrels[idx];
2996  IndexBulkDeleteResult *istat = vacrel->indstats[idx];
2997 
2998  vacrel->indstats[idx] =
2999  lazy_cleanup_one_index(indrel, istat, reltuples,
3000  estimated_count, vacrel);
3001  }
3002  }
3003  else
3004  {
3005  /* Outsource everything to parallel variant */
3007  }
3008 }
3009 
3010 /*
3011  * lazy_vacuum_one_index() -- vacuum index relation.
3012  *
3013  * Delete all the index entries pointing to tuples listed in
3014  * dead_tuples, and update running statistics.
3015  *
3016  * reltuples is the number of heap tuples to be passed to the
3017  * bulkdelete callback. It's always assumed to be estimated.
3018  *
3019  * Returns bulk delete stats derived from input stats
3020  */
3021 static IndexBulkDeleteResult *
3023  double reltuples, LVRelState *vacrel)
3024 {
3025  IndexVacuumInfo ivinfo;
3026  PGRUsage ru0;
3027  LVSavedErrInfo saved_err_info;
3028 
3029  pg_rusage_init(&ru0);
3030 
3031  ivinfo.index = indrel;
3032  ivinfo.analyze_only = false;
3033  ivinfo.report_progress = false;
3034  ivinfo.estimated_count = true;
3035  ivinfo.message_level = elevel;
3036  ivinfo.num_heap_tuples = reltuples;
3037  ivinfo.strategy = vacrel->bstrategy;
3038 
3039  /*
3040  * Update error traceback information.
3041  *
3042  * The index name is saved during this phase and restored immediately
3043  * after this phase. See vacuum_error_callback.
3044  */
3045  Assert(vacrel->indname == NULL);
3046  vacrel->indname = pstrdup(RelationGetRelationName(indrel));
3047  update_vacuum_error_info(vacrel, &saved_err_info,
3050 
3051  /* Do bulk deletion */
3052  istat = index_bulk_delete(&ivinfo, istat, lazy_tid_reaped,
3053  (void *) vacrel->dead_tuples);
3054 
3055  ereport(elevel,
3056  (errmsg("scanned index \"%s\" to remove %d row versions",
3057  vacrel->indname, vacrel->dead_tuples->num_tuples),
3058  errdetail_internal("%s", pg_rusage_show(&ru0))));
3059 
3060  /* Revert to the previous phase information for error traceback */
3061  restore_vacuum_error_info(vacrel, &saved_err_info);
3062  pfree(vacrel->indname);
3063  vacrel->indname = NULL;
3064 
3065  return istat;
3066 }
3067 
3068 /*
3069  * lazy_cleanup_one_index() -- do post-vacuum cleanup for index relation.
3070  *
3071  * reltuples is the number of heap tuples and estimated_count is true
3072  * if reltuples is an estimated value.
3073  *
3074  * Returns bulk delete stats derived from input stats
3075  */
3076 static IndexBulkDeleteResult *
3078  double reltuples, bool estimated_count,
3079  LVRelState *vacrel)
3080 {
3081  IndexVacuumInfo ivinfo;
3082  PGRUsage ru0;
3083  LVSavedErrInfo saved_err_info;
3084 
3085  pg_rusage_init(&ru0);
3086 
3087  ivinfo.index = indrel;
3088  ivinfo.analyze_only = false;
3089  ivinfo.report_progress = false;
3090  ivinfo.estimated_count = estimated_count;
3091  ivinfo.message_level = elevel;
3092 
3093  ivinfo.num_heap_tuples = reltuples;
3094  ivinfo.strategy = vacrel->bstrategy;
3095 
3096  /*
3097  * Update error traceback information.
3098  *
3099  * The index name is saved during this phase and restored immediately
3100  * after this phase. See vacuum_error_callback.
3101  */
3102  Assert(vacrel->indname == NULL);
3103  vacrel->indname = pstrdup(RelationGetRelationName(indrel));
3104  update_vacuum_error_info(vacrel, &saved_err_info,
3107 
3108  istat = index_vacuum_cleanup(&ivinfo, istat);
3109 
3110  if (istat)
3111  {
3112  ereport(elevel,
3113  (errmsg("index \"%s\" now contains %.0f row versions in %u pages",
3114  RelationGetRelationName(indrel),
3115  istat->num_index_tuples,
3116  istat->num_pages),
3117  errdetail("%.0f index row versions were removed.\n"
3118  "%u index pages were newly deleted.\n"
3119  "%u index pages are currently deleted, of which %u are currently reusable.\n"
3120  "%s.",
3121  istat->tuples_removed,
3122  istat->pages_newly_deleted,
3123  istat->pages_deleted, istat->pages_free,
3124  pg_rusage_show(&ru0))));
3125  }
3126 
3127  /* Revert to the previous phase information for error traceback */
3128  restore_vacuum_error_info(vacrel, &saved_err_info);
3129  pfree(vacrel->indname);
3130  vacrel->indname = NULL;
3131 
3132  return istat;
3133 }
3134 
3135 /*
3136  * should_attempt_truncation - should we attempt to truncate the heap?
3137  *
3138  * Don't even think about it unless we have a shot at releasing a goodly
3139  * number of pages. Otherwise, the time taken isn't worth it.
3140  *
3141  * Also don't attempt it if wraparound failsafe is in effect. It's hard to
3142  * predict how long lazy_truncate_heap will take. Don't take any chances.
3143  * There is very little chance of truncation working out when the failsafe is
3144  * in effect in any case. lazy_scan_prune makes the optimistic assumption
3145  * that any LP_DEAD items it encounters will always be LP_UNUSED by the time
3146  * we're called.
3147  *
3148  * Also don't attempt it if we are doing early pruning/vacuuming, because a
3149  * scan which cannot find a truncated heap page cannot determine that the
3150  * snapshot is too old to read that page.
3151  *
3152  * This is split out so that we can test whether truncation is going to be
3153  * called for before we actually do it. If you change the logic here, be
3154  * careful to depend only on fields that lazy_scan_heap updates on-the-fly.
3155  */
3156 static bool
3158 {
3159  BlockNumber possibly_freeable;
3160 
3161  if (!vacrel->do_rel_truncate || vacrel->failsafe_active)
3162  return false;
3163 
3164  possibly_freeable = vacrel->rel_pages - vacrel->nonempty_pages;
3165  if (possibly_freeable > 0 &&
3166  (possibly_freeable >= REL_TRUNCATE_MINIMUM ||
3167  possibly_freeable >= vacrel->rel_pages / REL_TRUNCATE_FRACTION) &&
3169  return true;
3170  else
3171  return false;
3172 }
3173 
3174 /*
3175  * lazy_truncate_heap - try to truncate off any empty pages at the end
3176  */
3177 static void
3179 {
3180  BlockNumber orig_rel_pages = vacrel->rel_pages;
3181  BlockNumber new_rel_pages;
3182  bool lock_waiter_detected;
3183  int lock_retry;
3184 
3185  /* Report that we are now truncating */
3188 
3189  /*
3190  * Loop until no more truncating can be done.
3191  */
3192  do
3193  {
3194  PGRUsage ru0;
3195 
3196  pg_rusage_init(&ru0);
3197 
3198  /*
3199  * We need full exclusive lock on the relation in order to do
3200  * truncation. If we can't get it, give up rather than waiting --- we
3201  * don't want to block other backends, and we don't want to deadlock
3202  * (which is quite possible considering we already hold a lower-grade
3203  * lock).
3204  */
3205  lock_waiter_detected = false;
3206  lock_retry = 0;
3207  while (true)
3208  {
3210  break;
3211 
3212  /*
3213  * Check for interrupts while trying to (re-)acquire the exclusive
3214  * lock.
3215  */
3217 
3218  if (++lock_retry > (VACUUM_TRUNCATE_LOCK_TIMEOUT /
3220  {
3221  /*
3222  * We failed to establish the lock in the specified number of
3223  * retries. This means we give up truncating.
3224  */
3225  ereport(elevel,
3226  (errmsg("\"%s\": stopping truncate due to conflicting lock request",
3227  vacrel->relname)));
3228  return;
3229  }
3230 
3231  (void) WaitLatch(MyLatch,
3236  }
3237 
3238  /*
3239  * Now that we have exclusive lock, look to see if the rel has grown
3240  * whilst we were vacuuming with non-exclusive lock. If so, give up;
3241  * the newly added pages presumably contain non-deletable tuples.
3242  */
3243  new_rel_pages = RelationGetNumberOfBlocks(vacrel->rel);
3244  if (new_rel_pages != orig_rel_pages)
3245  {
3246  /*
3247  * Note: we intentionally don't update vacrel->rel_pages with the
3248  * new rel size here. If we did, it would amount to assuming that
3249  * the new pages are empty, which is unlikely. Leaving the numbers
3250  * alone amounts to assuming that the new pages have the same
3251  * tuple density as existing ones, which is less unlikely.
3252  */
3254  return;
3255  }
3256 
3257  /*
3258  * Scan backwards from the end to verify that the end pages actually
3259  * contain no tuples. This is *necessary*, not optional, because
3260  * other backends could have added tuples to these pages whilst we
3261  * were vacuuming.
3262  */
3263  new_rel_pages = count_nondeletable_pages(vacrel, &lock_waiter_detected);
3264  vacrel->blkno = new_rel_pages;
3265 
3266  if (new_rel_pages >= orig_rel_pages)
3267  {
3268  /* can't do anything after all */
3270  return;
3271  }
3272 
3273  /*
3274  * Okay to truncate.
3275  */
3276  RelationTruncate(vacrel->rel, new_rel_pages);
3277 
3278  /*
3279  * We can release the exclusive lock as soon as we have truncated.
3280  * Other backends can't safely access the relation until they have
3281  * processed the smgr invalidation that smgrtruncate sent out ... but
3282  * that should happen as part of standard invalidation processing once
3283  * they acquire lock on the relation.
3284  */
3286 
3287  /*
3288  * Update statistics. Here, it *is* correct to adjust rel_pages
3289  * without also touching reltuples, since the tuple count wasn't
3290  * changed by the truncation.
3291  */
3292  vacrel->pages_removed += orig_rel_pages - new_rel_pages;
3293  vacrel->rel_pages = new_rel_pages;
3294 
3295  ereport(elevel,
3296  (errmsg("table \"%s\": truncated %u to %u pages",
3297  vacrel->relname,
3298  orig_rel_pages, new_rel_pages),
3299  errdetail_internal("%s",
3300  pg_rusage_show(&ru0))));
3301  orig_rel_pages = new_rel_pages;
3302  } while (new_rel_pages > vacrel->nonempty_pages && lock_waiter_detected);
3303 }
3304 
3305 /*
3306  * Rescan end pages to verify that they are (still) empty of tuples.
3307  *
3308  * Returns number of nondeletable pages (last nonempty page + 1).
3309  */
3310 static BlockNumber
3311 count_nondeletable_pages(LVRelState *vacrel, bool *lock_waiter_detected)
3312 {
3313  BlockNumber blkno;
3314  BlockNumber prefetchedUntil;
3315  instr_time starttime;
3316 
3317  /* Initialize the starttime if we check for conflicting lock requests */
3318  INSTR_TIME_SET_CURRENT(starttime);
3319 
3320  /*
3321  * Start checking blocks at what we believe relation end to be and move
3322  * backwards. (Strange coding of loop control is needed because blkno is
3323  * unsigned.) To make the scan faster, we prefetch a few blocks at a time
3324  * in forward direction, so that OS-level readahead can kick in.
3325  */
3326  blkno = vacrel->rel_pages;
3328  "prefetch size must be power of 2");
3329  prefetchedUntil = InvalidBlockNumber;
3330  while (blkno > vacrel->nonempty_pages)
3331  {
3332  Buffer buf;
3333  Page page;
3334  OffsetNumber offnum,
3335  maxoff;
3336  bool hastup;
3337 
3338  /*
3339  * Check if another process requests a lock on our relation. We are
3340  * holding an AccessExclusiveLock here, so they will be waiting. We
3341  * only do this once per VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL, and we
3342  * only check if that interval has elapsed once every 32 blocks to
3343  * keep the number of system calls and actual shared lock table
3344  * lookups to a minimum.
3345  */
3346  if ((blkno % 32) == 0)
3347  {
3348  instr_time currenttime;
3349  instr_time elapsed;
3350 
3351  INSTR_TIME_SET_CURRENT(currenttime);
3352  elapsed = currenttime;
3353  INSTR_TIME_SUBTRACT(elapsed, starttime);
3354  if ((INSTR_TIME_GET_MICROSEC(elapsed) / 1000)
3356  {
3358  {
3359  ereport(elevel,
3360  (errmsg("table \"%s\": suspending truncate due to conflicting lock request",
3361  vacrel->relname)));
3362 
3363  *lock_waiter_detected = true;
3364  return blkno;
3365  }
3366  starttime = currenttime;
3367  }
3368  }
3369 
3370  /*
3371  * We don't insert a vacuum delay point here, because we have an
3372  * exclusive lock on the table which we want to hold for as short a
3373  * time as possible. We still need to check for interrupts however.
3374  */
3376 
3377  blkno--;
3378 
3379  /* If we haven't prefetched this lot yet, do so now. */
3380  if (prefetchedUntil > blkno)
3381  {
3382  BlockNumber prefetchStart;
3383  BlockNumber pblkno;
3384 
3385  prefetchStart = blkno & ~(PREFETCH_SIZE - 1);
3386  for (pblkno = prefetchStart; pblkno <= blkno; pblkno++)
3387  {
3388  PrefetchBuffer(vacrel->rel, MAIN_FORKNUM, pblkno);
3390  }
3391  prefetchedUntil = prefetchStart;
3392  }
3393 
3394  buf = ReadBufferExtended(vacrel->rel, MAIN_FORKNUM, blkno, RBM_NORMAL,
3395  vacrel->bstrategy);
3396 
3397  /* In this phase we only need shared access to the buffer */
3399 
3400  page = BufferGetPage(buf);
3401 
3402  if (PageIsNew(page) || PageIsEmpty(page))
3403  {
3404  UnlockReleaseBuffer(buf);
3405  continue;
3406  }
3407 
3408  hastup = false;
3409  maxoff = PageGetMaxOffsetNumber(page);
3410  for (offnum = FirstOffsetNumber;
3411  offnum <= maxoff;
3412  offnum = OffsetNumberNext(offnum))
3413  {
3414  ItemId itemid;
3415 
3416  itemid = PageGetItemId(page, offnum);
3417 
3418  /*
3419  * Note: any non-unused item should be taken as a reason to keep
3420  * this page. Even an LP_DEAD item makes truncation unsafe, since
3421  * we must not have cleaned out its index entries.
3422  */
3423  if (ItemIdIsUsed(itemid))
3424  {
3425  hastup = true;
3426  break; /* can stop scanning */
3427  }
3428  } /* scan along page */
3429 
3430  UnlockReleaseBuffer(buf);
3431 
3432  /* Done scanning if we found a tuple here */
3433  if (hastup)
3434  return blkno + 1;
3435  }
3436 
3437  /*
3438  * If we fall out of the loop, all the previously-thought-to-be-empty
3439  * pages still are; we need not bother to look at the last known-nonempty
3440  * page.
3441  */
3442  return vacrel->nonempty_pages;
3443 }
3444 
3445 /*
3446  * Return the maximum number of dead tuples we can record.
3447  */
3448 static long
3449 compute_max_dead_tuples(BlockNumber relblocks, bool hasindex)
3450 {
3451  long maxtuples;
3452  int vac_work_mem = IsAutoVacuumWorkerProcess() &&
3453  autovacuum_work_mem != -1 ?
3455 
3456  if (hasindex)
3457  {
3458  maxtuples = MAXDEADTUPLES(vac_work_mem * 1024L);
3459  maxtuples = Min(maxtuples, INT_MAX);
3460  maxtuples = Min(maxtuples, MAXDEADTUPLES(MaxAllocSize));
3461 
3462  /* curious coding here to ensure the multiplication can't overflow */
3463  if ((BlockNumber) (maxtuples / LAZY_ALLOC_TUPLES) > relblocks)
3464  maxtuples = relblocks * LAZY_ALLOC_TUPLES;
3465 
3466  /* stay sane if small maintenance_work_mem */
3467  maxtuples = Max(maxtuples, MaxHeapTuplesPerPage);
3468  }
3469  else
3470  maxtuples = MaxHeapTuplesPerPage;
3471 
3472  return maxtuples;
3473 }
3474 
3475 /*
3476  * lazy_space_alloc - space allocation decisions for lazy vacuum
3477  *
3478  * See the comments at the head of this file for rationale.
3479  */
3480 static void
3481 lazy_space_alloc(LVRelState *vacrel, int nworkers, BlockNumber nblocks)
3482 {
3483  LVDeadTuples *dead_tuples;
3484  long maxtuples;
3485 
3486  /*
3487  * Initialize state for a parallel vacuum. As of now, only one worker can
3488  * be used for an index, so we invoke parallelism only if there are at
3489  * least two indexes on a table.
3490  */
3491  if (nworkers >= 0 && vacrel->nindexes > 1 && vacrel->do_index_vacuuming)
3492  {
3493  /*
3494  * Since parallel workers cannot access data in temporary tables, we
3495  * can't perform parallel vacuum on them.
3496  */
3497  if (RelationUsesLocalBuffers(vacrel->rel))
3498  {
3499  /*
3500  * Give warning only if the user explicitly tries to perform a
3501  * parallel vacuum on the temporary table.
3502  */
3503  if (nworkers > 0)
3504  ereport(WARNING,
3505  (errmsg("disabling parallel option of vacuum on \"%s\" --- cannot vacuum temporary tables in parallel",
3506  vacrel->relname)));
3507  }
3508  else
3509  vacrel->lps = begin_parallel_vacuum(vacrel, nblocks, nworkers);
3510 
3511  /* If parallel mode started, we're done */
3512  if (ParallelVacuumIsActive(vacrel))
3513  return;
3514  }
3515 
3516  maxtuples = compute_max_dead_tuples(nblocks, vacrel->nindexes > 0);
3517 
3518  dead_tuples = (LVDeadTuples *) palloc(SizeOfDeadTuples(maxtuples));
3519  dead_tuples->num_tuples = 0;
3520  dead_tuples->max_tuples = (int) maxtuples;
3521 
3522  vacrel->dead_tuples = dead_tuples;
3523 }
3524 
3525 /*
3526  * lazy_space_free - free space allocated in lazy_space_alloc
3527  */
3528 static void
3530 {
3531  if (!ParallelVacuumIsActive(vacrel))
3532  return;
3533 
3534  /*
3535  * End parallel mode before updating index statistics as we cannot write
3536  * during parallel mode.
3537  */
3538  end_parallel_vacuum(vacrel);
3539 }
3540 
3541 /*
3542  * lazy_tid_reaped() -- is a particular tid deletable?
3543  *
3544  * This has the right signature to be an IndexBulkDeleteCallback.
3545  *
3546  * Assumes dead_tuples array is in sorted order.
3547  */
3548 static bool
3550 {
3551  LVDeadTuples *dead_tuples = (LVDeadTuples *) state;
3552  int64 litem,
3553  ritem,
3554  item;
3555  ItemPointer res;
3556 
3557  litem = itemptr_encode(&dead_tuples->itemptrs[0]);
3558  ritem = itemptr_encode(&dead_tuples->itemptrs[dead_tuples->num_tuples - 1]);
3559  item = itemptr_encode(itemptr);
3560 
3561  /*
3562  * Doing a simple bound check before bsearch() is useful to avoid the
3563  * extra cost of bsearch(), especially if dead tuples on the heap are
3564  * concentrated in a certain range. Since this function is called for
3565  * every index tuple, it pays to be really fast.
3566  */
3567  if (item < litem || item > ritem)
3568  return false;
3569 
3570  res = (ItemPointer) bsearch((void *) itemptr,
3571  (void *) dead_tuples->itemptrs,
3572  dead_tuples->num_tuples,
3573  sizeof(ItemPointerData),
3574  vac_cmp_itemptr);
3575 
3576  return (res != NULL);
3577 }
3578 
3579 /*
3580  * Comparator routines for use with qsort() and bsearch().
3581  */
3582 static int
3583 vac_cmp_itemptr(const void *left, const void *right)
3584 {
3585  BlockNumber lblk,
3586  rblk;
3587  OffsetNumber loff,
3588  roff;
3589 
3590  lblk = ItemPointerGetBlockNumber((ItemPointer) left);
3591  rblk = ItemPointerGetBlockNumber((ItemPointer) right);
3592 
3593  if (lblk < rblk)
3594  return -1;
3595  if (lblk > rblk)
3596  return 1;
3597 
3598  loff = ItemPointerGetOffsetNumber((ItemPointer) left);
3599  roff = ItemPointerGetOffsetNumber((ItemPointer) right);
3600 
3601  if (loff < roff)
3602  return -1;
3603  if (loff > roff)
3604  return 1;
3605 
3606  return 0;
3607 }
3608 
3609 /*
3610  * Check if every tuple in the given page is visible to all current and future
3611  * transactions. Also return the visibility_cutoff_xid which is the highest
3612  * xmin amongst the visible tuples. Set *all_frozen to true if every tuple
3613  * on this page is frozen.
3614  */
3615 static bool
3617  TransactionId *visibility_cutoff_xid,
3618  bool *all_frozen)
3619 {
3620  Page page = BufferGetPage(buf);
3621  BlockNumber blockno = BufferGetBlockNumber(buf);
3622  OffsetNumber offnum,
3623  maxoff;
3624  bool all_visible = true;
3625 
3626  *visibility_cutoff_xid = InvalidTransactionId;
3627  *all_frozen = true;
3628 
3629  /*
3630  * This is a stripped down version of the line pointer scan in
3631  * lazy_scan_heap(). So if you change anything here, also check that code.
3632  */
3633  maxoff = PageGetMaxOffsetNumber(page);
3634  for (offnum = FirstOffsetNumber;
3635  offnum <= maxoff && all_visible;
3636  offnum = OffsetNumberNext(offnum))
3637  {
3638  ItemId itemid;
3639  HeapTupleData tuple;
3640 
3641  /*
3642  * Set the offset number so that we can display it along with any
3643  * error that occurred while processing this tuple.
3644  */
3645  vacrel->offnum = offnum;
3646  itemid = PageGetItemId(page, offnum);
3647 
3648  /* Unused or redirect line pointers are of no interest */
3649  if (!ItemIdIsUsed(itemid) || ItemIdIsRedirected(itemid))
3650  continue;
3651 
3652  ItemPointerSet(&(tuple.t_self), blockno, offnum);
3653 
3654  /*
3655  * Dead line pointers can have index pointers pointing to them. So
3656  * they can't be treated as visible
3657  */
3658  if (ItemIdIsDead(itemid))
3659  {
3660  all_visible = false;
3661  *all_frozen = false;
3662  break;
3663  }
3664 
3665  Assert(ItemIdIsNormal(itemid));
3666 
3667  tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
3668  tuple.t_len = ItemIdGetLength(itemid);
3669  tuple.t_tableOid = RelationGetRelid(vacrel->rel);
3670 
3671  switch (HeapTupleSatisfiesVacuum(&tuple, vacrel->OldestXmin, buf))
3672  {
3673  case HEAPTUPLE_LIVE:
3674  {
3675  TransactionId xmin;
3676 
3677  /* Check comments in lazy_scan_heap. */
3679  {
3680  all_visible = false;
3681  *all_frozen = false;
3682  break;
3683  }
3684 
3685  /*
3686  * The inserter definitely committed. But is it old enough
3687  * that everyone sees it as committed?
3688  */
3689  xmin = HeapTupleHeaderGetXmin(tuple.t_data);
3690  if (!TransactionIdPrecedes(xmin, vacrel->OldestXmin))
3691  {
3692  all_visible = false;
3693  *all_frozen = false;
3694  break;
3695  }
3696 
3697  /* Track newest xmin on page. */
3698  if (TransactionIdFollows(xmin, *visibility_cutoff_xid))
3699  *visibility_cutoff_xid = xmin;
3700 
3701  /* Check whether this tuple is already frozen or not */
3702  if (all_visible && *all_frozen &&
3704  *all_frozen = false;
3705  }
3706  break;
3707 
3708  case HEAPTUPLE_DEAD:
3712  {
3713  all_visible = false;
3714  *all_frozen = false;
3715  break;
3716  }
3717  default:
3718  elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
3719  break;
3720  }
3721  } /* scan along page */
3722 
3723  /* Clear the offset information once we have processed the given page. */
3724  vacrel->offnum = InvalidOffsetNumber;
3725 
3726  return all_visible;
3727 }
3728 
3729 /*
3730  * Compute the number of parallel worker processes to request. Both index
3731  * vacuum and index cleanup can be executed with parallel workers. The index
3732  * is eligible for parallel vacuum iff its size is greater than
3733  * min_parallel_index_scan_size as invoking workers for very small indexes
3734  * can hurt performance.
3735  *
3736  * nrequested is the number of parallel workers that user requested. If
3737  * nrequested is 0, we compute the parallel degree based on nindexes, that is
3738  * the number of indexes that support parallel vacuum. This function also
3739  * sets can_parallel_vacuum to remember indexes that participate in parallel
3740  * vacuum.
3741  */
3742 static int
3744  bool *can_parallel_vacuum)
3745 {
3746  int nindexes_parallel = 0;
3747  int nindexes_parallel_bulkdel = 0;
3748  int nindexes_parallel_cleanup = 0;
3749  int parallel_workers;
3750 
3751  /*
3752  * We don't allow performing parallel operation in standalone backend or
3753  * when parallelism is disabled.
3754  */
3756  return 0;
3757 
3758  /*
3759  * Compute the number of indexes that can participate in parallel vacuum.
3760  */
3761  for (int idx = 0; idx < vacrel->nindexes; idx++)
3762  {
3763  Relation indrel = vacrel->indrels[idx];
3764  uint8 vacoptions = indrel->rd_indam->amparallelvacuumoptions;
3765 
3766  if (vacoptions == VACUUM_OPTION_NO_PARALLEL ||
3768  continue;
3769 
3770  can_parallel_vacuum[idx] = true;
3771 
3772  if ((vacoptions & VACUUM_OPTION_PARALLEL_BULKDEL) != 0)
3773  nindexes_parallel_bulkdel++;
3774  if (((vacoptions & VACUUM_OPTION_PARALLEL_CLEANUP) != 0) ||
3775  ((vacoptions & VACUUM_OPTION_PARALLEL_COND_CLEANUP) != 0))
3776  nindexes_parallel_cleanup++;
3777  }
3778 
3779  nindexes_parallel = Max(nindexes_parallel_bulkdel,
3780  nindexes_parallel_cleanup);
3781 
3782  /* The leader process takes one index */
3783  nindexes_parallel--;
3784 
3785  /* No index supports parallel vacuum */
3786  if (nindexes_parallel <= 0)
3787  return 0;
3788 
3789  /* Compute the parallel degree */
3790  parallel_workers = (nrequested > 0) ?
3791  Min(nrequested, nindexes_parallel) : nindexes_parallel;
3792 
3793  /* Cap by max_parallel_maintenance_workers */
3794  parallel_workers = Min(parallel_workers, max_parallel_maintenance_workers);
3795 
3796  return parallel_workers;
3797 }
3798 
3799 /*
3800  * Update index statistics in pg_class if the statistics are accurate.
3801  */
3802 static void
3804 {
3805  Relation *indrels = vacrel->indrels;
3806  int nindexes = vacrel->nindexes;
3807  IndexBulkDeleteResult **indstats = vacrel->indstats;
3808 
3810 
3811  for (int idx = 0; idx < nindexes; idx++)
3812  {
3813  Relation indrel = indrels[idx];
3814  IndexBulkDeleteResult *istat = indstats[idx];
3815 
3816  if (istat == NULL || istat->estimated_count)
3817  continue;
3818 
3819  /* Update index statistics */
3820  vac_update_relstats(indrel,
3821  istat->num_pages,
3822  istat->num_index_tuples,
3823  0,
3824  false,
3827  false);
3828  }
3829 }
3830 
3831 /*
3832  * This function prepares and returns parallel vacuum state if we can launch
3833  * even one worker. This function is responsible for entering parallel mode,
3834  * create a parallel context, and then initialize the DSM segment.
3835  */
3836 static LVParallelState *
3838  int nrequested)
3839 {
3840  LVParallelState *lps = NULL;
3841  Relation *indrels = vacrel->indrels;
3842  int nindexes = vacrel->nindexes;
3843  ParallelContext *pcxt;
3844  LVShared *shared;
3845  LVDeadTuples *dead_tuples;
3846  BufferUsage *buffer_usage;
3847  WalUsage *wal_usage;
3848  bool *can_parallel_vacuum;
3849  long maxtuples;
3850  Size est_shared;
3851  Size est_deadtuples;
3852  int nindexes_mwm = 0;
3853  int parallel_workers = 0;
3854  int querylen;
3855 
3856  /*
3857  * A parallel vacuum must be requested and there must be indexes on the
3858  * relation
3859  */
3860  Assert(nrequested >= 0);
3861  Assert(nindexes > 0);
3862 
3863  /*
3864  * Compute the number of parallel vacuum workers to launch
3865  */
3866  can_parallel_vacuum = (bool *) palloc0(sizeof(bool) * nindexes);
3867  parallel_workers = compute_parallel_vacuum_workers(vacrel,
3868  nrequested,
3869  can_parallel_vacuum);
3870 
3871  /* Can't perform vacuum in parallel */
3872  if (parallel_workers <= 0)
3873  {
3874  pfree(can_parallel_vacuum);
3875  return lps;
3876  }
3877 
3878  lps = (LVParallelState *) palloc0(sizeof(LVParallelState));
3879 
3881  pcxt = CreateParallelContext("postgres", "parallel_vacuum_main",
3882  parallel_workers);
3883  Assert(pcxt->nworkers > 0);
3884  lps->pcxt = pcxt;
3885 
3886  /* Estimate size for shared information -- PARALLEL_VACUUM_KEY_SHARED */
3887  est_shared = MAXALIGN(add_size(SizeOfLVShared, BITMAPLEN(nindexes)));
3888  for (int idx = 0; idx < nindexes; idx++)
3889  {
3890  Relation indrel = indrels[idx];
3891  uint8 vacoptions = indrel->rd_indam->amparallelvacuumoptions;
3892 
3893  /*
3894  * Cleanup option should be either disabled, always performing in
3895  * parallel or conditionally performing in parallel.
3896  */
3897  Assert(((vacoptions & VACUUM_OPTION_PARALLEL_CLEANUP) == 0) ||
3898  ((vacoptions & VACUUM_OPTION_PARALLEL_COND_CLEANUP) == 0));
3899  Assert(vacoptions <= VACUUM_OPTION_MAX_VALID_VALUE);
3900 
3901  /* Skip indexes that don't participate in parallel vacuum */
3902  if (!can_parallel_vacuum[idx])
3903  continue;
3904 
3905  if (indrel->rd_indam->amusemaintenanceworkmem)
3906  nindexes_mwm++;
3907 
3908  est_shared = add_size(est_shared, sizeof(LVSharedIndStats));
3909 
3910  /*
3911  * Remember the number of indexes that support parallel operation for
3912  * each phase.
3913  */
3914  if ((vacoptions & VACUUM_OPTION_PARALLEL_BULKDEL) != 0)
3916  if ((vacoptions & VACUUM_OPTION_PARALLEL_CLEANUP) != 0)
3918  if ((vacoptions & VACUUM_OPTION_PARALLEL_COND_CLEANUP) != 0)
3920  }
3921  shm_toc_estimate_chunk(&pcxt->estimator, est_shared);
3922  shm_toc_estimate_keys(&pcxt->estimator, 1);
3923 
3924  /* Estimate size for dead tuples -- PARALLEL_VACUUM_KEY_DEAD_TUPLES */
3925  maxtuples = compute_max_dead_tuples(nblocks, true);
3926  est_deadtuples = MAXALIGN(SizeOfDeadTuples(maxtuples));
3927  shm_toc_estimate_chunk(&pcxt->estimator, est_deadtuples);
3928  shm_toc_estimate_keys(&pcxt->estimator, 1);
3929 
3930  /*
3931  * Estimate space for BufferUsage and WalUsage --
3932  * PARALLEL_VACUUM_KEY_BUFFER_USAGE and PARALLEL_VACUUM_KEY_WAL_USAGE.
3933  *
3934  * If there are no extensions loaded that care, we could skip this. We
3935  * have no way of knowing whether anyone's looking at pgBufferUsage or
3936  * pgWalUsage, so do it unconditionally.
3937  */
3939  mul_size(sizeof(BufferUsage), pcxt->nworkers));
3940  shm_toc_estimate_keys(&pcxt->estimator, 1);
3942  mul_size(sizeof(WalUsage), pcxt->nworkers));
3943  shm_toc_estimate_keys(&pcxt->estimator, 1);
3944 
3945  /* Finally, estimate PARALLEL_VACUUM_KEY_QUERY_TEXT space */
3946  if (debug_query_string)
3947  {
3948  querylen = strlen(debug_query_string);
3949  shm_toc_estimate_chunk(&pcxt->estimator, querylen + 1);
3950  shm_toc_estimate_keys(&pcxt->estimator, 1);
3951  }
3952  else
3953  querylen = 0; /* keep compiler quiet */
3954 
3955  InitializeParallelDSM(pcxt);
3956 
3957  /* Prepare shared information */
3958  shared = (LVShared *) shm_toc_allocate(pcxt->toc, est_shared);
3959  MemSet(shared, 0, est_shared);
3960  shared->relid = RelationGetRelid(vacrel->rel);
3961  shared->elevel = elevel;
3962  shared->maintenance_work_mem_worker =
3963  (nindexes_mwm > 0) ?
3964  maintenance_work_mem / Min(parallel_workers, nindexes_mwm) :
3966 
3967  pg_atomic_init_u32(&(shared->cost_balance), 0);
3968  pg_atomic_init_u32(&(shared->active_nworkers), 0);
3969  pg_atomic_init_u32(&(shared->idx), 0);
3970  shared->offset = MAXALIGN(add_size(SizeOfLVShared, BITMAPLEN(nindexes)));
3971 
3972  /*
3973  * Initialize variables for shared index statistics, set NULL bitmap and
3974  * the size of stats for each index.
3975  */
3976  memset(shared->bitmap, 0x00, BITMAPLEN(nindexes));
3977  for (int idx = 0; idx < nindexes; idx++)
3978  {
3979  if (!can_parallel_vacuum[idx])
3980  continue;
3981 
3982  /* Set NOT NULL as this index does support parallelism */
3983  shared->bitmap[idx >> 3] |= 1 << (idx & 0x07);
3984  }
3985 
3987  lps->lvshared = shared;
3988 
3989  /* Prepare the dead tuple space */
3990  dead_tuples = (LVDeadTuples *) shm_toc_allocate(pcxt->toc, est_deadtuples);
3991  dead_tuples->max_tuples = maxtuples;
3992  dead_tuples->num_tuples = 0;
3993  MemSet(dead_tuples->itemptrs, 0, sizeof(ItemPointerData) * maxtuples);
3994  shm_toc_insert(pcxt->toc, PARALLEL_VACUUM_KEY_DEAD_TUPLES, dead_tuples);
3995  vacrel->dead_tuples = dead_tuples;
3996 
3997  /*
3998  * Allocate space for each worker's BufferUsage and WalUsage; no need to
3999  * initialize
4000  */
4001  buffer_usage = shm_toc_allocate(pcxt->toc,
4002  mul_size(sizeof(BufferUsage), pcxt->nworkers));
4003  shm_toc_insert(pcxt->toc, PARALLEL_VACUUM_KEY_BUFFER_USAGE, buffer_usage);
4004  lps->buffer_usage = buffer_usage;
4005  wal_usage = shm_toc_allocate(pcxt->toc,
4006  mul_size(sizeof(WalUsage), pcxt->nworkers));
4008  lps->wal_usage = wal_usage;
4009 
4010  /* Store query string for workers */
4011  if (debug_query_string)
4012  {
4013  char *sharedquery;
4014 
4015  sharedquery = (char *) shm_toc_allocate(pcxt->toc, querylen + 1);
4016  memcpy(sharedquery, debug_query_string, querylen + 1);
4017  sharedquery[querylen] = '\0';
4018  shm_toc_insert(pcxt->toc,
4019  PARALLEL_VACUUM_KEY_QUERY_TEXT, sharedquery);
4020  }
4021 
4022  pfree(can_parallel_vacuum);
4023  return lps;
4024 }
4025 
4026 /*
4027  * Destroy the parallel context, and end parallel mode.
4028  *
4029  * Since writes are not allowed during parallel mode, copy the
4030  * updated index statistics from DSM into local memory and then later use that
4031  * to update the index statistics. One might think that we can exit from
4032  * parallel mode, update the index statistics and then destroy parallel
4033  * context, but that won't be safe (see ExitParallelMode).
4034  */
4035 static void
4037 {
4038  IndexBulkDeleteResult **indstats = vacrel->indstats;
4039  LVParallelState *lps = vacrel->lps;
4040  int nindexes = vacrel->nindexes;
4041 
4043 
4044  /* Copy the updated statistics */
4045  for (int idx = 0; idx < nindexes; idx++)
4046  {
4047  LVSharedIndStats *shared_istat;
4048 
4049  shared_istat = parallel_stats_for_idx(lps->lvshared, idx);
4050 
4051  /*
4052  * Skip unused slot. The statistics of this index are already stored
4053  * in local memory.
4054  */
4055  if (shared_istat == NULL)
4056  continue;
4057 
4058  if (shared_istat->updated)
4059  {
4060  indstats[idx] = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult));
4061  memcpy(indstats[idx], &shared_istat->istat, sizeof(IndexBulkDeleteResult));
4062  }
4063  else
4064  indstats[idx] = NULL;
4065  }
4066 
4068  ExitParallelMode();
4069 
4070  /* Deactivate parallel vacuum */
4071  pfree(lps);
4072  vacrel->lps = NULL;
4073 }
4074 
4075 /*
4076  * Return shared memory statistics for index at offset 'getidx', if any
4077  */
4078 static LVSharedIndStats *
4079 parallel_stats_for_idx(LVShared *lvshared, int getidx)
4080 {
4081  char *p;
4082 
4083  if (IndStatsIsNull(lvshared, getidx))
4084  return NULL;
4085 
4086  p = (char *) GetSharedIndStats(lvshared);
4087  for (int idx = 0; idx < getidx; idx++)
4088  {
4089  if (IndStatsIsNull(lvshared, idx))
4090  continue;
4091 
4092  p += sizeof(LVSharedIndStats);
4093  }
4094 
4095  return (LVSharedIndStats *) p;
4096 }
4097 
4098 /*
4099  * Returns false, if the given index can't participate in parallel index
4100  * vacuum or parallel index cleanup
4101  */
4102 static bool
4104 {
4105  uint8 vacoptions = indrel->rd_indam->amparallelvacuumoptions;
4106 
4107  /* first_time must be true only if for_cleanup is true */
4108  Assert(lvshared->for_cleanup || !lvshared->first_time);
4109 
4110  if (lvshared->for_cleanup)
4111  {
4112  /* Skip, if the index does not support parallel cleanup */
4113  if (((vacoptions & VACUUM_OPTION_PARALLEL_CLEANUP) == 0) &&
4114  ((vacoptions & VACUUM_OPTION_PARALLEL_COND_CLEANUP) == 0))
4115  return true;
4116 
4117  /*
4118  * Skip, if the index supports parallel cleanup conditionally, but we
4119  * have already processed the index (for bulkdelete). See the
4120  * comments for option VACUUM_OPTION_PARALLEL_COND_CLEANUP to know
4121  * when indexes support parallel cleanup conditionally.
4122  */
4123  if (!lvshared->first_time &&
4124  ((vacoptions & VACUUM_OPTION_PARALLEL_COND_CLEANUP) != 0))
4125  return false;
4126  }
4127  else if ((vacoptions & VACUUM_OPTION_PARALLEL_BULKDEL) == 0)
4128  {
4129  /* Skip if the index does not support parallel bulk deletion */
4130  return false;
4131  }
4132 
4133  return true;
4134 }
4135 
4136 /*
4137  * Perform work within a launched parallel process.
4138  *
4139  * Since parallel vacuum workers perform only index vacuum or index cleanup,
4140  * we don't need to report progress information.
4141  */
4142 void
4144 {
4145  Relation rel;
4146  Relation *indrels;
4147  LVShared *lvshared;
4148  LVDeadTuples *dead_tuples;
4149  BufferUsage *buffer_usage;
4150  WalUsage *wal_usage;
4151  int nindexes;
4152  char *sharedquery;
4153  LVRelState vacrel;
4154  ErrorContextCallback errcallback;
4155 
4157  false);
4158  elevel = lvshared->elevel;
4159 
4160  if (lvshared->for_cleanup)
4161  elog(DEBUG1, "starting parallel vacuum worker for cleanup");
4162  else
4163  elog(DEBUG1, "starting parallel vacuum worker for bulk delete");
4164 
4165  /* Set debug_query_string for individual workers */
4166  sharedquery = shm_toc_lookup(toc, PARALLEL_VACUUM_KEY_QUERY_TEXT, true);
4167  debug_query_string = sharedquery;
4169 
4170  /*
4171  * Open table. The lock mode is the same as the leader process. It's
4172  * okay because the lock mode does not conflict among the parallel
4173  * workers.
4174  */
4175  rel = table_open(lvshared->relid, ShareUpdateExclusiveLock);
4176 
4177  /*
4178  * Open all indexes. indrels are sorted in order by OID, which should be
4179  * matched to the leader's one.
4180  */
4181  vac_open_indexes(rel, RowExclusiveLock, &nindexes, &indrels);
4182  Assert(nindexes > 0);
4183 
4184  /* Set dead tuple space */
4185  dead_tuples = (LVDeadTuples *) shm_toc_lookup(toc,
4187  false);
4188 
4189  /* Set cost-based vacuum delay */
4191  VacuumCostBalance = 0;
4192  VacuumPageHit = 0;
4193  VacuumPageMiss = 0;
4194  VacuumPageDirty = 0;
4196  VacuumSharedCostBalance = &(lvshared->cost_balance);
4197  VacuumActiveNWorkers = &(lvshared->active_nworkers);
4198 
4199  vacrel.rel = rel;
4200  vacrel.indrels = indrels;
4201  vacrel.nindexes = nindexes;
4202  /* Each parallel VACUUM worker gets its own access strategy */
4204  vacrel.indstats = (IndexBulkDeleteResult **)
4205  palloc0(nindexes * sizeof(IndexBulkDeleteResult *));
4206 
4207  if (lvshared->maintenance_work_mem_worker > 0)
4209 
4210  /*
4211  * Initialize vacrel for use as error callback arg by parallel worker.
4212  */
4214  vacrel.relname = pstrdup(RelationGetRelationName(rel));
4215  vacrel.indname = NULL;
4216  vacrel.phase = VACUUM_ERRCB_PHASE_UNKNOWN; /* Not yet processing */
4217  vacrel.dead_tuples = dead_tuples;
4218 
4219  /* Setup error traceback support for ereport() */
4220  errcallback.callback = vacuum_error_callback;
4221  errcallback.arg = &vacrel;
4222  errcallback.previous = error_context_stack;
4223  error_context_stack = &errcallback;
4224 
4225  /* Prepare to track buffer usage during parallel execution */
4227 
4228  /* Process indexes to perform vacuum/cleanup */
4229  do_parallel_processing(&vacrel, lvshared);
4230 
4231  /* Report buffer/WAL usage during parallel execution */
4232  buffer_usage = shm_toc_lookup(toc, PARALLEL_VACUUM_KEY_BUFFER_USAGE, false);
4233  wal_usage = shm_toc_lookup(toc, PARALLEL_VACUUM_KEY_WAL_USAGE, false);
4235  &wal_usage[ParallelWorkerNumber]);
4236 
4237  /* Pop the error context stack */
4238  error_context_stack = errcallback.previous;
4239 
4240  vac_close_indexes(nindexes, indrels, RowExclusiveLock);
4242  FreeAccessStrategy(vacrel.bstrategy);
4243  pfree(vacrel.indstats);
4244 }
4245 
4246 /*
4247  * Error context callback for errors occurring during vacuum.
4248  */
4249 static void
4251 {
4252  LVRelState *errinfo = arg;
4253 
4254  switch (errinfo->phase)
4255  {
4257  if (BlockNumberIsValid(errinfo->blkno))
4258  {
4259  if (OffsetNumberIsValid(errinfo->offnum))
4260  errcontext("while scanning block %u offset %u of relation \"%s.%s\"",
4261  errinfo->blkno, errinfo->offnum, errinfo->relnamespace, errinfo->relname);
4262  else
4263  errcontext("while scanning block %u of relation \"%s.%s\"",
4264  errinfo->blkno, errinfo->relnamespace, errinfo->relname);
4265  }
4266  else
4267  errcontext("while scanning relation \"%s.%s\"",
4268  errinfo->relnamespace, errinfo->relname);
4269  break;
4270 
4272  if (BlockNumberIsValid(errinfo->blkno))
4273  {
4274  if (OffsetNumberIsValid(errinfo->offnum))
4275  errcontext("while vacuuming block %u offset %u of relation \"%s.%s\"",
4276  errinfo->blkno, errinfo->offnum, errinfo->relnamespace, errinfo->relname);
4277  else
4278  errcontext("while vacuuming block %u of relation \"%s.%s\"",
4279  errinfo->blkno, errinfo->relnamespace, errinfo->relname);
4280  }
4281  else
4282  errcontext("while vacuuming relation \"%s.%s\"",
4283  errinfo->relnamespace, errinfo->relname);
4284  break;
4285 
4287  errcontext("while vacuuming index \"%s\" of relation \"%s.%s\"",
4288  errinfo->indname, errinfo->relnamespace, errinfo->relname);
4289  break;
4290 
4292  errcontext("while cleaning up index \"%s\" of relation \"%s.%s\"",
4293  errinfo->indname, errinfo->relnamespace, errinfo->relname);
4294  break;
4295 
4297  if (BlockNumberIsValid(errinfo->blkno))
4298  errcontext("while truncating relation \"%s.%s\" to %u blocks",
4299  errinfo->relnamespace, errinfo->relname, errinfo->blkno);
4300  break;
4301 
4303  default:
4304  return; /* do nothing; the errinfo may not be
4305  * initialized */
4306  }
4307 }
4308 
4309 /*
4310  * Updates the information required for vacuum error callback. This also saves
4311  * the current information which can be later restored via restore_vacuum_error_info.
4312  */
4313 static void
4315  int phase, BlockNumber blkno, OffsetNumber offnum)
4316 {
4317  if (saved_vacrel)
4318  {
4319  saved_vacrel->offnum = vacrel->offnum;
4320  saved_vacrel->blkno = vacrel->blkno;
4321  saved_vacrel->phase = vacrel->phase;
4322  }
4323 
4324  vacrel->blkno = blkno;
4325  vacrel->offnum = offnum;
4326  vacrel->phase = phase;
4327 }
4328 
4329 /*
4330  * Restores the vacuum information saved via a prior call to update_vacuum_error_info.
4331  */
4332 static void
4334  const LVSavedErrInfo *saved_vacrel)
4335 {
4336  vacrel->blkno = saved_vacrel->blkno;
4337  vacrel->offnum = saved_vacrel->offnum;
4338  vacrel->phase = saved_vacrel->phase;
4339 }
VacErrPhase phase
Definition: vacuumlazy.c:343
static void end_parallel_vacuum(LVRelState *vacrel)
Definition: vacuumlazy.c:4036
BufferAccessStrategy GetAccessStrategy(BufferAccessStrategyType btype)
Definition: freelist.c:542
int autovacuum_work_mem
Definition: autovacuum.c:116
void XLogRegisterBufData(uint8 block_id, char *data, int len)
Definition: xloginsert.c:378
#define GetSharedIndStats(s)
Definition: vacuumlazy.c:267
uint8 amparallelvacuumoptions
Definition: amapi.h:248
static void vacuum_error_callback(void *arg)
Definition: vacuumlazy.c:4250
struct IndexAmRoutine * rd_indam
Definition: rel.h:201
int multixact_freeze_table_age
Definition: vacuum.h:216
void vac_close_indexes(int nindexes, Relation *Irel, LOCKMODE lockmode)
Definition: vacuum.c:2128
#define BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:96
void LockBufferForCleanup(Buffer buffer)
Definition: bufmgr.c:4064
XLogRecPtr log_heap_freeze(Relation reln, Buffer buffer, TransactionId cutoff_xid, xl_heap_freeze_tuple *tuples, int ntuples)
Definition: heapam.c:7933
#define PROGRESS_VACUUM_HEAP_BLKS_VACUUMED
Definition: progress.h:24
#define InvalidXLogRecPtr
Definition: xlogdefs.h:28
WalUsage * wal_usage
Definition: vacuumlazy.c:294
static bool lazy_check_wraparound_failsafe(LVRelState *vacrel)
Definition: vacuumlazy.c:2598
LVParallelState * lps
Definition: vacuumlazy.c:324
#define PageIsEmpty(page)
Definition: bufpage.h:222
int64 VacuumPageMiss
Definition: globals.c:148
#define DEBUG1
Definition: elog.h:25
int64 PgStat_Counter
Definition: pgstat.h:92
#define BYPASS_THRESHOLD_PAGES
Definition: vacuumlazy.c:110
void table_close(Relation relation, LOCKMODE lockmode)
Definition: table.c:167
int errhint(const char *fmt,...)
Definition: elog.c:1156
static void update_vacuum_error_info(LVRelState *vacrel, LVSavedErrInfo *saved_vacrel, int phase, BlockNumber blkno, OffsetNumber offnum)
Definition: vacuumlazy.c:4314
static void lazy_scan_prune(LVRelState *vacrel, Buffer buf, BlockNumber blkno, Page page, GlobalVisState *vistest, LVPagePruneState *prunestate)
Definition: vacuumlazy.c:1697
double tuples_removed
Definition: genam.h:79
XLogRecPtr log_newpage_buffer(Buffer buffer, bool page_std)
Definition: xloginsert.c:1142
Relation * indrels
Definition: vacuumlazy.c:309
pg_atomic_uint32 * VacuumActiveNWorkers
Definition: vacuum.c:79
double vac_estimate_reltuples(Relation relation, BlockNumber total_pages, BlockNumber scanned_pages, double scanned_tuples)
Definition: vacuum.c:1223
OffsetNumber offset
Definition: heapam_xlog.h:327
int VacuumCostBalance
Definition: globals.c:151
ItemPointerData itemptrs[FLEXIBLE_ARRAY_MEMBER]
Definition: vacuumlazy.c:185
bool estimated_count
Definition: vacuumlazy.c:228
static bool lazy_tid_reaped(ItemPointer itemptr, void *state)
Definition: vacuumlazy.c:3549
#define ItemIdIsRedirected(itemId)
Definition: itemid.h:106
bool TransactionIdFollows(TransactionId id1, TransactionId id2)
Definition: transam.c:334
bool do_index_vacuuming
Definition: vacuumlazy.c:318
#define WL_TIMEOUT
Definition: latch.h:128
ParallelContext * CreateParallelContext(const char *library_name, const char *function_name, int nworkers)
Definition: parallel.c:164
#define PageIsAllVisible(page)
Definition: bufpage.h:385
#define VACOPT_DISABLE_PAGE_SKIPPING
Definition: vacuum.h:185
uint32 TransactionId
Definition: c.h:587
#define IndStatsIsNull(s, i)
Definition: vacuumlazy.c:269
void UnlockRelation(Relation relation, LOCKMODE lockmode)
Definition: lmgr.c:283
int num_index_scans
Definition: vacuumlazy.c:365
void RecordPageWithFreeSpace(Relation rel, BlockNumber heapBlk, Size spaceAvail)
Definition: freespace.c:181
IndexBulkDeleteResult * index_vacuum_cleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *istat)
Definition: indexam.c:712
#define PARALLEL_VACUUM_KEY_DEAD_TUPLES
Definition: vacuumlazy.c:152
Oid relid
Definition: vacuumlazy.c:206
OffsetNumber offnum
Definition: vacuumlazy.c:342
#define PROGRESS_VACUUM_MAX_DEAD_TUPLES
Definition: progress.h:26
#define PROGRESS_VACUUM_PHASE_VACUUM_INDEX
Definition: progress.h:31
void visibilitymap_pin(Relation rel, BlockNumber heapBlk, Buffer *buf)
TimestampTz GetCurrentTimestamp(void)
Definition: timestamp.c:1580
void visibilitymap_set(Relation rel, BlockNumber heapBlk, Buffer heapBuf, XLogRecPtr recptr, Buffer vmBuf, TransactionId cutoff_xid, uint8 flags)
static void lazy_space_alloc(LVRelState *vacrel, int nworkers, BlockNumber relblocks)
Definition: vacuumlazy.c:3481
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:1565
int64 TimestampTz
Definition: timestamp.h:39
WalUsage pgWalUsage
Definition: instrument.c:22
#define SizeOfDeadTuples(cnt)
Definition: vacuumlazy.c:190
void XLogRegisterBuffer(uint8 block_id, Buffer buffer, uint8 flags)
Definition: xloginsert.c:232
HeapTupleHeaderData * HeapTupleHeader
Definition: htup.h:23
static uint32 pg_atomic_sub_fetch_u32(volatile pg_atomic_uint32 *ptr, int32 sub_)
Definition: atomics.h:401
BufferUsage * buffer_usage
Definition: vacuumlazy.c:291
#define VISIBILITYMAP_ALL_FROZEN
Definition: visibilitymap.h:27
char * pstrdup(const char *in)
Definition: mcxt.c:1299
shm_toc_estimator estimator
Definition: parallel.h:42
IndexBulkDeleteResult * index_bulk_delete(IndexVacuumInfo *info, IndexBulkDeleteResult *istat, IndexBulkDeleteCallback callback, void *callback_state)
Definition: indexam.c:691
bool analyze_only
Definition: genam.h:47
Buffer ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
Definition: bufmgr.c:741
struct timeval instr_time
Definition: instr_time.h:150
int64 VacuumPageHit
Definition: globals.c:147
#define Min(x, y)
Definition: c.h:986
int nindexes
Definition: vacuumlazy.c:310
bool report_progress
Definition: genam.h:48
#define END_CRIT_SECTION()
Definition: miscadmin.h:149
BufferAccessStrategy strategy
Definition: genam.h:52
struct LVSharedIndStats LVSharedIndStats
int64 wal_fpi
Definition: instrument.h:50
#define ItemIdIsUsed(itemId)
Definition: itemid.h:92
#define MaxHeapTuplesPerPage
Definition: htup_details.h:573
void WalUsageAccumDiff(WalUsage *dst, const WalUsage *add, const WalUsage *sub)
Definition: instrument.c:274
#define VM_ALL_FROZEN(r, b, v)
Definition: visibilitymap.h:34
unsigned char uint8
Definition: c.h:439
static LVParallelState * begin_parallel_vacuum(LVRelState *vacrel, BlockNumber nblocks, int nrequested)
Definition: vacuumlazy.c:3837
#define PROGRESS_VACUUM_HEAP_BLKS_SCANNED
Definition: progress.h:23
bool do_index_cleanup
Definition: vacuumlazy.c:319
#define InvalidBuffer
Definition: buf.h:25
#define FLEXIBLE_ARRAY_MEMBER
Definition: c.h:350
static void update_index_statistics(LVRelState *vacrel)
Definition: vacuumlazy.c:3803
#define PROGRESS_VACUUM_TOTAL_HEAP_BLKS
Definition: progress.h:22
#define START_CRIT_SECTION()
Definition: miscadmin.h:147
#define VACUUM_OPTION_MAX_VALID_VALUE
Definition: vacuum.h:63
Relation index
Definition: genam.h:46
IndexBulkDeleteResult istat
Definition: vacuumlazy.c:279
MultiXactId MultiXactCutoff
Definition: vacuumlazy.c:335
#define MemSet(start, val, len)
Definition: c.h:1008
#define INFO
Definition: elog.h:33
int64 live_tuples
Definition: vacuumlazy.c:371
#define VACUUM_TRUNCATE_LOCK_TIMEOUT
Definition: vacuumlazy.c:104
static uint32 pg_atomic_add_fetch_u32(volatile pg_atomic_uint32 *ptr, int32 add_)
Definition: atomics.h:386
Datum idx(PG_FUNCTION_ARGS)
Definition: _int_op.c:259
static bool lazy_check_needs_freeze(Buffer buf, bool *hastup, LVRelState *vacrel)
Definition: vacuumlazy.c:2532
void vacuum_set_xid_limits(Relation rel, int freeze_min_age, int freeze_table_age, int multixact_freeze_min_age, int multixact_freeze_table_age, TransactionId *oldestXmin, TransactionId *freezeLimit, TransactionId *xidFullScanLimit, MultiXactId *multiXactCutoff, MultiXactId *mxactFullScanLimit)
Definition: vacuum.c:957
int64 VacuumPageDirty
Definition: globals.c:149
static long compute_max_dead_tuples(BlockNumber relblocks, bool hasindex)
Definition: vacuumlazy.c:3449
uint32 BlockNumber
Definition: block.h:31
void ReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:3768
struct LVRelState LVRelState
#define BITMAPLEN(NATTS)
Definition: htup_details.h:546
int nindexes_parallel_bulkdel
Definition: vacuumlazy.c:300
OffsetNumber offnum
Definition: vacuumlazy.c:396
int64 lpdead_items
Definition: vacuumlazy.c:367
int maintenance_work_mem_worker
Definition: vacuumlazy.c:238
void visibilitymap_count(Relation rel, BlockNumber *all_visible, BlockNumber *all_frozen)
VacOptValue truncate
Definition: vacuum.h:223
BlockNumber frozenskipped_pages
Definition: vacuumlazy.c:352
#define BUFFER_LOCK_EXCLUSIVE
Definition: bufmgr.h:98
#define SizeOfPageHeaderData
Definition: bufpage.h:216
#define LOG
Definition: elog.h:26
Form_pg_class rd_rel
Definition: rel.h:109
unsigned int Oid
Definition: postgres_ext.h:31
#define shm_toc_estimate_chunk(e, sz)
Definition: shm_toc.h:51
#define ItemIdIsDead(itemId)
Definition: itemid.h:113
double new_live_tuples
Definition: vacuumlazy.c:360
int nindexes_parallel_condcleanup
Definition: vacuumlazy.c:302
void InstrEndParallelQuery(BufferUsage *bufusage, WalUsage *walusage)
Definition: instrument.c:208
void(* callback)(void *arg)
Definition: elog.h:247
bool TimestampDifferenceExceeds(TimestampTz start_time, TimestampTz stop_time, int msec)
Definition: timestamp.c:1711
struct ErrorContextCallback * previous
Definition: elog.h:246
TransactionId visibility_cutoff_xid
Definition: vacuumlazy.c:389
static void lazy_space_free(LVRelState *vacrel)
Definition: vacuumlazy.c:3529
#define PageGetMaxOffsetNumber(page)
Definition: bufpage.h:357
int freeze_table_age
Definition: vacuum.h:213
static bool heap_page_is_all_visible(LVRelState *vacrel, Buffer buf, TransactionId *visibility_cutoff_xid, bool *all_frozen)
Definition: vacuumlazy.c:3616
void ReinitializeParallelWorkers(ParallelContext *pcxt, int nworkers_to_launch)
Definition: parallel.c:525
void pgstat_progress_end_command(void)
void ResetLatch(Latch *latch)
Definition: latch.c:660
static IndexBulkDeleteResult * parallel_process_one_index(Relation indrel, IndexBulkDeleteResult *istat, LVShared *lvshared, LVSharedIndStats *shared_indstats, LVRelState *vacrel)
Definition: vacuumlazy.c:2924
int WaitLatch(Latch *latch, int wakeEvents, long timeout, uint32 wait_event_info)
Definition: latch.c:452
int errdetail_internal(const char *fmt,...)
Definition: elog.c:1069
void pgstat_progress_update_multi_param(int nparam, const int *index, const int64 *val)
#define PARALLEL_VACUUM_KEY_QUERY_TEXT
Definition: vacuumlazy.c:153
uint16 OffsetNumber
Definition: off.h:24
ItemPointerData * ItemPointer
Definition: itemptr.h:49
static void lazy_vacuum_heap_rel(LVRelState *vacrel)
Definition: vacuumlazy.c:2313
#define VISIBILITYMAP_VALID_BITS
Definition: visibilitymap.h:28
HeapTupleHeader t_data
Definition: htup.h:68
void pg_rusage_init(PGRUsage *ru0)
Definition: pg_rusage.c:27
bool heap_prepare_freeze_tuple(HeapTupleHeader tuple, TransactionId relfrozenxid, TransactionId relminmxid, TransactionId cutoff_xid, TransactionId cutoff_multi, xl_heap_freeze_tuple *frz, bool *totally_frozen_p)
Definition: heapam.c:6402
#define FORCE_CHECK_PAGE()
ErrorContextCallback * error_context_stack
Definition: elog.c:93
ParallelContext * pcxt
Definition: vacuumlazy.c:285
#define PROGRESS_VACUUM_PHASE_TRUNCATE
Definition: progress.h:34
IndexBulkDeleteResult ** indstats
Definition: vacuumlazy.c:362
bool failsafe_active
Definition: vacuumlazy.c:313
struct LVPagePruneState LVPagePruneState
#define SizeOfLVShared
Definition: vacuumlazy.c:266
pg_atomic_uint32 cost_balance
Definition: vacuumlazy.c:245
#define StaticAssertStmt(condition, errmessage)
Definition: c.h:918
BlockNumber blkno
Definition: vacuumlazy.c:395
void WaitForParallelWorkersToFinish(ParallelContext *pcxt)
Definition: parallel.c:762
#define ParallelVacuumIsActive(vacrel)
Definition: vacuumlazy.c:161
void DestroyParallelContext(ParallelContext *pcxt)
Definition: parallel.c:916
GlobalVisState * GlobalVisTestFor(Relation rel)
Definition: procarray.c:4029
#define PREFETCH_SIZE
Definition: vacuumlazy.c:144
bool heap_tuple_needs_eventual_freeze(HeapTupleHeader tuple)
Definition: heapam.c:7047
VacErrPhase phase
Definition: vacuumlazy.c:397
bits32 options
Definition: vacuum.h:211
int heap_page_prune(Relation relation, Buffer buffer, GlobalVisState *vistest, TransactionId old_snap_xmin, TimestampTz old_snap_ts, bool report_stats, OffsetNumber *off_loc)
Definition: pruneheap.c:219
#define ItemIdGetLength(itemId)
Definition: itemid.h:59
void pfree(void *pointer)
Definition: mcxt.c:1169
bool IsInParallelMode(void)
Definition: xact.c:1012
void appendStringInfo(StringInfo str, const char *fmt,...)
Definition: stringinfo.c:91
bool visibilitymap_clear(Relation rel, BlockNumber heapBlk, Buffer buf, uint8 flags)
#define VACUUM_FSM_EVERY_PAGES
Definition: vacuumlazy.c:124
bool TransactionIdPrecedesOrEquals(TransactionId id1, TransactionId id2)
Definition: transam.c:319
void UnlockReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:3791
bool ConditionalLockBufferForCleanup(Buffer buffer)
Definition: bufmgr.c:4241
LVDeadTuples * dead_tuples
Definition: vacuumlazy.c:348
#define ERROR
Definition: elog.h:46
#define REL_TRUNCATE_MINIMUM
Definition: vacuumlazy.c:92
Size PageGetHeapFreeSpace(Page page)
Definition: bufpage.c:984
#define INSTR_TIME_SUBTRACT(x, y)
Definition: instr_time.h:170
int freeze_min_age
Definition: vacuum.h:212
BlockNumber num_pages
Definition: genam.h:76
static void lazy_scan_heap(LVRelState *vacrel, VacuumParams *params, bool aggressive)
Definition: vacuumlazy.c:906
BlockNumber pages_free
Definition: genam.h:82
ItemPointerData t_self
Definition: htup.h:65
PgStat_Counter pgStatBlockWriteTime
Definition: pgstat.c:248
BlockNumber scanned_pages
Definition: vacuumlazy.c:350
void ExitParallelMode(void)
Definition: xact.c:992
bool is_wraparound
Definition: vacuum.h:218
char * get_database_name(Oid dbid)
Definition: dbcommands.c:2113
#define DEBUG2
Definition: elog.h:24
static void do_serial_processing_for_unsafe_indexes(LVRelState *vacrel, LVShared *lvshared)
Definition: vacuumlazy.c:2871
#define HeapTupleHeaderXminCommitted(tup)
Definition: htup_details.h:324
static BlockNumber count_nondeletable_pages(LVRelState *vacrel, bool *lock_waiter_detected)
Definition: vacuumlazy.c:3311
void appendStringInfoString(StringInfo str, const char *s)
Definition: stringinfo.c:176
uint32 t_len
Definition: htup.h:64
void heap_execute_freeze_tuple(HeapTupleHeader tuple, xl_heap_freeze_tuple *frz)
Definition: heapam.c:6631
char * get_namespace_name(Oid nspid)
Definition: lsyscache.c:3316
void pgstat_progress_start_command(ProgressCommandType cmdtype, Oid relid)
PgStat_Counter pgStatBlockReadTime
Definition: pgstat.c:247
#define NoLock
Definition: lockdefs.h:34
static char * buf
Definition: pg_test_fsync.c:68
#define PageSetAllVisible(page)
Definition: bufpage.h:387
bool IsUnderPostmaster
Definition: globals.c:112
#define FirstOffsetNumber
Definition: off.h:27
#define RowExclusiveLock
Definition: lockdefs.h:38
static int lazy_vacuum_heap_page(LVRelState *vacrel, BlockNumber blkno, Buffer buffer, int tupindex, Buffer *vmbuffer)
Definition: vacuumlazy.c:2410
#define REGBUF_STANDARD
Definition: xloginsert.h:35
BlockNumber nonempty_pages
Definition: vacuumlazy.c:356
struct LVDeadTuples LVDeadTuples
int errdetail(const char *fmt,...)
Definition: elog.c:1042
int elevel
Definition: vacuumlazy.c:207
int ParallelWorkerNumber
Definition: parallel.c:112
const char * pg_rusage_show(const PGRUsage *ru0)
Definition: pg_rusage.c:40
bool ConditionalLockRelation(Relation relation, LOCKMODE lockmode)
Definition: lmgr.c:248
#define InvalidTransactionId
Definition: transam.h:31
#define RelationGetRelationName(relation)
Definition: rel.h:511
pg_atomic_uint32 idx
Definition: vacuumlazy.c:259
BlockNumber pages_removed
Definition: vacuumlazy.c:354
static IndexBulkDeleteResult * lazy_cleanup_one_index(Relation indrel, IndexBulkDeleteResult *istat, double reltuples, bool estimated_count, LVRelState *vacrel)
Definition: vacuumlazy.c:3077
unsigned int uint32
Definition: c.h:441
Oid t_tableOid
Definition: htup.h:66
#define MultiXactIdIsValid(multi)
Definition: multixact.h:28
bool vacuum_xid_failsafe_check(TransactionId relfrozenxid, MultiXactId relminmxid)
Definition: vacuum.c:1163
static int64 itemptr_encode(ItemPointer itemptr)
Definition: index.h:185
int min_parallel_index_scan_size
Definition: allpaths.c:65
int nworkers_launched
Definition: parallel.h:38
BlockNumber pages_deleted
Definition: genam.h:81
#define BufferGetPage(buffer)
Definition: bufmgr.h:169
HTSV_Result HeapTupleSatisfiesVacuum(HeapTuple htup, TransactionId OldestXmin, Buffer buffer)
static void lazy_truncate_heap(LVRelState *vacrel)
Definition: vacuumlazy.c:3178
void LaunchParallelWorkers(ParallelContext *pcxt)
Definition: parallel.c:539
bool IsAutoVacuumWorkerProcess(void)
Definition: autovacuum.c:3406
#define XLOG_HEAP2_VACUUM
Definition: heapam_xlog.h:55
BlockNumber pinskipped_pages
Definition: vacuumlazy.c:351
void InstrAccumParallelQuery(BufferUsage *bufusage, WalUsage *walusage)
Definition: instrument.c:218
static bool parallel_processing_is_safe(Relation indrel, LVShared *lvshared)
Definition: vacuumlazy.c:4103
bool heap_tuple_needs_freeze(HeapTupleHeader tuple, TransactionId cutoff_xid, MultiXactId cutoff_multi, Buffer buf)
Definition: heapam.c:7100
#define PROGRESS_VACUUM_NUM_DEAD_TUPLES
Definition: progress.h:27
#define IsParallelWorker()
Definition: parallel.h:61
bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition: transam.c:300
bool first_time
Definition: vacuumlazy.c:215
#define SKIP_PAGES_THRESHOLD
Definition: vacuumlazy.c:138
TransactionId FreezeLimit
Definition: vacuumlazy.c:334
static void do_parallel_processing(LVRelState *vacrel, LVShared *lvshared)
Definition: vacuumlazy.c:2811
#define VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL
Definition: vacuumlazy.c:103
#define MaxAllocSize
Definition: memutils.h:40
void initStringInfo(StringInfo str)
Definition: stringinfo.c:59
static IndexBulkDeleteResult * lazy_vacuum_one_index(Relation indrel, IndexBulkDeleteResult *istat, double reltuples, LVRelState *vacrel)
Definition: vacuumlazy.c:3022
static LVSharedIndStats * parallel_stats_for_idx(LVShared *lvshared, int getidx)
Definition: vacuumlazy.c:4079
#define WARNING
Definition: elog.h:40
void vac_open_indexes(Relation relation, LOCKMODE lockmode, int *nindexes, Relation **Irel)
Definition: vacuum.c:2085
const char * debug_query_string
Definition: postgres.c:89
static void do_parallel_lazy_vacuum_all_indexes(LVRelState *vacrel)
Definition: vacuumlazy.c:2638
#define SizeOfHeapVacuum
Definition: heapam_xlog.h:265
double reltuples
Definition: vacuumlazy.c:227
#define VACUUM_OPTION_NO_PARALLEL
Definition: vacuum.h:39
void InitializeParallelDSM(ParallelContext *pcxt)
Definition: parallel.c:202
#define PageGetItemId(page, offsetNumber)
Definition: bufpage.h:235
TransactionId OldestXmin
Definition: vacuumlazy.c:332
int VacuumCostBalanceLocal
Definition: vacuum.c:80
pg_atomic_uint32 * VacuumSharedCostBalance
Definition: vacuum.c:78
void XLogRegisterData(char *data, int len)
Definition: xloginsert.c:340
VacOptValue index_cleanup
Definition: vacuum.h:222
void InstrStartParallelQuery(void)
Definition: instrument.c:200
static int elevel
Definition: vacuumlazy.c:401
char * indname
Definition: vacuumlazy.c:340
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition: xloginsert.c:432
uint8 bits8
Definition: c.h:448
#define ngettext(s, p, n)
Definition: c.h:1182
int nindexes_parallel_cleanup
Definition: vacuumlazy.c:301
Size mul_size(Size s1, Size s2)
Definition: shmem.c:519
Size GetRecordedFreeSpace(Relation rel, BlockNumber heapBlk)
Definition: freespace.c:230
void * palloc0(Size size)
Definition: mcxt.c:1093
#define VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL
Definition: vacuumlazy.c:102
Size add_size(Size s1, Size s2)
Definition: shmem.c:502
Oid MyDatabaseId
Definition: globals.c:88
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:4007
#define PARALLEL_VACUUM_KEY_SHARED
Definition: vacuumlazy.c:151
int max_parallel_maintenance_workers
Definition: globals.c:127
void ReinitializeParallelDSM(ParallelContext *pcxt)
Definition: parallel.c:475
#define InvalidMultiXactId
Definition: multixact.h:24
#define RelationGetNumberOfBlocks(reln)
Definition: bufmgr.h:212
pg_atomic_uint32 active_nworkers
Definition: vacuumlazy.c:252
MultiXactId relminmxid
Definition: vacuumlazy.c:328
#define InvalidOffsetNumber
Definition: off.h:26
int64 tuples_deleted
Definition: vacuumlazy.c:366
static void restore_vacuum_error_info(LVRelState *vacrel, const LVSavedErrInfo *saved_vacrel)
Definition: vacuumlazy.c:4333
#define PROGRESS_VACUUM_PHASE_FINAL_CLEANUP
Definition: progress.h:35
#define ereport(elevel,...)
Definition: elog.h:157
#define ItemIdHasStorage(itemId)
Definition: itemid.h:120
int maintenance_work_mem
Definition: globals.c:126
bool amusemaintenanceworkmem
Definition: amapi.h:246
bool LockHasWaitersRelation(Relation relation, LOCKMODE lockmode)
Definition: lmgr.c:346
#define BlockNumberIsValid(blockNumber)
Definition: block.h:70
static int compute_parallel_vacuum_workers(LVRelState *vacrel, int nrequested, bool *can_parallel_vacuum)
Definition: vacuumlazy.c:3743
char * relname
Definition: vacuumlazy.c:339
static uint32 pg_atomic_fetch_add_u32(volatile pg_atomic_uint32 *ptr, int32 add_)
Definition: atomics.h:328
int message_level
Definition: genam.h:50
TransactionId MultiXactId
Definition: c.h:597
int errmsg_internal(const char *fmt,...)
Definition: elog.c:996
double num_heap_tuples
Definition: genam.h:51
TransactionId relfrozenxid
Definition: vacuumlazy.c:327
#define Max(x, y)
Definition: c.h:980
void FreeAccessStrategy(BufferAccessStrategy strategy)
Definition: freelist.c:597
#define ShareUpdateExclusiveLock
Definition: lockdefs.h:39
#define PARALLEL_VACUUM_KEY_BUFFER_USAGE
Definition: vacuumlazy.c:154
double new_rel_tuples
Definition: vacuumlazy.c:359
#define PageClearAllVisible(page)
Definition: bufpage.h:389
void PageTruncateLinePointerArray(Page page)
Definition: bufpage.c:828
uint64 XLogRecPtr
Definition: xlogdefs.h:21
struct LVShared LVShared
#define Assert(condition)
Definition: c.h:804
#define VACUUM_OPTION_PARALLEL_COND_CLEANUP
Definition: vacuum.h:52
void pgstat_progress_update_param(int index, int64 val)
Definition: regguts.h:317
bits8 bitmap[FLEXIBLE_ARRAY_MEMBER]
Definition: vacuumlazy.c:261
#define FrozenTransactionId
Definition: transam.h:33
#define PARALLEL_VACUUM_KEY_WAL_USAGE
Definition: vacuumlazy.c:155
#define ItemIdIsNormal(itemId)
Definition: itemid.h:99
#define INSTR_TIME_GET_MICROSEC(t)
Definition: instr_time.h:205
BlockNumber lpdead_item_pages
Definition: vacuumlazy.c:355
#define PROGRESS_VACUUM_PHASE_INDEX_CLEANUP
Definition: progress.h:33
#define HeapTupleHeaderGetXmin(tup)
Definition: htup_details.h:313
#define VM_ALL_VISIBLE(r, b, v)
Definition: visibilitymap.h:32
#define OffsetNumberNext(offsetNumber)
Definition: off.h:52
size_t Size
Definition: c.h:540
#define PROGRESS_VACUUM_NUM_INDEX_VACUUMS
Definition: progress.h:25
int64 new_dead_tuples
Definition: vacuumlazy.c:368
static void lazy_cleanup_all_indexes(LVRelState *vacrel)
Definition: vacuumlazy.c:2978
#define PROGRESS_VACUUM_PHASE_SCAN_HEAP
Definition: progress.h:30
#define PROGRESS_VACUUM_PHASE
Definition: progress.h:21
void parallel_vacuum_main(dsm_segment *seg, shm_toc *toc)
Definition: vacuumlazy.c:4143
int nworkers
Definition: vacuum.h:230
#define InvalidBlockNumber
Definition: block.h:33
#define shm_toc_estimate_keys(e, cnt)
Definition: shm_toc.h:53
void heap_vacuum_rel(Relation rel, VacuumParams *params, BufferAccessStrategy bstrategy)
Definition: vacuumlazy.c:482
BlockNumber tupcount_pages
Definition: vacuumlazy.c:353
#define MAXALIGN(LEN)
Definition: c.h:757
#define BufferIsValid(bufnum)
Definition: bufmgr.h:123
int log_min_duration
Definition: vacuum.h:219
#define ItemPointerGetOffsetNumber(pointer)
Definition: itemptr.h:117
void EnterParallelMode(void)
Definition: xact.c:979
BufferAccessStrategy bstrategy
Definition: vacuumlazy.c:323
LVShared * lvshared
Definition: vacuumlazy.c:288
static void lazy_vacuum(LVRelState *vacrel)
Definition: vacuumlazy.c:2085
#define VACUUM_OPTION_PARALLEL_BULKDEL
Definition: vacuum.h:45
#define VACOPT_VERBOSE
Definition: vacuum.h:180
void * shm_toc_allocate(shm_toc *toc, Size nbytes)
Definition: shm_toc.c:88
#define ItemPointerSetBlockNumber(pointer, blockNumber)
Definition: itemptr.h:138
#define RelationNeedsWAL(relation)
Definition: rel.h:601
#define VISIBILITYMAP_ALL_VISIBLE
Definition: visibilitymap.h:26
#define INSTR_TIME_SET_CURRENT(t)
Definition: instr_time.h:156
void pgstat_report_vacuum(Oid tableoid, bool shared, PgStat_Counter livetuples, PgStat_Counter deadtuples)
Definition: pgstat.c:1577
#define RelationUsesLocalBuffers(relation)
Definition: rel.h:610
static bool lazy_vacuum_all_indexes(LVRelState *vacrel)
Definition: vacuumlazy.c:2216
#define PageGetLSN(page)
Definition: bufpage.h:366
int64 wal_records
Definition: instrument.h:49
#define ItemPointerSetOffsetNumber(pointer, offsetNumber)
Definition: itemptr.h:148
Relation rel
Definition: vacuumlazy.c:308
bool for_cleanup
Definition: vacuumlazy.c:214
#define AccessExclusiveLock
Definition: lockdefs.h:45
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:2748
BlockNumber blkno
Definition: vacuumlazy.c:341
#define FAILSAFE_EVERY_PAGES
Definition: vacuumlazy.c:115
BlockNumber rel_pages
Definition: vacuumlazy.c:349
#define PageIsNew(page)
Definition: bufpage.h:229
void shm_toc_insert(shm_toc *toc, uint64 key, void *address)
Definition: shm_toc.c:171
void * palloc(Size size)
Definition: mcxt.c:1062
int errmsg(const char *fmt,...)
Definition: elog.c:909
static void do_parallel_lazy_cleanup_all_indexes(LVRelState *vacrel)
Definition: vacuumlazy.c:2659
uint8 visibilitymap_get_status(Relation rel, BlockNumber heapBlk, Buffer *buf)
void pgstat_report_activity(BackendState state, const char *cmd_str)
double VacuumCostDelay
Definition: globals.c:145
char * relnamespace
Definition: vacuumlazy.c:338
BlockNumber pages_newly_deleted
Definition: genam.h:80
#define elog(elevel,...)
Definition: elog.h:232
int old_snapshot_threshold
Definition: snapmgr.c:78
bool MultiXactIdPrecedesOrEquals(MultiXactId multi1, MultiXactId multi2)
Definition: multixact.c:3173
int i
#define OffsetNumberIsValid(offsetNumber)
Definition: off.h:39
static bool should_attempt_truncation(LVRelState *vacrel)
Definition: vacuumlazy.c:3157
PrefetchBufferResult PrefetchBuffer(Relation reln, ForkNumber forkNum, BlockNumber blockNum)
Definition: bufmgr.c:587
#define errcontext
Definition: elog.h:204
#define BUFFER_LOCK_SHARE
Definition: bufmgr.h:97
void * arg
#define unlikely(x)
Definition: c.h:273
struct LVParallelState LVParallelState
struct Latch * MyLatch
Definition: globals.c:57
HTSV_Result
Definition: heapam.h:93
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:120
static void pg_atomic_init_u32(volatile pg_atomic_uint32 *ptr, uint32 val)
Definition: atomics.h:223
#define ItemPointerGetBlockNumber(pointer)
Definition: itemptr.h:98
uint32 offset
Definition: vacuumlazy.c:260
static void pg_atomic_write_u32(volatile pg_atomic_uint32 *ptr, uint32 val)
Definition: atomics.h:258
#define ItemIdSetUnused(itemId)
Definition: itemid.h:128
static int vac_cmp_itemptr(const void *left, const void *right)
Definition: vacuumlazy.c:3583
uint64 wal_bytes
Definition: instrument.h:51
void vacuum_delay_point(void)
Definition: vacuum.c:2149
#define MAXDEADTUPLES(max_size)
Definition: vacuumlazy.c:193
void TimestampDifference(TimestampTz start_time, TimestampTz stop_time, long *secs, int *microsecs)
Definition: timestamp.c:1656
#define TransactionIdIsNormal(xid)
Definition: transam.h:42
Relation table_open(Oid relationId, LOCKMODE lockmode)
Definition: table.c:39
void XLogBeginInsert(void)
Definition: xloginsert.c:135
#define VACUUM_OPTION_PARALLEL_CLEANUP
Definition: vacuum.h:60
void vac_update_relstats(Relation relation, BlockNumber num_pages, double num_tuples, BlockNumber num_all_visible_pages, bool hasindex, TransactionId frozenxid, MultiXactId minmulti, bool in_outer_xact)
Definition: vacuum.c:1306
VacErrPhase
Definition: vacuumlazy.c:164
#define PageSetLSN(page, lsn)
Definition: bufpage.h:368
#define LAZY_ALLOC_TUPLES
Definition: vacuumlazy.c:132
int64 num_tuples
Definition: vacuumlazy.c:370
double num_index_tuples
Definition: genam.h:78
int Buffer
Definition: buf.h:23
double old_live_tuples
Definition: vacuumlazy.c:329
#define WL_LATCH_SET
Definition: latch.h:125
#define _(x)
Definition: elog.c:89
#define RelationGetRelid(relation)
Definition: rel.h:477
int multixact_freeze_min_age
Definition: vacuum.h:214
bool consider_bypass_optimization
Definition: vacuumlazy.c:315
void * shm_toc_lookup(shm_toc *toc, uint64 key, bool noError)
Definition: shm_toc.c:232
bool estimated_count
Definition: genam.h:77
void FreeSpaceMapVacuumRange(Relation rel, BlockNumber start, BlockNumber end)
Definition: freespace.c:352
bool track_io_timing
Definition: bufmgr.c:135
#define PROGRESS_VACUUM_PHASE_VACUUM_HEAP
Definition: progress.h:32
#define REL_TRUNCATE_FRACTION
Definition: vacuumlazy.c:93
bool do_rel_truncate
Definition: vacuumlazy.c:320
#define PageGetItem(page, itemId)
Definition: bufpage.h:340
Pointer Page
Definition: bufpage.h:78
#define ItemPointerSet(pointer, blockNumber, offNum)
Definition: itemptr.h:127
shm_toc * toc
Definition: parallel.h:45
#define WL_EXIT_ON_PM_DEATH
Definition: latch.h:130
static void do_parallel_vacuum_or_cleanup(LVRelState *vacrel, int nworkers)
Definition: vacuumlazy.c:2698
bool VacuumCostActive
Definition: globals.c:152
bool estimated_count
Definition: genam.h:49
static uint32 pg_atomic_read_u32(volatile pg_atomic_uint32 *ptr)
Definition: atomics.h:241
struct LVSavedErrInfo LVSavedErrInfo
void RelationTruncate(Relation rel, BlockNumber nblocks)
Definition: storage.c:277
#define RelationGetNamespace(relation)
Definition: rel.h:518