PostgreSQL Source Code  git master
vacuumlazy.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * vacuumlazy.c
4  * Concurrent ("lazy") vacuuming.
5  *
6  *
7  * The major space usage for LAZY VACUUM is storage for the array of dead tuple
8  * TIDs. We want to ensure we can vacuum even the very largest relations with
9  * finite memory space usage. To do that, we set upper bounds on the number of
10  * tuples we will keep track of at once.
11  *
12  * We are willing to use at most maintenance_work_mem (or perhaps
13  * autovacuum_work_mem) memory space to keep track of dead tuples. We
14  * initially allocate an array of TIDs of that size, with an upper limit that
15  * depends on table size (this limit ensures we don't allocate a huge area
16  * uselessly for vacuuming small tables). If the array threatens to overflow,
17  * we suspend the heap scan phase and perform a pass of index cleanup and page
18  * compaction, then resume the heap scan with an empty TID array.
19  *
20  * If we're processing a table with no indexes, we can just vacuum each page
21  * as we go; there's no need to save up multiple tuples to minimize the number
22  * of index scans performed. So we don't use maintenance_work_mem memory for
23  * the TID array, just enough to hold as many heap tuples as fit on one page.
24  *
25  * Lazy vacuum supports parallel execution with parallel worker processes. In
26  * a parallel vacuum, we perform both index vacuum and index cleanup with
27  * parallel worker processes. Individual indexes are processed by one vacuum
28  * process. At the beginning of a lazy vacuum (at lazy_scan_heap) we prepare
29  * the parallel context and initialize the DSM segment that contains shared
30  * information as well as the memory space for storing dead tuples. When
31  * starting either index vacuum or index cleanup, we launch parallel worker
32  * processes. Once all indexes are processed the parallel worker processes
33  * exit. After that, the leader process re-initializes the parallel context
34  * so that it can use the same DSM for multiple passes of index vacuum and
35  * for performing index cleanup. For updating the index statistics, we need
36  * to update the system table and since updates are not allowed during
37  * parallel mode we update the index statistics after exiting from the
38  * parallel mode.
39  *
40  * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
41  * Portions Copyright (c) 1994, Regents of the University of California
42  *
43  *
44  * IDENTIFICATION
45  * src/backend/access/heap/vacuumlazy.c
46  *
47  *-------------------------------------------------------------------------
48  */
49 #include "postgres.h"
50 
51 #include <math.h>
52 
53 #include "access/amapi.h"
54 #include "access/genam.h"
55 #include "access/heapam.h"
56 #include "access/heapam_xlog.h"
57 #include "access/htup_details.h"
58 #include "access/multixact.h"
59 #include "access/parallel.h"
60 #include "access/transam.h"
61 #include "access/visibilitymap.h"
62 #include "access/xact.h"
63 #include "access/xlog.h"
64 #include "catalog/index.h"
65 #include "catalog/storage.h"
66 #include "commands/dbcommands.h"
67 #include "commands/progress.h"
68 #include "commands/vacuum.h"
69 #include "executor/instrument.h"
70 #include "miscadmin.h"
71 #include "optimizer/paths.h"
72 #include "pgstat.h"
73 #include "portability/instr_time.h"
74 #include "postmaster/autovacuum.h"
75 #include "storage/bufmgr.h"
76 #include "storage/freespace.h"
77 #include "storage/lmgr.h"
78 #include "tcop/tcopprot.h"
79 #include "utils/lsyscache.h"
80 #include "utils/memutils.h"
81 #include "utils/pg_rusage.h"
82 #include "utils/timestamp.h"
83 
84 
85 /*
86  * Space/time tradeoff parameters: do these need to be user-tunable?
87  *
88  * To consider truncating the relation, we want there to be at least
89  * REL_TRUNCATE_MINIMUM or (relsize / REL_TRUNCATE_FRACTION) (whichever
90  * is less) potentially-freeable pages.
91  */
92 #define REL_TRUNCATE_MINIMUM 1000
93 #define REL_TRUNCATE_FRACTION 16
94 
95 /*
96  * Timing parameters for truncate locking heuristics.
97  *
98  * These were not exposed as user tunable GUC values because it didn't seem
99  * that the potential for improvement was great enough to merit the cost of
100  * supporting them.
101  */
102 #define VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL 20 /* ms */
103 #define VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL 50 /* ms */
104 #define VACUUM_TRUNCATE_LOCK_TIMEOUT 5000 /* ms */
105 
106 /*
107  * Threshold that controls whether we bypass index vacuuming and heap
108  * vacuuming as an optimization
109  */
110 #define BYPASS_THRESHOLD_PAGES 0.02 /* i.e. 2% of rel_pages */
111 
112 /*
113  * When a table is small (i.e. smaller than this), save cycles by avoiding
114  * repeated failsafe checks
115  */
116 #define FAILSAFE_MIN_PAGES \
117  ((BlockNumber) (((uint64) 4 * 1024 * 1024 * 1024) / BLCKSZ))
118 
119 /*
120  * When a table has no indexes, vacuum the FSM after every 8GB, approximately
121  * (it won't be exact because we only vacuum FSM after processing a heap page
122  * that has some removable tuples). When there are indexes, this is ignored,
123  * and we vacuum FSM after each index/heap cleaning pass.
124  */
125 #define VACUUM_FSM_EVERY_PAGES \
126  ((BlockNumber) (((uint64) 8 * 1024 * 1024 * 1024) / BLCKSZ))
127 
128 /*
129  * Guesstimation of number of dead tuples per page. This is used to
130  * provide an upper limit to memory allocated when vacuuming small
131  * tables.
132  */
133 #define LAZY_ALLOC_TUPLES MaxHeapTuplesPerPage
134 
135 /*
136  * Before we consider skipping a page that's marked as clean in
137  * visibility map, we must've seen at least this many clean pages.
138  */
139 #define SKIP_PAGES_THRESHOLD ((BlockNumber) 32)
140 
141 /*
142  * Size of the prefetch window for lazy vacuum backwards truncation scan.
143  * Needs to be a power of 2.
144  */
145 #define PREFETCH_SIZE ((BlockNumber) 32)
146 
147 /*
148  * DSM keys for parallel vacuum. Unlike other parallel execution code, since
149  * we don't need to worry about DSM keys conflicting with plan_node_id we can
150  * use small integers.
151  */
152 #define PARALLEL_VACUUM_KEY_SHARED 1
153 #define PARALLEL_VACUUM_KEY_DEAD_TUPLES 2
154 #define PARALLEL_VACUUM_KEY_QUERY_TEXT 3
155 #define PARALLEL_VACUUM_KEY_BUFFER_USAGE 4
156 #define PARALLEL_VACUUM_KEY_WAL_USAGE 5
157 
158 /*
159  * Macro to check if we are in a parallel vacuum. If true, we are in the
160  * parallel mode and the DSM segment is initialized.
161  */
162 #define ParallelVacuumIsActive(vacrel) ((vacrel)->lps != NULL)
163 
164 /* Phases of vacuum during which we report error context. */
165 typedef enum
166 {
173 } VacErrPhase;
174 
175 /*
176  * LVDeadTuples stores the dead tuple TIDs collected during the heap scan.
177  * This is allocated in the DSM segment in parallel mode and in local memory
178  * in non-parallel mode.
179  */
180 typedef struct LVDeadTuples
181 {
182  int max_tuples; /* # slots allocated in array */
183  int num_tuples; /* current # of entries */
184  /* List of TIDs of tuples we intend to delete */
185  /* NB: this list is ordered by TID address */
187  * ItemPointerData */
188 } LVDeadTuples;
189 
190 /* The dead tuple space consists of LVDeadTuples and dead tuple TIDs */
191 #define SizeOfDeadTuples(cnt) \
192  add_size(offsetof(LVDeadTuples, itemptrs), \
193  mul_size(sizeof(ItemPointerData), cnt))
194 #define MAXDEADTUPLES(max_size) \
195  (((max_size) - offsetof(LVDeadTuples, itemptrs)) / sizeof(ItemPointerData))
196 
197 /*
198  * Shared information among parallel workers. So this is allocated in the DSM
199  * segment.
200  */
201 typedef struct LVShared
202 {
203  /*
204  * Target table relid and log level. These fields are not modified during
205  * the lazy vacuum.
206  */
208  int elevel;
209 
210  /*
211  * An indication for vacuum workers to perform either index vacuum or
212  * index cleanup. first_time is true only if for_cleanup is true and
213  * bulk-deletion is not performed yet.
214  */
217 
218  /*
219  * Fields for both index vacuum and cleanup.
220  *
221  * reltuples is the total number of input heap tuples. We set either old
222  * live tuples in the index vacuum case or the new live tuples in the
223  * index cleanup case.
224  *
225  * estimated_count is true if reltuples is an estimated value. (Note that
226  * reltuples could be -1 in this case, indicating we have no idea.)
227  */
228  double reltuples;
230 
231  /*
232  * In single process lazy vacuum we could consume more memory during index
233  * vacuuming or cleanup apart from the memory for heap scanning. In
234  * parallel vacuum, since individual vacuum workers can consume memory
235  * equal to maintenance_work_mem, the new maintenance_work_mem for each
236  * worker is set such that the parallel operation doesn't consume more
237  * memory than single process lazy vacuum.
238  */
240 
241  /*
242  * Shared vacuum cost balance. During parallel vacuum,
243  * VacuumSharedCostBalance points to this value and it accumulates the
244  * balance of each parallel vacuum worker.
245  */
247 
248  /*
249  * Number of active parallel workers. This is used for computing the
250  * minimum threshold of the vacuum cost balance before a worker sleeps for
251  * cost-based delay.
252  */
254 
255  /*
256  * Variables to control parallel vacuum. We have a bitmap to indicate
257  * which index has stats in shared memory. The set bit in the map
258  * indicates that the particular index supports a parallel vacuum.
259  */
260  pg_atomic_uint32 idx; /* counter for vacuuming and clean up */
261  uint32 offset; /* sizeof header incl. bitmap */
262  bits8 bitmap[FLEXIBLE_ARRAY_MEMBER]; /* bit map of NULLs */
263 
264  /* Shared index statistics data follows at end of struct */
265 } LVShared;
266 
267 #define SizeOfLVShared (offsetof(LVShared, bitmap) + sizeof(bits8))
268 #define GetSharedIndStats(s) \
269  ((LVSharedIndStats *)((char *)(s) + ((LVShared *)(s))->offset))
270 #define IndStatsIsNull(s, i) \
271  (!(((LVShared *)(s))->bitmap[(i) >> 3] & (1 << ((i) & 0x07))))
272 
273 /*
274  * Struct for an index bulk-deletion statistic used for parallel vacuum. This
275  * is allocated in the DSM segment.
276  */
277 typedef struct LVSharedIndStats
278 {
279  bool updated; /* are the stats updated? */
282 
283 /* Struct for maintaining a parallel vacuum state. */
284 typedef struct LVParallelState
285 {
287 
288  /* Shared information among parallel vacuum workers */
290 
291  /* Points to buffer usage area in DSM */
293 
294  /* Points to WAL usage area in DSM */
296 
297  /*
298  * The number of indexes that support parallel index bulk-deletion and
299  * parallel index cleanup respectively.
300  */
305 
306 typedef struct LVRelState
307 {
308  /* Target heap relation and its indexes */
311  int nindexes;
312  /* Do index vacuuming/cleanup? */
315  /* Wraparound failsafe in effect? (implies !do_index_vacuuming) */
317 
318  /* Buffer access strategy and parallel state */
321 
322  /* Statistics from pg_class when we start out */
323  BlockNumber old_rel_pages; /* previous value of pg_class.relpages */
324  double old_live_tuples; /* previous value of pg_class.reltuples */
325  /* rel's initial relfrozenxid and relminmxid */
328 
329  /* VACUUM operation's cutoff for pruning */
331  /* VACUUM operation's cutoff for freezing XIDs and MultiXactIds */
334 
335  /* Error reporting state */
337  char *relname;
338  char *indname;
339  BlockNumber blkno; /* used only for heap operations */
340  OffsetNumber offnum; /* used only for heap operations */
342 
343  /*
344  * State managed by lazy_scan_heap() follows
345  */
346  LVDeadTuples *dead_tuples; /* items to vacuum from indexes */
347  BlockNumber rel_pages; /* total number of pages */
348  BlockNumber scanned_pages; /* number of pages we examined */
349  BlockNumber pinskipped_pages; /* # of pages skipped due to a pin */
350  BlockNumber frozenskipped_pages; /* # of frozen pages we skipped */
351  BlockNumber tupcount_pages; /* pages whose tuples we counted */
352  BlockNumber pages_removed; /* pages remove by truncation */
353  BlockNumber lpdead_item_pages; /* # pages with LP_DEAD items */
354  BlockNumber nonempty_pages; /* actually, last nonempty page + 1 */
356 
357  /* Statistics output by us, for table */
358  double new_rel_tuples; /* new estimated total # of tuples */
359  double new_live_tuples; /* new estimated total # of live tuples */
360  /* Statistics output by index AMs */
362 
363  /* Instrumentation counters */
365  int64 tuples_deleted; /* # deleted from table */
366  int64 lpdead_items; /* # deleted from indexes */
367  int64 new_dead_tuples; /* new estimated total # of dead items in
368  * table */
369  int64 num_tuples; /* total number of nonremovable tuples */
370  int64 live_tuples; /* live tuples (reltuples estimate) */
371 } LVRelState;
372 
373 /*
374  * State returned by lazy_scan_prune()
375  */
376 typedef struct LVPagePruneState
377 {
378  bool hastup; /* Page is truncatable? */
379  bool has_lpdead_items; /* includes existing LP_DEAD items */
380 
381  /*
382  * State describes the proper VM bit states to set for the page following
383  * pruning and freezing. all_visible implies !has_lpdead_items, but don't
384  * trust all_frozen result unless all_visible is also set to true.
385  */
386  bool all_visible; /* Every item visible to all? */
387  bool all_frozen; /* provided all_visible is also true */
388  TransactionId visibility_cutoff_xid; /* For recovery conflicts */
390 
391 /* Struct for saving and restoring vacuum error information. */
392 typedef struct LVSavedErrInfo
393 {
398 
399 /* elevel controls whole VACUUM's verbosity */
400 static int elevel = -1;
401 
402 
403 /* non-export function prototypes */
404 static void lazy_scan_heap(LVRelState *vacrel, VacuumParams *params,
405  bool aggressive);
406 static void lazy_scan_prune(LVRelState *vacrel, Buffer buf,
407  BlockNumber blkno, Page page,
408  GlobalVisState *vistest,
409  LVPagePruneState *prunestate);
410 static void lazy_vacuum(LVRelState *vacrel, bool onecall);
411 static bool lazy_vacuum_all_indexes(LVRelState *vacrel);
412 static void lazy_vacuum_heap_rel(LVRelState *vacrel);
413 static int lazy_vacuum_heap_page(LVRelState *vacrel, BlockNumber blkno,
414  Buffer buffer, int tupindex, Buffer *vmbuffer);
415 static bool lazy_check_needs_freeze(Buffer buf, bool *hastup,
416  LVRelState *vacrel);
417 static bool lazy_check_wraparound_failsafe(LVRelState *vacrel);
420 static void do_parallel_vacuum_or_cleanup(LVRelState *vacrel, int nworkers);
421 static void do_parallel_processing(LVRelState *vacrel,
422  LVShared *lvshared);
424  LVShared *lvshared);
426  IndexBulkDeleteResult *istat,
427  LVShared *lvshared,
428  LVSharedIndStats *shared_indstats,
429  LVRelState *vacrel);
430 static void lazy_cleanup_all_indexes(LVRelState *vacrel);
432  IndexBulkDeleteResult *istat,
433  double reltuples,
434  LVRelState *vacrel);
436  IndexBulkDeleteResult *istat,
437  double reltuples,
438  bool estimated_count,
439  LVRelState *vacrel);
440 static bool should_attempt_truncation(LVRelState *vacrel,
441  VacuumParams *params);
442 static void lazy_truncate_heap(LVRelState *vacrel);
444 static long compute_max_dead_tuples(BlockNumber relblocks, bool hasindex);
445 static void lazy_space_alloc(LVRelState *vacrel, int nworkers,
446  BlockNumber relblocks);
447 static void lazy_space_free(LVRelState *vacrel);
448 static bool lazy_tid_reaped(ItemPointer itemptr, void *state);
449 static int vac_cmp_itemptr(const void *left, const void *right);
450 static bool heap_page_is_all_visible(LVRelState *vacrel, Buffer buf,
451  TransactionId *visibility_cutoff_xid, bool *all_frozen);
453  int nrequested,
454  bool *can_parallel_vacuum);
455 static void update_index_statistics(LVRelState *vacrel);
457  BlockNumber nblocks,
458  int nrequested);
459 static void end_parallel_vacuum(LVRelState *vacrel);
460 static LVSharedIndStats *parallel_stats_for_idx(LVShared *lvshared, int getidx);
461 static bool parallel_processing_is_safe(Relation indrel, LVShared *lvshared);
462 static void vacuum_error_callback(void *arg);
463 static void update_vacuum_error_info(LVRelState *vacrel,
464  LVSavedErrInfo *saved_vacrel,
465  int phase, BlockNumber blkno,
466  OffsetNumber offnum);
467 static void restore_vacuum_error_info(LVRelState *vacrel,
468  const LVSavedErrInfo *saved_vacrel);
469 
470 
471 /*
472  * heap_vacuum_rel() -- perform VACUUM for one heap relation
473  *
474  * This routine vacuums a single heap, cleans out its indexes, and
475  * updates its relpages and reltuples statistics.
476  *
477  * At entry, we have already established a transaction and opened
478  * and locked the relation.
479  */
480 void
482  BufferAccessStrategy bstrategy)
483 {
484  LVRelState *vacrel;
485  PGRUsage ru0;
486  TimestampTz starttime = 0;
487  WalUsage walusage_start = pgWalUsage;
488  WalUsage walusage = {0, 0, 0};
489  long secs;
490  int usecs;
491  double read_rate,
492  write_rate;
493  bool aggressive; /* should we scan all unfrozen pages? */
494  bool scanned_all_unfrozen; /* actually scanned all such pages? */
495  char **indnames = NULL;
496  TransactionId xidFullScanLimit;
497  MultiXactId mxactFullScanLimit;
498  BlockNumber new_rel_pages;
499  BlockNumber new_rel_allvisible;
500  double new_live_tuples;
501  TransactionId new_frozen_xid;
502  MultiXactId new_min_multi;
503  ErrorContextCallback errcallback;
504  PgStat_Counter startreadtime = 0;
505  PgStat_Counter startwritetime = 0;
506  TransactionId OldestXmin;
507  TransactionId FreezeLimit;
508  MultiXactId MultiXactCutoff;
509 
510  Assert(params != NULL);
513 
514  /* measure elapsed time iff autovacuum logging requires it */
515  if (IsAutoVacuumWorkerProcess() && params->log_min_duration >= 0)
516  {
517  pg_rusage_init(&ru0);
518  starttime = GetCurrentTimestamp();
519  if (track_io_timing)
520  {
521  startreadtime = pgStatBlockReadTime;
522  startwritetime = pgStatBlockWriteTime;
523  }
524  }
525 
526  if (params->options & VACOPT_VERBOSE)
527  elevel = INFO;
528  else
529  elevel = DEBUG2;
530 
532  RelationGetRelid(rel));
533 
535  params->freeze_min_age,
536  params->freeze_table_age,
537  params->multixact_freeze_min_age,
539  &OldestXmin, &FreezeLimit, &xidFullScanLimit,
540  &MultiXactCutoff, &mxactFullScanLimit);
541 
542  /*
543  * We request an aggressive scan if the table's frozen Xid is now older
544  * than or equal to the requested Xid full-table scan limit; or if the
545  * table's minimum MultiXactId is older than or equal to the requested
546  * mxid full-table scan limit; or if DISABLE_PAGE_SKIPPING was specified.
547  */
548  aggressive = TransactionIdPrecedesOrEquals(rel->rd_rel->relfrozenxid,
549  xidFullScanLimit);
550  aggressive |= MultiXactIdPrecedesOrEquals(rel->rd_rel->relminmxid,
551  mxactFullScanLimit);
553  aggressive = true;
554 
555  vacrel = (LVRelState *) palloc0(sizeof(LVRelState));
556 
557  /* Set up high level stuff about rel */
558  vacrel->rel = rel;
559  vac_open_indexes(vacrel->rel, RowExclusiveLock, &vacrel->nindexes,
560  &vacrel->indrels);
561  vacrel->do_index_vacuuming = true;
562  vacrel->do_index_cleanup = true;
563  vacrel->do_failsafe = false;
564  if (params->index_cleanup == VACOPT_TERNARY_DISABLED)
565  {
566  vacrel->do_index_vacuuming = false;
567  vacrel->do_index_cleanup = false;
568  }
569  vacrel->bstrategy = bstrategy;
570  vacrel->old_rel_pages = rel->rd_rel->relpages;
571  vacrel->old_live_tuples = rel->rd_rel->reltuples;
572  vacrel->relfrozenxid = rel->rd_rel->relfrozenxid;
573  vacrel->relminmxid = rel->rd_rel->relminmxid;
574 
575  /* Set cutoffs for entire VACUUM */
576  vacrel->OldestXmin = OldestXmin;
577  vacrel->FreezeLimit = FreezeLimit;
578  vacrel->MultiXactCutoff = MultiXactCutoff;
579 
581  vacrel->relname = pstrdup(RelationGetRelationName(rel));
582  vacrel->indname = NULL;
584 
585  /* Save index names iff autovacuum logging requires it */
586  if (IsAutoVacuumWorkerProcess() && params->log_min_duration >= 0 &&
587  vacrel->nindexes > 0)
588  {
589  indnames = palloc(sizeof(char *) * vacrel->nindexes);
590  for (int i = 0; i < vacrel->nindexes; i++)
591  indnames[i] =
593  }
594 
595  /*
596  * Setup error traceback support for ereport(). The idea is to set up an
597  * error context callback to display additional information on any error
598  * during a vacuum. During different phases of vacuum (heap scan, heap
599  * vacuum, index vacuum, index clean up, heap truncate), we update the
600  * error context callback to display appropriate information.
601  *
602  * Note that the index vacuum and heap vacuum phases may be called
603  * multiple times in the middle of the heap scan phase. So the old phase
604  * information is restored at the end of those phases.
605  */
606  errcallback.callback = vacuum_error_callback;
607  errcallback.arg = vacrel;
608  errcallback.previous = error_context_stack;
609  error_context_stack = &errcallback;
610 
611  /* Do the vacuuming */
612  lazy_scan_heap(vacrel, params, aggressive);
613 
614  /* Done with indexes */
615  vac_close_indexes(vacrel->nindexes, vacrel->indrels, NoLock);
616 
617  /*
618  * Compute whether we actually scanned the all unfrozen pages. If we did,
619  * we can adjust relfrozenxid and relminmxid.
620  *
621  * NB: We need to check this before truncating the relation, because that
622  * will change ->rel_pages.
623  */
624  if ((vacrel->scanned_pages + vacrel->frozenskipped_pages)
625  < vacrel->rel_pages)
626  {
627  Assert(!aggressive);
628  scanned_all_unfrozen = false;
629  }
630  else
631  scanned_all_unfrozen = true;
632 
633  /*
634  * Optionally truncate the relation.
635  */
636  if (should_attempt_truncation(vacrel, params))
637  {
638  /*
639  * Update error traceback information. This is the last phase during
640  * which we add context information to errors, so we don't need to
641  * revert to the previous phase.
642  */
644  vacrel->nonempty_pages,
646  lazy_truncate_heap(vacrel);
647  }
648 
649  /* Pop the error context stack */
650  error_context_stack = errcallback.previous;
651 
652  /* Report that we are now doing final cleanup */
655 
656  /*
657  * Update statistics in pg_class.
658  *
659  * In principle new_live_tuples could be -1 indicating that we (still)
660  * don't know the tuple count. In practice that probably can't happen,
661  * since we'd surely have scanned some pages if the table is new and
662  * nonempty.
663  *
664  * For safety, clamp relallvisible to be not more than what we're setting
665  * relpages to.
666  *
667  * Also, don't change relfrozenxid/relminmxid if we skipped any pages,
668  * since then we don't know for certain that all tuples have a newer xmin.
669  */
670  new_rel_pages = vacrel->rel_pages;
671  new_live_tuples = vacrel->new_live_tuples;
672 
673  visibilitymap_count(rel, &new_rel_allvisible, NULL);
674  if (new_rel_allvisible > new_rel_pages)
675  new_rel_allvisible = new_rel_pages;
676 
677  new_frozen_xid = scanned_all_unfrozen ? FreezeLimit : InvalidTransactionId;
678  new_min_multi = scanned_all_unfrozen ? MultiXactCutoff : InvalidMultiXactId;
679 
681  new_rel_pages,
682  new_live_tuples,
683  new_rel_allvisible,
684  vacrel->nindexes > 0,
685  new_frozen_xid,
686  new_min_multi,
687  false);
688 
689  /*
690  * Report results to the stats collector, too.
691  *
692  * Deliberately avoid telling the stats collector about LP_DEAD items that
693  * remain in the table due to VACUUM bypassing index and heap vacuuming.
694  * ANALYZE will consider the remaining LP_DEAD items to be dead tuples.
695  * It seems like a good idea to err on the side of not vacuuming again too
696  * soon in cases where the failsafe prevented significant amounts of heap
697  * vacuuming.
698  */
700  rel->rd_rel->relisshared,
701  Max(new_live_tuples, 0),
702  vacrel->new_dead_tuples);
704 
705  /* and log the action if appropriate */
706  if (IsAutoVacuumWorkerProcess() && params->log_min_duration >= 0)
707  {
708  TimestampTz endtime = GetCurrentTimestamp();
709 
710  if (params->log_min_duration == 0 ||
711  TimestampDifferenceExceeds(starttime, endtime,
712  params->log_min_duration))
713  {
715  char *msgfmt;
716 
717  TimestampDifference(starttime, endtime, &secs, &usecs);
718 
719  memset(&walusage, 0, sizeof(WalUsage));
720  WalUsageAccumDiff(&walusage, &pgWalUsage, &walusage_start);
721 
722  read_rate = 0;
723  write_rate = 0;
724  if ((secs > 0) || (usecs > 0))
725  {
726  read_rate = (double) BLCKSZ * VacuumPageMiss / (1024 * 1024) /
727  (secs + usecs / 1000000.0);
728  write_rate = (double) BLCKSZ * VacuumPageDirty / (1024 * 1024) /
729  (secs + usecs / 1000000.0);
730  }
731 
732  /*
733  * This is pretty messy, but we split it up so that we can skip
734  * emitting individual parts of the message when not applicable.
735  */
736  initStringInfo(&buf);
737  if (params->is_wraparound)
738  {
739  /*
740  * While it's possible for a VACUUM to be both is_wraparound
741  * and !aggressive, that's just a corner-case -- is_wraparound
742  * implies aggressive. Produce distinct output for the corner
743  * case all the same, just in case.
744  */
745  if (aggressive)
746  msgfmt = _("automatic aggressive vacuum to prevent wraparound of table \"%s.%s.%s\": index scans: %d\n");
747  else
748  msgfmt = _("automatic vacuum to prevent wraparound of table \"%s.%s.%s\": index scans: %d\n");
749  }
750  else
751  {
752  if (aggressive)
753  msgfmt = _("automatic aggressive vacuum of table \"%s.%s.%s\": index scans: %d\n");
754  else
755  msgfmt = _("automatic vacuum of table \"%s.%s.%s\": index scans: %d\n");
756  }
757  appendStringInfo(&buf, msgfmt,
759  vacrel->relnamespace,
760  vacrel->relname,
761  vacrel->num_index_scans);
762  appendStringInfo(&buf, _("pages: %u removed, %u remain, %u skipped due to pins, %u skipped frozen\n"),
763  vacrel->pages_removed,
764  vacrel->rel_pages,
765  vacrel->pinskipped_pages,
766  vacrel->frozenskipped_pages);
767  appendStringInfo(&buf,
768  _("tuples: %lld removed, %lld remain, %lld are dead but not yet removable, oldest xmin: %u\n"),
769  (long long) vacrel->tuples_deleted,
770  (long long) vacrel->new_rel_tuples,
771  (long long) vacrel->new_dead_tuples,
772  OldestXmin);
773  appendStringInfo(&buf,
774  _("buffer usage: %lld hits, %lld misses, %lld dirtied\n"),
775  (long long) VacuumPageHit,
776  (long long) VacuumPageMiss,
777  (long long) VacuumPageDirty);
778  if (vacrel->rel_pages > 0)
779  {
780  if (vacrel->do_index_vacuuming)
781  {
782  msgfmt = _(" %u pages from table (%.2f%% of total) had %lld dead item identifiers removed\n");
783 
784  if (vacrel->nindexes == 0 || vacrel->num_index_scans == 0)
785  appendStringInfo(&buf, _("index scan not needed:"));
786  else
787  appendStringInfo(&buf, _("index scan needed:"));
788  }
789  else
790  {
791  msgfmt = _(" %u pages from table (%.2f%% of total) have %lld dead item identifiers\n");
792 
793  if (!vacrel->do_failsafe)
794  appendStringInfo(&buf, _("index scan bypassed:"));
795  else
796  appendStringInfo(&buf, _("index scan bypassed by failsafe:"));
797  }
798  appendStringInfo(&buf, msgfmt,
799  vacrel->lpdead_item_pages,
800  100.0 * vacrel->lpdead_item_pages / vacrel->rel_pages,
801  (long long) vacrel->lpdead_items);
802  }
803  for (int i = 0; i < vacrel->nindexes; i++)
804  {
805  IndexBulkDeleteResult *istat = vacrel->indstats[i];
806 
807  if (!istat)
808  continue;
809 
810  appendStringInfo(&buf,
811  _("index \"%s\": pages: %u in total, %u newly deleted, %u currently deleted, %u reusable\n"),
812  indnames[i],
813  istat->num_pages,
814  istat->pages_newly_deleted,
815  istat->pages_deleted,
816  istat->pages_free);
817  }
818  appendStringInfo(&buf, _("avg read rate: %.3f MB/s, avg write rate: %.3f MB/s\n"),
819  read_rate, write_rate);
820  if (track_io_timing)
821  {
822  appendStringInfoString(&buf, _("I/O Timings:"));
823  if (pgStatBlockReadTime - startreadtime > 0)
824  appendStringInfo(&buf, _(" read=%.3f"),
825  (double) (pgStatBlockReadTime - startreadtime) / 1000);
826  if (pgStatBlockWriteTime - startwritetime > 0)
827  appendStringInfo(&buf, _(" write=%.3f"),
828  (double) (pgStatBlockWriteTime - startwritetime) / 1000);
829  appendStringInfoChar(&buf, '\n');
830  }
831  appendStringInfo(&buf, _("system usage: %s\n"), pg_rusage_show(&ru0));
832  appendStringInfo(&buf,
833  _("WAL usage: %ld records, %ld full page images, %llu bytes"),
834  walusage.wal_records,
835  walusage.wal_fpi,
836  (unsigned long long) walusage.wal_bytes);
837 
838  ereport(LOG,
839  (errmsg_internal("%s", buf.data)));
840  pfree(buf.data);
841  }
842  }
843 
844  /* Cleanup index statistics and index names */
845  for (int i = 0; i < vacrel->nindexes; i++)
846  {
847  if (vacrel->indstats[i])
848  pfree(vacrel->indstats[i]);
849 
850  if (indnames && indnames[i])
851  pfree(indnames[i]);
852  }
853 }
854 
855 /*
856  * lazy_scan_heap() -- scan an open heap relation
857  *
858  * This routine prunes each page in the heap, which will among other
859  * things truncate dead tuples to dead line pointers, defragment the
860  * page, and set commit status bits (see heap_page_prune). It also builds
861  * lists of dead tuples and pages with free space, calculates statistics
862  * on the number of live tuples in the heap, and marks pages as
863  * all-visible if appropriate. When done, or when we run low on space
864  * for dead-tuple TIDs, invoke lazy_vacuum to vacuum indexes and vacuum
865  * heap relation during its own second pass over the heap.
866  *
867  * If the table has at least two indexes, we execute both index vacuum
868  * and index cleanup with parallel workers unless parallel vacuum is
869  * disabled. In a parallel vacuum, we enter parallel mode and then
870  * create both the parallel context and the DSM segment before starting
871  * heap scan so that we can record dead tuples to the DSM segment. All
872  * parallel workers are launched at beginning of index vacuuming and
873  * index cleanup and they exit once done with all indexes. At the end of
874  * this function we exit from parallel mode. Index bulk-deletion results
875  * are stored in the DSM segment and we update index statistics for all
876  * the indexes after exiting from parallel mode since writes are not
877  * allowed during parallel mode.
878  *
879  * If there are no indexes then we can reclaim line pointers on the fly;
880  * dead line pointers need only be retained until all index pointers that
881  * reference them have been killed.
882  */
883 static void
884 lazy_scan_heap(LVRelState *vacrel, VacuumParams *params, bool aggressive)
885 {
886  LVDeadTuples *dead_tuples;
887  BlockNumber nblocks,
888  blkno,
889  next_unskippable_block,
890  next_fsm_block_to_vacuum;
891  PGRUsage ru0;
892  Buffer vmbuffer = InvalidBuffer;
893  bool skipping_blocks,
894  have_vacuumed_indexes = false;
896  const int initprog_index[] = {
900  };
901  int64 initprog_val[3];
902  GlobalVisState *vistest;
903 
904  pg_rusage_init(&ru0);
905 
906  if (aggressive)
907  ereport(elevel,
908  (errmsg("aggressively vacuuming \"%s.%s\"",
909  vacrel->relnamespace,
910  vacrel->relname)));
911  else
912  ereport(elevel,
913  (errmsg("vacuuming \"%s.%s\"",
914  vacrel->relnamespace,
915  vacrel->relname)));
916 
917  nblocks = RelationGetNumberOfBlocks(vacrel->rel);
918  next_unskippable_block = 0;
919  next_fsm_block_to_vacuum = 0;
920  vacrel->rel_pages = nblocks;
921  vacrel->scanned_pages = 0;
922  vacrel->pinskipped_pages = 0;
923  vacrel->frozenskipped_pages = 0;
924  vacrel->tupcount_pages = 0;
925  vacrel->pages_removed = 0;
926  vacrel->lpdead_item_pages = 0;
927  vacrel->nonempty_pages = 0;
928  vacrel->lock_waiter_detected = false;
929 
930  /* Initialize instrumentation counters */
931  vacrel->num_index_scans = 0;
932  vacrel->tuples_deleted = 0;
933  vacrel->lpdead_items = 0;
934  vacrel->new_dead_tuples = 0;
935  vacrel->num_tuples = 0;
936  vacrel->live_tuples = 0;
937 
938  vistest = GlobalVisTestFor(vacrel->rel);
939 
940  vacrel->indstats = (IndexBulkDeleteResult **)
941  palloc0(vacrel->nindexes * sizeof(IndexBulkDeleteResult *));
942 
943  /*
944  * Before beginning scan, check if it's already necessary to apply
945  * failsafe
946  */
948 
949  /*
950  * Allocate the space for dead tuples. Note that this handles parallel
951  * VACUUM initialization as part of allocating shared memory space used
952  * for dead_tuples.
953  */
954  lazy_space_alloc(vacrel, params->nworkers, nblocks);
955  dead_tuples = vacrel->dead_tuples;
956 
957  /* Report that we're scanning the heap, advertising total # of blocks */
958  initprog_val[0] = PROGRESS_VACUUM_PHASE_SCAN_HEAP;
959  initprog_val[1] = nblocks;
960  initprog_val[2] = dead_tuples->max_tuples;
961  pgstat_progress_update_multi_param(3, initprog_index, initprog_val);
962 
963  /*
964  * Except when aggressive is set, we want to skip pages that are
965  * all-visible according to the visibility map, but only when we can skip
966  * at least SKIP_PAGES_THRESHOLD consecutive pages. Since we're reading
967  * sequentially, the OS should be doing readahead for us, so there's no
968  * gain in skipping a page now and then; that's likely to disable
969  * readahead and so be counterproductive. Also, skipping even a single
970  * page means that we can't update relfrozenxid, so we only want to do it
971  * if we can skip a goodly number of pages.
972  *
973  * When aggressive is set, we can't skip pages just because they are
974  * all-visible, but we can still skip pages that are all-frozen, since
975  * such pages do not need freezing and do not affect the value that we can
976  * safely set for relfrozenxid or relminmxid.
977  *
978  * Before entering the main loop, establish the invariant that
979  * next_unskippable_block is the next block number >= blkno that we can't
980  * skip based on the visibility map, either all-visible for a regular scan
981  * or all-frozen for an aggressive scan. We set it to nblocks if there's
982  * no such block. We also set up the skipping_blocks flag correctly at
983  * this stage.
984  *
985  * Note: The value returned by visibilitymap_get_status could be slightly
986  * out-of-date, since we make this test before reading the corresponding
987  * heap page or locking the buffer. This is OK. If we mistakenly think
988  * that the page is all-visible or all-frozen when in fact the flag's just
989  * been cleared, we might fail to vacuum the page. It's easy to see that
990  * skipping a page when aggressive is not set is not a very big deal; we
991  * might leave some dead tuples lying around, but the next vacuum will
992  * find them. But even when aggressive *is* set, it's still OK if we miss
993  * a page whose all-frozen marking has just been cleared. Any new XIDs
994  * just added to that page are necessarily newer than the GlobalXmin we
995  * computed, so they'll have no effect on the value to which we can safely
996  * set relfrozenxid. A similar argument applies for MXIDs and relminmxid.
997  *
998  * We will scan the table's last page, at least to the extent of
999  * determining whether it has tuples or not, even if it should be skipped
1000  * according to the above rules; except when we've already determined that
1001  * it's not worth trying to truncate the table. This avoids having
1002  * lazy_truncate_heap() take access-exclusive lock on the table to attempt
1003  * a truncation that just fails immediately because there are tuples in
1004  * the last page. This is worth avoiding mainly because such a lock must
1005  * be replayed on any hot standby, where it can be disruptive.
1006  */
1007  if ((params->options & VACOPT_DISABLE_PAGE_SKIPPING) == 0)
1008  {
1009  while (next_unskippable_block < nblocks)
1010  {
1011  uint8 vmstatus;
1012 
1013  vmstatus = visibilitymap_get_status(vacrel->rel,
1014  next_unskippable_block,
1015  &vmbuffer);
1016  if (aggressive)
1017  {
1018  if ((vmstatus & VISIBILITYMAP_ALL_FROZEN) == 0)
1019  break;
1020  }
1021  else
1022  {
1023  if ((vmstatus & VISIBILITYMAP_ALL_VISIBLE) == 0)
1024  break;
1025  }
1027  next_unskippable_block++;
1028  }
1029  }
1030 
1031  if (next_unskippable_block >= SKIP_PAGES_THRESHOLD)
1032  skipping_blocks = true;
1033  else
1034  skipping_blocks = false;
1035 
1036  for (blkno = 0; blkno < nblocks; blkno++)
1037  {
1038  Buffer buf;
1039  Page page;
1040  bool all_visible_according_to_vm = false;
1041  LVPagePruneState prunestate;
1042 
1043  /*
1044  * Consider need to skip blocks. See note above about forcing
1045  * scanning of last page.
1046  */
1047 #define FORCE_CHECK_PAGE() \
1048  (blkno == nblocks - 1 && should_attempt_truncation(vacrel, params))
1049 
1051 
1053  blkno, InvalidOffsetNumber);
1054 
1055  if (blkno == next_unskippable_block)
1056  {
1057  /* Time to advance next_unskippable_block */
1058  next_unskippable_block++;
1059  if ((params->options & VACOPT_DISABLE_PAGE_SKIPPING) == 0)
1060  {
1061  while (next_unskippable_block < nblocks)
1062  {
1063  uint8 vmskipflags;
1064 
1065  vmskipflags = visibilitymap_get_status(vacrel->rel,
1066  next_unskippable_block,
1067  &vmbuffer);
1068  if (aggressive)
1069  {
1070  if ((vmskipflags & VISIBILITYMAP_ALL_FROZEN) == 0)
1071  break;
1072  }
1073  else
1074  {
1075  if ((vmskipflags & VISIBILITYMAP_ALL_VISIBLE) == 0)
1076  break;
1077  }
1079  next_unskippable_block++;
1080  }
1081  }
1082 
1083  /*
1084  * We know we can't skip the current block. But set up
1085  * skipping_blocks to do the right thing at the following blocks.
1086  */
1087  if (next_unskippable_block - blkno > SKIP_PAGES_THRESHOLD)
1088  skipping_blocks = true;
1089  else
1090  skipping_blocks = false;
1091 
1092  /*
1093  * Normally, the fact that we can't skip this block must mean that
1094  * it's not all-visible. But in an aggressive vacuum we know only
1095  * that it's not all-frozen, so it might still be all-visible.
1096  */
1097  if (aggressive && VM_ALL_VISIBLE(vacrel->rel, blkno, &vmbuffer))
1098  all_visible_according_to_vm = true;
1099  }
1100  else
1101  {
1102  /*
1103  * The current block is potentially skippable; if we've seen a
1104  * long enough run of skippable blocks to justify skipping it, and
1105  * we're not forced to check it, then go ahead and skip.
1106  * Otherwise, the page must be at least all-visible if not
1107  * all-frozen, so we can set all_visible_according_to_vm = true.
1108  */
1109  if (skipping_blocks && !FORCE_CHECK_PAGE())
1110  {
1111  /*
1112  * Tricky, tricky. If this is in aggressive vacuum, the page
1113  * must have been all-frozen at the time we checked whether it
1114  * was skippable, but it might not be any more. We must be
1115  * careful to count it as a skipped all-frozen page in that
1116  * case, or else we'll think we can't update relfrozenxid and
1117  * relminmxid. If it's not an aggressive vacuum, we don't
1118  * know whether it was all-frozen, so we have to recheck; but
1119  * in this case an approximate answer is OK.
1120  */
1121  if (aggressive || VM_ALL_FROZEN(vacrel->rel, blkno, &vmbuffer))
1122  vacrel->frozenskipped_pages++;
1123  continue;
1124  }
1125  all_visible_according_to_vm = true;
1126  }
1127 
1129 
1130  /*
1131  * Consider if we definitely have enough space to process TIDs on page
1132  * already. If we are close to overrunning the available space for
1133  * dead-tuple TIDs, pause and do a cycle of vacuuming before we tackle
1134  * this page.
1135  */
1136  if ((dead_tuples->max_tuples - dead_tuples->num_tuples) < MaxHeapTuplesPerPage &&
1137  dead_tuples->num_tuples > 0)
1138  {
1139  /*
1140  * Before beginning index vacuuming, we release any pin we may
1141  * hold on the visibility map page. This isn't necessary for
1142  * correctness, but we do it anyway to avoid holding the pin
1143  * across a lengthy, unrelated operation.
1144  */
1145  if (BufferIsValid(vmbuffer))
1146  {
1147  ReleaseBuffer(vmbuffer);
1148  vmbuffer = InvalidBuffer;
1149  }
1150 
1151  /* Remove the collected garbage tuples from table and indexes */
1152  lazy_vacuum(vacrel, false);
1153  have_vacuumed_indexes = true;
1154 
1155  /*
1156  * Vacuum the Free Space Map to make newly-freed space visible on
1157  * upper-level FSM pages. Note we have not yet processed blkno.
1158  */
1159  FreeSpaceMapVacuumRange(vacrel->rel, next_fsm_block_to_vacuum,
1160  blkno);
1161  next_fsm_block_to_vacuum = blkno;
1162 
1163  /* Report that we are once again scanning the heap */
1166  }
1167 
1168  /*
1169  * Set up visibility map page as needed.
1170  *
1171  * Pin the visibility map page in case we need to mark the page
1172  * all-visible. In most cases this will be very cheap, because we'll
1173  * already have the correct page pinned anyway. However, it's
1174  * possible that (a) next_unskippable_block is covered by a different
1175  * VM page than the current block or (b) we released our pin and did a
1176  * cycle of index vacuuming.
1177  */
1178  visibilitymap_pin(vacrel->rel, blkno, &vmbuffer);
1179 
1180  buf = ReadBufferExtended(vacrel->rel, MAIN_FORKNUM, blkno,
1181  RBM_NORMAL, vacrel->bstrategy);
1182 
1183  /*
1184  * We need buffer cleanup lock so that we can prune HOT chains and
1185  * defragment the page.
1186  */
1188  {
1189  bool hastup;
1190 
1191  /*
1192  * If we're not performing an aggressive scan to guard against XID
1193  * wraparound, and we don't want to forcibly check the page, then
1194  * it's OK to skip vacuuming pages we get a lock conflict on. They
1195  * will be dealt with in some future vacuum.
1196  */
1197  if (!aggressive && !FORCE_CHECK_PAGE())
1198  {
1199  ReleaseBuffer(buf);
1200  vacrel->pinskipped_pages++;
1201  continue;
1202  }
1203 
1204  /*
1205  * Read the page with share lock to see if any xids on it need to
1206  * be frozen. If not we just skip the page, after updating our
1207  * scan statistics. If there are some, we wait for cleanup lock.
1208  *
1209  * We could defer the lock request further by remembering the page
1210  * and coming back to it later, or we could even register
1211  * ourselves for multiple buffers and then service whichever one
1212  * is received first. For now, this seems good enough.
1213  *
1214  * If we get here with aggressive false, then we're just forcibly
1215  * checking the page, and so we don't want to insist on getting
1216  * the lock; we only need to know if the page contains tuples, so
1217  * that we can update nonempty_pages correctly. It's convenient
1218  * to use lazy_check_needs_freeze() for both situations, though.
1219  */
1221  if (!lazy_check_needs_freeze(buf, &hastup, vacrel))
1222  {
1223  UnlockReleaseBuffer(buf);
1224  vacrel->scanned_pages++;
1225  vacrel->pinskipped_pages++;
1226  if (hastup)
1227  vacrel->nonempty_pages = blkno + 1;
1228  continue;
1229  }
1230  if (!aggressive)
1231  {
1232  /*
1233  * Here, we must not advance scanned_pages; that would amount
1234  * to claiming that the page contains no freezable tuples.
1235  */
1236  UnlockReleaseBuffer(buf);
1237  vacrel->pinskipped_pages++;
1238  if (hastup)
1239  vacrel->nonempty_pages = blkno + 1;
1240  continue;
1241  }
1243  LockBufferForCleanup(buf);
1244  /* drop through to normal processing */
1245  }
1246 
1247  /*
1248  * By here we definitely have enough dead_tuples space for whatever
1249  * LP_DEAD tids are on this page, we have the visibility map page set
1250  * up in case we need to set this page's all_visible/all_frozen bit,
1251  * and we have a super-exclusive lock. Any tuples on this page are
1252  * now sure to be "counted" by this VACUUM.
1253  *
1254  * One last piece of preamble needs to take place before we can prune:
1255  * we need to consider new and empty pages.
1256  */
1257  vacrel->scanned_pages++;
1258  vacrel->tupcount_pages++;
1259 
1260  page = BufferGetPage(buf);
1261 
1262  if (PageIsNew(page))
1263  {
1264  /*
1265  * All-zeroes pages can be left over if either a backend extends
1266  * the relation by a single page, but crashes before the newly
1267  * initialized page has been written out, or when bulk-extending
1268  * the relation (which creates a number of empty pages at the tail
1269  * end of the relation, but enters them into the FSM).
1270  *
1271  * Note we do not enter the page into the visibilitymap. That has
1272  * the downside that we repeatedly visit this page in subsequent
1273  * vacuums, but otherwise we'll never not discover the space on a
1274  * promoted standby. The harm of repeated checking ought to
1275  * normally not be too bad - the space usually should be used at
1276  * some point, otherwise there wouldn't be any regular vacuums.
1277  *
1278  * Make sure these pages are in the FSM, to ensure they can be
1279  * reused. Do that by testing if there's any space recorded for
1280  * the page. If not, enter it. We do so after releasing the lock
1281  * on the heap page, the FSM is approximate, after all.
1282  */
1283  UnlockReleaseBuffer(buf);
1284 
1285  if (GetRecordedFreeSpace(vacrel->rel, blkno) == 0)
1286  {
1287  Size freespace = BLCKSZ - SizeOfPageHeaderData;
1288 
1289  RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
1290  }
1291  continue;
1292  }
1293 
1294  if (PageIsEmpty(page))
1295  {
1296  Size freespace = PageGetHeapFreeSpace(page);
1297 
1298  /*
1299  * Empty pages are always all-visible and all-frozen (note that
1300  * the same is currently not true for new pages, see above).
1301  */
1302  if (!PageIsAllVisible(page))
1303  {
1305 
1306  /* mark buffer dirty before writing a WAL record */
1307  MarkBufferDirty(buf);
1308 
1309  /*
1310  * It's possible that another backend has extended the heap,
1311  * initialized the page, and then failed to WAL-log the page
1312  * due to an ERROR. Since heap extension is not WAL-logged,
1313  * recovery might try to replay our record setting the page
1314  * all-visible and find that the page isn't initialized, which
1315  * will cause a PANIC. To prevent that, check whether the
1316  * page has been previously WAL-logged, and if not, do that
1317  * now.
1318  */
1319  if (RelationNeedsWAL(vacrel->rel) &&
1320  PageGetLSN(page) == InvalidXLogRecPtr)
1321  log_newpage_buffer(buf, true);
1322 
1323  PageSetAllVisible(page);
1324  visibilitymap_set(vacrel->rel, blkno, buf, InvalidXLogRecPtr,
1325  vmbuffer, InvalidTransactionId,
1327  END_CRIT_SECTION();
1328  }
1329 
1330  UnlockReleaseBuffer(buf);
1331  RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
1332  continue;
1333  }
1334 
1335  /*
1336  * Prune and freeze tuples.
1337  *
1338  * Accumulates details of remaining LP_DEAD line pointers on page in
1339  * dead tuple list. This includes LP_DEAD line pointers that we
1340  * pruned ourselves, as well as existing LP_DEAD line pointers that
1341  * were pruned some time earlier. Also considers freezing XIDs in the
1342  * tuple headers of remaining items with storage.
1343  */
1344  lazy_scan_prune(vacrel, buf, blkno, page, vistest, &prunestate);
1345 
1346  Assert(!prunestate.all_visible || !prunestate.has_lpdead_items);
1347  Assert(!all_visible_according_to_vm || prunestate.all_visible);
1348 
1349  /* Remember the location of the last page with nonremovable tuples */
1350  if (prunestate.hastup)
1351  vacrel->nonempty_pages = blkno + 1;
1352 
1353  if (vacrel->nindexes == 0)
1354  {
1355  /*
1356  * Consider the need to do page-at-a-time heap vacuuming when
1357  * using the one-pass strategy now.
1358  *
1359  * The one-pass strategy will never call lazy_vacuum(). The steps
1360  * performed here can be thought of as the one-pass equivalent of
1361  * a call to lazy_vacuum().
1362  */
1363  if (prunestate.has_lpdead_items)
1364  {
1365  Size freespace;
1366 
1367  lazy_vacuum_heap_page(vacrel, blkno, buf, 0, &vmbuffer);
1368 
1369  /* Forget the now-vacuumed tuples */
1370  dead_tuples->num_tuples = 0;
1371 
1372  /*
1373  * Periodically perform FSM vacuuming to make newly-freed
1374  * space visible on upper FSM pages. Note we have not yet
1375  * performed FSM processing for blkno.
1376  *
1377  * Call lazy_check_wraparound_failsafe() here, too, since we
1378  * also don't want to do that too frequently, or too
1379  * infrequently.
1380  */
1381  if (blkno - next_fsm_block_to_vacuum >= VACUUM_FSM_EVERY_PAGES)
1382  {
1383  FreeSpaceMapVacuumRange(vacrel->rel, next_fsm_block_to_vacuum,
1384  blkno);
1385  next_fsm_block_to_vacuum = blkno;
1387  }
1388 
1389  /*
1390  * Now perform FSM processing for blkno, and move on to next
1391  * page.
1392  *
1393  * Our call to lazy_vacuum_heap_page() will have considered if
1394  * it's possible to set all_visible/all_frozen independently
1395  * of lazy_scan_prune(). Note that prunestate was invalidated
1396  * by lazy_vacuum_heap_page() call.
1397  */
1398  freespace = PageGetHeapFreeSpace(page);
1399 
1400  UnlockReleaseBuffer(buf);
1401  RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
1402  continue;
1403  }
1404 
1405  /*
1406  * There was no call to lazy_vacuum_heap_page() because pruning
1407  * didn't encounter/create any LP_DEAD items that needed to be
1408  * vacuumed. Prune state has not been invalidated, so proceed
1409  * with prunestate-driven visibility map and FSM steps (just like
1410  * the two-pass strategy).
1411  */
1412  Assert(dead_tuples->num_tuples == 0);
1413  }
1414 
1415  /*
1416  * Handle setting visibility map bit based on what the VM said about
1417  * the page before pruning started, and using prunestate
1418  */
1419  if (!all_visible_according_to_vm && prunestate.all_visible)
1420  {
1422 
1423  if (prunestate.all_frozen)
1424  flags |= VISIBILITYMAP_ALL_FROZEN;
1425 
1426  /*
1427  * It should never be the case that the visibility map page is set
1428  * while the page-level bit is clear, but the reverse is allowed
1429  * (if checksums are not enabled). Regardless, set both bits so
1430  * that we get back in sync.
1431  *
1432  * NB: If the heap page is all-visible but the VM bit is not set,
1433  * we don't need to dirty the heap page. However, if checksums
1434  * are enabled, we do need to make sure that the heap page is
1435  * dirtied before passing it to visibilitymap_set(), because it
1436  * may be logged. Given that this situation should only happen in
1437  * rare cases after a crash, it is not worth optimizing.
1438  */
1439  PageSetAllVisible(page);
1440  MarkBufferDirty(buf);
1441  visibilitymap_set(vacrel->rel, blkno, buf, InvalidXLogRecPtr,
1442  vmbuffer, prunestate.visibility_cutoff_xid,
1443  flags);
1444  }
1445 
1446  /*
1447  * As of PostgreSQL 9.2, the visibility map bit should never be set if
1448  * the page-level bit is clear. However, it's possible that the bit
1449  * got cleared after we checked it and before we took the buffer
1450  * content lock, so we must recheck before jumping to the conclusion
1451  * that something bad has happened.
1452  */
1453  else if (all_visible_according_to_vm && !PageIsAllVisible(page)
1454  && VM_ALL_VISIBLE(vacrel->rel, blkno, &vmbuffer))
1455  {
1456  elog(WARNING, "page is not marked all-visible but visibility map bit is set in relation \"%s\" page %u",
1457  vacrel->relname, blkno);
1458  visibilitymap_clear(vacrel->rel, blkno, vmbuffer,
1460  }
1461 
1462  /*
1463  * It's possible for the value returned by
1464  * GetOldestNonRemovableTransactionId() to move backwards, so it's not
1465  * wrong for us to see tuples that appear to not be visible to
1466  * everyone yet, while PD_ALL_VISIBLE is already set. The real safe
1467  * xmin value never moves backwards, but
1468  * GetOldestNonRemovableTransactionId() is conservative and sometimes
1469  * returns a value that's unnecessarily small, so if we see that
1470  * contradiction it just means that the tuples that we think are not
1471  * visible to everyone yet actually are, and the PD_ALL_VISIBLE flag
1472  * is correct.
1473  *
1474  * There should never be dead tuples on a page with PD_ALL_VISIBLE
1475  * set, however.
1476  */
1477  else if (prunestate.has_lpdead_items && PageIsAllVisible(page))
1478  {
1479  elog(WARNING, "page containing dead tuples is marked as all-visible in relation \"%s\" page %u",
1480  vacrel->relname, blkno);
1481  PageClearAllVisible(page);
1482  MarkBufferDirty(buf);
1483  visibilitymap_clear(vacrel->rel, blkno, vmbuffer,
1485  }
1486 
1487  /*
1488  * If the all-visible page is all-frozen but not marked as such yet,
1489  * mark it as all-frozen. Note that all_frozen is only valid if
1490  * all_visible is true, so we must check both.
1491  */
1492  else if (all_visible_according_to_vm && prunestate.all_visible &&
1493  prunestate.all_frozen &&
1494  !VM_ALL_FROZEN(vacrel->rel, blkno, &vmbuffer))
1495  {
1496  /*
1497  * We can pass InvalidTransactionId as the cutoff XID here,
1498  * because setting the all-frozen bit doesn't cause recovery
1499  * conflicts.
1500  */
1501  visibilitymap_set(vacrel->rel, blkno, buf, InvalidXLogRecPtr,
1502  vmbuffer, InvalidTransactionId,
1504  }
1505 
1506  /*
1507  * Final steps for block: drop super-exclusive lock, record free space
1508  * in the FSM
1509  */
1510  if (prunestate.has_lpdead_items && vacrel->do_index_vacuuming)
1511  {
1512  /*
1513  * Wait until lazy_vacuum_heap_rel() to save free space. This
1514  * doesn't just save us some cycles; it also allows us to record
1515  * any additional free space that lazy_vacuum_heap_page() will
1516  * make available in cases where it's possible to truncate the
1517  * page's line pointer array.
1518  *
1519  * Note: It's not in fact 100% certain that we really will call
1520  * lazy_vacuum_heap_rel() -- lazy_vacuum() might yet opt to skip
1521  * index vacuuming (and so must skip heap vacuuming). This is
1522  * deemed okay because it only happens in emergencies, or when
1523  * there is very little free space anyway. (Besides, we start
1524  * recording free space in the FSM once index vacuuming has been
1525  * abandoned.)
1526  *
1527  * Note: The one-pass (no indexes) case is only supposed to make
1528  * it this far when there were no LP_DEAD items during pruning.
1529  */
1530  Assert(vacrel->nindexes > 0);
1531  UnlockReleaseBuffer(buf);
1532  }
1533  else
1534  {
1535  Size freespace = PageGetHeapFreeSpace(page);
1536 
1537  UnlockReleaseBuffer(buf);
1538  RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
1539  }
1540  }
1541 
1542  /* report that everything is now scanned */
1544 
1545  /* Clear the block number information */
1546  vacrel->blkno = InvalidBlockNumber;
1547 
1548  /* now we can compute the new value for pg_class.reltuples */
1549  vacrel->new_live_tuples = vac_estimate_reltuples(vacrel->rel, nblocks,
1550  vacrel->tupcount_pages,
1551  vacrel->live_tuples);
1552 
1553  /*
1554  * Also compute the total number of surviving heap entries. In the
1555  * (unlikely) scenario that new_live_tuples is -1, take it as zero.
1556  */
1557  vacrel->new_rel_tuples =
1558  Max(vacrel->new_live_tuples, 0) + vacrel->new_dead_tuples;
1559 
1560  /*
1561  * Release any remaining pin on visibility map page.
1562  */
1563  if (BufferIsValid(vmbuffer))
1564  {
1565  ReleaseBuffer(vmbuffer);
1566  vmbuffer = InvalidBuffer;
1567  }
1568 
1569  /* If any tuples need to be deleted, perform final vacuum cycle */
1570  if (dead_tuples->num_tuples > 0)
1571  lazy_vacuum(vacrel, !have_vacuumed_indexes);
1572 
1573  /*
1574  * Vacuum the remainder of the Free Space Map. We must do this whether or
1575  * not there were indexes, and whether or not we bypassed index vacuuming.
1576  */
1577  if (blkno > next_fsm_block_to_vacuum)
1578  FreeSpaceMapVacuumRange(vacrel->rel, next_fsm_block_to_vacuum, blkno);
1579 
1580  /* report all blocks vacuumed */
1582 
1583  /* Do post-vacuum cleanup */
1584  if (vacrel->nindexes > 0 && vacrel->do_index_cleanup)
1585  lazy_cleanup_all_indexes(vacrel);
1586 
1587  /*
1588  * Free resources managed by lazy_space_alloc(). (We must end parallel
1589  * mode/free shared memory before updating index statistics. We cannot
1590  * write while in parallel mode.)
1591  */
1592  lazy_space_free(vacrel);
1593 
1594  /* Update index statistics */
1595  if (vacrel->nindexes > 0 && vacrel->do_index_cleanup)
1596  update_index_statistics(vacrel);
1597 
1598  /*
1599  * If table has no indexes and at least one heap pages was vacuumed, make
1600  * log report that lazy_vacuum_heap_rel would've made had there been
1601  * indexes (having indexes implies using the two pass strategy).
1602  *
1603  * We deliberately don't do this in the case where there are indexes but
1604  * index vacuuming was bypassed. We make a similar report at the point
1605  * that index vacuuming is bypassed, but that's actually quite different
1606  * in one important sense: it shows information about work we _haven't_
1607  * done.
1608  *
1609  * log_autovacuum output does things differently; it consistently presents
1610  * information about LP_DEAD items for the VACUUM as a whole. We always
1611  * report on each round of index and heap vacuuming separately, though.
1612  */
1613  if (vacrel->nindexes == 0 && vacrel->lpdead_item_pages > 0)
1614  ereport(elevel,
1615  (errmsg("\"%s\": removed %lld dead item identifiers in %u pages",
1616  vacrel->relname, (long long) vacrel->lpdead_items,
1617  vacrel->lpdead_item_pages)));
1618 
1619  initStringInfo(&buf);
1620  appendStringInfo(&buf,
1621  _("%lld dead row versions cannot be removed yet, oldest xmin: %u\n"),
1622  (long long) vacrel->new_dead_tuples, vacrel->OldestXmin);
1623  appendStringInfo(&buf, ngettext("%u page removed.\n",
1624  "%u pages removed.\n",
1625  vacrel->pages_removed),
1626  vacrel->pages_removed);
1627  appendStringInfo(&buf, ngettext("Skipped %u page due to buffer pins, ",
1628  "Skipped %u pages due to buffer pins, ",
1629  vacrel->pinskipped_pages),
1630  vacrel->pinskipped_pages);
1631  appendStringInfo(&buf, ngettext("%u frozen page.\n",
1632  "%u frozen pages.\n",
1633  vacrel->frozenskipped_pages),
1634  vacrel->frozenskipped_pages);
1635  appendStringInfo(&buf, _("%s."), pg_rusage_show(&ru0));
1636 
1637  ereport(elevel,
1638  (errmsg("\"%s\": found %lld removable, %lld nonremovable row versions in %u out of %u pages",
1639  vacrel->relname,
1640  (long long) vacrel->tuples_deleted,
1641  (long long) vacrel->num_tuples, vacrel->scanned_pages,
1642  nblocks),
1643  errdetail_internal("%s", buf.data)));
1644  pfree(buf.data);
1645 }
1646 
1647 /*
1648  * lazy_scan_prune() -- lazy_scan_heap() pruning and freezing.
1649  *
1650  * Caller must hold pin and buffer cleanup lock on the buffer.
1651  *
1652  * Prior to PostgreSQL 14 there were very rare cases where heap_page_prune()
1653  * was allowed to disagree with our HeapTupleSatisfiesVacuum() call about
1654  * whether or not a tuple should be considered DEAD. This happened when an
1655  * inserting transaction concurrently aborted (after our heap_page_prune()
1656  * call, before our HeapTupleSatisfiesVacuum() call). There was rather a lot
1657  * of complexity just so we could deal with tuples that were DEAD to VACUUM,
1658  * but nevertheless were left with storage after pruning.
1659  *
1660  * The approach we take now is to restart pruning when the race condition is
1661  * detected. This allows heap_page_prune() to prune the tuples inserted by
1662  * the now-aborted transaction. This is a little crude, but it guarantees
1663  * that any items that make it into the dead_tuples array are simple LP_DEAD
1664  * line pointers, and that every remaining item with tuple storage is
1665  * considered as a candidate for freezing.
1666  */
1667 static void
1669  Buffer buf,
1670  BlockNumber blkno,
1671  Page page,
1672  GlobalVisState *vistest,
1673  LVPagePruneState *prunestate)
1674 {
1675  Relation rel = vacrel->rel;
1676  OffsetNumber offnum,
1677  maxoff;
1678  ItemId itemid;
1679  HeapTupleData tuple;
1680  HTSV_Result res;
1681  int tuples_deleted,
1682  lpdead_items,
1683  new_dead_tuples,
1684  num_tuples,
1685  live_tuples;
1686  int nfrozen;
1687  OffsetNumber deadoffsets[MaxHeapTuplesPerPage];
1689 
1690  maxoff = PageGetMaxOffsetNumber(page);
1691 
1692 retry:
1693 
1694  /* Initialize (or reset) page-level counters */
1695  tuples_deleted = 0;
1696  lpdead_items = 0;
1697  new_dead_tuples = 0;
1698  num_tuples = 0;
1699  live_tuples = 0;
1700 
1701  /*
1702  * Prune all HOT-update chains in this page.
1703  *
1704  * We count tuples removed by the pruning step as tuples_deleted. Its
1705  * final value can be thought of as the number of tuples that have been
1706  * deleted from the table. It should not be confused with lpdead_items;
1707  * lpdead_items's final value can be thought of as the number of tuples
1708  * that were deleted from indexes.
1709  */
1710  tuples_deleted = heap_page_prune(rel, buf, vistest,
1711  InvalidTransactionId, 0, false,
1712  &vacrel->offnum);
1713 
1714  /*
1715  * Now scan the page to collect LP_DEAD items and check for tuples
1716  * requiring freezing among remaining tuples with storage
1717  */
1718  prunestate->hastup = false;
1719  prunestate->has_lpdead_items = false;
1720  prunestate->all_visible = true;
1721  prunestate->all_frozen = true;
1723  nfrozen = 0;
1724 
1725  for (offnum = FirstOffsetNumber;
1726  offnum <= maxoff;
1727  offnum = OffsetNumberNext(offnum))
1728  {
1729  bool tuple_totally_frozen;
1730 
1731  /*
1732  * Set the offset number so that we can display it along with any
1733  * error that occurred while processing this tuple.
1734  */
1735  vacrel->offnum = offnum;
1736  itemid = PageGetItemId(page, offnum);
1737 
1738  if (!ItemIdIsUsed(itemid))
1739  continue;
1740 
1741  /* Redirect items mustn't be touched */
1742  if (ItemIdIsRedirected(itemid))
1743  {
1744  prunestate->hastup = true; /* page won't be truncatable */
1745  continue;
1746  }
1747 
1748  /*
1749  * LP_DEAD items are processed outside of the loop.
1750  *
1751  * Note that we deliberately don't set hastup=true in the case of an
1752  * LP_DEAD item here, which is not how lazy_check_needs_freeze() or
1753  * count_nondeletable_pages() do it -- they only consider pages empty
1754  * when they only have LP_UNUSED items, which is important for
1755  * correctness.
1756  *
1757  * Our assumption is that any LP_DEAD items we encounter here will
1758  * become LP_UNUSED inside lazy_vacuum_heap_page() before we actually
1759  * call count_nondeletable_pages(). In any case our opinion of
1760  * whether or not a page 'hastup' (which is how our caller sets its
1761  * vacrel->nonempty_pages value) is inherently race-prone. It must be
1762  * treated as advisory/unreliable, so we might as well be slightly
1763  * optimistic.
1764  */
1765  if (ItemIdIsDead(itemid))
1766  {
1767  deadoffsets[lpdead_items++] = offnum;
1768  prunestate->all_visible = false;
1769  prunestate->has_lpdead_items = true;
1770  continue;
1771  }
1772 
1773  Assert(ItemIdIsNormal(itemid));
1774 
1775  ItemPointerSet(&(tuple.t_self), blkno, offnum);
1776  tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
1777  tuple.t_len = ItemIdGetLength(itemid);
1778  tuple.t_tableOid = RelationGetRelid(rel);
1779 
1780  /*
1781  * DEAD tuples are almost always pruned into LP_DEAD line pointers by
1782  * heap_page_prune(), but it's possible that the tuple state changed
1783  * since heap_page_prune() looked. Handle that here by restarting.
1784  * (See comments at the top of function for a full explanation.)
1785  */
1786  res = HeapTupleSatisfiesVacuum(&tuple, vacrel->OldestXmin, buf);
1787 
1788  if (unlikely(res == HEAPTUPLE_DEAD))
1789  goto retry;
1790 
1791  /*
1792  * The criteria for counting a tuple as live in this block need to
1793  * match what analyze.c's acquire_sample_rows() does, otherwise VACUUM
1794  * and ANALYZE may produce wildly different reltuples values, e.g.
1795  * when there are many recently-dead tuples.
1796  *
1797  * The logic here is a bit simpler than acquire_sample_rows(), as
1798  * VACUUM can't run inside a transaction block, which makes some cases
1799  * impossible (e.g. in-progress insert from the same transaction).
1800  *
1801  * We treat LP_DEAD items a little differently, too -- we don't count
1802  * them as dead_tuples at all (we only consider new_dead_tuples). The
1803  * outcome is no different because we assume that any LP_DEAD items we
1804  * encounter here will become LP_UNUSED inside lazy_vacuum_heap_page()
1805  * before we report anything to the stats collector. (Cases where we
1806  * bypass index vacuuming will violate our assumption, but the overall
1807  * impact of that should be negligible.)
1808  */
1809  switch (res)
1810  {
1811  case HEAPTUPLE_LIVE:
1812 
1813  /*
1814  * Count it as live. Not only is this natural, but it's also
1815  * what acquire_sample_rows() does.
1816  */
1817  live_tuples++;
1818 
1819  /*
1820  * Is the tuple definitely visible to all transactions?
1821  *
1822  * NB: Like with per-tuple hint bits, we can't set the
1823  * PD_ALL_VISIBLE flag if the inserter committed
1824  * asynchronously. See SetHintBits for more info. Check that
1825  * the tuple is hinted xmin-committed because of that.
1826  */
1827  if (prunestate->all_visible)
1828  {
1829  TransactionId xmin;
1830 
1832  {
1833  prunestate->all_visible = false;
1834  break;
1835  }
1836 
1837  /*
1838  * The inserter definitely committed. But is it old enough
1839  * that everyone sees it as committed?
1840  */
1841  xmin = HeapTupleHeaderGetXmin(tuple.t_data);
1842  if (!TransactionIdPrecedes(xmin, vacrel->OldestXmin))
1843  {
1844  prunestate->all_visible = false;
1845  break;
1846  }
1847 
1848  /* Track newest xmin on page. */
1849  if (TransactionIdFollows(xmin, prunestate->visibility_cutoff_xid))
1850  prunestate->visibility_cutoff_xid = xmin;
1851  }
1852  break;
1854 
1855  /*
1856  * If tuple is recently deleted then we must not remove it
1857  * from relation. (We only remove items that are LP_DEAD from
1858  * pruning.)
1859  */
1860  new_dead_tuples++;
1861  prunestate->all_visible = false;
1862  break;
1864 
1865  /*
1866  * We do not count these rows as live, because we expect the
1867  * inserting transaction to update the counters at commit, and
1868  * we assume that will happen only after we report our
1869  * results. This assumption is a bit shaky, but it is what
1870  * acquire_sample_rows() does, so be consistent.
1871  */
1872  prunestate->all_visible = false;
1873  break;
1875  /* This is an expected case during concurrent vacuum */
1876  prunestate->all_visible = false;
1877 
1878  /*
1879  * Count such rows as live. As above, we assume the deleting
1880  * transaction will commit and update the counters after we
1881  * report.
1882  */
1883  live_tuples++;
1884  break;
1885  default:
1886  elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
1887  break;
1888  }
1889 
1890  /*
1891  * Non-removable tuple (i.e. tuple with storage).
1892  *
1893  * Check tuple left behind after pruning to see if needs to be frozen
1894  * now.
1895  */
1896  num_tuples++;
1897  prunestate->hastup = true;
1899  vacrel->relfrozenxid,
1900  vacrel->relminmxid,
1901  vacrel->FreezeLimit,
1902  vacrel->MultiXactCutoff,
1903  &frozen[nfrozen],
1904  &tuple_totally_frozen))
1905  {
1906  /* Will execute freeze below */
1907  frozen[nfrozen++].offset = offnum;
1908  }
1909 
1910  /*
1911  * If tuple is not frozen (and not about to become frozen) then caller
1912  * had better not go on to set this page's VM bit
1913  */
1914  if (!tuple_totally_frozen)
1915  prunestate->all_frozen = false;
1916  }
1917 
1918  /*
1919  * We have now divided every item on the page into either an LP_DEAD item
1920  * that will need to be vacuumed in indexes later, or a LP_NORMAL tuple
1921  * that remains and needs to be considered for freezing now (LP_UNUSED and
1922  * LP_REDIRECT items also remain, but are of no further interest to us).
1923  */
1924  vacrel->offnum = InvalidOffsetNumber;
1925 
1926  /*
1927  * Consider the need to freeze any items with tuple storage from the page
1928  * first (arbitrary)
1929  */
1930  if (nfrozen > 0)
1931  {
1932  Assert(prunestate->hastup);
1933 
1934  /*
1935  * At least one tuple with storage needs to be frozen -- execute that
1936  * now.
1937  *
1938  * If we need to freeze any tuples we'll mark the buffer dirty, and
1939  * write a WAL record recording the changes. We must log the changes
1940  * to be crash-safe against future truncation of CLOG.
1941  */
1943 
1944  MarkBufferDirty(buf);
1945 
1946  /* execute collected freezes */
1947  for (int i = 0; i < nfrozen; i++)
1948  {
1949  HeapTupleHeader htup;
1950 
1951  itemid = PageGetItemId(page, frozen[i].offset);
1952  htup = (HeapTupleHeader) PageGetItem(page, itemid);
1953 
1954  heap_execute_freeze_tuple(htup, &frozen[i]);
1955  }
1956 
1957  /* Now WAL-log freezing if necessary */
1958  if (RelationNeedsWAL(vacrel->rel))
1959  {
1960  XLogRecPtr recptr;
1961 
1962  recptr = log_heap_freeze(vacrel->rel, buf, vacrel->FreezeLimit,
1963  frozen, nfrozen);
1964  PageSetLSN(page, recptr);
1965  }
1966 
1967  END_CRIT_SECTION();
1968  }
1969 
1970  /*
1971  * The second pass over the heap can also set visibility map bits, using
1972  * the same approach. This is important when the table frequently has a
1973  * few old LP_DEAD items on each page by the time we get to it (typically
1974  * because past opportunistic pruning operations freed some non-HOT
1975  * tuples).
1976  *
1977  * VACUUM will call heap_page_is_all_visible() during the second pass over
1978  * the heap to determine all_visible and all_frozen for the page -- this
1979  * is a specialized version of the logic from this function. Now that
1980  * we've finished pruning and freezing, make sure that we're in total
1981  * agreement with heap_page_is_all_visible() using an assertion.
1982  */
1983 #ifdef USE_ASSERT_CHECKING
1984  /* Note that all_frozen value does not matter when !all_visible */
1985  if (prunestate->all_visible)
1986  {
1987  TransactionId cutoff;
1988  bool all_frozen;
1989 
1990  if (!heap_page_is_all_visible(vacrel, buf, &cutoff, &all_frozen))
1991  Assert(false);
1992 
1993  Assert(lpdead_items == 0);
1994  Assert(prunestate->all_frozen == all_frozen);
1995 
1996  /*
1997  * It's possible that we froze tuples and made the page's XID cutoff
1998  * (for recovery conflict purposes) FrozenTransactionId. This is okay
1999  * because visibility_cutoff_xid will be logged by our caller in a
2000  * moment.
2001  */
2002  Assert(cutoff == FrozenTransactionId ||
2003  cutoff == prunestate->visibility_cutoff_xid);
2004  }
2005 #endif
2006 
2007  /*
2008  * Now save details of the LP_DEAD items from the page in the dead_tuples
2009  * array. Also record that page has dead items in per-page prunestate.
2010  */
2011  if (lpdead_items > 0)
2012  {
2013  LVDeadTuples *dead_tuples = vacrel->dead_tuples;
2014  ItemPointerData tmp;
2015 
2016  Assert(!prunestate->all_visible);
2017  Assert(prunestate->has_lpdead_items);
2018 
2019  vacrel->lpdead_item_pages++;
2020 
2021  ItemPointerSetBlockNumber(&tmp, blkno);
2022 
2023  for (int i = 0; i < lpdead_items; i++)
2024  {
2025  ItemPointerSetOffsetNumber(&tmp, deadoffsets[i]);
2026  dead_tuples->itemptrs[dead_tuples->num_tuples++] = tmp;
2027  }
2028 
2029  Assert(dead_tuples->num_tuples <= dead_tuples->max_tuples);
2031  dead_tuples->num_tuples);
2032  }
2033 
2034  /* Finally, add page-local counts to whole-VACUUM counts */
2035  vacrel->tuples_deleted += tuples_deleted;
2036  vacrel->lpdead_items += lpdead_items;
2037  vacrel->new_dead_tuples += new_dead_tuples;
2038  vacrel->num_tuples += num_tuples;
2039  vacrel->live_tuples += live_tuples;
2040 }
2041 
2042 /*
2043  * Remove the collected garbage tuples from the table and its indexes.
2044  *
2045  * We may choose to bypass index vacuuming at this point, though only when the
2046  * ongoing VACUUM operation will definitely only have one index scan/round of
2047  * index vacuuming. Caller indicates whether or not this is such a VACUUM
2048  * operation using 'onecall' argument.
2049  *
2050  * In rare emergencies, the ongoing VACUUM operation can be made to skip both
2051  * index vacuuming and index cleanup at the point we're called. This avoids
2052  * having the whole system refuse to allocate further XIDs/MultiXactIds due to
2053  * wraparound.
2054  */
2055 static void
2056 lazy_vacuum(LVRelState *vacrel, bool onecall)
2057 {
2058  bool do_bypass_optimization;
2059 
2060  /* Should not end up here with no indexes */
2061  Assert(vacrel->nindexes > 0);
2063  Assert(vacrel->lpdead_item_pages > 0);
2064 
2065  if (!vacrel->do_index_vacuuming)
2066  {
2067  Assert(!vacrel->do_index_cleanup);
2068  vacrel->dead_tuples->num_tuples = 0;
2069  return;
2070  }
2071 
2072  /*
2073  * Consider bypassing index vacuuming (and heap vacuuming) entirely.
2074  *
2075  * We currently only do this in cases where the number of LP_DEAD items
2076  * for the entire VACUUM operation is close to zero. This avoids sharp
2077  * discontinuities in the duration and overhead of successive VACUUM
2078  * operations that run against the same table with a fixed workload.
2079  * Ideally, successive VACUUM operations will behave as if there are
2080  * exactly zero LP_DEAD items in cases where there are close to zero.
2081  *
2082  * This is likely to be helpful with a table that is continually affected
2083  * by UPDATEs that can mostly apply the HOT optimization, but occasionally
2084  * have small aberrations that lead to just a few heap pages retaining
2085  * only one or two LP_DEAD items. This is pretty common; even when the
2086  * DBA goes out of their way to make UPDATEs use HOT, it is practically
2087  * impossible to predict whether HOT will be applied in 100% of cases.
2088  * It's far easier to ensure that 99%+ of all UPDATEs against a table use
2089  * HOT through careful tuning.
2090  */
2091  do_bypass_optimization = false;
2092  if (onecall && vacrel->rel_pages > 0)
2093  {
2094  BlockNumber threshold;
2095 
2096  Assert(vacrel->num_index_scans == 0);
2097  Assert(vacrel->lpdead_items == vacrel->dead_tuples->num_tuples);
2098  Assert(vacrel->do_index_vacuuming);
2099  Assert(vacrel->do_index_cleanup);
2100 
2101  /*
2102  * This crossover point at which we'll start to do index vacuuming is
2103  * expressed as a percentage of the total number of heap pages in the
2104  * table that are known to have at least one LP_DEAD item. This is
2105  * much more important than the total number of LP_DEAD items, since
2106  * it's a proxy for the number of heap pages whose visibility map bits
2107  * cannot be set on account of bypassing index and heap vacuuming.
2108  *
2109  * We apply one further precautionary test: the space currently used
2110  * to store the TIDs (TIDs that now all point to LP_DEAD items) must
2111  * not exceed 32MB. This limits the risk that we will bypass index
2112  * vacuuming again and again until eventually there is a VACUUM whose
2113  * dead_tuples space is not CPU cache resident.
2114  *
2115  * We don't take any special steps to remember the LP_DEAD items (such
2116  * as counting them in new_dead_tuples report to the stats collector)
2117  * when the optimization is applied. Though the accounting used in
2118  * analyze.c's acquire_sample_rows() will recognize the same LP_DEAD
2119  * items as dead rows in its own stats collector report, that's okay.
2120  * The discrepancy should be negligible. If this optimization is ever
2121  * expanded to cover more cases then this may need to be reconsidered.
2122  */
2123  threshold = (double) vacrel->rel_pages * BYPASS_THRESHOLD_PAGES;
2124  do_bypass_optimization =
2125  (vacrel->lpdead_item_pages < threshold &&
2126  vacrel->lpdead_items < MAXDEADTUPLES(32L * 1024L * 1024L));
2127  }
2128 
2129  if (do_bypass_optimization)
2130  {
2131  /*
2132  * There are almost zero TIDs. Behave as if there were precisely
2133  * zero: bypass index vacuuming, but do index cleanup.
2134  *
2135  * We expect that the ongoing VACUUM operation will finish very
2136  * quickly, so there is no point in considering speeding up as a
2137  * failsafe against wraparound failure. (Index cleanup is expected to
2138  * finish very quickly in cases where there were no ambulkdelete()
2139  * calls.)
2140  */
2141  vacrel->do_index_vacuuming = false;
2142  ereport(elevel,
2143  (errmsg("\"%s\": index scan bypassed: %u pages from table (%.2f%% of total) have %lld dead item identifiers",
2144  vacrel->relname, vacrel->rel_pages,
2145  100.0 * vacrel->lpdead_item_pages / vacrel->rel_pages,
2146  (long long) vacrel->lpdead_items)));
2147  }
2148  else if (lazy_vacuum_all_indexes(vacrel))
2149  {
2150  /*
2151  * We successfully completed a round of index vacuuming. Do related
2152  * heap vacuuming now.
2153  */
2154  lazy_vacuum_heap_rel(vacrel);
2155  }
2156  else
2157  {
2158  /*
2159  * Failsafe case.
2160  *
2161  * we attempted index vacuuming, but didn't finish a full round/full
2162  * index scan. This happens when relfrozenxid or relminmxid is too
2163  * far in the past.
2164  *
2165  * From this point on the VACUUM operation will do no further index
2166  * vacuuming or heap vacuuming. This VACUUM operation won't end up
2167  * back here again.
2168  */
2169  Assert(vacrel->do_failsafe);
2170  }
2171 
2172  /*
2173  * Forget the LP_DEAD items that we just vacuumed (or just decided to not
2174  * vacuum)
2175  */
2176  vacrel->dead_tuples->num_tuples = 0;
2177 }
2178 
2179 /*
2180  * lazy_vacuum_all_indexes() -- Main entry for index vacuuming
2181  *
2182  * Returns true in the common case when all indexes were successfully
2183  * vacuumed. Returns false in rare cases where we determined that the ongoing
2184  * VACUUM operation is at risk of taking too long to finish, leading to
2185  * wraparound failure.
2186  */
2187 static bool
2189 {
2190  bool allindexes = true;
2191 
2193  Assert(vacrel->nindexes > 0);
2194  Assert(vacrel->do_index_vacuuming);
2195  Assert(vacrel->do_index_cleanup);
2198 
2199  /* Precheck for XID wraparound emergencies */
2200  if (lazy_check_wraparound_failsafe(vacrel))
2201  {
2202  /* Wraparound emergency -- don't even start an index scan */
2203  return false;
2204  }
2205 
2206  /* Report that we are now vacuuming indexes */
2209 
2210  if (!ParallelVacuumIsActive(vacrel))
2211  {
2212  for (int idx = 0; idx < vacrel->nindexes; idx++)
2213  {
2214  Relation indrel = vacrel->indrels[idx];
2215  IndexBulkDeleteResult *istat = vacrel->indstats[idx];
2216 
2217  vacrel->indstats[idx] =
2218  lazy_vacuum_one_index(indrel, istat, vacrel->old_live_tuples,
2219  vacrel);
2220 
2221  if (lazy_check_wraparound_failsafe(vacrel))
2222  {
2223  /* Wraparound emergency -- end current index scan */
2224  allindexes = false;
2225  break;
2226  }
2227  }
2228  }
2229  else
2230  {
2231  /* Outsource everything to parallel variant */
2233 
2234  /*
2235  * Do a postcheck to consider applying wraparound failsafe now. Note
2236  * that parallel VACUUM only gets the precheck and this postcheck.
2237  */
2238  if (lazy_check_wraparound_failsafe(vacrel))
2239  allindexes = false;
2240  }
2241 
2242  /*
2243  * We delete all LP_DEAD items from the first heap pass in all indexes on
2244  * each call here (except calls where we choose to do the failsafe). This
2245  * makes the next call to lazy_vacuum_heap_rel() safe (except in the event
2246  * of the failsafe triggering, which prevents the next call from taking
2247  * place).
2248  */
2249  Assert(vacrel->num_index_scans > 0 ||
2250  vacrel->dead_tuples->num_tuples == vacrel->lpdead_items);
2251  Assert(allindexes || vacrel->do_failsafe);
2252 
2253  /*
2254  * Increase and report the number of index scans.
2255  *
2256  * We deliberately include the case where we started a round of bulk
2257  * deletes that we weren't able to finish due to the failsafe triggering.
2258  */
2259  vacrel->num_index_scans++;
2261  vacrel->num_index_scans);
2262 
2263  return allindexes;
2264 }
2265 
2266 /*
2267  * lazy_vacuum_heap_rel() -- second pass over the heap for two pass strategy
2268  *
2269  * This routine marks LP_DEAD items in vacrel->dead_tuples array as LP_UNUSED.
2270  * Pages that never had lazy_scan_prune record LP_DEAD items are not visited
2271  * at all.
2272  *
2273  * We may also be able to truncate the line pointer array of the heap pages we
2274  * visit. If there is a contiguous group of LP_UNUSED items at the end of the
2275  * array, it can be reclaimed as free space. These LP_UNUSED items usually
2276  * start out as LP_DEAD items recorded by lazy_scan_prune (we set items from
2277  * each page to LP_UNUSED, and then consider if it's possible to truncate the
2278  * page's line pointer array).
2279  *
2280  * Note: the reason for doing this as a second pass is we cannot remove the
2281  * tuples until we've removed their index entries, and we want to process
2282  * index entry removal in batches as large as possible.
2283  */
2284 static void
2286 {
2287  int tupindex;
2288  BlockNumber vacuumed_pages;
2289  PGRUsage ru0;
2290  Buffer vmbuffer = InvalidBuffer;
2291  LVSavedErrInfo saved_err_info;
2292 
2293  Assert(vacrel->do_index_vacuuming);
2294  Assert(vacrel->do_index_cleanup);
2295  Assert(vacrel->num_index_scans > 0);
2296 
2297  /* Report that we are now vacuuming the heap */
2300 
2301  /* Update error traceback information */
2302  update_vacuum_error_info(vacrel, &saved_err_info,
2305 
2306  pg_rusage_init(&ru0);
2307  vacuumed_pages = 0;
2308 
2309  tupindex = 0;
2310  while (tupindex < vacrel->dead_tuples->num_tuples)
2311  {
2312  BlockNumber tblk;
2313  Buffer buf;
2314  Page page;
2315  Size freespace;
2316 
2318 
2319  tblk = ItemPointerGetBlockNumber(&vacrel->dead_tuples->itemptrs[tupindex]);
2320  vacrel->blkno = tblk;
2321  buf = ReadBufferExtended(vacrel->rel, MAIN_FORKNUM, tblk, RBM_NORMAL,
2322  vacrel->bstrategy);
2324  tupindex = lazy_vacuum_heap_page(vacrel, tblk, buf, tupindex,
2325  &vmbuffer);
2326 
2327  /* Now that we've vacuumed the page, record its available space */
2328  page = BufferGetPage(buf);
2329  freespace = PageGetHeapFreeSpace(page);
2330 
2331  UnlockReleaseBuffer(buf);
2332  RecordPageWithFreeSpace(vacrel->rel, tblk, freespace);
2333  vacuumed_pages++;
2334  }
2335 
2336  /* Clear the block number information */
2337  vacrel->blkno = InvalidBlockNumber;
2338 
2339  if (BufferIsValid(vmbuffer))
2340  {
2341  ReleaseBuffer(vmbuffer);
2342  vmbuffer = InvalidBuffer;
2343  }
2344 
2345  /*
2346  * We set all LP_DEAD items from the first heap pass to LP_UNUSED during
2347  * the second heap pass. No more, no less.
2348  */
2349  Assert(vacrel->num_index_scans > 1 ||
2350  (tupindex == vacrel->lpdead_items &&
2351  vacuumed_pages == vacrel->lpdead_item_pages));
2352 
2353  ereport(elevel,
2354  (errmsg("\"%s\": removed %d dead item identifiers in %u pages",
2355  vacrel->relname, tupindex, vacuumed_pages),
2356  errdetail_internal("%s", pg_rusage_show(&ru0))));
2357 
2358  /* Revert to the previous phase information for error traceback */
2359  restore_vacuum_error_info(vacrel, &saved_err_info);
2360 }
2361 
2362 /*
2363  * lazy_vacuum_heap_page() -- free page's LP_DEAD items listed in the
2364  * vacrel->dead_tuples array.
2365  *
2366  * Caller must have an exclusive buffer lock on the buffer (though a
2367  * super-exclusive lock is also acceptable).
2368  *
2369  * tupindex is the index in vacrel->dead_tuples of the first dead tuple for
2370  * this page. We assume the rest follow sequentially. The return value is
2371  * the first tupindex after the tuples of this page.
2372  *
2373  * Prior to PostgreSQL 14 there were rare cases where this routine had to set
2374  * tuples with storage to unused. These days it is strictly responsible for
2375  * marking LP_DEAD stub line pointers as unused. This only happens for those
2376  * LP_DEAD items on the page that were determined to be LP_DEAD items back
2377  * when the same page was visited by lazy_scan_prune() (i.e. those whose TID
2378  * was recorded in the dead_tuples array).
2379  */
2380 static int
2382  int tupindex, Buffer *vmbuffer)
2383 {
2384  LVDeadTuples *dead_tuples = vacrel->dead_tuples;
2385  Page page = BufferGetPage(buffer);
2387  int uncnt = 0;
2388  TransactionId visibility_cutoff_xid;
2389  bool all_frozen;
2390  LVSavedErrInfo saved_err_info;
2391 
2392  Assert(vacrel->nindexes == 0 || vacrel->do_index_vacuuming);
2393 
2395 
2396  /* Update error traceback information */
2397  update_vacuum_error_info(vacrel, &saved_err_info,
2400 
2402 
2403  for (; tupindex < dead_tuples->num_tuples; tupindex++)
2404  {
2405  BlockNumber tblk;
2406  OffsetNumber toff;
2407  ItemId itemid;
2408 
2409  tblk = ItemPointerGetBlockNumber(&dead_tuples->itemptrs[tupindex]);
2410  if (tblk != blkno)
2411  break; /* past end of tuples for this block */
2412  toff = ItemPointerGetOffsetNumber(&dead_tuples->itemptrs[tupindex]);
2413  itemid = PageGetItemId(page, toff);
2414 
2415  Assert(ItemIdIsDead(itemid) && !ItemIdHasStorage(itemid));
2416  ItemIdSetUnused(itemid);
2417  unused[uncnt++] = toff;
2418  }
2419 
2420  Assert(uncnt > 0);
2421 
2422  /* Attempt to truncate line pointer array now */
2424 
2425  /*
2426  * Mark buffer dirty before we write WAL.
2427  */
2428  MarkBufferDirty(buffer);
2429 
2430  /* XLOG stuff */
2431  if (RelationNeedsWAL(vacrel->rel))
2432  {
2433  xl_heap_vacuum xlrec;
2434  XLogRecPtr recptr;
2435 
2436  xlrec.nunused = uncnt;
2437 
2438  XLogBeginInsert();
2439  XLogRegisterData((char *) &xlrec, SizeOfHeapVacuum);
2440 
2441  XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
2442  XLogRegisterBufData(0, (char *) unused, uncnt * sizeof(OffsetNumber));
2443 
2444  recptr = XLogInsert(RM_HEAP2_ID, XLOG_HEAP2_VACUUM);
2445 
2446  PageSetLSN(page, recptr);
2447  }
2448 
2449  /*
2450  * End critical section, so we safely can do visibility tests (which
2451  * possibly need to perform IO and allocate memory!). If we crash now the
2452  * page (including the corresponding vm bit) might not be marked all
2453  * visible, but that's fine. A later vacuum will fix that.
2454  */
2455  END_CRIT_SECTION();
2456 
2457  /*
2458  * Now that we have removed the LD_DEAD items from the page, once again
2459  * check if the page has become all-visible. The page is already marked
2460  * dirty, exclusively locked, and, if needed, a full page image has been
2461  * emitted.
2462  */
2463  if (heap_page_is_all_visible(vacrel, buffer, &visibility_cutoff_xid,
2464  &all_frozen))
2465  PageSetAllVisible(page);
2466 
2467  /*
2468  * All the changes to the heap page have been done. If the all-visible
2469  * flag is now set, also set the VM all-visible bit (and, if possible, the
2470  * all-frozen bit) unless this has already been done previously.
2471  */
2472  if (PageIsAllVisible(page))
2473  {
2474  uint8 flags = 0;
2475  uint8 vm_status = visibilitymap_get_status(vacrel->rel,
2476  blkno, vmbuffer);
2477 
2478  /* Set the VM all-frozen bit to flag, if needed */
2479  if ((vm_status & VISIBILITYMAP_ALL_VISIBLE) == 0)
2480  flags |= VISIBILITYMAP_ALL_VISIBLE;
2481  if ((vm_status & VISIBILITYMAP_ALL_FROZEN) == 0 && all_frozen)
2482  flags |= VISIBILITYMAP_ALL_FROZEN;
2483 
2484  Assert(BufferIsValid(*vmbuffer));
2485  if (flags != 0)
2486  visibilitymap_set(vacrel->rel, blkno, buffer, InvalidXLogRecPtr,
2487  *vmbuffer, visibility_cutoff_xid, flags);
2488  }
2489 
2490  /* Revert to the previous phase information for error traceback */
2491  restore_vacuum_error_info(vacrel, &saved_err_info);
2492  return tupindex;
2493 }
2494 
2495 /*
2496  * lazy_check_needs_freeze() -- scan page to see if any tuples
2497  * need to be cleaned to avoid wraparound
2498  *
2499  * Returns true if the page needs to be vacuumed using cleanup lock.
2500  * Also returns a flag indicating whether page contains any tuples at all.
2501  */
2502 static bool
2504 {
2505  Page page = BufferGetPage(buf);
2506  OffsetNumber offnum,
2507  maxoff;
2508  HeapTupleHeader tupleheader;
2509 
2510  *hastup = false;
2511 
2512  /*
2513  * New and empty pages, obviously, don't contain tuples. We could make
2514  * sure that the page is registered in the FSM, but it doesn't seem worth
2515  * waiting for a cleanup lock just for that, especially because it's
2516  * likely that the pin holder will do so.
2517  */
2518  if (PageIsNew(page) || PageIsEmpty(page))
2519  return false;
2520 
2521  maxoff = PageGetMaxOffsetNumber(page);
2522  for (offnum = FirstOffsetNumber;
2523  offnum <= maxoff;
2524  offnum = OffsetNumberNext(offnum))
2525  {
2526  ItemId itemid;
2527 
2528  /*
2529  * Set the offset number so that we can display it along with any
2530  * error that occurred while processing this tuple.
2531  */
2532  vacrel->offnum = offnum;
2533  itemid = PageGetItemId(page, offnum);
2534 
2535  /* this should match hastup test in count_nondeletable_pages() */
2536  if (ItemIdIsUsed(itemid))
2537  *hastup = true;
2538 
2539  /* dead and redirect items never need freezing */
2540  if (!ItemIdIsNormal(itemid))
2541  continue;
2542 
2543  tupleheader = (HeapTupleHeader) PageGetItem(page, itemid);
2544 
2545  if (heap_tuple_needs_freeze(tupleheader, vacrel->FreezeLimit,
2546  vacrel->MultiXactCutoff, buf))
2547  break;
2548  } /* scan along page */
2549 
2550  /* Clear the offset information once we have processed the given page. */
2551  vacrel->offnum = InvalidOffsetNumber;
2552 
2553  return (offnum <= maxoff);
2554 }
2555 
2556 /*
2557  * Trigger the failsafe to avoid wraparound failure when vacrel table has a
2558  * relfrozenxid and/or relminmxid that is dangerously far in the past.
2559  *
2560  * Triggering the failsafe makes the ongoing VACUUM bypass any further index
2561  * vacuuming and heap vacuuming. Truncating the heap is also bypassed.
2562  *
2563  * Any remaining work (work that VACUUM cannot just bypass) is typically sped
2564  * up when the failsafe triggers. VACUUM stops applying any cost-based delay
2565  * that it started out with.
2566  *
2567  * Returns true when failsafe has been triggered.
2568  *
2569  * Caller is expected to call here before and after vacuuming each index in
2570  * the case of two-pass VACUUM, or every VACUUM_FSM_EVERY_PAGES blocks in the
2571  * case of no-indexes/one-pass VACUUM.
2572  *
2573  * There is also a precheck before the first pass over the heap begins, which
2574  * is helpful when the failsafe initially triggers during a non-aggressive
2575  * VACUUM -- the automatic aggressive vacuum to prevent wraparound that
2576  * follows can independently trigger the failsafe right away.
2577  */
2578 static bool
2580 {
2581  /* Avoid calling vacuum_xid_failsafe_check() very frequently */
2582  if (vacrel->num_index_scans == 0 &&
2583  vacrel->rel_pages <= FAILSAFE_MIN_PAGES)
2584  return false;
2585 
2586  /* Don't warn more than once per VACUUM */
2587  if (vacrel->do_failsafe)
2588  return true;
2589 
2591  vacrel->relminmxid)))
2592  {
2593  Assert(vacrel->do_index_vacuuming);
2594  Assert(vacrel->do_index_cleanup);
2595 
2596  vacrel->do_index_vacuuming = false;
2597  vacrel->do_index_cleanup = false;
2598  vacrel->do_failsafe = true;
2599 
2600  ereport(WARNING,
2601  (errmsg("abandoned index vacuuming of table \"%s.%s.%s\" as a failsafe after %d index scans",
2603  vacrel->relnamespace,
2604  vacrel->relname,
2605  vacrel->num_index_scans),
2606  errdetail("table's relfrozenxid or relminmxid is too far in the past"),
2607  errhint("Consider increasing configuration parameter \"maintenance_work_mem\" or \"autovacuum_work_mem\".\n"
2608  "You might also need to consider other ways for VACUUM to keep up with the allocation of transaction IDs.")));
2609 
2610  /* Stop applying cost limits from this point on */
2611  VacuumCostActive = false;
2612  VacuumCostBalance = 0;
2613 
2614  return true;
2615  }
2616 
2617  return false;
2618 }
2619 
2620 /*
2621  * Perform lazy_vacuum_all_indexes() steps in parallel
2622  */
2623 static void
2625 {
2626  /* Tell parallel workers to do index vacuuming */
2627  vacrel->lps->lvshared->for_cleanup = false;
2628  vacrel->lps->lvshared->first_time = false;
2629 
2630  /*
2631  * We can only provide an approximate value of num_heap_tuples in vacuum
2632  * cases.
2633  */
2634  vacrel->lps->lvshared->reltuples = vacrel->old_live_tuples;
2635  vacrel->lps->lvshared->estimated_count = true;
2636 
2638  vacrel->lps->nindexes_parallel_bulkdel);
2639 }
2640 
2641 /*
2642  * Perform lazy_cleanup_all_indexes() steps in parallel
2643  */
2644 static void
2646 {
2647  int nworkers;
2648 
2649  /*
2650  * If parallel vacuum is active we perform index cleanup with parallel
2651  * workers.
2652  *
2653  * Tell parallel workers to do index cleanup.
2654  */
2655  vacrel->lps->lvshared->for_cleanup = true;
2656  vacrel->lps->lvshared->first_time = (vacrel->num_index_scans == 0);
2657 
2658  /*
2659  * Now we can provide a better estimate of total number of surviving
2660  * tuples (we assume indexes are more interested in that than in the
2661  * number of nominally live tuples).
2662  */
2663  vacrel->lps->lvshared->reltuples = vacrel->new_rel_tuples;
2664  vacrel->lps->lvshared->estimated_count =
2665  (vacrel->tupcount_pages < vacrel->rel_pages);
2666 
2667  /* Determine the number of parallel workers to launch */
2668  if (vacrel->lps->lvshared->first_time)
2669  nworkers = vacrel->lps->nindexes_parallel_cleanup +
2671  else
2672  nworkers = vacrel->lps->nindexes_parallel_cleanup;
2673 
2674  do_parallel_vacuum_or_cleanup(vacrel, nworkers);
2675 }
2676 
2677 /*
2678  * Perform index vacuum or index cleanup with parallel workers. This function
2679  * must be used by the parallel vacuum leader process. The caller must set
2680  * lps->lvshared->for_cleanup to indicate whether to perform vacuum or
2681  * cleanup.
2682  */
2683 static void
2685 {
2686  LVParallelState *lps = vacrel->lps;
2687 
2689  Assert(ParallelVacuumIsActive(vacrel));
2690  Assert(vacrel->nindexes > 0);
2691 
2692  /* The leader process will participate */
2693  nworkers--;
2694 
2695  /*
2696  * It is possible that parallel context is initialized with fewer workers
2697  * than the number of indexes that need a separate worker in the current
2698  * phase, so we need to consider it. See compute_parallel_vacuum_workers.
2699  */
2700  nworkers = Min(nworkers, lps->pcxt->nworkers);
2701 
2702  /* Setup the shared cost-based vacuum delay and launch workers */
2703  if (nworkers > 0)
2704  {
2705  if (vacrel->num_index_scans > 0)
2706  {
2707  /* Reset the parallel index processing counter */
2708  pg_atomic_write_u32(&(lps->lvshared->idx), 0);
2709 
2710  /* Reinitialize the parallel context to relaunch parallel workers */
2712  }
2713 
2714  /*
2715  * Set up shared cost balance and the number of active workers for
2716  * vacuum delay. We need to do this before launching workers as
2717  * otherwise, they might not see the updated values for these
2718  * parameters.
2719  */
2722 
2723  /*
2724  * The number of workers can vary between bulkdelete and cleanup
2725  * phase.
2726  */
2727  ReinitializeParallelWorkers(lps->pcxt, nworkers);
2728 
2730 
2731  if (lps->pcxt->nworkers_launched > 0)
2732  {
2733  /*
2734  * Reset the local cost values for leader backend as we have
2735  * already accumulated the remaining balance of heap.
2736  */
2737  VacuumCostBalance = 0;
2739 
2740  /* Enable shared cost balance for leader backend */
2743  }
2744 
2745  if (lps->lvshared->for_cleanup)
2746  ereport(elevel,
2747  (errmsg(ngettext("launched %d parallel vacuum worker for index cleanup (planned: %d)",
2748  "launched %d parallel vacuum workers for index cleanup (planned: %d)",
2749  lps->pcxt->nworkers_launched),
2750  lps->pcxt->nworkers_launched, nworkers)));
2751  else
2752  ereport(elevel,
2753  (errmsg(ngettext("launched %d parallel vacuum worker for index vacuuming (planned: %d)",
2754  "launched %d parallel vacuum workers for index vacuuming (planned: %d)",
2755  lps->pcxt->nworkers_launched),
2756  lps->pcxt->nworkers_launched, nworkers)));
2757  }
2758 
2759  /* Process the indexes that can be processed by only leader process */
2761 
2762  /*
2763  * Join as a parallel worker. The leader process alone processes all the
2764  * indexes in the case where no workers are launched.
2765  */
2766  do_parallel_processing(vacrel, lps->lvshared);
2767 
2768  /*
2769  * Next, accumulate buffer and WAL usage. (This must wait for the workers
2770  * to finish, or we might get incomplete data.)
2771  */
2772  if (nworkers > 0)
2773  {
2774  /* Wait for all vacuum workers to finish */
2776 
2777  for (int i = 0; i < lps->pcxt->nworkers_launched; i++)
2779  }
2780 
2781  /*
2782  * Carry the shared balance value to heap scan and disable shared costing
2783  */
2785  {
2787  VacuumSharedCostBalance = NULL;
2788  VacuumActiveNWorkers = NULL;
2789  }
2790 }
2791 
2792 /*
2793  * Index vacuum/cleanup routine used by the leader process and parallel
2794  * vacuum worker processes to process the indexes in parallel.
2795  */
2796 static void
2798 {
2799  /*
2800  * Increment the active worker count if we are able to launch any worker.
2801  */
2804 
2805  /* Loop until all indexes are vacuumed */
2806  for (;;)
2807  {
2808  int idx;
2809  LVSharedIndStats *shared_istat;
2810  Relation indrel;
2811  IndexBulkDeleteResult *istat;
2812 
2813  /* Get an index number to process */
2814  idx = pg_atomic_fetch_add_u32(&(lvshared->idx), 1);
2815 
2816  /* Done for all indexes? */
2817  if (idx >= vacrel->nindexes)
2818  break;
2819 
2820  /* Get the index statistics of this index from DSM */
2821  shared_istat = parallel_stats_for_idx(lvshared, idx);
2822 
2823  /* Skip indexes not participating in parallelism */
2824  if (shared_istat == NULL)
2825  continue;
2826 
2827  indrel = vacrel->indrels[idx];
2828 
2829  /*
2830  * Skip processing indexes that are unsafe for workers (these are
2831  * processed in do_serial_processing_for_unsafe_indexes() by leader)
2832  */
2833  if (!parallel_processing_is_safe(indrel, lvshared))
2834  continue;
2835 
2836  /* Do vacuum or cleanup of the index */
2837  istat = (vacrel->indstats[idx]);
2838  vacrel->indstats[idx] = parallel_process_one_index(indrel, istat,
2839  lvshared,
2840  shared_istat,
2841  vacrel);
2842  }
2843 
2844  /*
2845  * We have completed the index vacuum so decrement the active worker
2846  * count.
2847  */
2850 }
2851 
2852 /*
2853  * Vacuum or cleanup indexes that can be processed by only the leader process
2854  * because these indexes don't support parallel operation at that phase.
2855  */
2856 static void
2858 {
2860 
2861  /*
2862  * Increment the active worker count if we are able to launch any worker.
2863  */
2866 
2867  for (int idx = 0; idx < vacrel->nindexes; idx++)
2868  {
2869  LVSharedIndStats *shared_istat;
2870  Relation indrel;
2871  IndexBulkDeleteResult *istat;
2872 
2873  shared_istat = parallel_stats_for_idx(lvshared, idx);
2874 
2875  /* Skip already-complete indexes */
2876  if (shared_istat != NULL)
2877  continue;
2878 
2879  indrel = vacrel->indrels[idx];
2880 
2881  /*
2882  * We're only here for the unsafe indexes
2883  */
2884  if (parallel_processing_is_safe(indrel, lvshared))
2885  continue;
2886 
2887  /* Do vacuum or cleanup of the index */
2888  istat = (vacrel->indstats[idx]);
2889  vacrel->indstats[idx] = parallel_process_one_index(indrel, istat,
2890  lvshared,
2891  shared_istat,
2892  vacrel);
2893  }
2894 
2895  /*
2896  * We have completed the index vacuum so decrement the active worker
2897  * count.
2898  */
2901 }
2902 
2903 /*
2904  * Vacuum or cleanup index either by leader process or by one of the worker
2905  * process. After processing the index this function copies the index
2906  * statistics returned from ambulkdelete and amvacuumcleanup to the DSM
2907  * segment.
2908  */
2909 static IndexBulkDeleteResult *
2911  IndexBulkDeleteResult *istat,
2912  LVShared *lvshared,
2913  LVSharedIndStats *shared_istat,
2914  LVRelState *vacrel)
2915 {
2916  IndexBulkDeleteResult *istat_res;
2917 
2918  /*
2919  * Update the pointer to the corresponding bulk-deletion result if someone
2920  * has already updated it
2921  */
2922  if (shared_istat && shared_istat->updated && istat == NULL)
2923  istat = &shared_istat->istat;
2924 
2925  /* Do vacuum or cleanup of the index */
2926  if (lvshared->for_cleanup)
2927  istat_res = lazy_cleanup_one_index(indrel, istat, lvshared->reltuples,
2928  lvshared->estimated_count, vacrel);
2929  else
2930  istat_res = lazy_vacuum_one_index(indrel, istat, lvshared->reltuples,
2931  vacrel);
2932 
2933  /*
2934  * Copy the index bulk-deletion result returned from ambulkdelete and
2935  * amvacuumcleanup to the DSM segment if it's the first cycle because they
2936  * allocate locally and it's possible that an index will be vacuumed by a
2937  * different vacuum process the next cycle. Copying the result normally
2938  * happens only the first time an index is vacuumed. For any additional
2939  * vacuum pass, we directly point to the result on the DSM segment and
2940  * pass it to vacuum index APIs so that workers can update it directly.
2941  *
2942  * Since all vacuum workers write the bulk-deletion result at different
2943  * slots we can write them without locking.
2944  */
2945  if (shared_istat && !shared_istat->updated && istat_res != NULL)
2946  {
2947  memcpy(&shared_istat->istat, istat_res, sizeof(IndexBulkDeleteResult));
2948  shared_istat->updated = true;
2949 
2950  /* Free the locally-allocated bulk-deletion result */
2951  pfree(istat_res);
2952 
2953  /* return the pointer to the result from shared memory */
2954  return &shared_istat->istat;
2955  }
2956 
2957  return istat_res;
2958 }
2959 
2960 /*
2961  * lazy_cleanup_all_indexes() -- cleanup all indexes of relation.
2962  */
2963 static void
2965 {
2967  Assert(vacrel->nindexes > 0);
2968 
2969  /* Report that we are now cleaning up indexes */
2972 
2973  if (!ParallelVacuumIsActive(vacrel))
2974  {
2975  double reltuples = vacrel->new_rel_tuples;
2976  bool estimated_count =
2977  vacrel->tupcount_pages < vacrel->rel_pages;
2978 
2979  for (int idx = 0; idx < vacrel->nindexes; idx++)
2980  {
2981  Relation indrel = vacrel->indrels[idx];
2982  IndexBulkDeleteResult *istat = vacrel->indstats[idx];
2983 
2984  vacrel->indstats[idx] =
2985  lazy_cleanup_one_index(indrel, istat, reltuples,
2986  estimated_count, vacrel);
2987  }
2988  }
2989  else
2990  {
2991  /* Outsource everything to parallel variant */
2993  }
2994 }
2995 
2996 /*
2997  * lazy_vacuum_one_index() -- vacuum index relation.
2998  *
2999  * Delete all the index entries pointing to tuples listed in
3000  * dead_tuples, and update running statistics.
3001  *
3002  * reltuples is the number of heap tuples to be passed to the
3003  * bulkdelete callback. It's always assumed to be estimated.
3004  *
3005  * Returns bulk delete stats derived from input stats
3006  */
3007 static IndexBulkDeleteResult *
3009  double reltuples, LVRelState *vacrel)
3010 {
3011  IndexVacuumInfo ivinfo;
3012  PGRUsage ru0;
3013  LVSavedErrInfo saved_err_info;
3014 
3015  pg_rusage_init(&ru0);
3016 
3017  ivinfo.index = indrel;
3018  ivinfo.analyze_only = false;
3019  ivinfo.report_progress = false;
3020  ivinfo.estimated_count = true;
3021  ivinfo.message_level = elevel;
3022  ivinfo.num_heap_tuples = reltuples;
3023  ivinfo.strategy = vacrel->bstrategy;
3024 
3025  /*
3026  * Update error traceback information.
3027  *
3028  * The index name is saved during this phase and restored immediately
3029  * after this phase. See vacuum_error_callback.
3030  */
3031  Assert(vacrel->indname == NULL);
3032  vacrel->indname = pstrdup(RelationGetRelationName(indrel));
3033  update_vacuum_error_info(vacrel, &saved_err_info,
3036 
3037  /* Do bulk deletion */
3038  istat = index_bulk_delete(&ivinfo, istat, lazy_tid_reaped,
3039  (void *) vacrel->dead_tuples);
3040 
3041  ereport(elevel,
3042  (errmsg("scanned index \"%s\" to remove %d row versions",
3043  vacrel->indname, vacrel->dead_tuples->num_tuples),
3044  errdetail_internal("%s", pg_rusage_show(&ru0))));
3045 
3046  /* Revert to the previous phase information for error traceback */
3047  restore_vacuum_error_info(vacrel, &saved_err_info);
3048  pfree(vacrel->indname);
3049  vacrel->indname = NULL;
3050 
3051  return istat;
3052 }
3053 
3054 /*
3055  * lazy_cleanup_one_index() -- do post-vacuum cleanup for index relation.
3056  *
3057  * reltuples is the number of heap tuples and estimated_count is true
3058  * if reltuples is an estimated value.
3059  *
3060  * Returns bulk delete stats derived from input stats
3061  */
3062 static IndexBulkDeleteResult *
3064  double reltuples, bool estimated_count,
3065  LVRelState *vacrel)
3066 {
3067  IndexVacuumInfo ivinfo;
3068  PGRUsage ru0;
3069  LVSavedErrInfo saved_err_info;
3070 
3071  pg_rusage_init(&ru0);
3072 
3073  ivinfo.index = indrel;
3074  ivinfo.analyze_only = false;
3075  ivinfo.report_progress = false;
3076  ivinfo.estimated_count = estimated_count;
3077  ivinfo.message_level = elevel;
3078 
3079  ivinfo.num_heap_tuples = reltuples;
3080  ivinfo.strategy = vacrel->bstrategy;
3081 
3082  /*
3083  * Update error traceback information.
3084  *
3085  * The index name is saved during this phase and restored immediately
3086  * after this phase. See vacuum_error_callback.
3087  */
3088  Assert(vacrel->indname == NULL);
3089  vacrel->indname = pstrdup(RelationGetRelationName(indrel));
3090  update_vacuum_error_info(vacrel, &saved_err_info,
3093 
3094  istat = index_vacuum_cleanup(&ivinfo, istat);
3095 
3096  if (istat)
3097  {
3098  ereport(elevel,
3099  (errmsg("index \"%s\" now contains %.0f row versions in %u pages",
3100  RelationGetRelationName(indrel),
3101  (istat)->num_index_tuples,
3102  (istat)->num_pages),
3103  errdetail("%.0f index row versions were removed.\n"
3104  "%u index pages were newly deleted.\n"
3105  "%u index pages are currently deleted, of which %u are currently reusable.\n"
3106  "%s.",
3107  (istat)->tuples_removed,
3108  (istat)->pages_newly_deleted,
3109  (istat)->pages_deleted, (istat)->pages_free,
3110  pg_rusage_show(&ru0))));
3111  }
3112 
3113  /* Revert to the previous phase information for error traceback */
3114  restore_vacuum_error_info(vacrel, &saved_err_info);
3115  pfree(vacrel->indname);
3116  vacrel->indname = NULL;
3117 
3118  return istat;
3119 }
3120 
3121 /*
3122  * should_attempt_truncation - should we attempt to truncate the heap?
3123  *
3124  * Don't even think about it unless we have a shot at releasing a goodly
3125  * number of pages. Otherwise, the time taken isn't worth it.
3126  *
3127  * Also don't attempt it if wraparound failsafe is in effect. It's hard to
3128  * predict how long lazy_truncate_heap will take. Don't take any chances.
3129  * There is very little chance of truncation working out when the failsafe is
3130  * in effect in any case. lazy_scan_prune makes the optimistic assumption
3131  * that any LP_DEAD items it encounters will always be LP_UNUSED by the time
3132  * we're called.
3133  *
3134  * Also don't attempt it if we are doing early pruning/vacuuming, because a
3135  * scan which cannot find a truncated heap page cannot determine that the
3136  * snapshot is too old to read that page.
3137  *
3138  * This is split out so that we can test whether truncation is going to be
3139  * called for before we actually do it. If you change the logic here, be
3140  * careful to depend only on fields that lazy_scan_heap updates on-the-fly.
3141  */
3142 static bool
3144 {
3145  BlockNumber possibly_freeable;
3146 
3147  if (params->truncate == VACOPT_TERNARY_DISABLED)
3148  return false;
3149 
3150  if (vacrel->do_failsafe)
3151  return false;
3152 
3153  possibly_freeable = vacrel->rel_pages - vacrel->nonempty_pages;
3154  if (possibly_freeable > 0 &&
3155  (possibly_freeable >= REL_TRUNCATE_MINIMUM ||
3156  possibly_freeable >= vacrel->rel_pages / REL_TRUNCATE_FRACTION) &&
3158  return true;
3159  else
3160  return false;
3161 }
3162 
3163 /*
3164  * lazy_truncate_heap - try to truncate off any empty pages at the end
3165  */
3166 static void
3168 {
3169  BlockNumber old_rel_pages = vacrel->rel_pages;
3170  BlockNumber new_rel_pages;
3171  int lock_retry;
3172 
3173  /* Report that we are now truncating */
3176 
3177  /*
3178  * Loop until no more truncating can be done.
3179  */
3180  do
3181  {
3182  PGRUsage ru0;
3183 
3184  pg_rusage_init(&ru0);
3185 
3186  /*
3187  * We need full exclusive lock on the relation in order to do
3188  * truncation. If we can't get it, give up rather than waiting --- we
3189  * don't want to block other backends, and we don't want to deadlock
3190  * (which is quite possible considering we already hold a lower-grade
3191  * lock).
3192  */
3193  vacrel->lock_waiter_detected = false;
3194  lock_retry = 0;
3195  while (true)
3196  {
3198  break;
3199 
3200  /*
3201  * Check for interrupts while trying to (re-)acquire the exclusive
3202  * lock.
3203  */
3205 
3206  if (++lock_retry > (VACUUM_TRUNCATE_LOCK_TIMEOUT /
3208  {
3209  /*
3210  * We failed to establish the lock in the specified number of
3211  * retries. This means we give up truncating.
3212  */
3213  vacrel->lock_waiter_detected = true;
3214  ereport(elevel,
3215  (errmsg("\"%s\": stopping truncate due to conflicting lock request",
3216  vacrel->relname)));
3217  return;
3218  }
3219 
3221  }
3222 
3223  /*
3224  * Now that we have exclusive lock, look to see if the rel has grown
3225  * whilst we were vacuuming with non-exclusive lock. If so, give up;
3226  * the newly added pages presumably contain non-deletable tuples.
3227  */
3228  new_rel_pages = RelationGetNumberOfBlocks(vacrel->rel);
3229  if (new_rel_pages != old_rel_pages)
3230  {
3231  /*
3232  * Note: we intentionally don't update vacrel->rel_pages with the
3233  * new rel size here. If we did, it would amount to assuming that
3234  * the new pages are empty, which is unlikely. Leaving the numbers
3235  * alone amounts to assuming that the new pages have the same
3236  * tuple density as existing ones, which is less unlikely.
3237  */
3239  return;
3240  }
3241 
3242  /*
3243  * Scan backwards from the end to verify that the end pages actually
3244  * contain no tuples. This is *necessary*, not optional, because
3245  * other backends could have added tuples to these pages whilst we
3246  * were vacuuming.
3247  */
3248  new_rel_pages = count_nondeletable_pages(vacrel);
3249  vacrel->blkno = new_rel_pages;
3250 
3251  if (new_rel_pages >= old_rel_pages)
3252  {
3253  /* can't do anything after all */
3255  return;
3256  }
3257 
3258  /*
3259  * Okay to truncate.
3260  */
3261  RelationTruncate(vacrel->rel, new_rel_pages);
3262 
3263  /*
3264  * We can release the exclusive lock as soon as we have truncated.
3265  * Other backends can't safely access the relation until they have
3266  * processed the smgr invalidation that smgrtruncate sent out ... but
3267  * that should happen as part of standard invalidation processing once
3268  * they acquire lock on the relation.
3269  */
3271 
3272  /*
3273  * Update statistics. Here, it *is* correct to adjust rel_pages
3274  * without also touching reltuples, since the tuple count wasn't
3275  * changed by the truncation.
3276  */
3277  vacrel->pages_removed += old_rel_pages - new_rel_pages;
3278  vacrel->rel_pages = new_rel_pages;
3279 
3280  ereport(elevel,
3281  (errmsg("\"%s\": truncated %u to %u pages",
3282  vacrel->relname,
3283  old_rel_pages, new_rel_pages),
3284  errdetail_internal("%s",
3285  pg_rusage_show(&ru0))));
3286  old_rel_pages = new_rel_pages;
3287  } while (new_rel_pages > vacrel->nonempty_pages &&
3288  vacrel->lock_waiter_detected);
3289 }
3290 
3291 /*
3292  * Rescan end pages to verify that they are (still) empty of tuples.
3293  *
3294  * Returns number of nondeletable pages (last nonempty page + 1).
3295  */
3296 static BlockNumber
3298 {
3299  BlockNumber blkno;
3300  BlockNumber prefetchedUntil;
3301  instr_time starttime;
3302 
3303  /* Initialize the starttime if we check for conflicting lock requests */
3304  INSTR_TIME_SET_CURRENT(starttime);
3305 
3306  /*
3307  * Start checking blocks at what we believe relation end to be and move
3308  * backwards. (Strange coding of loop control is needed because blkno is
3309  * unsigned.) To make the scan faster, we prefetch a few blocks at a time
3310  * in forward direction, so that OS-level readahead can kick in.
3311  */
3312  blkno = vacrel->rel_pages;
3314  "prefetch size must be power of 2");
3315  prefetchedUntil = InvalidBlockNumber;
3316  while (blkno > vacrel->nonempty_pages)
3317  {
3318  Buffer buf;
3319  Page page;
3320  OffsetNumber offnum,
3321  maxoff;
3322  bool hastup;
3323 
3324  /*
3325  * Check if another process requests a lock on our relation. We are
3326  * holding an AccessExclusiveLock here, so they will be waiting. We
3327  * only do this once per VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL, and we
3328  * only check if that interval has elapsed once every 32 blocks to
3329  * keep the number of system calls and actual shared lock table
3330  * lookups to a minimum.
3331  */
3332  if ((blkno % 32) == 0)
3333  {
3334  instr_time currenttime;
3335  instr_time elapsed;
3336 
3337  INSTR_TIME_SET_CURRENT(currenttime);
3338  elapsed = currenttime;
3339  INSTR_TIME_SUBTRACT(elapsed, starttime);
3340  if ((INSTR_TIME_GET_MICROSEC(elapsed) / 1000)
3342  {
3344  {
3345  ereport(elevel,
3346  (errmsg("\"%s\": suspending truncate due to conflicting lock request",
3347  vacrel->relname)));
3348 
3349  vacrel->lock_waiter_detected = true;
3350  return blkno;
3351  }
3352  starttime = currenttime;
3353  }
3354  }
3355 
3356  /*
3357  * We don't insert a vacuum delay point here, because we have an
3358  * exclusive lock on the table which we want to hold for as short a
3359  * time as possible. We still need to check for interrupts however.
3360  */
3362 
3363  blkno--;
3364 
3365  /* If we haven't prefetched this lot yet, do so now. */
3366  if (prefetchedUntil > blkno)
3367  {
3368  BlockNumber prefetchStart;
3369  BlockNumber pblkno;
3370 
3371  prefetchStart = blkno & ~(PREFETCH_SIZE - 1);
3372  for (pblkno = prefetchStart; pblkno <= blkno; pblkno++)
3373  {
3374  PrefetchBuffer(vacrel->rel, MAIN_FORKNUM, pblkno);
3376  }
3377  prefetchedUntil = prefetchStart;
3378  }
3379 
3380  buf = ReadBufferExtended(vacrel->rel, MAIN_FORKNUM, blkno, RBM_NORMAL,
3381  vacrel->bstrategy);
3382 
3383  /* In this phase we only need shared access to the buffer */
3385 
3386  page = BufferGetPage(buf);
3387 
3388  if (PageIsNew(page) || PageIsEmpty(page))
3389  {
3390  UnlockReleaseBuffer(buf);
3391  continue;
3392  }
3393 
3394  hastup = false;
3395  maxoff = PageGetMaxOffsetNumber(page);
3396  for (offnum = FirstOffsetNumber;
3397  offnum <= maxoff;
3398  offnum = OffsetNumberNext(offnum))
3399  {
3400  ItemId itemid;
3401 
3402  itemid = PageGetItemId(page, offnum);
3403 
3404  /*
3405  * Note: any non-unused item should be taken as a reason to keep
3406  * this page. We formerly thought that DEAD tuples could be
3407  * thrown away, but that's not so, because we'd not have cleaned
3408  * out their index entries.
3409  */
3410  if (ItemIdIsUsed(itemid))
3411  {
3412  hastup = true;
3413  break; /* can stop scanning */
3414  }
3415  } /* scan along page */
3416 
3417  UnlockReleaseBuffer(buf);
3418 
3419  /* Done scanning if we found a tuple here */
3420  if (hastup)
3421  return blkno + 1;
3422  }
3423 
3424  /*
3425  * If we fall out of the loop, all the previously-thought-to-be-empty
3426  * pages still are; we need not bother to look at the last known-nonempty
3427  * page.
3428  */
3429  return vacrel->nonempty_pages;
3430 }
3431 
3432 /*
3433  * Return the maximum number of dead tuples we can record.
3434  */
3435 static long
3436 compute_max_dead_tuples(BlockNumber relblocks, bool hasindex)
3437 {
3438  long maxtuples;
3439  int vac_work_mem = IsAutoVacuumWorkerProcess() &&
3440  autovacuum_work_mem != -1 ?
3442 
3443  if (hasindex)
3444  {
3445  maxtuples = MAXDEADTUPLES(vac_work_mem * 1024L);
3446  maxtuples = Min(maxtuples, INT_MAX);
3447  maxtuples = Min(maxtuples, MAXDEADTUPLES(MaxAllocSize));
3448 
3449  /* curious coding here to ensure the multiplication can't overflow */
3450  if ((BlockNumber) (maxtuples / LAZY_ALLOC_TUPLES) > relblocks)
3451  maxtuples = relblocks * LAZY_ALLOC_TUPLES;
3452 
3453  /* stay sane if small maintenance_work_mem */
3454  maxtuples = Max(maxtuples, MaxHeapTuplesPerPage);
3455  }
3456  else
3457  maxtuples = MaxHeapTuplesPerPage;
3458 
3459  return maxtuples;
3460 }
3461 
3462 /*
3463  * lazy_space_alloc - space allocation decisions for lazy vacuum
3464  *
3465  * See the comments at the head of this file for rationale.
3466  */
3467 static void
3468 lazy_space_alloc(LVRelState *vacrel, int nworkers, BlockNumber nblocks)
3469 {
3470  LVDeadTuples *dead_tuples;
3471  long maxtuples;
3472 
3473  /*
3474  * Initialize state for a parallel vacuum. As of now, only one worker can
3475  * be used for an index, so we invoke parallelism only if there are at
3476  * least two indexes on a table.
3477  */
3478  if (nworkers >= 0 && vacrel->nindexes > 1 && vacrel->do_index_vacuuming)
3479  {
3480  /*
3481  * Since parallel workers cannot access data in temporary tables, we
3482  * can't perform parallel vacuum on them.
3483  */
3484  if (RelationUsesLocalBuffers(vacrel->rel))
3485  {
3486  /*
3487  * Give warning only if the user explicitly tries to perform a
3488  * parallel vacuum on the temporary table.
3489  */
3490  if (nworkers > 0)
3491  ereport(WARNING,
3492  (errmsg("disabling parallel option of vacuum on \"%s\" --- cannot vacuum temporary tables in parallel",
3493  vacrel->relname)));
3494  }
3495  else
3496  vacrel->lps = begin_parallel_vacuum(vacrel, nblocks, nworkers);
3497 
3498  /* If parallel mode started, we're done */
3499  if (ParallelVacuumIsActive(vacrel))
3500  return;
3501  }
3502 
3503  maxtuples = compute_max_dead_tuples(nblocks, vacrel->nindexes > 0);
3504 
3505  dead_tuples = (LVDeadTuples *) palloc(SizeOfDeadTuples(maxtuples));
3506  dead_tuples->num_tuples = 0;
3507  dead_tuples->max_tuples = (int) maxtuples;
3508 
3509  vacrel->dead_tuples = dead_tuples;
3510 }
3511 
3512 /*
3513  * lazy_space_free - free space allocated in lazy_space_alloc
3514  */
3515 static void
3517 {
3518  if (!ParallelVacuumIsActive(vacrel))
3519  return;
3520 
3521  /*
3522  * End parallel mode before updating index statistics as we cannot write
3523  * during parallel mode.
3524  */
3525  end_parallel_vacuum(vacrel);
3526 }
3527 
3528 /*
3529  * lazy_tid_reaped() -- is a particular tid deletable?
3530  *
3531  * This has the right signature to be an IndexBulkDeleteCallback.
3532  *
3533  * Assumes dead_tuples array is in sorted order.
3534  */
3535 static bool
3537 {
3538  LVDeadTuples *dead_tuples = (LVDeadTuples *) state;
3539  int64 litem,
3540  ritem,
3541  item;
3542  ItemPointer res;
3543 
3544  litem = itemptr_encode(&dead_tuples->itemptrs[0]);
3545  ritem = itemptr_encode(&dead_tuples->itemptrs[dead_tuples->num_tuples - 1]);
3546  item = itemptr_encode(itemptr);
3547 
3548  /*
3549  * Doing a simple bound check before bsearch() is useful to avoid the
3550  * extra cost of bsearch(), especially if dead tuples on the heap are
3551  * concentrated in a certain range. Since this function is called for
3552  * every index tuple, it pays to be really fast.
3553  */
3554  if (item < litem || item > ritem)
3555  return false;
3556 
3557  res = (ItemPointer) bsearch((void *) itemptr,
3558  (void *) dead_tuples->itemptrs,
3559  dead_tuples->num_tuples,
3560  sizeof(ItemPointerData),
3561  vac_cmp_itemptr);
3562 
3563  return (res != NULL);
3564 }
3565 
3566 /*
3567  * Comparator routines for use with qsort() and bsearch().
3568  */
3569 static int
3570 vac_cmp_itemptr(const void *left, const void *right)
3571 {
3572  BlockNumber lblk,
3573  rblk;
3574  OffsetNumber loff,
3575  roff;
3576 
3577  lblk = ItemPointerGetBlockNumber((ItemPointer) left);
3578  rblk = ItemPointerGetBlockNumber((ItemPointer) right);
3579 
3580  if (lblk < rblk)
3581  return -1;
3582  if (lblk > rblk)
3583  return 1;
3584 
3585  loff = ItemPointerGetOffsetNumber((ItemPointer) left);
3586  roff = ItemPointerGetOffsetNumber((ItemPointer) right);
3587 
3588  if (loff < roff)
3589  return -1;
3590  if (loff > roff)
3591  return 1;
3592 
3593  return 0;
3594 }
3595 
3596 /*
3597  * Check if every tuple in the given page is visible to all current and future
3598  * transactions. Also return the visibility_cutoff_xid which is the highest
3599  * xmin amongst the visible tuples. Set *all_frozen to true if every tuple
3600  * on this page is frozen.
3601  */
3602 static bool
3604  TransactionId *visibility_cutoff_xid,
3605  bool *all_frozen)
3606 {
3607  Page page = BufferGetPage(buf);
3608  BlockNumber blockno = BufferGetBlockNumber(buf);
3609  OffsetNumber offnum,
3610  maxoff;
3611  bool all_visible = true;
3612 
3613  *visibility_cutoff_xid = InvalidTransactionId;
3614  *all_frozen = true;
3615 
3616  /*
3617  * This is a stripped down version of the line pointer scan in
3618  * lazy_scan_heap(). So if you change anything here, also check that code.
3619  */
3620  maxoff = PageGetMaxOffsetNumber(page);
3621  for (offnum = FirstOffsetNumber;
3622  offnum <= maxoff && all_visible;
3623  offnum = OffsetNumberNext(offnum))
3624  {
3625  ItemId itemid;
3626  HeapTupleData tuple;
3627 
3628  /*
3629  * Set the offset number so that we can display it along with any
3630  * error that occurred while processing this tuple.
3631  */
3632  vacrel->offnum = offnum;
3633  itemid = PageGetItemId(page, offnum);
3634 
3635  /* Unused or redirect line pointers are of no interest */
3636  if (!ItemIdIsUsed(itemid) || ItemIdIsRedirected(itemid))
3637  continue;
3638 
3639  ItemPointerSet(&(tuple.t_self), blockno, offnum);
3640 
3641  /*
3642  * Dead line pointers can have index pointers pointing to them. So
3643  * they can't be treated as visible
3644  */
3645  if (ItemIdIsDead(itemid))
3646  {
3647  all_visible = false;
3648  *all_frozen = false;
3649  break;
3650  }
3651 
3652  Assert(ItemIdIsNormal(itemid));
3653 
3654  tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
3655  tuple.t_len = ItemIdGetLength(itemid);
3656  tuple.t_tableOid = RelationGetRelid(vacrel->rel);
3657 
3658  switch (HeapTupleSatisfiesVacuum(&tuple, vacrel->OldestXmin, buf))
3659  {
3660  case HEAPTUPLE_LIVE:
3661  {
3662  TransactionId xmin;
3663 
3664  /* Check comments in lazy_scan_heap. */
3666  {
3667  all_visible = false;
3668  *all_frozen = false;
3669  break;
3670  }
3671 
3672  /*
3673  * The inserter definitely committed. But is it old enough
3674  * that everyone sees it as committed?
3675  */
3676  xmin = HeapTupleHeaderGetXmin(tuple.t_data);
3677  if (!TransactionIdPrecedes(xmin, vacrel->OldestXmin))
3678  {
3679  all_visible = false;
3680  *all_frozen = false;
3681  break;
3682  }
3683 
3684  /* Track newest xmin on page. */
3685  if (TransactionIdFollows(xmin, *visibility_cutoff_xid))
3686  *visibility_cutoff_xid = xmin;
3687 
3688  /* Check whether this tuple is already frozen or not */
3689  if (all_visible && *all_frozen &&
3691  *all_frozen = false;
3692  }
3693  break;
3694 
3695  case HEAPTUPLE_DEAD:
3699  {
3700  all_visible = false;
3701  *all_frozen = false;
3702  break;
3703  }
3704  default:
3705  elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
3706  break;
3707  }
3708  } /* scan along page */
3709 
3710  /* Clear the offset information once we have processed the given page. */
3711  vacrel->offnum = InvalidOffsetNumber;
3712 
3713  return all_visible;
3714 }
3715 
3716 /*
3717  * Compute the number of parallel worker processes to request. Both index
3718  * vacuum and index cleanup can be executed with parallel workers. The index
3719  * is eligible for parallel vacuum iff its size is greater than
3720  * min_parallel_index_scan_size as invoking workers for very small indexes
3721  * can hurt performance.
3722  *
3723  * nrequested is the number of parallel workers that user requested. If
3724  * nrequested is 0, we compute the parallel degree based on nindexes, that is
3725  * the number of indexes that support parallel vacuum. This function also
3726  * sets can_parallel_vacuum to remember indexes that participate in parallel
3727  * vacuum.
3728  */
3729 static int
3731  bool *can_parallel_vacuum)
3732 {
3733  int nindexes_parallel = 0;
3734  int nindexes_parallel_bulkdel = 0;
3735  int nindexes_parallel_cleanup = 0;
3736  int parallel_workers;
3737 
3738  /*
3739  * We don't allow performing parallel operation in standalone backend or
3740  * when parallelism is disabled.
3741  */
3743  return 0;
3744 
3745  /*
3746  * Compute the number of indexes that can participate in parallel vacuum.
3747  */
3748  for (int idx = 0; idx < vacrel->nindexes; idx++)
3749  {
3750  Relation indrel = vacrel->indrels[idx];
3751  uint8 vacoptions = indrel->rd_indam->amparallelvacuumoptions;
3752 
3753  if (vacoptions == VACUUM_OPTION_NO_PARALLEL ||
3755  continue;
3756 
3757  can_parallel_vacuum[idx] = true;
3758 
3759  if ((vacoptions & VACUUM_OPTION_PARALLEL_BULKDEL) != 0)
3760  nindexes_parallel_bulkdel++;
3761  if (((vacoptions & VACUUM_OPTION_PARALLEL_CLEANUP) != 0) ||
3762  ((vacoptions & VACUUM_OPTION_PARALLEL_COND_CLEANUP) != 0))
3763  nindexes_parallel_cleanup++;
3764  }
3765 
3766  nindexes_parallel = Max(nindexes_parallel_bulkdel,
3767  nindexes_parallel_cleanup);
3768 
3769  /* The leader process takes one index */
3770  nindexes_parallel--;
3771 
3772  /* No index supports parallel vacuum */
3773  if (nindexes_parallel <= 0)
3774  return 0;
3775 
3776  /* Compute the parallel degree */
3777  parallel_workers = (nrequested > 0) ?
3778  Min(nrequested, nindexes_parallel) : nindexes_parallel;
3779 
3780  /* Cap by max_parallel_maintenance_workers */
3781  parallel_workers = Min(parallel_workers, max_parallel_maintenance_workers);
3782 
3783  return parallel_workers;
3784 }
3785 
3786 /*
3787  * Update index statistics in pg_class if the statistics are accurate.
3788  */
3789 static void
3791 {
3792  Relation *indrels = vacrel->indrels;
3793  int nindexes = vacrel->nindexes;
3794  IndexBulkDeleteResult **indstats = vacrel->indstats;
3795 
3797 
3798  for (int idx = 0; idx < nindexes; idx++)
3799  {
3800  Relation indrel = indrels[idx];
3801  IndexBulkDeleteResult *istat = indstats[idx];
3802 
3803  if (istat == NULL || istat->estimated_count)
3804  continue;
3805 
3806  /* Update index statistics */
3807  vac_update_relstats(indrel,
3808  istat->num_pages,
3809  istat->num_index_tuples,
3810  0,
3811  false,
3814  false);
3815  }
3816 }
3817 
3818 /*
3819  * This function prepares and returns parallel vacuum state if we can launch
3820  * even one worker. This function is responsible for entering parallel mode,
3821  * create a parallel context, and then initialize the DSM segment.
3822  */
3823 static LVParallelState *
3825  int nrequested)
3826 {
3827  LVParallelState *lps = NULL;
3828  Relation *indrels = vacrel->indrels;
3829  int nindexes = vacrel->nindexes;
3830  ParallelContext *pcxt;
3831  LVShared *shared;
3832  LVDeadTuples *dead_tuples;
3833  BufferUsage *buffer_usage;
3834  WalUsage *wal_usage;
3835  bool *can_parallel_vacuum;
3836  long maxtuples;
3837  Size est_shared;
3838  Size est_deadtuples;
3839  int nindexes_mwm = 0;
3840  int parallel_workers = 0;
3841  int querylen;
3842 
3843  /*
3844  * A parallel vacuum must be requested and there must be indexes on the
3845  * relation
3846  */
3847  Assert(nrequested >= 0);
3848  Assert(nindexes > 0);
3849 
3850  /*
3851  * Compute the number of parallel vacuum workers to launch
3852  */
3853  can_parallel_vacuum = (bool *) palloc0(sizeof(bool) * nindexes);
3854  parallel_workers = compute_parallel_vacuum_workers(vacrel,
3855  nrequested,
3856  can_parallel_vacuum);
3857 
3858  /* Can't perform vacuum in parallel */
3859  if (parallel_workers <= 0)
3860  {
3861  pfree(can_parallel_vacuum);
3862  return lps;
3863  }
3864 
3865  lps = (LVParallelState *) palloc0(sizeof(LVParallelState));
3866 
3868  pcxt = CreateParallelContext("postgres", "parallel_vacuum_main",
3869  parallel_workers);
3870  Assert(pcxt->nworkers > 0);
3871  lps->pcxt = pcxt;
3872 
3873  /* Estimate size for shared information -- PARALLEL_VACUUM_KEY_SHARED */
3874  est_shared = MAXALIGN(add_size(SizeOfLVShared, BITMAPLEN(nindexes)));
3875  for (int idx = 0; idx < nindexes; idx++)
3876  {
3877  Relation indrel = indrels[idx];
3878  uint8 vacoptions = indrel->rd_indam->amparallelvacuumoptions;
3879 
3880  /*
3881  * Cleanup option should be either disabled, always performing in
3882  * parallel or conditionally performing in parallel.
3883  */
3884  Assert(((vacoptions & VACUUM_OPTION_PARALLEL_CLEANUP) == 0) ||
3885  ((vacoptions & VACUUM_OPTION_PARALLEL_COND_CLEANUP) == 0));
3886  Assert(vacoptions <= VACUUM_OPTION_MAX_VALID_VALUE);
3887 
3888  /* Skip indexes that don't participate in parallel vacuum */
3889  if (!can_parallel_vacuum[idx])
3890  continue;
3891 
3892  if (indrel->rd_indam->amusemaintenanceworkmem)
3893  nindexes_mwm++;
3894 
3895  est_shared = add_size(est_shared, sizeof(LVSharedIndStats));
3896 
3897  /*
3898  * Remember the number of indexes that support parallel operation for
3899  * each phase.
3900  */
3901  if ((vacoptions & VACUUM_OPTION_PARALLEL_BULKDEL) != 0)
3903  if ((vacoptions & VACUUM_OPTION_PARALLEL_CLEANUP) != 0)
3905  if ((vacoptions & VACUUM_OPTION_PARALLEL_COND_CLEANUP) != 0)
3907  }
3908  shm_toc_estimate_chunk(&pcxt->estimator, est_shared);
3909  shm_toc_estimate_keys(&pcxt->estimator, 1);
3910 
3911  /* Estimate size for dead tuples -- PARALLEL_VACUUM_KEY_DEAD_TUPLES */
3912  maxtuples = compute_max_dead_tuples(nblocks, true);
3913  est_deadtuples = MAXALIGN(SizeOfDeadTuples(maxtuples));
3914  shm_toc_estimate_chunk(&pcxt->estimator, est_deadtuples);
3915  shm_toc_estimate_keys(&pcxt->estimator, 1);
3916 
3917  /*
3918  * Estimate space for BufferUsage and WalUsage --
3919  * PARALLEL_VACUUM_KEY_BUFFER_USAGE and PARALLEL_VACUUM_KEY_WAL_USAGE.
3920  *
3921  * If there are no extensions loaded that care, we could skip this. We
3922  * have no way of knowing whether anyone's looking at pgBufferUsage or
3923  * pgWalUsage, so do it unconditionally.
3924  */
3926  mul_size(sizeof(BufferUsage), pcxt->nworkers));
3927  shm_toc_estimate_keys(&pcxt->estimator, 1);
3929  mul_size(sizeof(WalUsage), pcxt->nworkers));
3930  shm_toc_estimate_keys(&pcxt->estimator, 1);
3931 
3932  /* Finally, estimate PARALLEL_VACUUM_KEY_QUERY_TEXT space */
3933  if (debug_query_string)
3934  {
3935  querylen = strlen(debug_query_string);
3936  shm_toc_estimate_chunk(&pcxt->estimator, querylen + 1);
3937  shm_toc_estimate_keys(&pcxt->estimator, 1);
3938  }
3939  else
3940  querylen = 0; /* keep compiler quiet */
3941 
3942  InitializeParallelDSM(pcxt);
3943 
3944  /* Prepare shared information */
3945  shared = (LVShared *) shm_toc_allocate(pcxt->toc, est_shared);
3946  MemSet(shared, 0, est_shared);
3947  shared->relid = RelationGetRelid(vacrel->rel);
3948  shared->elevel = elevel;
3949  shared->maintenance_work_mem_worker =
3950  (nindexes_mwm > 0) ?
3951  maintenance_work_mem / Min(parallel_workers, nindexes_mwm) :
3953 
3954  pg_atomic_init_u32(&(shared->cost_balance), 0);
3955  pg_atomic_init_u32(&(shared->active_nworkers), 0);
3956  pg_atomic_init_u32(&(shared->idx), 0);
3957  shared->offset = MAXALIGN(add_size(SizeOfLVShared, BITMAPLEN(nindexes)));
3958 
3959  /*
3960  * Initialize variables for shared index statistics, set NULL bitmap and
3961  * the size of stats for each index.
3962  */
3963  memset(shared->bitmap, 0x00, BITMAPLEN(nindexes));
3964  for (int idx = 0; idx < nindexes; idx++)
3965  {
3966  if (!can_parallel_vacuum[idx])
3967  continue;
3968 
3969  /* Set NOT NULL as this index does support parallelism */
3970  shared->bitmap[idx >> 3] |= 1 << (idx & 0x07);
3971  }
3972 
3974  lps->lvshared = shared;
3975 
3976  /* Prepare the dead tuple space */
3977  dead_tuples = (LVDeadTuples *) shm_toc_allocate(pcxt->toc, est_deadtuples);
3978  dead_tuples->max_tuples = maxtuples;
3979  dead_tuples->num_tuples = 0;
3980  MemSet(dead_tuples->itemptrs, 0, sizeof(ItemPointerData) * maxtuples);
3981  shm_toc_insert(pcxt->toc, PARALLEL_VACUUM_KEY_DEAD_TUPLES, dead_tuples);
3982  vacrel->dead_tuples = dead_tuples;
3983 
3984  /*
3985  * Allocate space for each worker's BufferUsage and WalUsage; no need to
3986  * initialize
3987  */
3988  buffer_usage = shm_toc_allocate(pcxt->toc,
3989  mul_size(sizeof(BufferUsage), pcxt->nworkers));
3990  shm_toc_insert(pcxt->toc, PARALLEL_VACUUM_KEY_BUFFER_USAGE, buffer_usage);
3991  lps->buffer_usage = buffer_usage;
3992  wal_usage = shm_toc_allocate(pcxt->toc,
3993  mul_size(sizeof(WalUsage), pcxt->nworkers));
3995  lps->wal_usage = wal_usage;
3996 
3997  /* Store query string for workers */
3998  if (debug_query_string)
3999  {
4000  char *sharedquery;
4001 
4002  sharedquery = (char *) shm_toc_allocate(pcxt->toc, querylen + 1);
4003  memcpy(sharedquery, debug_query_string, querylen + 1);
4004  sharedquery[querylen] = '\0';
4005  shm_toc_insert(pcxt->toc,
4006  PARALLEL_VACUUM_KEY_QUERY_TEXT, sharedquery);
4007  }
4008 
4009  pfree(can_parallel_vacuum);
4010  return lps;
4011 }
4012 
4013 /*
4014  * Destroy the parallel context, and end parallel mode.
4015  *
4016  * Since writes are not allowed during parallel mode, copy the
4017  * updated index statistics from DSM into local memory and then later use that
4018  * to update the index statistics. One might think that we can exit from
4019  * parallel mode, update the index statistics and then destroy parallel
4020  * context, but that won't be safe (see ExitParallelMode).
4021  */
4022 static void
4024 {
4025  IndexBulkDeleteResult **indstats = vacrel->indstats;
4026  LVParallelState *lps = vacrel->lps;
4027  int nindexes = vacrel->nindexes;
4028 
4030 
4031  /* Copy the updated statistics */
4032  for (int idx = 0; idx < nindexes; idx++)
4033  {
4034  LVSharedIndStats *shared_istat;
4035 
4036  shared_istat = parallel_stats_for_idx(lps->lvshared, idx);
4037 
4038  /*
4039  * Skip unused slot. The statistics of this index are already stored
4040  * in local memory.
4041  */
4042  if (shared_istat == NULL)
4043  continue;
4044 
4045  if (shared_istat->updated)
4046  {
4047  indstats[idx] = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult));
4048  memcpy(indstats[idx], &(shared_istat->istat), sizeof(IndexBulkDeleteResult));
4049  }
4050  else
4051  indstats[idx] = NULL;
4052  }
4053 
4055  ExitParallelMode();
4056 
4057  /* Deactivate parallel vacuum */
4058  pfree(lps);
4059  vacrel->lps = NULL;
4060 }
4061 
4062 /*
4063  * Return shared memory statistics for index at offset 'getidx', if any
4064  */
4065 static LVSharedIndStats *
4066 parallel_stats_for_idx(LVShared *lvshared, int getidx)
4067 {
4068  char *p;
4069 
4070  if (IndStatsIsNull(lvshared, getidx))
4071  return NULL;
4072 
4073  p = (char *) GetSharedIndStats(lvshared);
4074  for (int idx = 0; idx < getidx; idx++)
4075  {
4076  if (IndStatsIsNull(lvshared, idx))
4077  continue;
4078 
4079  p += sizeof(LVSharedIndStats);
4080  }
4081 
4082  return (LVSharedIndStats *) p;
4083 }
4084 
4085 /*
4086  * Returns false, if the given index can't participate in parallel index
4087  * vacuum or parallel index cleanup
4088  */
4089 static bool
4091 {
4092  uint8 vacoptions = indrel->rd_indam->amparallelvacuumoptions;
4093 
4094  /* first_time must be true only if for_cleanup is true */
4095  Assert(lvshared->for_cleanup || !lvshared->first_time);
4096 
4097  if (lvshared->for_cleanup)
4098  {
4099  /* Skip, if the index does not support parallel cleanup */
4100  if (((vacoptions & VACUUM_OPTION_PARALLEL_CLEANUP) == 0) &&
4101  ((vacoptions & VACUUM_OPTION_PARALLEL_COND_CLEANUP) == 0))
4102  return true;
4103 
4104  /*
4105  * Skip, if the index supports parallel cleanup conditionally, but we
4106  * have already processed the index (for bulkdelete). See the
4107  * comments for option VACUUM_OPTION_PARALLEL_COND_CLEANUP to know
4108  * when indexes support parallel cleanup conditionally.
4109  */
4110  if (!lvshared->first_time &&
4111  ((vacoptions & VACUUM_OPTION_PARALLEL_COND_CLEANUP) != 0))
4112  return false;
4113  }
4114  else if ((vacoptions & VACUUM_OPTION_PARALLEL_BULKDEL) == 0)
4115  {
4116  /* Skip if the index does not support parallel bulk deletion */
4117  return false;
4118  }
4119 
4120  return true;
4121 }
4122 
4123 /*
4124  * Perform work within a launched parallel process.
4125  *
4126  * Since parallel vacuum workers perform only index vacuum or index cleanup,
4127  * we don't need to report progress information.
4128  */
4129 void
4131 {
4132  Relation rel;
4133  Relation *indrels;
4134  LVShared *lvshared;
4135  LVDeadTuples *dead_tuples;
4136  BufferUsage *buffer_usage;
4137  WalUsage *wal_usage;
4138  int nindexes;
4139  char *sharedquery;
4140  LVRelState vacrel;
4141  ErrorContextCallback errcallback;
4142 
4144  false);
4145  elevel = lvshared->elevel;
4146 
4147  if (lvshared->for_cleanup)
4148  elog(DEBUG1, "starting parallel vacuum worker for cleanup");
4149  else
4150  elog(DEBUG1, "starting parallel vacuum worker for bulk delete");
4151 
4152  /* Set debug_query_string for individual workers */
4153  sharedquery = shm_toc_lookup(toc, PARALLEL_VACUUM_KEY_QUERY_TEXT, true);
4154  debug_query_string = sharedquery;
4156 
4157  /*
4158  * Open table. The lock mode is the same as the leader process. It's
4159  * okay because the lock mode does not conflict among the parallel
4160  * workers.
4161  */
4162  rel = table_open(lvshared->relid, ShareUpdateExclusiveLock);
4163 
4164  /*
4165  * Open all indexes. indrels are sorted in order by OID, which should be
4166  * matched to the leader's one.
4167  */
4168  vac_open_indexes(rel, RowExclusiveLock, &nindexes, &indrels);
4169  Assert(nindexes > 0);
4170 
4171  /* Set dead tuple space */
4172  dead_tuples = (LVDeadTuples *) shm_toc_lookup(toc,
4174  false);
4175 
4176  /* Set cost-based vacuum delay */
4178  VacuumCostBalance = 0;
4179  VacuumPageHit = 0;
4180  VacuumPageMiss = 0;
4181  VacuumPageDirty = 0;
4183  VacuumSharedCostBalance = &(lvshared->cost_balance);
4184  VacuumActiveNWorkers = &(lvshared->active_nworkers);
4185 
4186  vacrel.rel = rel;
4187  vacrel.indrels = indrels;
4188  vacrel.nindexes = nindexes;
4189  /* Each parallel VACUUM worker gets its own access strategy */
4191  vacrel.indstats = (IndexBulkDeleteResult **)
4192  palloc0(nindexes * sizeof(IndexBulkDeleteResult *));
4193 
4194  if (lvshared->maintenance_work_mem_worker > 0)
4196 
4197  /*
4198  * Initialize vacrel for use as error callback arg by parallel worker.
4199  */
4201  vacrel.relname = pstrdup(RelationGetRelationName(rel));
4202  vacrel.indname = NULL;
4203  vacrel.phase = VACUUM_ERRCB_PHASE_UNKNOWN; /* Not yet processing */
4204  vacrel.dead_tuples = dead_tuples;
4205 
4206  /* Setup error traceback support for ereport() */
4207  errcallback.callback = vacuum_error_callback;
4208  errcallback.arg = &vacrel;
4209  errcallback.previous = error_context_stack;
4210  error_context_stack = &errcallback;
4211 
4212  /* Prepare to track buffer usage during parallel execution */
4214 
4215  /* Process indexes to perform vacuum/cleanup */
4216  do_parallel_processing(&vacrel, lvshared);
4217 
4218  /* Report buffer/WAL usage during parallel execution */
4219  buffer_usage = shm_toc_lookup(toc, PARALLEL_VACUUM_KEY_BUFFER_USAGE, false);
4220  wal_usage = shm_toc_lookup(toc, PARALLEL_VACUUM_KEY_WAL_USAGE, false);
4222  &wal_usage[ParallelWorkerNumber]);
4223 
4224  /* Pop the error context stack */
4225  error_context_stack = errcallback.previous;
4226 
4227  vac_close_indexes(nindexes, indrels, RowExclusiveLock);
4229  FreeAccessStrategy(vacrel.bstrategy);
4230  pfree(vacrel.indstats);
4231 }
4232 
4233 /*
4234  * Error context callback for errors occurring during vacuum.
4235  */
4236 static void
4238 {
4239  LVRelState *errinfo = arg;
4240 
4241  switch (errinfo->phase)
4242  {
4244  if (BlockNumberIsValid(errinfo->blkno))
4245  {
4246  if (OffsetNumberIsValid(errinfo->offnum))
4247  errcontext("while scanning block %u and offset %u of relation \"%s.%s\"",
4248  errinfo->blkno, errinfo->offnum, errinfo->relnamespace, errinfo->relname);
4249  else
4250  errcontext("while scanning block %u of relation \"%s.%s\"",
4251  errinfo->blkno, errinfo->relnamespace, errinfo->relname);
4252  }
4253  else
4254  errcontext("while scanning relation \"%s.%s\"",
4255  errinfo->relnamespace, errinfo->relname);
4256  break;
4257 
4259  if (BlockNumberIsValid(errinfo->blkno))
4260  {
4261  if (OffsetNumberIsValid(errinfo->offnum))
4262  errcontext("while vacuuming block %u and offset %u of relation \"%s.%s\"",
4263  errinfo->blkno, errinfo->offnum, errinfo->relnamespace, errinfo->relname);
4264  else
4265  errcontext("while vacuuming block %u of relation \"%s.%s\"",
4266  errinfo->blkno, errinfo->relnamespace, errinfo->relname);
4267  }
4268  else
4269  errcontext("while vacuuming relation \"%s.%s\"",
4270  errinfo->relnamespace, errinfo->relname);
4271  break;
4272 
4274  errcontext("while vacuuming index \"%s\" of relation \"%s.%s\"",
4275  errinfo->indname, errinfo->relnamespace, errinfo->relname);
4276  break;
4277 
4279  errcontext("while cleaning up index \"%s\" of relation \"%s.%s\"",
4280  errinfo->indname, errinfo->relnamespace, errinfo->relname);
4281  break;
4282 
4284  if (BlockNumberIsValid(errinfo->blkno))
4285  errcontext("while truncating relation \"%s.%s\" to %u blocks",
4286  errinfo->relnamespace, errinfo->relname, errinfo->blkno);
4287  break;
4288 
4290  default:
4291  return; /* do nothing; the errinfo may not be
4292  * initialized */
4293  }
4294 }
4295 
4296 /*
4297  * Updates the information required for vacuum error callback. This also saves
4298  * the current information which can be later restored via restore_vacuum_error_info.
4299  */
4300 static void
4302  int phase, BlockNumber blkno, OffsetNumber offnum)
4303 {
4304  if (saved_vacrel)
4305  {
4306  saved_vacrel->offnum = vacrel->offnum;
4307  saved_vacrel->blkno = vacrel->blkno;
4308  saved_vacrel->phase = vacrel->phase;
4309  }
4310 
4311  vacrel->blkno = blkno;
4312  vacrel->offnum = offnum;
4313  vacrel->phase = phase;
4314 }
4315 
4316 /*
4317  * Restores the vacuum information saved via a prior call to update_vacuum_error_info.
4318  */
4319 static void
4321  const LVSavedErrInfo *saved_vacrel)
4322 {
4323  vacrel->blkno = saved_vacrel->blkno;
4324  vacrel->offnum = saved_vacrel->offnum;
4325  vacrel->phase = saved_vacrel->phase;
4326 }
VacErrPhase phase
Definition: vacuumlazy.c:341
static void end_parallel_vacuum(LVRelState *vacrel)
Definition: vacuumlazy.c:4023
BufferAccessStrategy GetAccessStrategy(BufferAccessStrategyType btype)
Definition: freelist.c:542
int autovacuum_work_mem
Definition: autovacuum.c:117
void XLogRegisterBufData(uint8 block_id, char *data, int len)
Definition: xloginsert.c:368
#define GetSharedIndStats(s)
Definition: vacuumlazy.c:268
uint8 amparallelvacuumoptions
Definition: amapi.h:248
static void vacuum_error_callback(void *arg)
Definition: vacuumlazy.c:4237
struct IndexAmRoutine * rd_indam
Definition: rel.h:189
int multixact_freeze_table_age
Definition: vacuum.h:213
void vac_close_indexes(int nindexes, Relation *Irel, LOCKMODE lockmode)
Definition: vacuum.c:2095
#define BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:96
void LockBufferForCleanup(Buffer buffer)
Definition: bufmgr.c:4080
XLogRecPtr log_heap_freeze(Relation reln, Buffer buffer, TransactionId cutoff_xid, xl_heap_freeze_tuple *tuples, int ntuples)
Definition: heapam.c:7990
#define PROGRESS_VACUUM_HEAP_BLKS_VACUUMED
Definition: progress.h:24
#define InvalidXLogRecPtr
Definition: xlogdefs.h:28
WalUsage * wal_usage
Definition: vacuumlazy.c:295
static bool lazy_check_wraparound_failsafe(LVRelState *vacrel)
Definition: vacuumlazy.c:2579
LVParallelState * lps
Definition: vacuumlazy.c:320
#define PageIsEmpty(page)
Definition: bufpage.h:222
int64 VacuumPageMiss
Definition: globals.c:148
#define DEBUG1
Definition: elog.h:25
int64 PgStat_Counter
Definition: pgstat.h:92
#define BYPASS_THRESHOLD_PAGES
Definition: vacuumlazy.c:110
void table_close(Relation relation, LOCKMODE lockmode)
Definition: table.c:167
int errhint(const char *fmt,...)
Definition: elog.c:1156
static void update_vacuum_error_info(LVRelState *vacrel, LVSavedErrInfo *saved_vacrel, int phase, BlockNumber blkno, OffsetNumber offnum)
Definition: vacuumlazy.c:4301
static void lazy_scan_prune(LVRelState *vacrel, Buffer buf, BlockNumber blkno, Page page, GlobalVisState *vistest, LVPagePruneState *prunestate)
Definition: vacuumlazy.c:1668
XLogRecPtr log_newpage_buffer(Buffer buffer, bool page_std)
Definition: xloginsert.c:1090
Relation * indrels
Definition: vacuumlazy.c:310
pg_atomic_uint32 * VacuumActiveNWorkers
Definition: vacuum.c:79
double vac_estimate_reltuples(Relation relation, BlockNumber total_pages, BlockNumber scanned_pages, double scanned_tuples)
Definition: vacuum.c:1209
OffsetNumber offset
Definition: heapam_xlog.h:327
int VacuumCostBalance
Definition: globals.c:151
ItemPointerData itemptrs[FLEXIBLE_ARRAY_MEMBER]
Definition: vacuumlazy.c:186
bool estimated_count
Definition: vacuumlazy.c:229
static bool lazy_tid_reaped(ItemPointer itemptr, void *state)
Definition: vacuumlazy.c:3536
#define ItemIdIsRedirected(itemId)
Definition: itemid.h:106
bool TransactionIdFollows(TransactionId id1, TransactionId id2)
Definition: transam.c:334
bool do_index_vacuuming
Definition: vacuumlazy.c:313
ParallelContext * CreateParallelContext(const char *library_name, const char *function_name, int nworkers)
Definition: parallel.c:164
#define PageIsAllVisible(page)
Definition: bufpage.h:385
#define VACOPT_DISABLE_PAGE_SKIPPING
Definition: vacuum.h:185
uint32 TransactionId
Definition: c.h:587
#define IndStatsIsNull(s, i)
Definition: vacuumlazy.c:270
void UnlockRelation(Relation relation, LOCKMODE lockmode)
Definition: lmgr.c:283
int num_index_scans
Definition: vacuumlazy.c:364
void RecordPageWithFreeSpace(Relation rel, BlockNumber heapBlk, Size spaceAvail)
Definition: freespace.c:181
IndexBulkDeleteResult * index_vacuum_cleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *istat)
Definition: indexam.c:712
#define PARALLEL_VACUUM_KEY_DEAD_TUPLES
Definition: vacuumlazy.c:153
Oid relid
Definition: vacuumlazy.c:207
OffsetNumber offnum
Definition: vacuumlazy.c:340
#define PROGRESS_VACUUM_MAX_DEAD_TUPLES
Definition: progress.h:26
#define PROGRESS_VACUUM_PHASE_VACUUM_INDEX
Definition: progress.h:31
void visibilitymap_pin(Relation rel, BlockNumber heapBlk, Buffer *buf)
TimestampTz GetCurrentTimestamp(void)
Definition: timestamp.c:1580
static bool should_attempt_truncation(LVRelState *vacrel, VacuumParams *params)
Definition: vacuumlazy.c:3143
void visibilitymap_set(Relation rel, BlockNumber heapBlk, Buffer heapBuf, XLogRecPtr recptr, Buffer vmBuf, TransactionId cutoff_xid, uint8 flags)
static void lazy_space_alloc(LVRelState *vacrel, int nworkers, BlockNumber relblocks)
Definition: vacuumlazy.c:3468
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:1562
int64 TimestampTz
Definition: timestamp.h:39
WalUsage pgWalUsage
Definition: instrument.c:22
#define SizeOfDeadTuples(cnt)
Definition: vacuumlazy.c:191
void XLogRegisterBuffer(uint8 block_id, Buffer buffer, uint8 flags)
Definition: xloginsert.c:220
HeapTupleHeaderData * HeapTupleHeader
Definition: htup.h:23
static uint32 pg_atomic_sub_fetch_u32(volatile pg_atomic_uint32 *ptr, int32 sub_)
Definition: atomics.h:401
BufferUsage * buffer_usage
Definition: vacuumlazy.c:292
#define VISIBILITYMAP_ALL_FROZEN
Definition: visibilitymap.h:27
char * pstrdup(const char *in)
Definition: mcxt.c:1299
shm_toc_estimator estimator
Definition: parallel.h:42
IndexBulkDeleteResult * index_bulk_delete(IndexVacuumInfo *info, IndexBulkDeleteResult *istat, IndexBulkDeleteCallback callback, void *callback_state)
Definition: indexam.c:691
bool analyze_only
Definition: genam.h:47
Buffer ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
Definition: bufmgr.c:744
struct timeval instr_time
Definition: instr_time.h:150
int64 VacuumPageHit
Definition: globals.c:147
#define Min(x, y)
Definition: c.h:986
int nindexes
Definition: vacuumlazy.c:311
bool report_progress
Definition: genam.h:48
#define END_CRIT_SECTION()
Definition: miscadmin.h:137
BufferAccessStrategy strategy
Definition: genam.h:52
struct LVSharedIndStats LVSharedIndStats
#define ItemIdIsUsed(itemId)
Definition: itemid.h:92
#define MaxHeapTuplesPerPage
Definition: htup_details.h:573
void WalUsageAccumDiff(WalUsage *dst, const WalUsage *add, const WalUsage *sub)
Definition: instrument.c:255
#define VM_ALL_FROZEN(r, b, v)
Definition: visibilitymap.h:34
unsigned char uint8
Definition: c.h:439
static LVParallelState * begin_parallel_vacuum(LVRelState *vacrel, BlockNumber nblocks, int nrequested)
Definition: vacuumlazy.c:3824
#define PROGRESS_VACUUM_HEAP_BLKS_SCANNED
Definition: progress.h:23
bool do_index_cleanup
Definition: vacuumlazy.c:314
#define InvalidBuffer
Definition: buf.h:25
#define FLEXIBLE_ARRAY_MEMBER
Definition: c.h:350
bool do_failsafe
Definition: vacuumlazy.c:316
static void lazy_vacuum(LVRelState *vacrel, bool onecall)
Definition: vacuumlazy.c:2056
static void update_index_statistics(LVRelState *vacrel)
Definition: vacuumlazy.c:3790
#define PROGRESS_VACUUM_TOTAL_HEAP_BLKS
Definition: progress.h:22
#define START_CRIT_SECTION()
Definition: miscadmin.h:135
#define VACUUM_OPTION_MAX_VALID_VALUE
Definition: vacuum.h:63
Relation index
Definition: genam.h:46
IndexBulkDeleteResult istat
Definition: vacuumlazy.c:280
MultiXactId MultiXactCutoff
Definition: vacuumlazy.c:333
#define MemSet(start, val, len)
Definition: c.h:1008
#define INFO
Definition: elog.h:33
int64 live_tuples
Definition: vacuumlazy.c:370
#define VACUUM_TRUNCATE_LOCK_TIMEOUT
Definition: vacuumlazy.c:104
static uint32 pg_atomic_add_fetch_u32(volatile pg_atomic_uint32 *ptr, int32 add_)
Definition: atomics.h:386
Datum idx(PG_FUNCTION_ARGS)
Definition: _int_op.c:259
static bool lazy_check_needs_freeze(Buffer buf, bool *hastup, LVRelState *vacrel)
Definition: vacuumlazy.c:2503
void vacuum_set_xid_limits(Relation rel, int freeze_min_age, int freeze_table_age, int multixact_freeze_min_age, int multixact_freeze_table_age, TransactionId *oldestXmin, TransactionId *freezeLimit, TransactionId *xidFullScanLimit, MultiXactId *multiXactCutoff, MultiXactId *mxactFullScanLimit)
Definition: vacuum.c:943
int64 VacuumPageDirty
Definition: globals.c:149
static long compute_max_dead_tuples(BlockNumber relblocks, bool hasindex)
Definition: vacuumlazy.c:3436
uint32 BlockNumber
Definition: block.h:31
void ReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:3784
struct LVRelState LVRelState
#define BITMAPLEN(NATTS)
Definition: htup_details.h:546
int nindexes_parallel_bulkdel
Definition: vacuumlazy.c:301
OffsetNumber offnum
Definition: vacuumlazy.c:395
int64 lpdead_items
Definition: vacuumlazy.c:366
int maintenance_work_mem_worker
Definition: vacuumlazy.c:239
void visibilitymap_count(Relation rel, BlockNumber *all_visible, BlockNumber *all_frozen)
BlockNumber frozenskipped_pages
Definition: vacuumlazy.c:350
#define BUFFER_LOCK_EXCLUSIVE
Definition: bufmgr.h:98
#define SizeOfPageHeaderData
Definition: bufpage.h:216
#define LOG
Definition: elog.h:26
Form_pg_class rd_rel
Definition: rel.h:110
unsigned int Oid
Definition: postgres_ext.h:31
#define shm_toc_estimate_chunk(e, sz)
Definition: shm_toc.h:51
#define ItemIdIsDead(itemId)
Definition: itemid.h:113
double new_live_tuples
Definition: vacuumlazy.c:359
int nindexes_parallel_condcleanup
Definition: vacuumlazy.c:303
void InstrEndParallelQuery(BufferUsage *bufusage, WalUsage *walusage)
Definition: instrument.c:189
void(* callback)(void *arg)
Definition: elog.h:247
bool TimestampDifferenceExceeds(TimestampTz start_time, TimestampTz stop_time, int msec)
Definition: timestamp.c:1711
struct ErrorContextCallback * previous
Definition: elog.h:246
TransactionId visibility_cutoff_xid
Definition: vacuumlazy.c:388
static void lazy_space_free(LVRelState *vacrel)
Definition: vacuumlazy.c:3516
#define PageGetMaxOffsetNumber(page)
Definition: bufpage.h:357
int freeze_table_age
Definition: vacuum.h:210
static bool heap_page_is_all_visible(LVRelState *vacrel, Buffer buf, TransactionId *visibility_cutoff_xid, bool *all_frozen)
Definition: vacuumlazy.c:3603
void ReinitializeParallelWorkers(ParallelContext *pcxt, int nworkers_to_launch)
Definition: parallel.c:514
void pgstat_progress_end_command(void)
static IndexBulkDeleteResult * parallel_process_one_index(Relation indrel, IndexBulkDeleteResult *istat, LVShared *lvshared, LVSharedIndStats *shared_indstats, LVRelState *vacrel)
Definition: vacuumlazy.c:2910
int errdetail_internal(const char *fmt,...)
Definition: elog.c:1069
void pgstat_progress_update_multi_param(int nparam, const int *index, const int64 *val)
#define PARALLEL_VACUUM_KEY_QUERY_TEXT
Definition: vacuumlazy.c:154
uint16 OffsetNumber
Definition: off.h:24
ItemPointerData * ItemPointer
Definition: itemptr.h:49
static void lazy_vacuum_heap_rel(LVRelState *vacrel)
Definition: vacuumlazy.c:2285
#define VISIBILITYMAP_VALID_BITS
Definition: visibilitymap.h:28
HeapTupleHeader t_data
Definition: htup.h:68
void pg_rusage_init(PGRUsage *ru0)
Definition: pg_rusage.c:27
bool heap_prepare_freeze_tuple(HeapTupleHeader tuple, TransactionId relfrozenxid, TransactionId relminmxid, TransactionId cutoff_xid, TransactionId cutoff_multi, xl_heap_freeze_tuple *frz, bool *totally_frozen_p)
Definition: heapam.c:6459
#define FORCE_CHECK_PAGE()
ErrorContextCallback * error_context_stack
Definition: elog.c:93
ParallelContext * pcxt
Definition: vacuumlazy.c:286
static BlockNumber count_nondeletable_pages(LVRelState *vacrel)
Definition: vacuumlazy.c:3297
#define PROGRESS_VACUUM_PHASE_TRUNCATE
Definition: progress.h:34
IndexBulkDeleteResult ** indstats
Definition: vacuumlazy.c:361
struct LVPagePruneState LVPagePruneState
#define SizeOfLVShared
Definition: vacuumlazy.c:267
pg_atomic_uint32 cost_balance
Definition: vacuumlazy.c:246
#define StaticAssertStmt(condition, errmessage)
Definition: c.h:918
BlockNumber blkno
Definition: vacuumlazy.c:394
void WaitForParallelWorkersToFinish(ParallelContext *pcxt)
Definition: parallel.c:751
#define ParallelVacuumIsActive(vacrel)
Definition: vacuumlazy.c:162
void pg_usleep(long microsec)
Definition: signal.c:53
void DestroyParallelContext(ParallelContext *pcxt)
Definition: parallel.c:905
GlobalVisState * GlobalVisTestFor(Relation rel)
Definition: procarray.c:3963
#define PREFETCH_SIZE
Definition: vacuumlazy.c:145
bool heap_tuple_needs_eventual_freeze(HeapTupleHeader tuple)
Definition: heapam.c:7104
VacErrPhase phase
Definition: vacuumlazy.c:396
bits32 options
Definition: vacuum.h:208
#define FAILSAFE_MIN_PAGES
Definition: vacuumlazy.c:116
int heap_page_prune(Relation relation, Buffer buffer, GlobalVisState *vistest, TransactionId old_snap_xmin, TimestampTz old_snap_ts, bool report_stats, OffsetNumber *off_loc)
Definition: pruneheap.c:219
#define ItemIdGetLength(itemId)
Definition: itemid.h:59
void pfree(void *pointer)
Definition: mcxt.c:1169
bool IsInParallelMode(void)
Definition: xact.c:1012
void appendStringInfo(StringInfo str, const char *fmt,...)
Definition: stringinfo.c:91
bool visibilitymap_clear(Relation rel, BlockNumber heapBlk, Buffer buf, uint8 flags)
#define VACUUM_FSM_EVERY_PAGES
Definition: vacuumlazy.c:125
bool TransactionIdPrecedesOrEquals(TransactionId id1, TransactionId id2)
Definition: transam.c:319
void UnlockReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:3807
bool ConditionalLockBufferForCleanup(Buffer buffer)
Definition: bufmgr.c:4257
LVDeadTuples * dead_tuples
Definition: vacuumlazy.c:346
#define ERROR
Definition: elog.h:46
#define REL_TRUNCATE_MINIMUM
Definition: vacuumlazy.c:92
Size PageGetHeapFreeSpace(Page page)
Definition: bufpage.c:984
#define INSTR_TIME_SUBTRACT(x, y)
Definition: instr_time.h:170
int freeze_min_age
Definition: vacuum.h:209
BlockNumber num_pages
Definition: genam.h:76
static void lazy_scan_heap(LVRelState *vacrel, VacuumParams *params, bool aggressive)
Definition: vacuumlazy.c:884
BlockNumber pages_free
Definition: genam.h:82
ItemPointerData t_self
Definition: htup.h:65
PgStat_Counter pgStatBlockWriteTime
Definition: pgstat.c:247
BlockNumber scanned_pages
Definition: vacuumlazy.c:348
void ExitParallelMode(void)
Definition: xact.c:992
bool is_wraparound
Definition: vacuum.h:215
long wal_records
Definition: instrument.h:37
char * get_database_name(Oid dbid)
Definition: dbcommands.c:2155
#define DEBUG2
Definition: elog.h:24
static void do_serial_processing_for_unsafe_indexes(LVRelState *vacrel, LVShared *lvshared)
Definition: vacuumlazy.c:2857
#define HeapTupleHeaderXminCommitted(tup)
Definition: htup_details.h:324
void appendStringInfoString(StringInfo str, const char *s)
Definition: stringinfo.c:176
uint32 t_len
Definition: htup.h:64
void heap_execute_freeze_tuple(HeapTupleHeader tuple, xl_heap_freeze_tuple *frz)
Definition: heapam.c:6688
char * get_namespace_name(Oid nspid)
Definition: lsyscache.c:3316
void pgstat_progress_start_command(ProgressCommandType cmdtype, Oid relid)
PgStat_Counter pgStatBlockReadTime
Definition: pgstat.c:246
#define NoLock
Definition: lockdefs.h:34
static char * buf
Definition: pg_test_fsync.c:68
#define PageSetAllVisible(page)
Definition: bufpage.h:387
bool IsUnderPostmaster
Definition: globals.c:112
#define FirstOffsetNumber
Definition: off.h:27
#define RowExclusiveLock
Definition: lockdefs.h:38
static int lazy_vacuum_heap_page(LVRelState *vacrel, BlockNumber blkno, Buffer buffer, int tupindex, Buffer *vmbuffer)
Definition: vacuumlazy.c:2381
#define REGBUF_STANDARD
Definition: xloginsert.h:35
BlockNumber nonempty_pages
Definition: vacuumlazy.c:354
struct LVDeadTuples LVDeadTuples
int errdetail(const char *fmt,...)
Definition: elog.c:1042
int elevel
Definition: vacuumlazy.c:208
int ParallelWorkerNumber
Definition: parallel.c:112
const char * pg_rusage_show(const PGRUsage *ru0)
Definition: pg_rusage.c:40
bool ConditionalLockRelation(Relation relation, LOCKMODE lockmode)
Definition: lmgr.c:248
#define InvalidTransactionId
Definition: transam.h:31
#define RelationGetRelationName(relation)
Definition: rel.h:491
pg_atomic_uint32 idx
Definition: vacuumlazy.c:260
BlockNumber pages_removed
Definition: vacuumlazy.c:352
static IndexBulkDeleteResult * lazy_cleanup_one_index(Relation indrel, IndexBulkDeleteResult *istat, double reltuples, bool estimated_count, LVRelState *vacrel)
Definition: vacuumlazy.c:3063
unsigned int uint32
Definition: c.h:441
Oid t_tableOid
Definition: htup.h:66
#define MultiXactIdIsValid(multi)
Definition: multixact.h:28
bool vacuum_xid_failsafe_check(TransactionId relfrozenxid, MultiXactId relminmxid)
Definition: vacuum.c:1149
static int64 itemptr_encode(ItemPointer itemptr)
Definition: index.h:188
int min_parallel_index_scan_size
Definition: allpaths.c:65
int nworkers_launched
Definition: parallel.h:38
BlockNumber pages_deleted
Definition: genam.h:81
#define BufferGetPage(buffer)
Definition: bufmgr.h:169
HTSV_Result HeapTupleSatisfiesVacuum(HeapTuple htup, TransactionId OldestXmin, Buffer buffer)
static void lazy_truncate_heap(LVRelState *vacrel)
Definition: vacuumlazy.c:3167
void LaunchParallelWorkers(ParallelContext *pcxt)
Definition: parallel.c:528
bool IsAutoVacuumWorkerProcess(void)
Definition: autovacuum.c:3448
#define XLOG_HEAP2_VACUUM
Definition: heapam_xlog.h:55
BlockNumber pinskipped_pages
Definition: vacuumlazy.c:349
void InstrAccumParallelQuery(BufferUsage *bufusage, WalUsage *walusage)
Definition: instrument.c:199
static bool parallel_processing_is_safe(Relation indrel, LVShared *lvshared)
Definition: vacuumlazy.c:4090
bool heap_tuple_needs_freeze(HeapTupleHeader tuple, TransactionId cutoff_xid, MultiXactId cutoff_multi, Buffer buf)
Definition: heapam.c:7157
#define PROGRESS_VACUUM_NUM_DEAD_TUPLES
Definition: progress.h:27
#define IsParallelWorker()
Definition: parallel.h:61
bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition: transam.c:300
bool first_time
Definition: vacuumlazy.c:216
VacOptTernaryValue index_cleanup
Definition: vacuum.h:219
#define SKIP_PAGES_THRESHOLD
Definition: vacuumlazy.c:139
TransactionId FreezeLimit
Definition: vacuumlazy.c:332
static void do_parallel_processing(LVRelState *vacrel, LVShared *lvshared)
Definition: vacuumlazy.c:2797
#define VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL
Definition: vacuumlazy.c:103
#define MaxAllocSize
Definition: memutils.h:40
void appendStringInfoChar(StringInfo str, char ch)
Definition: stringinfo.c:188
void initStringInfo(StringInfo str)
Definition: stringinfo.c:59
static IndexBulkDeleteResult * lazy_vacuum_one_index(Relation indrel, IndexBulkDeleteResult *istat, double reltuples, LVRelState *vacrel)
Definition: vacuumlazy.c:3008
static LVSharedIndStats * parallel_stats_for_idx(LVShared *lvshared, int getidx)
Definition: vacuumlazy.c:4066
#define WARNING
Definition: elog.h:40
void vac_open_indexes(Relation relation, LOCKMODE lockmode, int *nindexes, Relation **Irel)
Definition: vacuum.c:2052
const char * debug_query_string
Definition: postgres.c:89
static void do_parallel_lazy_vacuum_all_indexes(LVRelState *vacrel)
Definition: vacuumlazy.c:2624
#define SizeOfHeapVacuum
Definition: heapam_xlog.h:265
double reltuples
Definition: vacuumlazy.c:228
#define VACUUM_OPTION_NO_PARALLEL
Definition: vacuum.h:39
void InitializeParallelDSM(ParallelContext *pcxt)
Definition: parallel.c:202
#define PageGetItemId(page, offsetNumber)
Definition: bufpage.h:235
TransactionId OldestXmin
Definition: vacuumlazy.c:330
int VacuumCostBalanceLocal
Definition: vacuum.c:80
pg_atomic_uint32 * VacuumSharedCostBalance
Definition: vacuum.c:78
void XLogRegisterData(char *data, int len)
Definition: xloginsert.c:330
void InstrStartParallelQuery(void)
Definition: instrument.c:181
static int elevel
Definition: vacuumlazy.c:400
char * indname
Definition: vacuumlazy.c:338
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition: xloginsert.c:422
uint8 bits8
Definition: c.h:448
#define ngettext(s, p, n)
Definition: c.h:1182
int nindexes_parallel_cleanup
Definition: vacuumlazy.c:302
Size mul_size(Size s1, Size s2)
Definition: shmem.c:519
Size GetRecordedFreeSpace(Relation rel, BlockNumber heapBlk)
Definition: freespace.c:231
void * palloc0(Size size)
Definition: mcxt.c:1093
#define VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL
Definition: vacuumlazy.c:102
Size add_size(Size s1, Size s2)
Definition: shmem.c:502
Oid MyDatabaseId
Definition: globals.c:88
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:4023
#define PARALLEL_VACUUM_KEY_SHARED
Definition: vacuumlazy.c:152
int max_parallel_maintenance_workers
Definition: globals.c:127
void ReinitializeParallelDSM(ParallelContext *pcxt)
Definition: parallel.c:464
BlockNumber old_rel_pages
Definition: vacuumlazy.c:323
#define InvalidMultiXactId
Definition: multixact.h:24
#define RelationGetNumberOfBlocks(reln)
Definition: bufmgr.h:213
pg_atomic_uint32 active_nworkers
Definition: vacuumlazy.c:253
MultiXactId relminmxid
Definition: vacuumlazy.c:327
#define InvalidOffsetNumber
Definition: off.h:26
int64 tuples_deleted
Definition: vacuumlazy.c:365
VacOptTernaryValue truncate
Definition: vacuum.h:221
static void restore_vacuum_error_info(LVRelState *vacrel, const LVSavedErrInfo *saved_vacrel)
Definition: vacuumlazy.c:4320
#define PROGRESS_VACUUM_PHASE_FINAL_CLEANUP
Definition: progress.h:35
#define ereport(elevel,...)
Definition: elog.h:157
#define ItemIdHasStorage(itemId)
Definition: itemid.h:120
int maintenance_work_mem
Definition: globals.c:126
bool amusemaintenanceworkmem
Definition: amapi.h:246
bool LockHasWaitersRelation(Relation relation, LOCKMODE lockmode)
Definition: lmgr.c:346
#define BlockNumberIsValid(blockNumber)
Definition: block.h:70
static int compute_parallel_vacuum_workers(LVRelState *vacrel, int nrequested, bool *can_parallel_vacuum)
Definition: vacuumlazy.c:3730
char * relname
Definition: vacuumlazy.c:337
static uint32 pg_atomic_fetch_add_u32(volatile pg_atomic_uint32 *ptr, int32 add_)
Definition: atomics.h:328
int message_level
Definition: genam.h:50
TransactionId MultiXactId
Definition: c.h:597
int errmsg_internal(const char *fmt,...)
Definition: elog.c:996
double num_heap_tuples
Definition: genam.h:51
TransactionId relfrozenxid
Definition: vacuumlazy.c:326
#define Max(x, y)
Definition: c.h:980
void FreeAccessStrategy(BufferAccessStrategy strategy)
Definition: freelist.c:597
#define ShareUpdateExclusiveLock
Definition: lockdefs.h:39
#define PARALLEL_VACUUM_KEY_BUFFER_USAGE
Definition: vacuumlazy.c:155
double new_rel_tuples
Definition: vacuumlazy.c:358
#define PageClearAllVisible(page)
Definition: bufpage.h:389
bool lock_waiter_detected
Definition: vacuumlazy.c:355
void PageTruncateLinePointerArray(Page page)
Definition: bufpage.c:828
uint64 XLogRecPtr
Definition: xlogdefs.h:21
struct LVShared LVShared
#define Assert(condition)
Definition: c.h:804
#define VACUUM_OPTION_PARALLEL_COND_CLEANUP
Definition: vacuum.h:52
void pgstat_progress_update_param(int index, int64 val)
Definition: regguts.h:317
bits8 bitmap[FLEXIBLE_ARRAY_MEMBER]
Definition: vacuumlazy.c:262
#define FrozenTransactionId
Definition: transam.h:33
#define PARALLEL_VACUUM_KEY_WAL_USAGE
Definition: vacuumlazy.c:156
#define ItemIdIsNormal(itemId)
Definition: itemid.h:99
#define INSTR_TIME_GET_MICROSEC(t)
Definition: instr_time.h:205
BlockNumber lpdead_item_pages
Definition: vacuumlazy.c:353
#define PROGRESS_VACUUM_PHASE_INDEX_CLEANUP
Definition: progress.h:33
#define HeapTupleHeaderGetXmin(tup)
Definition: htup_details.h:313
#define VM_ALL_VISIBLE(r, b, v)
Definition: visibilitymap.h:32
#define OffsetNumberNext(offsetNumber)
Definition: off.h:52
size_t Size
Definition: c.h:540
#define PROGRESS_VACUUM_NUM_INDEX_VACUUMS
Definition: progress.h:25
int64 new_dead_tuples
Definition: vacuumlazy.c:367
static void lazy_cleanup_all_indexes(LVRelState *vacrel)
Definition: vacuumlazy.c:2964
#define PROGRESS_VACUUM_PHASE_SCAN_HEAP
Definition: progress.h:30
#define PROGRESS_VACUUM_PHASE
Definition: progress.h:21
void parallel_vacuum_main(dsm_segment *seg, shm_toc *toc)
Definition: vacuumlazy.c:4130
int nworkers
Definition: vacuum.h:229
#define InvalidBlockNumber
Definition: block.h:33
#define shm_toc_estimate_keys(e, cnt)
Definition: shm_toc.h:53
long wal_fpi
Definition: instrument.h:38
void heap_vacuum_rel(Relation rel, VacuumParams *params, BufferAccessStrategy bstrategy)
Definition: vacuumlazy.c:481
BlockNumber tupcount_pages
Definition: vacuumlazy.c:351
#define MAXALIGN(LEN)
Definition: c.h:757
#define BufferIsValid(bufnum)
Definition: bufmgr.h:123
int log_min_duration
Definition: vacuum.h:216
#define ItemPointerGetOffsetNumber(pointer)
Definition: itemptr.h:117
void EnterParallelMode(void)
Definition: xact.c:979
BufferAccessStrategy bstrategy
Definition: vacuumlazy.c:319
LVShared * lvshared
Definition: vacuumlazy.c:289
#define VACUUM_OPTION_PARALLEL_BULKDEL
Definition: vacuum.h:45
#define VACOPT_VERBOSE
Definition: vacuum.h:180
void * shm_toc_allocate(shm_toc *toc, Size nbytes)
Definition: shm_toc.c:88
#define ItemPointerSetBlockNumber(pointer, blockNumber)
Definition: itemptr.h:138
#define RelationNeedsWAL(relation)
Definition: rel.h:570
#define VISIBILITYMAP_ALL_VISIBLE
Definition: visibilitymap.h:26
#define INSTR_TIME_SET_CURRENT(t)
Definition: instr_time.h:156
void pgstat_report_vacuum(Oid tableoid, bool shared, PgStat_Counter livetuples, PgStat_Counter deadtuples)
Definition: pgstat.c:1585
#define RelationUsesLocalBuffers(relation)
Definition: rel.h:579
static bool lazy_vacuum_all_indexes(LVRelState *vacrel)
Definition: vacuumlazy.c:2188
#define PageGetLSN(page)
Definition: bufpage.h:366
#define ItemPointerSetOffsetNumber(pointer, offsetNumber)
Definition: itemptr.h:148
Relation rel
Definition: vacuumlazy.c:309
bool for_cleanup
Definition: vacuumlazy.c:215
#define AccessExclusiveLock
Definition: lockdefs.h:45
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:2758
BlockNumber blkno
Definition: vacuumlazy.c:339
BlockNumber rel_pages
Definition: vacuumlazy.c:347
#define PageIsNew(page)
Definition: bufpage.h:229
void shm_toc_insert(shm_toc *toc, uint64 key, void *address)
Definition: shm_toc.c:171
void * palloc(Size size)
Definition: mcxt.c:1062
int errmsg(const char *fmt,...)
Definition: elog.c:909
static void do_parallel_lazy_cleanup_all_indexes(LVRelState *vacrel)
Definition: vacuumlazy.c:2645
uint8 visibilitymap_get_status(Relation rel, BlockNumber heapBlk, Buffer *buf)
void pgstat_report_activity(BackendState state, const char *cmd_str)
double VacuumCostDelay
Definition: globals.c:145
char * relnamespace
Definition: vacuumlazy.c:336
BlockNumber pages_newly_deleted
Definition: genam.h:80
#define elog(elevel,...)
Definition: elog.h:232
int old_snapshot_threshold
Definition: snapmgr.c:78
bool MultiXactIdPrecedesOrEquals(MultiXactId multi1, MultiXactId multi2)
Definition: multixact.c:3170
int i
#define OffsetNumberIsValid(offsetNumber)
Definition: off.h:39
PrefetchBufferResult PrefetchBuffer(Relation reln, ForkNumber forkNum, BlockNumber blockNum)
Definition: bufmgr.c:587
#define errcontext
Definition: elog.h:204
#define BUFFER_LOCK_SHARE
Definition: bufmgr.h:97
void * arg
#define unlikely(x)
Definition: c.h:273
struct LVParallelState LVParallelState
HTSV_Result
Definition: heapam.h:93
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:102
static void pg_atomic_init_u32(volatile pg_atomic_uint32 *ptr, uint32 val)
Definition: atomics.h:223
#define ItemPointerGetBlockNumber(pointer)
Definition: itemptr.h:98
uint32 offset
Definition: vacuumlazy.c:261
static void pg_atomic_write_u32(volatile pg_atomic_uint32 *ptr, uint32 val)
Definition: atomics.h:258
#define ItemIdSetUnused(itemId)
Definition: itemid.h:128
static int vac_cmp_itemptr(const void *left, const void *right)
Definition: vacuumlazy.c:3570
uint64 wal_bytes
Definition: instrument.h:39
void vacuum_delay_point(void)
Definition: vacuum.c:2116
#define MAXDEADTUPLES(max_size)
Definition: vacuumlazy.c:194
void TimestampDifference(TimestampTz start_time, TimestampTz stop_time, long *secs, int *microsecs)
Definition: timestamp.c:1656
#define TransactionIdIsNormal(xid)
Definition: transam.h:42
Relation table_open(Oid relationId, LOCKMODE lockmode)
Definition: table.c:39
void XLogBeginInsert(void)
Definition: xloginsert.c:123
#define VACUUM_OPTION_PARALLEL_CLEANUP
Definition: vacuum.h:60
void vac_update_relstats(Relation relation, BlockNumber num_pages, double num_tuples, BlockNumber num_all_visible_pages, bool hasindex, TransactionId frozenxid, MultiXactId minmulti, bool in_outer_xact)
Definition: vacuum.c:1292
VacErrPhase
Definition: vacuumlazy.c:165
#define PageSetLSN(page, lsn)
Definition: bufpage.h:368
#define LAZY_ALLOC_TUPLES
Definition: vacuumlazy.c:133
int64 num_tuples
Definition: vacuumlazy.c:369
double num_index_tuples
Definition: genam.h:78
int Buffer
Definition: buf.h:23
double old_live_tuples
Definition: vacuumlazy.c:324
#define _(x)
Definition: elog.c:89
#define RelationGetRelid(relation)
Definition: rel.h:457
int multixact_freeze_min_age
Definition: vacuum.h:211
void * shm_toc_lookup(shm_toc *toc, uint64 key, bool noError)
Definition: shm_toc.c:232
bool estimated_count
Definition: genam.h:77
void FreeSpaceMapVacuumRange(Relation rel, BlockNumber start, BlockNumber end)
Definition: freespace.c:355
bool track_io_timing
Definition: bufmgr.c:135
#define PROGRESS_VACUUM_PHASE_VACUUM_HEAP
Definition: progress.h:32
#define REL_TRUNCATE_FRACTION
Definition: vacuumlazy.c:93
#define PageGetItem(page, itemId)
Definition: bufpage.h:340
Pointer Page
Definition: bufpage.h:78
#define ItemPointerSet(pointer, blockNumber, offNum)
Definition: itemptr.h:127
shm_toc * toc
Definition: parallel.h:45
static void do_parallel_vacuum_or_cleanup(LVRelState *vacrel, int nworkers)
Definition: vacuumlazy.c:2684
bool VacuumCostActive
Definition: globals.c:152
bool estimated_count
Definition: genam.h:49
static uint32 pg_atomic_read_u32(volatile pg_atomic_uint32 *ptr)
Definition: atomics.h:241
struct LVSavedErrInfo LVSavedErrInfo
void RelationTruncate(Relation rel, BlockNumber nblocks)
Definition: storage.c:277
#define RelationGetNamespace(relation)
Definition: rel.h:498