PostgreSQL Source Code  git master
vacuumlazy.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * vacuumlazy.c
4  * Concurrent ("lazy") vacuuming.
5  *
6  *
7  * The major space usage for LAZY VACUUM is storage for the array of dead tuple
8  * TIDs. We want to ensure we can vacuum even the very largest relations with
9  * finite memory space usage. To do that, we set upper bounds on the number of
10  * tuples we will keep track of at once.
11  *
12  * We are willing to use at most maintenance_work_mem (or perhaps
13  * autovacuum_work_mem) memory space to keep track of dead tuples. We
14  * initially allocate an array of TIDs of that size, with an upper limit that
15  * depends on table size (this limit ensures we don't allocate a huge area
16  * uselessly for vacuuming small tables). If the array threatens to overflow,
17  * we suspend the heap scan phase and perform a pass of index cleanup and page
18  * compaction, then resume the heap scan with an empty TID array.
19  *
20  * If we're processing a table with no indexes, we can just vacuum each page
21  * as we go; there's no need to save up multiple tuples to minimize the number
22  * of index scans performed. So we don't use maintenance_work_mem memory for
23  * the TID array, just enough to hold as many heap tuples as fit on one page.
24  *
25  * Lazy vacuum supports parallel execution with parallel worker processes. In
26  * a parallel vacuum, we perform both index vacuum and index cleanup with
27  * parallel worker processes. Individual indexes are processed by one vacuum
28  * process. At the beginning of a lazy vacuum (at lazy_scan_heap) we prepare
29  * the parallel context and initialize the DSM segment that contains shared
30  * information as well as the memory space for storing dead tuples. When
31  * starting either index vacuum or index cleanup, we launch parallel worker
32  * processes. Once all indexes are processed the parallel worker processes
33  * exit. After that, the leader process re-initializes the parallel context
34  * so that it can use the same DSM for multiple passes of index vacuum and
35  * for performing index cleanup. For updating the index statistics, we need
36  * to update the system table and since updates are not allowed during
37  * parallel mode we update the index statistics after exiting from the
38  * parallel mode.
39  *
40  * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
41  * Portions Copyright (c) 1994, Regents of the University of California
42  *
43  *
44  * IDENTIFICATION
45  * src/backend/access/heap/vacuumlazy.c
46  *
47  *-------------------------------------------------------------------------
48  */
49 #include "postgres.h"
50 
51 #include <math.h>
52 
53 #include "access/amapi.h"
54 #include "access/genam.h"
55 #include "access/heapam.h"
56 #include "access/heapam_xlog.h"
57 #include "access/htup_details.h"
58 #include "access/multixact.h"
59 #include "access/parallel.h"
60 #include "access/transam.h"
61 #include "access/visibilitymap.h"
62 #include "access/xact.h"
63 #include "access/xlog.h"
64 #include "catalog/storage.h"
65 #include "commands/dbcommands.h"
66 #include "commands/progress.h"
67 #include "commands/vacuum.h"
68 #include "executor/instrument.h"
69 #include "miscadmin.h"
70 #include "optimizer/paths.h"
71 #include "pgstat.h"
72 #include "portability/instr_time.h"
73 #include "postmaster/autovacuum.h"
74 #include "storage/bufmgr.h"
75 #include "storage/freespace.h"
76 #include "storage/lmgr.h"
77 #include "tcop/tcopprot.h"
78 #include "utils/lsyscache.h"
79 #include "utils/memutils.h"
80 #include "utils/pg_rusage.h"
81 #include "utils/timestamp.h"
82 
83 
84 /*
85  * Space/time tradeoff parameters: do these need to be user-tunable?
86  *
87  * To consider truncating the relation, we want there to be at least
88  * REL_TRUNCATE_MINIMUM or (relsize / REL_TRUNCATE_FRACTION) (whichever
89  * is less) potentially-freeable pages.
90  */
91 #define REL_TRUNCATE_MINIMUM 1000
92 #define REL_TRUNCATE_FRACTION 16
93 
94 /*
95  * Timing parameters for truncate locking heuristics.
96  *
97  * These were not exposed as user tunable GUC values because it didn't seem
98  * that the potential for improvement was great enough to merit the cost of
99  * supporting them.
100  */
101 #define VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL 20 /* ms */
102 #define VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL 50 /* ms */
103 #define VACUUM_TRUNCATE_LOCK_TIMEOUT 5000 /* ms */
104 
105 /*
106  * When a table has no indexes, vacuum the FSM after every 8GB, approximately
107  * (it won't be exact because we only vacuum FSM after processing a heap page
108  * that has some removable tuples). When there are indexes, this is ignored,
109  * and we vacuum FSM after each index/heap cleaning pass.
110  */
111 #define VACUUM_FSM_EVERY_PAGES \
112  ((BlockNumber) (((uint64) 8 * 1024 * 1024 * 1024) / BLCKSZ))
113 
114 /*
115  * Guesstimation of number of dead tuples per page. This is used to
116  * provide an upper limit to memory allocated when vacuuming small
117  * tables.
118  */
119 #define LAZY_ALLOC_TUPLES MaxHeapTuplesPerPage
120 
121 /*
122  * Before we consider skipping a page that's marked as clean in
123  * visibility map, we must've seen at least this many clean pages.
124  */
125 #define SKIP_PAGES_THRESHOLD ((BlockNumber) 32)
126 
127 /*
128  * Size of the prefetch window for lazy vacuum backwards truncation scan.
129  * Needs to be a power of 2.
130  */
131 #define PREFETCH_SIZE ((BlockNumber) 32)
132 
133 /*
134  * DSM keys for parallel vacuum. Unlike other parallel execution code, since
135  * we don't need to worry about DSM keys conflicting with plan_node_id we can
136  * use small integers.
137  */
138 #define PARALLEL_VACUUM_KEY_SHARED 1
139 #define PARALLEL_VACUUM_KEY_DEAD_TUPLES 2
140 #define PARALLEL_VACUUM_KEY_QUERY_TEXT 3
141 #define PARALLEL_VACUUM_KEY_BUFFER_USAGE 4
142 #define PARALLEL_VACUUM_KEY_WAL_USAGE 5
143 
144 /*
145  * Macro to check if we are in a parallel vacuum. If true, we are in the
146  * parallel mode and the DSM segment is initialized.
147  */
148 #define ParallelVacuumIsActive(lps) PointerIsValid(lps)
149 
150 /* Phases of vacuum during which we report error context. */
151 typedef enum
152 {
159 } VacErrPhase;
160 
161 /*
162  * LVDeadTuples stores the dead tuple TIDs collected during the heap scan.
163  * This is allocated in the DSM segment in parallel mode and in local memory
164  * in non-parallel mode.
165  */
166 typedef struct LVDeadTuples
167 {
168  int max_tuples; /* # slots allocated in array */
169  int num_tuples; /* current # of entries */
170  /* List of TIDs of tuples we intend to delete */
171  /* NB: this list is ordered by TID address */
173  * ItemPointerData */
174 } LVDeadTuples;
175 
176 /* The dead tuple space consists of LVDeadTuples and dead tuple TIDs */
177 #define SizeOfDeadTuples(cnt) \
178  add_size(offsetof(LVDeadTuples, itemptrs), \
179  mul_size(sizeof(ItemPointerData), cnt))
180 #define MAXDEADTUPLES(max_size) \
181  (((max_size) - offsetof(LVDeadTuples, itemptrs)) / sizeof(ItemPointerData))
182 
183 /*
184  * Shared information among parallel workers. So this is allocated in the DSM
185  * segment.
186  */
187 typedef struct LVShared
188 {
189  /*
190  * Target table relid and log level. These fields are not modified during
191  * the lazy vacuum.
192  */
194  int elevel;
195 
196  /*
197  * An indication for vacuum workers to perform either index vacuum or
198  * index cleanup. first_time is true only if for_cleanup is true and
199  * bulk-deletion is not performed yet.
200  */
203 
204  /*
205  * Fields for both index vacuum and cleanup.
206  *
207  * reltuples is the total number of input heap tuples. We set either old
208  * live tuples in the index vacuum case or the new live tuples in the
209  * index cleanup case.
210  *
211  * estimated_count is true if reltuples is an estimated value. (Note that
212  * reltuples could be -1 in this case, indicating we have no idea.)
213  */
214  double reltuples;
216 
217  /*
218  * In single process lazy vacuum we could consume more memory during index
219  * vacuuming or cleanup apart from the memory for heap scanning. In
220  * parallel vacuum, since individual vacuum workers can consume memory
221  * equal to maintenance_work_mem, the new maintenance_work_mem for each
222  * worker is set such that the parallel operation doesn't consume more
223  * memory than single process lazy vacuum.
224  */
226 
227  /*
228  * Shared vacuum cost balance. During parallel vacuum,
229  * VacuumSharedCostBalance points to this value and it accumulates the
230  * balance of each parallel vacuum worker.
231  */
233 
234  /*
235  * Number of active parallel workers. This is used for computing the
236  * minimum threshold of the vacuum cost balance before a worker sleeps for
237  * cost-based delay.
238  */
240 
241  /*
242  * Variables to control parallel vacuum. We have a bitmap to indicate
243  * which index has stats in shared memory. The set bit in the map
244  * indicates that the particular index supports a parallel vacuum.
245  */
246  pg_atomic_uint32 idx; /* counter for vacuuming and clean up */
247  uint32 offset; /* sizeof header incl. bitmap */
248  bits8 bitmap[FLEXIBLE_ARRAY_MEMBER]; /* bit map of NULLs */
249 
250  /* Shared index statistics data follows at end of struct */
251 } LVShared;
252 
253 #define SizeOfLVShared (offsetof(LVShared, bitmap) + sizeof(bits8))
254 #define GetSharedIndStats(s) \
255  ((LVSharedIndStats *)((char *)(s) + ((LVShared *)(s))->offset))
256 #define IndStatsIsNull(s, i) \
257  (!(((LVShared *)(s))->bitmap[(i) >> 3] & (1 << ((i) & 0x07))))
258 
259 /*
260  * Struct for an index bulk-deletion statistic used for parallel vacuum. This
261  * is allocated in the DSM segment.
262  */
263 typedef struct LVSharedIndStats
264 {
265  bool updated; /* are the stats updated? */
268 
269 /* Struct for maintaining a parallel vacuum state. */
270 typedef struct LVParallelState
271 {
273 
274  /* Shared information among parallel vacuum workers */
276 
277  /* Points to buffer usage area in DSM */
279 
280  /* Points to WAL usage area in DSM */
282 
283  /*
284  * The number of indexes that support parallel index bulk-deletion and
285  * parallel index cleanup respectively.
286  */
291 
292 typedef struct LVRelStats
293 {
295  char *relname;
296  /* useindex = true means two-pass strategy; false means one-pass */
297  bool useindex;
298  /* Overall statistics about rel */
299  BlockNumber old_rel_pages; /* previous value of pg_class.relpages */
300  BlockNumber rel_pages; /* total number of pages */
301  BlockNumber scanned_pages; /* number of pages we examined */
302  BlockNumber pinskipped_pages; /* # of pages we skipped due to a pin */
303  BlockNumber frozenskipped_pages; /* # of frozen pages we skipped */
304  BlockNumber tupcount_pages; /* pages whose tuples we counted */
305  double old_live_tuples; /* previous value of pg_class.reltuples */
306  double new_rel_tuples; /* new estimated total # of tuples */
307  double new_live_tuples; /* new estimated total # of live tuples */
308  double new_dead_tuples; /* new estimated total # of dead tuples */
311  BlockNumber nonempty_pages; /* actually, last nonempty page + 1 */
316 
317  /* Used for error callback */
318  char *indname;
319  BlockNumber blkno; /* used only for heap operations */
320  OffsetNumber offnum; /* used only for heap operations */
322 } LVRelStats;
323 
324 /* Struct for saving and restoring vacuum error information. */
325 typedef struct LVSavedErrInfo
326 {
331 
332 /* A few variables that don't seem worth passing around as parameters */
333 static int elevel = -1;
334 
338 
340 
341 
342 /* non-export function prototypes */
343 static void lazy_scan_heap(Relation onerel, VacuumParams *params,
344  LVRelStats *vacrelstats, Relation *Irel, int nindexes,
345  bool aggressive);
346 static void lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats);
347 static bool lazy_check_needs_freeze(Buffer buf, bool *hastup,
348  LVRelStats *vacrelstats);
349 static void lazy_vacuum_all_indexes(Relation onerel, Relation *Irel,
350  IndexBulkDeleteResult **stats,
351  LVRelStats *vacrelstats, LVParallelState *lps,
352  int nindexes);
353 static void lazy_vacuum_index(Relation indrel, IndexBulkDeleteResult **stats,
354  LVDeadTuples *dead_tuples, double reltuples, LVRelStats *vacrelstats);
355 static void lazy_cleanup_index(Relation indrel,
356  IndexBulkDeleteResult **stats,
357  double reltuples, bool estimated_count, LVRelStats *vacrelstats);
358 static int lazy_vacuum_page(Relation onerel, BlockNumber blkno, Buffer buffer,
359  int tupindex, LVRelStats *vacrelstats, Buffer *vmbuffer);
360 static bool should_attempt_truncation(VacuumParams *params,
361  LVRelStats *vacrelstats);
362 static void lazy_truncate_heap(Relation onerel, LVRelStats *vacrelstats);
364  LVRelStats *vacrelstats);
365 static void lazy_space_alloc(LVRelStats *vacrelstats, BlockNumber relblocks);
366 static void lazy_record_dead_tuple(LVDeadTuples *dead_tuples,
367  ItemPointer itemptr);
368 static bool lazy_tid_reaped(ItemPointer itemptr, void *state);
369 static int vac_cmp_itemptr(const void *left, const void *right);
371  LVRelStats *vacrelstats,
372  TransactionId *visibility_cutoff_xid, bool *all_frozen);
374  LVRelStats *vacrelstats, LVParallelState *lps,
375  int nindexes);
376 static void parallel_vacuum_index(Relation *Irel, IndexBulkDeleteResult **stats,
377  LVShared *lvshared, LVDeadTuples *dead_tuples,
378  int nindexes, LVRelStats *vacrelstats);
379 static void vacuum_indexes_leader(Relation *Irel, IndexBulkDeleteResult **stats,
380  LVRelStats *vacrelstats, LVParallelState *lps,
381  int nindexes);
382 static void vacuum_one_index(Relation indrel, IndexBulkDeleteResult **stats,
383  LVShared *lvshared, LVSharedIndStats *shared_indstats,
384  LVDeadTuples *dead_tuples, LVRelStats *vacrelstats);
385 static void lazy_cleanup_all_indexes(Relation *Irel, IndexBulkDeleteResult **stats,
386  LVRelStats *vacrelstats, LVParallelState *lps,
387  int nindexes);
388 static long compute_max_dead_tuples(BlockNumber relblocks, bool hasindex);
389 static int compute_parallel_vacuum_workers(Relation *Irel, int nindexes, int nrequested,
390  bool *can_parallel_vacuum);
391 static void prepare_index_statistics(LVShared *lvshared, bool *can_parallel_vacuum,
392  int nindexes);
393 static void update_index_statistics(Relation *Irel, IndexBulkDeleteResult **stats,
394  int nindexes);
396  LVRelStats *vacrelstats, BlockNumber nblocks,
397  int nindexes, int nrequested);
398 static void end_parallel_vacuum(IndexBulkDeleteResult **stats,
399  LVParallelState *lps, int nindexes);
400 static LVSharedIndStats *get_indstats(LVShared *lvshared, int n);
401 static bool skip_parallel_vacuum_index(Relation indrel, LVShared *lvshared);
402 static void vacuum_error_callback(void *arg);
403 static void update_vacuum_error_info(LVRelStats *errinfo, LVSavedErrInfo *saved_err_info,
404  int phase, BlockNumber blkno,
405  OffsetNumber offnum);
406 static void restore_vacuum_error_info(LVRelStats *errinfo, const LVSavedErrInfo *saved_err_info);
407 
408 
409 /*
410  * heap_vacuum_rel() -- perform VACUUM for one heap relation
411  *
412  * This routine vacuums a single heap, cleans out its indexes, and
413  * updates its relpages and reltuples statistics.
414  *
415  * At entry, we have already established a transaction and opened
416  * and locked the relation.
417  */
418 void
420  BufferAccessStrategy bstrategy)
421 {
422  LVRelStats *vacrelstats;
423  Relation *Irel;
424  int nindexes;
425  PGRUsage ru0;
426  TimestampTz starttime = 0;
427  WalUsage walusage_start = pgWalUsage;
428  WalUsage walusage = {0, 0, 0};
429  long secs;
430  int usecs;
431  double read_rate,
432  write_rate;
433  bool aggressive; /* should we scan all unfrozen pages? */
434  bool scanned_all_unfrozen; /* actually scanned all such pages? */
435  TransactionId xidFullScanLimit;
436  MultiXactId mxactFullScanLimit;
437  BlockNumber new_rel_pages;
438  BlockNumber new_rel_allvisible;
439  double new_live_tuples;
440  TransactionId new_frozen_xid;
441  MultiXactId new_min_multi;
442  ErrorContextCallback errcallback;
443 
444  Assert(params != NULL);
447 
448  /* not every AM requires these to be valid, but heap does */
449  Assert(TransactionIdIsNormal(onerel->rd_rel->relfrozenxid));
450  Assert(MultiXactIdIsValid(onerel->rd_rel->relminmxid));
451 
452  /* measure elapsed time iff autovacuum logging requires it */
453  if (IsAutoVacuumWorkerProcess() && params->log_min_duration >= 0)
454  {
455  pg_rusage_init(&ru0);
456  starttime = GetCurrentTimestamp();
457  }
458 
459  if (params->options & VACOPT_VERBOSE)
460  elevel = INFO;
461  else
462  elevel = DEBUG2;
463 
465  RelationGetRelid(onerel));
466 
467  vac_strategy = bstrategy;
468 
469  vacuum_set_xid_limits(onerel,
470  params->freeze_min_age,
471  params->freeze_table_age,
472  params->multixact_freeze_min_age,
474  true, /* we must be a top-level command */
475  &OldestXmin, &FreezeLimit, &xidFullScanLimit,
476  &MultiXactCutoff, &mxactFullScanLimit);
477 
478  /*
479  * We request an aggressive scan if the table's frozen Xid is now older
480  * than or equal to the requested Xid full-table scan limit; or if the
481  * table's minimum MultiXactId is older than or equal to the requested
482  * mxid full-table scan limit; or if DISABLE_PAGE_SKIPPING was specified.
483  */
484  aggressive = TransactionIdPrecedesOrEquals(onerel->rd_rel->relfrozenxid,
485  xidFullScanLimit);
486  aggressive |= MultiXactIdPrecedesOrEquals(onerel->rd_rel->relminmxid,
487  mxactFullScanLimit);
489  aggressive = true;
490 
491  vacrelstats = (LVRelStats *) palloc0(sizeof(LVRelStats));
492 
493  vacrelstats->relnamespace = get_namespace_name(RelationGetNamespace(onerel));
494  vacrelstats->relname = pstrdup(RelationGetRelationName(onerel));
495  vacrelstats->indname = NULL;
496  vacrelstats->phase = VACUUM_ERRCB_PHASE_UNKNOWN;
497  vacrelstats->old_rel_pages = onerel->rd_rel->relpages;
498  vacrelstats->old_live_tuples = onerel->rd_rel->reltuples;
499  vacrelstats->num_index_scans = 0;
500  vacrelstats->pages_removed = 0;
501  vacrelstats->lock_waiter_detected = false;
502 
503  /* Open all indexes of the relation */
504  vac_open_indexes(onerel, RowExclusiveLock, &nindexes, &Irel);
505  vacrelstats->useindex = (nindexes > 0 &&
507 
508  /*
509  * Setup error traceback support for ereport(). The idea is to set up an
510  * error context callback to display additional information on any error
511  * during a vacuum. During different phases of vacuum (heap scan, heap
512  * vacuum, index vacuum, index clean up, heap truncate), we update the
513  * error context callback to display appropriate information.
514  *
515  * Note that the index vacuum and heap vacuum phases may be called
516  * multiple times in the middle of the heap scan phase. So the old phase
517  * information is restored at the end of those phases.
518  */
519  errcallback.callback = vacuum_error_callback;
520  errcallback.arg = vacrelstats;
521  errcallback.previous = error_context_stack;
522  error_context_stack = &errcallback;
523 
524  /* Do the vacuuming */
525  lazy_scan_heap(onerel, params, vacrelstats, Irel, nindexes, aggressive);
526 
527  /* Done with indexes */
528  vac_close_indexes(nindexes, Irel, NoLock);
529 
530  /*
531  * Compute whether we actually scanned the all unfrozen pages. If we did,
532  * we can adjust relfrozenxid and relminmxid.
533  *
534  * NB: We need to check this before truncating the relation, because that
535  * will change ->rel_pages.
536  */
537  if ((vacrelstats->scanned_pages + vacrelstats->frozenskipped_pages)
538  < vacrelstats->rel_pages)
539  {
540  Assert(!aggressive);
541  scanned_all_unfrozen = false;
542  }
543  else
544  scanned_all_unfrozen = true;
545 
546  /*
547  * Optionally truncate the relation.
548  */
549  if (should_attempt_truncation(params, vacrelstats))
550  {
551  /*
552  * Update error traceback information. This is the last phase during
553  * which we add context information to errors, so we don't need to
554  * revert to the previous phase.
555  */
557  vacrelstats->nonempty_pages,
559  lazy_truncate_heap(onerel, vacrelstats);
560  }
561 
562  /* Pop the error context stack */
563  error_context_stack = errcallback.previous;
564 
565  /* Report that we are now doing final cleanup */
568 
569  /*
570  * Update statistics in pg_class.
571  *
572  * In principle new_live_tuples could be -1 indicating that we (still)
573  * don't know the tuple count. In practice that probably can't happen,
574  * since we'd surely have scanned some pages if the table is new and
575  * nonempty.
576  *
577  * For safety, clamp relallvisible to be not more than what we're setting
578  * relpages to.
579  *
580  * Also, don't change relfrozenxid/relminmxid if we skipped any pages,
581  * since then we don't know for certain that all tuples have a newer xmin.
582  */
583  new_rel_pages = vacrelstats->rel_pages;
584  new_live_tuples = vacrelstats->new_live_tuples;
585 
586  visibilitymap_count(onerel, &new_rel_allvisible, NULL);
587  if (new_rel_allvisible > new_rel_pages)
588  new_rel_allvisible = new_rel_pages;
589 
590  new_frozen_xid = scanned_all_unfrozen ? FreezeLimit : InvalidTransactionId;
591  new_min_multi = scanned_all_unfrozen ? MultiXactCutoff : InvalidMultiXactId;
592 
593  vac_update_relstats(onerel,
594  new_rel_pages,
595  new_live_tuples,
596  new_rel_allvisible,
597  nindexes > 0,
598  new_frozen_xid,
599  new_min_multi,
600  false);
601 
602  /* report results to the stats collector, too */
604  onerel->rd_rel->relisshared,
605  Max(new_live_tuples, 0),
606  vacrelstats->new_dead_tuples);
608 
609  /* and log the action if appropriate */
610  if (IsAutoVacuumWorkerProcess() && params->log_min_duration >= 0)
611  {
612  TimestampTz endtime = GetCurrentTimestamp();
613 
614  if (params->log_min_duration == 0 ||
615  TimestampDifferenceExceeds(starttime, endtime,
616  params->log_min_duration))
617  {
619  char *msgfmt;
620 
621  TimestampDifference(starttime, endtime, &secs, &usecs);
622 
623  memset(&walusage, 0, sizeof(WalUsage));
624  WalUsageAccumDiff(&walusage, &pgWalUsage, &walusage_start);
625 
626  read_rate = 0;
627  write_rate = 0;
628  if ((secs > 0) || (usecs > 0))
629  {
630  read_rate = (double) BLCKSZ * VacuumPageMiss / (1024 * 1024) /
631  (secs + usecs / 1000000.0);
632  write_rate = (double) BLCKSZ * VacuumPageDirty / (1024 * 1024) /
633  (secs + usecs / 1000000.0);
634  }
635 
636  /*
637  * This is pretty messy, but we split it up so that we can skip
638  * emitting individual parts of the message when not applicable.
639  */
640  initStringInfo(&buf);
641  if (params->is_wraparound)
642  {
643  if (aggressive)
644  msgfmt = _("automatic aggressive vacuum to prevent wraparound of table \"%s.%s.%s\": index scans: %d\n");
645  else
646  msgfmt = _("automatic vacuum to prevent wraparound of table \"%s.%s.%s\": index scans: %d\n");
647  }
648  else
649  {
650  if (aggressive)
651  msgfmt = _("automatic aggressive vacuum of table \"%s.%s.%s\": index scans: %d\n");
652  else
653  msgfmt = _("automatic vacuum of table \"%s.%s.%s\": index scans: %d\n");
654  }
655  appendStringInfo(&buf, msgfmt,
657  vacrelstats->relnamespace,
658  vacrelstats->relname,
659  vacrelstats->num_index_scans);
660  appendStringInfo(&buf, _("pages: %u removed, %u remain, %u skipped due to pins, %u skipped frozen\n"),
661  vacrelstats->pages_removed,
662  vacrelstats->rel_pages,
663  vacrelstats->pinskipped_pages,
664  vacrelstats->frozenskipped_pages);
665  appendStringInfo(&buf,
666  _("tuples: %.0f removed, %.0f remain, %.0f are dead but not yet removable, oldest xmin: %u\n"),
667  vacrelstats->tuples_deleted,
668  vacrelstats->new_rel_tuples,
669  vacrelstats->new_dead_tuples,
670  OldestXmin);
671  appendStringInfo(&buf,
672  _("buffer usage: %lld hits, %lld misses, %lld dirtied\n"),
673  (long long) VacuumPageHit,
674  (long long) VacuumPageMiss,
675  (long long) VacuumPageDirty);
676  appendStringInfo(&buf, _("avg read rate: %.3f MB/s, avg write rate: %.3f MB/s\n"),
677  read_rate, write_rate);
678  appendStringInfo(&buf, _("system usage: %s\n"), pg_rusage_show(&ru0));
679  appendStringInfo(&buf,
680  _("WAL usage: %ld records, %ld full page images, %llu bytes"),
681  walusage.wal_records,
682  walusage.wal_fpi,
683  (unsigned long long) walusage.wal_bytes);
684 
685  ereport(LOG,
686  (errmsg_internal("%s", buf.data)));
687  pfree(buf.data);
688  }
689  }
690 }
691 
692 /*
693  * For Hot Standby we need to know the highest transaction id that will
694  * be removed by any change. VACUUM proceeds in a number of passes so
695  * we need to consider how each pass operates. The first phase runs
696  * heap_page_prune(), which can issue XLOG_HEAP2_CLEAN records as it
697  * progresses - these will have a latestRemovedXid on each record.
698  * In some cases this removes all of the tuples to be removed, though
699  * often we have dead tuples with index pointers so we must remember them
700  * for removal in phase 3. Index records for those rows are removed
701  * in phase 2 and index blocks do not have MVCC information attached.
702  * So before we can allow removal of any index tuples we need to issue
703  * a WAL record containing the latestRemovedXid of rows that will be
704  * removed in phase three. This allows recovery queries to block at the
705  * correct place, i.e. before phase two, rather than during phase three
706  * which would be after the rows have become inaccessible.
707  */
708 static void
710 {
711  /*
712  * Skip this for relations for which no WAL is to be written, or if we're
713  * not trying to support archive recovery.
714  */
715  if (!RelationNeedsWAL(rel) || !XLogIsNeeded())
716  return;
717 
718  /*
719  * No need to write the record at all unless it contains a valid value
720  */
721  if (TransactionIdIsValid(vacrelstats->latestRemovedXid))
722  (void) log_heap_cleanup_info(rel->rd_node, vacrelstats->latestRemovedXid);
723 }
724 
725 /*
726  * lazy_scan_heap() -- scan an open heap relation
727  *
728  * This routine prunes each page in the heap, which will among other
729  * things truncate dead tuples to dead line pointers, defragment the
730  * page, and set commit status bits (see heap_page_prune). It also builds
731  * lists of dead tuples and pages with free space, calculates statistics
732  * on the number of live tuples in the heap, and marks pages as
733  * all-visible if appropriate. When done, or when we run low on space for
734  * dead-tuple TIDs, invoke vacuuming of indexes and call lazy_vacuum_heap
735  * to reclaim dead line pointers.
736  *
737  * If the table has at least two indexes, we execute both index vacuum
738  * and index cleanup with parallel workers unless parallel vacuum is
739  * disabled. In a parallel vacuum, we enter parallel mode and then
740  * create both the parallel context and the DSM segment before starting
741  * heap scan so that we can record dead tuples to the DSM segment. All
742  * parallel workers are launched at beginning of index vacuuming and
743  * index cleanup and they exit once done with all indexes. At the end of
744  * this function we exit from parallel mode. Index bulk-deletion results
745  * are stored in the DSM segment and we update index statistics for all
746  * the indexes after exiting from parallel mode since writes are not
747  * allowed during parallel mode.
748  *
749  * If there are no indexes then we can reclaim line pointers on the fly;
750  * dead line pointers need only be retained until all index pointers that
751  * reference them have been killed.
752  */
753 static void
754 lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats,
755  Relation *Irel, int nindexes, bool aggressive)
756 {
757  LVParallelState *lps = NULL;
758  LVDeadTuples *dead_tuples;
759  BlockNumber nblocks,
760  blkno;
761  HeapTupleData tuple;
762  TransactionId relfrozenxid = onerel->rd_rel->relfrozenxid;
763  TransactionId relminmxid = onerel->rd_rel->relminmxid;
764  BlockNumber empty_pages,
765  vacuumed_pages,
766  next_fsm_block_to_vacuum;
767  double num_tuples, /* total number of nonremovable tuples */
768  live_tuples, /* live tuples (reltuples estimate) */
769  tups_vacuumed, /* tuples cleaned up by vacuum */
770  nkeep, /* dead-but-not-removable tuples */
771  nunused; /* unused line pointers */
772  IndexBulkDeleteResult **indstats;
773  int i;
774  PGRUsage ru0;
775  Buffer vmbuffer = InvalidBuffer;
776  BlockNumber next_unskippable_block;
777  bool skipping_blocks;
778  xl_heap_freeze_tuple *frozen;
780  const int initprog_index[] = {
784  };
785  int64 initprog_val[3];
786  GlobalVisState *vistest;
787 
788  pg_rusage_init(&ru0);
789 
790  if (aggressive)
791  ereport(elevel,
792  (errmsg("aggressively vacuuming \"%s.%s\"",
793  vacrelstats->relnamespace,
794  vacrelstats->relname)));
795  else
796  ereport(elevel,
797  (errmsg("vacuuming \"%s.%s\"",
798  vacrelstats->relnamespace,
799  vacrelstats->relname)));
800 
801  empty_pages = vacuumed_pages = 0;
802  next_fsm_block_to_vacuum = (BlockNumber) 0;
803  num_tuples = live_tuples = tups_vacuumed = nkeep = nunused = 0;
804 
805  indstats = (IndexBulkDeleteResult **)
806  palloc0(nindexes * sizeof(IndexBulkDeleteResult *));
807 
808  nblocks = RelationGetNumberOfBlocks(onerel);
809  vacrelstats->rel_pages = nblocks;
810  vacrelstats->scanned_pages = 0;
811  vacrelstats->tupcount_pages = 0;
812  vacrelstats->nonempty_pages = 0;
813  vacrelstats->latestRemovedXid = InvalidTransactionId;
814 
815  vistest = GlobalVisTestFor(onerel);
816 
817  /*
818  * Initialize state for a parallel vacuum. As of now, only one worker can
819  * be used for an index, so we invoke parallelism only if there are at
820  * least two indexes on a table.
821  */
822  if (params->nworkers >= 0 && vacrelstats->useindex && nindexes > 1)
823  {
824  /*
825  * Since parallel workers cannot access data in temporary tables, we
826  * can't perform parallel vacuum on them.
827  */
828  if (RelationUsesLocalBuffers(onerel))
829  {
830  /*
831  * Give warning only if the user explicitly tries to perform a
832  * parallel vacuum on the temporary table.
833  */
834  if (params->nworkers > 0)
836  (errmsg("disabling parallel option of vacuum on \"%s\" --- cannot vacuum temporary tables in parallel",
837  vacrelstats->relname)));
838  }
839  else
840  lps = begin_parallel_vacuum(RelationGetRelid(onerel), Irel,
841  vacrelstats, nblocks, nindexes,
842  params->nworkers);
843  }
844 
845  /*
846  * Allocate the space for dead tuples in case parallel vacuum is not
847  * initialized.
848  */
849  if (!ParallelVacuumIsActive(lps))
850  lazy_space_alloc(vacrelstats, nblocks);
851 
852  dead_tuples = vacrelstats->dead_tuples;
854 
855  /* Report that we're scanning the heap, advertising total # of blocks */
856  initprog_val[0] = PROGRESS_VACUUM_PHASE_SCAN_HEAP;
857  initprog_val[1] = nblocks;
858  initprog_val[2] = dead_tuples->max_tuples;
859  pgstat_progress_update_multi_param(3, initprog_index, initprog_val);
860 
861  /*
862  * Except when aggressive is set, we want to skip pages that are
863  * all-visible according to the visibility map, but only when we can skip
864  * at least SKIP_PAGES_THRESHOLD consecutive pages. Since we're reading
865  * sequentially, the OS should be doing readahead for us, so there's no
866  * gain in skipping a page now and then; that's likely to disable
867  * readahead and so be counterproductive. Also, skipping even a single
868  * page means that we can't update relfrozenxid, so we only want to do it
869  * if we can skip a goodly number of pages.
870  *
871  * When aggressive is set, we can't skip pages just because they are
872  * all-visible, but we can still skip pages that are all-frozen, since
873  * such pages do not need freezing and do not affect the value that we can
874  * safely set for relfrozenxid or relminmxid.
875  *
876  * Before entering the main loop, establish the invariant that
877  * next_unskippable_block is the next block number >= blkno that we can't
878  * skip based on the visibility map, either all-visible for a regular scan
879  * or all-frozen for an aggressive scan. We set it to nblocks if there's
880  * no such block. We also set up the skipping_blocks flag correctly at
881  * this stage.
882  *
883  * Note: The value returned by visibilitymap_get_status could be slightly
884  * out-of-date, since we make this test before reading the corresponding
885  * heap page or locking the buffer. This is OK. If we mistakenly think
886  * that the page is all-visible or all-frozen when in fact the flag's just
887  * been cleared, we might fail to vacuum the page. It's easy to see that
888  * skipping a page when aggressive is not set is not a very big deal; we
889  * might leave some dead tuples lying around, but the next vacuum will
890  * find them. But even when aggressive *is* set, it's still OK if we miss
891  * a page whose all-frozen marking has just been cleared. Any new XIDs
892  * just added to that page are necessarily newer than the GlobalXmin we
893  * computed, so they'll have no effect on the value to which we can safely
894  * set relfrozenxid. A similar argument applies for MXIDs and relminmxid.
895  *
896  * We will scan the table's last page, at least to the extent of
897  * determining whether it has tuples or not, even if it should be skipped
898  * according to the above rules; except when we've already determined that
899  * it's not worth trying to truncate the table. This avoids having
900  * lazy_truncate_heap() take access-exclusive lock on the table to attempt
901  * a truncation that just fails immediately because there are tuples in
902  * the last page. This is worth avoiding mainly because such a lock must
903  * be replayed on any hot standby, where it can be disruptive.
904  */
905  next_unskippable_block = 0;
906  if ((params->options & VACOPT_DISABLE_PAGE_SKIPPING) == 0)
907  {
908  while (next_unskippable_block < nblocks)
909  {
910  uint8 vmstatus;
911 
912  vmstatus = visibilitymap_get_status(onerel, next_unskippable_block,
913  &vmbuffer);
914  if (aggressive)
915  {
916  if ((vmstatus & VISIBILITYMAP_ALL_FROZEN) == 0)
917  break;
918  }
919  else
920  {
921  if ((vmstatus & VISIBILITYMAP_ALL_VISIBLE) == 0)
922  break;
923  }
925  next_unskippable_block++;
926  }
927  }
928 
929  if (next_unskippable_block >= SKIP_PAGES_THRESHOLD)
930  skipping_blocks = true;
931  else
932  skipping_blocks = false;
933 
934  for (blkno = 0; blkno < nblocks; blkno++)
935  {
936  Buffer buf;
937  Page page;
938  OffsetNumber offnum,
939  maxoff;
940  bool tupgone,
941  hastup;
942  int prev_dead_count;
943  int nfrozen;
944  Size freespace;
945  bool all_visible_according_to_vm = false;
946  bool all_visible;
947  bool all_frozen = true; /* provided all_visible is also true */
948  bool has_dead_tuples;
949  TransactionId visibility_cutoff_xid = InvalidTransactionId;
950 
951  /* see note above about forcing scanning of last page */
952 #define FORCE_CHECK_PAGE() \
953  (blkno == nblocks - 1 && should_attempt_truncation(params, vacrelstats))
954 
956 
958  blkno, InvalidOffsetNumber);
959 
960  if (blkno == next_unskippable_block)
961  {
962  /* Time to advance next_unskippable_block */
963  next_unskippable_block++;
964  if ((params->options & VACOPT_DISABLE_PAGE_SKIPPING) == 0)
965  {
966  while (next_unskippable_block < nblocks)
967  {
968  uint8 vmskipflags;
969 
970  vmskipflags = visibilitymap_get_status(onerel,
971  next_unskippable_block,
972  &vmbuffer);
973  if (aggressive)
974  {
975  if ((vmskipflags & VISIBILITYMAP_ALL_FROZEN) == 0)
976  break;
977  }
978  else
979  {
980  if ((vmskipflags & VISIBILITYMAP_ALL_VISIBLE) == 0)
981  break;
982  }
984  next_unskippable_block++;
985  }
986  }
987 
988  /*
989  * We know we can't skip the current block. But set up
990  * skipping_blocks to do the right thing at the following blocks.
991  */
992  if (next_unskippable_block - blkno > SKIP_PAGES_THRESHOLD)
993  skipping_blocks = true;
994  else
995  skipping_blocks = false;
996 
997  /*
998  * Normally, the fact that we can't skip this block must mean that
999  * it's not all-visible. But in an aggressive vacuum we know only
1000  * that it's not all-frozen, so it might still be all-visible.
1001  */
1002  if (aggressive && VM_ALL_VISIBLE(onerel, blkno, &vmbuffer))
1003  all_visible_according_to_vm = true;
1004  }
1005  else
1006  {
1007  /*
1008  * The current block is potentially skippable; if we've seen a
1009  * long enough run of skippable blocks to justify skipping it, and
1010  * we're not forced to check it, then go ahead and skip.
1011  * Otherwise, the page must be at least all-visible if not
1012  * all-frozen, so we can set all_visible_according_to_vm = true.
1013  */
1014  if (skipping_blocks && !FORCE_CHECK_PAGE())
1015  {
1016  /*
1017  * Tricky, tricky. If this is in aggressive vacuum, the page
1018  * must have been all-frozen at the time we checked whether it
1019  * was skippable, but it might not be any more. We must be
1020  * careful to count it as a skipped all-frozen page in that
1021  * case, or else we'll think we can't update relfrozenxid and
1022  * relminmxid. If it's not an aggressive vacuum, we don't
1023  * know whether it was all-frozen, so we have to recheck; but
1024  * in this case an approximate answer is OK.
1025  */
1026  if (aggressive || VM_ALL_FROZEN(onerel, blkno, &vmbuffer))
1027  vacrelstats->frozenskipped_pages++;
1028  continue;
1029  }
1030  all_visible_according_to_vm = true;
1031  }
1032 
1034 
1035  /*
1036  * If we are close to overrunning the available space for dead-tuple
1037  * TIDs, pause and do a cycle of vacuuming before we tackle this page.
1038  */
1039  if ((dead_tuples->max_tuples - dead_tuples->num_tuples) < MaxHeapTuplesPerPage &&
1040  dead_tuples->num_tuples > 0)
1041  {
1042  /*
1043  * Before beginning index vacuuming, we release any pin we may
1044  * hold on the visibility map page. This isn't necessary for
1045  * correctness, but we do it anyway to avoid holding the pin
1046  * across a lengthy, unrelated operation.
1047  */
1048  if (BufferIsValid(vmbuffer))
1049  {
1050  ReleaseBuffer(vmbuffer);
1051  vmbuffer = InvalidBuffer;
1052  }
1053 
1054  /* Work on all the indexes, then the heap */
1055  lazy_vacuum_all_indexes(onerel, Irel, indstats,
1056  vacrelstats, lps, nindexes);
1057 
1058  /* Remove tuples from heap */
1059  lazy_vacuum_heap(onerel, vacrelstats);
1060 
1061  /*
1062  * Forget the now-vacuumed tuples, and press on, but be careful
1063  * not to reset latestRemovedXid since we want that value to be
1064  * valid.
1065  */
1066  dead_tuples->num_tuples = 0;
1067 
1068  /*
1069  * Vacuum the Free Space Map to make newly-freed space visible on
1070  * upper-level FSM pages. Note we have not yet processed blkno.
1071  */
1072  FreeSpaceMapVacuumRange(onerel, next_fsm_block_to_vacuum, blkno);
1073  next_fsm_block_to_vacuum = blkno;
1074 
1075  /* Report that we are once again scanning the heap */
1078  }
1079 
1080  /*
1081  * Pin the visibility map page in case we need to mark the page
1082  * all-visible. In most cases this will be very cheap, because we'll
1083  * already have the correct page pinned anyway. However, it's
1084  * possible that (a) next_unskippable_block is covered by a different
1085  * VM page than the current block or (b) we released our pin and did a
1086  * cycle of index vacuuming.
1087  *
1088  */
1089  visibilitymap_pin(onerel, blkno, &vmbuffer);
1090 
1091  buf = ReadBufferExtended(onerel, MAIN_FORKNUM, blkno,
1092  RBM_NORMAL, vac_strategy);
1093 
1094  /* We need buffer cleanup lock so that we can prune HOT chains. */
1096  {
1097  /*
1098  * If we're not performing an aggressive scan to guard against XID
1099  * wraparound, and we don't want to forcibly check the page, then
1100  * it's OK to skip vacuuming pages we get a lock conflict on. They
1101  * will be dealt with in some future vacuum.
1102  */
1103  if (!aggressive && !FORCE_CHECK_PAGE())
1104  {
1105  ReleaseBuffer(buf);
1106  vacrelstats->pinskipped_pages++;
1107  continue;
1108  }
1109 
1110  /*
1111  * Read the page with share lock to see if any xids on it need to
1112  * be frozen. If not we just skip the page, after updating our
1113  * scan statistics. If there are some, we wait for cleanup lock.
1114  *
1115  * We could defer the lock request further by remembering the page
1116  * and coming back to it later, or we could even register
1117  * ourselves for multiple buffers and then service whichever one
1118  * is received first. For now, this seems good enough.
1119  *
1120  * If we get here with aggressive false, then we're just forcibly
1121  * checking the page, and so we don't want to insist on getting
1122  * the lock; we only need to know if the page contains tuples, so
1123  * that we can update nonempty_pages correctly. It's convenient
1124  * to use lazy_check_needs_freeze() for both situations, though.
1125  */
1127  if (!lazy_check_needs_freeze(buf, &hastup, vacrelstats))
1128  {
1129  UnlockReleaseBuffer(buf);
1130  vacrelstats->scanned_pages++;
1131  vacrelstats->pinskipped_pages++;
1132  if (hastup)
1133  vacrelstats->nonempty_pages = blkno + 1;
1134  continue;
1135  }
1136  if (!aggressive)
1137  {
1138  /*
1139  * Here, we must not advance scanned_pages; that would amount
1140  * to claiming that the page contains no freezable tuples.
1141  */
1142  UnlockReleaseBuffer(buf);
1143  vacrelstats->pinskipped_pages++;
1144  if (hastup)
1145  vacrelstats->nonempty_pages = blkno + 1;
1146  continue;
1147  }
1149  LockBufferForCleanup(buf);
1150  /* drop through to normal processing */
1151  }
1152 
1153  vacrelstats->scanned_pages++;
1154  vacrelstats->tupcount_pages++;
1155 
1156  page = BufferGetPage(buf);
1157 
1158  if (PageIsNew(page))
1159  {
1160  /*
1161  * All-zeroes pages can be left over if either a backend extends
1162  * the relation by a single page, but crashes before the newly
1163  * initialized page has been written out, or when bulk-extending
1164  * the relation (which creates a number of empty pages at the tail
1165  * end of the relation, but enters them into the FSM).
1166  *
1167  * Note we do not enter the page into the visibilitymap. That has
1168  * the downside that we repeatedly visit this page in subsequent
1169  * vacuums, but otherwise we'll never not discover the space on a
1170  * promoted standby. The harm of repeated checking ought to
1171  * normally not be too bad - the space usually should be used at
1172  * some point, otherwise there wouldn't be any regular vacuums.
1173  *
1174  * Make sure these pages are in the FSM, to ensure they can be
1175  * reused. Do that by testing if there's any space recorded for
1176  * the page. If not, enter it. We do so after releasing the lock
1177  * on the heap page, the FSM is approximate, after all.
1178  */
1179  UnlockReleaseBuffer(buf);
1180 
1181  empty_pages++;
1182 
1183  if (GetRecordedFreeSpace(onerel, blkno) == 0)
1184  {
1185  Size freespace;
1186 
1187  freespace = BufferGetPageSize(buf) - SizeOfPageHeaderData;
1188  RecordPageWithFreeSpace(onerel, blkno, freespace);
1189  }
1190  continue;
1191  }
1192 
1193  if (PageIsEmpty(page))
1194  {
1195  empty_pages++;
1196  freespace = PageGetHeapFreeSpace(page);
1197 
1198  /*
1199  * Empty pages are always all-visible and all-frozen (note that
1200  * the same is currently not true for new pages, see above).
1201  */
1202  if (!PageIsAllVisible(page))
1203  {
1205 
1206  /* mark buffer dirty before writing a WAL record */
1207  MarkBufferDirty(buf);
1208 
1209  /*
1210  * It's possible that another backend has extended the heap,
1211  * initialized the page, and then failed to WAL-log the page
1212  * due to an ERROR. Since heap extension is not WAL-logged,
1213  * recovery might try to replay our record setting the page
1214  * all-visible and find that the page isn't initialized, which
1215  * will cause a PANIC. To prevent that, check whether the
1216  * page has been previously WAL-logged, and if not, do that
1217  * now.
1218  */
1219  if (RelationNeedsWAL(onerel) &&
1220  PageGetLSN(page) == InvalidXLogRecPtr)
1221  log_newpage_buffer(buf, true);
1222 
1223  PageSetAllVisible(page);
1224  visibilitymap_set(onerel, blkno, buf, InvalidXLogRecPtr,
1225  vmbuffer, InvalidTransactionId,
1227  END_CRIT_SECTION();
1228  }
1229 
1230  UnlockReleaseBuffer(buf);
1231  RecordPageWithFreeSpace(onerel, blkno, freespace);
1232  continue;
1233  }
1234 
1235  /*
1236  * Prune all HOT-update chains in this page.
1237  *
1238  * We count tuples removed by the pruning step as removed by VACUUM.
1239  */
1240  tups_vacuumed += heap_page_prune(onerel, buf, vistest, false,
1242  &vacrelstats->latestRemovedXid,
1243  &vacrelstats->offnum);
1244 
1245  /*
1246  * Now scan the page to collect vacuumable items and check for tuples
1247  * requiring freezing.
1248  */
1249  all_visible = true;
1250  has_dead_tuples = false;
1251  nfrozen = 0;
1252  hastup = false;
1253  prev_dead_count = dead_tuples->num_tuples;
1254  maxoff = PageGetMaxOffsetNumber(page);
1255 
1256  /*
1257  * Note: If you change anything in the loop below, also look at
1258  * heap_page_is_all_visible to see if that needs to be changed.
1259  */
1260  for (offnum = FirstOffsetNumber;
1261  offnum <= maxoff;
1262  offnum = OffsetNumberNext(offnum))
1263  {
1264  ItemId itemid;
1265 
1266  /*
1267  * Set the offset number so that we can display it along with any
1268  * error that occurred while processing this tuple.
1269  */
1270  vacrelstats->offnum = offnum;
1271  itemid = PageGetItemId(page, offnum);
1272 
1273  /* Unused items require no processing, but we count 'em */
1274  if (!ItemIdIsUsed(itemid))
1275  {
1276  nunused += 1;
1277  continue;
1278  }
1279 
1280  /* Redirect items mustn't be touched */
1281  if (ItemIdIsRedirected(itemid))
1282  {
1283  hastup = true; /* this page won't be truncatable */
1284  continue;
1285  }
1286 
1287  ItemPointerSet(&(tuple.t_self), blkno, offnum);
1288 
1289  /*
1290  * DEAD line pointers are to be vacuumed normally; but we don't
1291  * count them in tups_vacuumed, else we'd be double-counting (at
1292  * least in the common case where heap_page_prune() just freed up
1293  * a non-HOT tuple).
1294  */
1295  if (ItemIdIsDead(itemid))
1296  {
1297  lazy_record_dead_tuple(dead_tuples, &(tuple.t_self));
1298  all_visible = false;
1299  continue;
1300  }
1301 
1302  Assert(ItemIdIsNormal(itemid));
1303 
1304  tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
1305  tuple.t_len = ItemIdGetLength(itemid);
1306  tuple.t_tableOid = RelationGetRelid(onerel);
1307 
1308  tupgone = false;
1309 
1310  /*
1311  * The criteria for counting a tuple as live in this block need to
1312  * match what analyze.c's acquire_sample_rows() does, otherwise
1313  * VACUUM and ANALYZE may produce wildly different reltuples
1314  * values, e.g. when there are many recently-dead tuples.
1315  *
1316  * The logic here is a bit simpler than acquire_sample_rows(), as
1317  * VACUUM can't run inside a transaction block, which makes some
1318  * cases impossible (e.g. in-progress insert from the same
1319  * transaction).
1320  */
1321  switch (HeapTupleSatisfiesVacuum(&tuple, OldestXmin, buf))
1322  {
1323  case HEAPTUPLE_DEAD:
1324 
1325  /*
1326  * Ordinarily, DEAD tuples would have been removed by
1327  * heap_page_prune(), but it's possible that the tuple
1328  * state changed since heap_page_prune() looked. In
1329  * particular an INSERT_IN_PROGRESS tuple could have
1330  * changed to DEAD if the inserter aborted. So this
1331  * cannot be considered an error condition.
1332  *
1333  * If the tuple is HOT-updated then it must only be
1334  * removed by a prune operation; so we keep it just as if
1335  * it were RECENTLY_DEAD. Also, if it's a heap-only
1336  * tuple, we choose to keep it, because it'll be a lot
1337  * cheaper to get rid of it in the next pruning pass than
1338  * to treat it like an indexed tuple. Finally, if index
1339  * cleanup is disabled, the second heap pass will not
1340  * execute, and the tuple will not get removed, so we must
1341  * treat it like any other dead tuple that we choose to
1342  * keep.
1343  *
1344  * If this were to happen for a tuple that actually needed
1345  * to be deleted, we'd be in trouble, because it'd
1346  * possibly leave a tuple below the relation's xmin
1347  * horizon alive. heap_prepare_freeze_tuple() is prepared
1348  * to detect that case and abort the transaction,
1349  * preventing corruption.
1350  */
1351  if (HeapTupleIsHotUpdated(&tuple) ||
1352  HeapTupleIsHeapOnly(&tuple) ||
1354  nkeep += 1;
1355  else
1356  tupgone = true; /* we can delete the tuple */
1357  all_visible = false;
1358  break;
1359  case HEAPTUPLE_LIVE:
1360 
1361  /*
1362  * Count it as live. Not only is this natural, but it's
1363  * also what acquire_sample_rows() does.
1364  */
1365  live_tuples += 1;
1366 
1367  /*
1368  * Is the tuple definitely visible to all transactions?
1369  *
1370  * NB: Like with per-tuple hint bits, we can't set the
1371  * PD_ALL_VISIBLE flag if the inserter committed
1372  * asynchronously. See SetHintBits for more info. Check
1373  * that the tuple is hinted xmin-committed because of
1374  * that.
1375  */
1376  if (all_visible)
1377  {
1378  TransactionId xmin;
1379 
1381  {
1382  all_visible = false;
1383  break;
1384  }
1385 
1386  /*
1387  * The inserter definitely committed. But is it old
1388  * enough that everyone sees it as committed?
1389  */
1390  xmin = HeapTupleHeaderGetXmin(tuple.t_data);
1391  if (!TransactionIdPrecedes(xmin, OldestXmin))
1392  {
1393  all_visible = false;
1394  break;
1395  }
1396 
1397  /* Track newest xmin on page. */
1398  if (TransactionIdFollows(xmin, visibility_cutoff_xid))
1399  visibility_cutoff_xid = xmin;
1400  }
1401  break;
1403 
1404  /*
1405  * If tuple is recently deleted then we must not remove it
1406  * from relation.
1407  */
1408  nkeep += 1;
1409  all_visible = false;
1410  break;
1412 
1413  /*
1414  * This is an expected case during concurrent vacuum.
1415  *
1416  * We do not count these rows as live, because we expect
1417  * the inserting transaction to update the counters at
1418  * commit, and we assume that will happen only after we
1419  * report our results. This assumption is a bit shaky,
1420  * but it is what acquire_sample_rows() does, so be
1421  * consistent.
1422  */
1423  all_visible = false;
1424  break;
1426  /* This is an expected case during concurrent vacuum */
1427  all_visible = false;
1428 
1429  /*
1430  * Count such rows as live. As above, we assume the
1431  * deleting transaction will commit and update the
1432  * counters after we report.
1433  */
1434  live_tuples += 1;
1435  break;
1436  default:
1437  elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
1438  break;
1439  }
1440 
1441  if (tupgone)
1442  {
1443  lazy_record_dead_tuple(dead_tuples, &(tuple.t_self));
1445  &vacrelstats->latestRemovedXid);
1446  tups_vacuumed += 1;
1447  has_dead_tuples = true;
1448  }
1449  else
1450  {
1451  bool tuple_totally_frozen;
1452 
1453  num_tuples += 1;
1454  hastup = true;
1455 
1456  /*
1457  * Each non-removable tuple must be checked to see if it needs
1458  * freezing. Note we already have exclusive buffer lock.
1459  */
1461  relfrozenxid, relminmxid,
1463  &frozen[nfrozen],
1464  &tuple_totally_frozen))
1465  frozen[nfrozen++].offset = offnum;
1466 
1467  if (!tuple_totally_frozen)
1468  all_frozen = false;
1469  }
1470  } /* scan along page */
1471 
1472  /*
1473  * Clear the offset information once we have processed all the tuples
1474  * on the page.
1475  */
1476  vacrelstats->offnum = InvalidOffsetNumber;
1477 
1478  /*
1479  * If we froze any tuples, mark the buffer dirty, and write a WAL
1480  * record recording the changes. We must log the changes to be
1481  * crash-safe against future truncation of CLOG.
1482  */
1483  if (nfrozen > 0)
1484  {
1486 
1487  MarkBufferDirty(buf);
1488 
1489  /* execute collected freezes */
1490  for (i = 0; i < nfrozen; i++)
1491  {
1492  ItemId itemid;
1493  HeapTupleHeader htup;
1494 
1495  itemid = PageGetItemId(page, frozen[i].offset);
1496  htup = (HeapTupleHeader) PageGetItem(page, itemid);
1497 
1498  heap_execute_freeze_tuple(htup, &frozen[i]);
1499  }
1500 
1501  /* Now WAL-log freezing if necessary */
1502  if (RelationNeedsWAL(onerel))
1503  {
1504  XLogRecPtr recptr;
1505 
1506  recptr = log_heap_freeze(onerel, buf, FreezeLimit,
1507  frozen, nfrozen);
1508  PageSetLSN(page, recptr);
1509  }
1510 
1511  END_CRIT_SECTION();
1512  }
1513 
1514  /*
1515  * If there are no indexes we can vacuum the page right now instead of
1516  * doing a second scan. Also we don't do that but forget dead tuples
1517  * when index cleanup is disabled.
1518  */
1519  if (!vacrelstats->useindex && dead_tuples->num_tuples > 0)
1520  {
1521  if (nindexes == 0)
1522  {
1523  /* Remove tuples from heap if the table has no index */
1524  lazy_vacuum_page(onerel, blkno, buf, 0, vacrelstats, &vmbuffer);
1525  vacuumed_pages++;
1526  has_dead_tuples = false;
1527  }
1528  else
1529  {
1530  /*
1531  * Here, we have indexes but index cleanup is disabled.
1532  * Instead of vacuuming the dead tuples on the heap, we just
1533  * forget them.
1534  *
1535  * Note that vacrelstats->dead_tuples could have tuples which
1536  * became dead after HOT-pruning but are not marked dead yet.
1537  * We do not process them because it's a very rare condition,
1538  * and the next vacuum will process them anyway.
1539  */
1541  }
1542 
1543  /*
1544  * Forget the now-vacuumed tuples, and press on, but be careful
1545  * not to reset latestRemovedXid since we want that value to be
1546  * valid.
1547  */
1548  dead_tuples->num_tuples = 0;
1549 
1550  /*
1551  * Periodically do incremental FSM vacuuming to make newly-freed
1552  * space visible on upper FSM pages. Note: although we've cleaned
1553  * the current block, we haven't yet updated its FSM entry (that
1554  * happens further down), so passing end == blkno is correct.
1555  */
1556  if (blkno - next_fsm_block_to_vacuum >= VACUUM_FSM_EVERY_PAGES)
1557  {
1558  FreeSpaceMapVacuumRange(onerel, next_fsm_block_to_vacuum,
1559  blkno);
1560  next_fsm_block_to_vacuum = blkno;
1561  }
1562  }
1563 
1564  freespace = PageGetHeapFreeSpace(page);
1565 
1566  /* mark page all-visible, if appropriate */
1567  if (all_visible && !all_visible_according_to_vm)
1568  {
1570 
1571  if (all_frozen)
1572  flags |= VISIBILITYMAP_ALL_FROZEN;
1573 
1574  /*
1575  * It should never be the case that the visibility map page is set
1576  * while the page-level bit is clear, but the reverse is allowed
1577  * (if checksums are not enabled). Regardless, set both bits so
1578  * that we get back in sync.
1579  *
1580  * NB: If the heap page is all-visible but the VM bit is not set,
1581  * we don't need to dirty the heap page. However, if checksums
1582  * are enabled, we do need to make sure that the heap page is
1583  * dirtied before passing it to visibilitymap_set(), because it
1584  * may be logged. Given that this situation should only happen in
1585  * rare cases after a crash, it is not worth optimizing.
1586  */
1587  PageSetAllVisible(page);
1588  MarkBufferDirty(buf);
1589  visibilitymap_set(onerel, blkno, buf, InvalidXLogRecPtr,
1590  vmbuffer, visibility_cutoff_xid, flags);
1591  }
1592 
1593  /*
1594  * As of PostgreSQL 9.2, the visibility map bit should never be set if
1595  * the page-level bit is clear. However, it's possible that the bit
1596  * got cleared after we checked it and before we took the buffer
1597  * content lock, so we must recheck before jumping to the conclusion
1598  * that something bad has happened.
1599  */
1600  else if (all_visible_according_to_vm && !PageIsAllVisible(page)
1601  && VM_ALL_VISIBLE(onerel, blkno, &vmbuffer))
1602  {
1603  elog(WARNING, "page is not marked all-visible but visibility map bit is set in relation \"%s\" page %u",
1604  vacrelstats->relname, blkno);
1605  visibilitymap_clear(onerel, blkno, vmbuffer,
1607  }
1608 
1609  /*
1610  * It's possible for the value returned by
1611  * GetOldestNonRemovableTransactionId() to move backwards, so it's not
1612  * wrong for us to see tuples that appear to not be visible to
1613  * everyone yet, while PD_ALL_VISIBLE is already set. The real safe
1614  * xmin value never moves backwards, but
1615  * GetOldestNonRemovableTransactionId() is conservative and sometimes
1616  * returns a value that's unnecessarily small, so if we see that
1617  * contradiction it just means that the tuples that we think are not
1618  * visible to everyone yet actually are, and the PD_ALL_VISIBLE flag
1619  * is correct.
1620  *
1621  * There should never be dead tuples on a page with PD_ALL_VISIBLE
1622  * set, however.
1623  */
1624  else if (PageIsAllVisible(page) && has_dead_tuples)
1625  {
1626  elog(WARNING, "page containing dead tuples is marked as all-visible in relation \"%s\" page %u",
1627  vacrelstats->relname, blkno);
1628  PageClearAllVisible(page);
1629  MarkBufferDirty(buf);
1630  visibilitymap_clear(onerel, blkno, vmbuffer,
1632  }
1633 
1634  /*
1635  * If the all-visible page is all-frozen but not marked as such yet,
1636  * mark it as all-frozen. Note that all_frozen is only valid if
1637  * all_visible is true, so we must check both.
1638  */
1639  else if (all_visible_according_to_vm && all_visible && all_frozen &&
1640  !VM_ALL_FROZEN(onerel, blkno, &vmbuffer))
1641  {
1642  /*
1643  * We can pass InvalidTransactionId as the cutoff XID here,
1644  * because setting the all-frozen bit doesn't cause recovery
1645  * conflicts.
1646  */
1647  visibilitymap_set(onerel, blkno, buf, InvalidXLogRecPtr,
1648  vmbuffer, InvalidTransactionId,
1650  }
1651 
1652  UnlockReleaseBuffer(buf);
1653 
1654  /* Remember the location of the last page with nonremovable tuples */
1655  if (hastup)
1656  vacrelstats->nonempty_pages = blkno + 1;
1657 
1658  /*
1659  * If we remembered any tuples for deletion, then the page will be
1660  * visited again by lazy_vacuum_heap, which will compute and record
1661  * its post-compaction free space. If not, then we're done with this
1662  * page, so remember its free space as-is. (This path will always be
1663  * taken if there are no indexes.)
1664  */
1665  if (dead_tuples->num_tuples == prev_dead_count)
1666  RecordPageWithFreeSpace(onerel, blkno, freespace);
1667  }
1668 
1669  /* report that everything is scanned and vacuumed */
1671 
1672  /* Clear the block number information */
1673  vacrelstats->blkno = InvalidBlockNumber;
1674 
1675  pfree(frozen);
1676 
1677  /* save stats for use later */
1678  vacrelstats->tuples_deleted = tups_vacuumed;
1679  vacrelstats->new_dead_tuples = nkeep;
1680 
1681  /* now we can compute the new value for pg_class.reltuples */
1682  vacrelstats->new_live_tuples = vac_estimate_reltuples(onerel,
1683  nblocks,
1684  vacrelstats->tupcount_pages,
1685  live_tuples);
1686 
1687  /*
1688  * Also compute the total number of surviving heap entries. In the
1689  * (unlikely) scenario that new_live_tuples is -1, take it as zero.
1690  */
1691  vacrelstats->new_rel_tuples =
1692  Max(vacrelstats->new_live_tuples, 0) + vacrelstats->new_dead_tuples;
1693 
1694  /*
1695  * Release any remaining pin on visibility map page.
1696  */
1697  if (BufferIsValid(vmbuffer))
1698  {
1699  ReleaseBuffer(vmbuffer);
1700  vmbuffer = InvalidBuffer;
1701  }
1702 
1703  /* If any tuples need to be deleted, perform final vacuum cycle */
1704  /* XXX put a threshold on min number of tuples here? */
1705  if (dead_tuples->num_tuples > 0)
1706  {
1707  /* Work on all the indexes, and then the heap */
1708  lazy_vacuum_all_indexes(onerel, Irel, indstats, vacrelstats,
1709  lps, nindexes);
1710 
1711  /* Remove tuples from heap */
1712  lazy_vacuum_heap(onerel, vacrelstats);
1713  }
1714 
1715  /*
1716  * Vacuum the remainder of the Free Space Map. We must do this whether or
1717  * not there were indexes.
1718  */
1719  if (blkno > next_fsm_block_to_vacuum)
1720  FreeSpaceMapVacuumRange(onerel, next_fsm_block_to_vacuum, blkno);
1721 
1722  /* report all blocks vacuumed */
1724 
1725  /* Do post-vacuum cleanup */
1726  if (vacrelstats->useindex)
1727  lazy_cleanup_all_indexes(Irel, indstats, vacrelstats, lps, nindexes);
1728 
1729  /*
1730  * End parallel mode before updating index statistics as we cannot write
1731  * during parallel mode.
1732  */
1733  if (ParallelVacuumIsActive(lps))
1734  end_parallel_vacuum(indstats, lps, nindexes);
1735 
1736  /* Update index statistics */
1737  update_index_statistics(Irel, indstats, nindexes);
1738 
1739  /* If no indexes, make log report that lazy_vacuum_heap would've made */
1740  if (vacuumed_pages)
1741  ereport(elevel,
1742  (errmsg("\"%s\": removed %.0f row versions in %u pages",
1743  vacrelstats->relname,
1744  tups_vacuumed, vacuumed_pages)));
1745 
1746  /*
1747  * This is pretty messy, but we split it up so that we can skip emitting
1748  * individual parts of the message when not applicable.
1749  */
1750  initStringInfo(&buf);
1751  appendStringInfo(&buf,
1752  _("%.0f dead row versions cannot be removed yet, oldest xmin: %u\n"),
1753  nkeep, OldestXmin);
1754  appendStringInfo(&buf, _("There were %.0f unused item identifiers.\n"),
1755  nunused);
1756  appendStringInfo(&buf, ngettext("Skipped %u page due to buffer pins, ",
1757  "Skipped %u pages due to buffer pins, ",
1758  vacrelstats->pinskipped_pages),
1759  vacrelstats->pinskipped_pages);
1760  appendStringInfo(&buf, ngettext("%u frozen page.\n",
1761  "%u frozen pages.\n",
1762  vacrelstats->frozenskipped_pages),
1763  vacrelstats->frozenskipped_pages);
1764  appendStringInfo(&buf, ngettext("%u page is entirely empty.\n",
1765  "%u pages are entirely empty.\n",
1766  empty_pages),
1767  empty_pages);
1768  appendStringInfo(&buf, _("%s."), pg_rusage_show(&ru0));
1769 
1770  ereport(elevel,
1771  (errmsg("\"%s\": found %.0f removable, %.0f nonremovable row versions in %u out of %u pages",
1772  vacrelstats->relname,
1773  tups_vacuumed, num_tuples,
1774  vacrelstats->scanned_pages, nblocks),
1775  errdetail_internal("%s", buf.data)));
1776  pfree(buf.data);
1777 }
1778 
1779 /*
1780  * lazy_vacuum_all_indexes() -- vacuum all indexes of relation.
1781  *
1782  * We process the indexes serially unless we are doing parallel vacuum.
1783  */
1784 static void
1786  IndexBulkDeleteResult **stats,
1787  LVRelStats *vacrelstats, LVParallelState *lps,
1788  int nindexes)
1789 {
1791  Assert(nindexes > 0);
1792 
1793  /* Log cleanup info before we touch indexes */
1794  vacuum_log_cleanup_info(onerel, vacrelstats);
1795 
1796  /* Report that we are now vacuuming indexes */
1799 
1800  /* Perform index vacuuming with parallel workers for parallel vacuum. */
1801  if (ParallelVacuumIsActive(lps))
1802  {
1803  /* Tell parallel workers to do index vacuuming */
1804  lps->lvshared->for_cleanup = false;
1805  lps->lvshared->first_time = false;
1806 
1807  /*
1808  * We can only provide an approximate value of num_heap_tuples in
1809  * vacuum cases.
1810  */
1811  lps->lvshared->reltuples = vacrelstats->old_live_tuples;
1812  lps->lvshared->estimated_count = true;
1813 
1814  lazy_parallel_vacuum_indexes(Irel, stats, vacrelstats, lps, nindexes);
1815  }
1816  else
1817  {
1818  int idx;
1819 
1820  for (idx = 0; idx < nindexes; idx++)
1821  lazy_vacuum_index(Irel[idx], &stats[idx], vacrelstats->dead_tuples,
1822  vacrelstats->old_live_tuples, vacrelstats);
1823  }
1824 
1825  /* Increase and report the number of index scans */
1826  vacrelstats->num_index_scans++;
1828  vacrelstats->num_index_scans);
1829 }
1830 
1831 
1832 /*
1833  * lazy_vacuum_heap() -- second pass over the heap
1834  *
1835  * This routine marks dead tuples as unused and compacts out free
1836  * space on their pages. Pages not having dead tuples recorded from
1837  * lazy_scan_heap are not visited at all.
1838  *
1839  * Note: the reason for doing this as a second pass is we cannot remove
1840  * the tuples until we've removed their index entries, and we want to
1841  * process index entry removal in batches as large as possible.
1842  */
1843 static void
1844 lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats)
1845 {
1846  int tupindex;
1847  int npages;
1848  PGRUsage ru0;
1849  Buffer vmbuffer = InvalidBuffer;
1850  LVSavedErrInfo saved_err_info;
1851 
1852  /* Report that we are now vacuuming the heap */
1855 
1856  /* Update error traceback information */
1857  update_vacuum_error_info(vacrelstats, &saved_err_info, VACUUM_ERRCB_PHASE_VACUUM_HEAP,
1859 
1860  pg_rusage_init(&ru0);
1861  npages = 0;
1862 
1863  tupindex = 0;
1864  while (tupindex < vacrelstats->dead_tuples->num_tuples)
1865  {
1866  BlockNumber tblk;
1867  Buffer buf;
1868  Page page;
1869  Size freespace;
1870 
1872 
1873  tblk = ItemPointerGetBlockNumber(&vacrelstats->dead_tuples->itemptrs[tupindex]);
1874  vacrelstats->blkno = tblk;
1875  buf = ReadBufferExtended(onerel, MAIN_FORKNUM, tblk, RBM_NORMAL,
1876  vac_strategy);
1878  {
1879  ReleaseBuffer(buf);
1880  ++tupindex;
1881  continue;
1882  }
1883  tupindex = lazy_vacuum_page(onerel, tblk, buf, tupindex, vacrelstats,
1884  &vmbuffer);
1885 
1886  /* Now that we've compacted the page, record its available space */
1887  page = BufferGetPage(buf);
1888  freespace = PageGetHeapFreeSpace(page);
1889 
1890  UnlockReleaseBuffer(buf);
1891  RecordPageWithFreeSpace(onerel, tblk, freespace);
1892  npages++;
1893  }
1894 
1895  /* Clear the block number information */
1896  vacrelstats->blkno = InvalidBlockNumber;
1897 
1898  if (BufferIsValid(vmbuffer))
1899  {
1900  ReleaseBuffer(vmbuffer);
1901  vmbuffer = InvalidBuffer;
1902  }
1903 
1904  ereport(elevel,
1905  (errmsg("\"%s\": removed %d row versions in %d pages",
1906  vacrelstats->relname,
1907  tupindex, npages),
1908  errdetail_internal("%s", pg_rusage_show(&ru0))));
1909 
1910  /* Revert to the previous phase information for error traceback */
1911  restore_vacuum_error_info(vacrelstats, &saved_err_info);
1912 }
1913 
1914 /*
1915  * lazy_vacuum_page() -- free dead tuples on a page
1916  * and repair its fragmentation.
1917  *
1918  * Caller must hold pin and buffer cleanup lock on the buffer.
1919  *
1920  * tupindex is the index in vacrelstats->dead_tuples of the first dead
1921  * tuple for this page. We assume the rest follow sequentially.
1922  * The return value is the first tupindex after the tuples of this page.
1923  */
1924 static int
1926  int tupindex, LVRelStats *vacrelstats, Buffer *vmbuffer)
1927 {
1928  LVDeadTuples *dead_tuples = vacrelstats->dead_tuples;
1929  Page page = BufferGetPage(buffer);
1930  OffsetNumber unused[MaxOffsetNumber];
1931  int uncnt = 0;
1932  TransactionId visibility_cutoff_xid;
1933  bool all_frozen;
1934  LVSavedErrInfo saved_err_info;
1935 
1937 
1938  /* Update error traceback information */
1939  update_vacuum_error_info(vacrelstats, &saved_err_info, VACUUM_ERRCB_PHASE_VACUUM_HEAP,
1940  blkno, InvalidOffsetNumber);
1941 
1943 
1944  for (; tupindex < dead_tuples->num_tuples; tupindex++)
1945  {
1946  BlockNumber tblk;
1947  OffsetNumber toff;
1948  ItemId itemid;
1949 
1950  tblk = ItemPointerGetBlockNumber(&dead_tuples->itemptrs[tupindex]);
1951  if (tblk != blkno)
1952  break; /* past end of tuples for this block */
1953  toff = ItemPointerGetOffsetNumber(&dead_tuples->itemptrs[tupindex]);
1954  itemid = PageGetItemId(page, toff);
1955  ItemIdSetUnused(itemid);
1956  unused[uncnt++] = toff;
1957  }
1958 
1960 
1961  /*
1962  * Mark buffer dirty before we write WAL.
1963  */
1964  MarkBufferDirty(buffer);
1965 
1966  /* XLOG stuff */
1967  if (RelationNeedsWAL(onerel))
1968  {
1969  XLogRecPtr recptr;
1970 
1971  recptr = log_heap_clean(onerel, buffer,
1972  NULL, 0, NULL, 0,
1973  unused, uncnt,
1974  vacrelstats->latestRemovedXid);
1975  PageSetLSN(page, recptr);
1976  }
1977 
1978  /*
1979  * End critical section, so we safely can do visibility tests (which
1980  * possibly need to perform IO and allocate memory!). If we crash now the
1981  * page (including the corresponding vm bit) might not be marked all
1982  * visible, but that's fine. A later vacuum will fix that.
1983  */
1984  END_CRIT_SECTION();
1985 
1986  /*
1987  * Now that we have removed the dead tuples from the page, once again
1988  * check if the page has become all-visible. The page is already marked
1989  * dirty, exclusively locked, and, if needed, a full page image has been
1990  * emitted in the log_heap_clean() above.
1991  */
1992  if (heap_page_is_all_visible(onerel, buffer, vacrelstats,
1993  &visibility_cutoff_xid,
1994  &all_frozen))
1995  PageSetAllVisible(page);
1996 
1997  /*
1998  * All the changes to the heap page have been done. If the all-visible
1999  * flag is now set, also set the VM all-visible bit (and, if possible, the
2000  * all-frozen bit) unless this has already been done previously.
2001  */
2002  if (PageIsAllVisible(page))
2003  {
2004  uint8 vm_status = visibilitymap_get_status(onerel, blkno, vmbuffer);
2005  uint8 flags = 0;
2006 
2007  /* Set the VM all-frozen bit to flag, if needed */
2008  if ((vm_status & VISIBILITYMAP_ALL_VISIBLE) == 0)
2009  flags |= VISIBILITYMAP_ALL_VISIBLE;
2010  if ((vm_status & VISIBILITYMAP_ALL_FROZEN) == 0 && all_frozen)
2011  flags |= VISIBILITYMAP_ALL_FROZEN;
2012 
2013  Assert(BufferIsValid(*vmbuffer));
2014  if (flags != 0)
2015  visibilitymap_set(onerel, blkno, buffer, InvalidXLogRecPtr,
2016  *vmbuffer, visibility_cutoff_xid, flags);
2017  }
2018 
2019  /* Revert to the previous phase information for error traceback */
2020  restore_vacuum_error_info(vacrelstats, &saved_err_info);
2021  return tupindex;
2022 }
2023 
2024 /*
2025  * lazy_check_needs_freeze() -- scan page to see if any tuples
2026  * need to be cleaned to avoid wraparound
2027  *
2028  * Returns true if the page needs to be vacuumed using cleanup lock.
2029  * Also returns a flag indicating whether page contains any tuples at all.
2030  */
2031 static bool
2032 lazy_check_needs_freeze(Buffer buf, bool *hastup, LVRelStats *vacrelstats)
2033 {
2034  Page page = BufferGetPage(buf);
2035  OffsetNumber offnum,
2036  maxoff;
2037  HeapTupleHeader tupleheader;
2038 
2039  *hastup = false;
2040 
2041  /*
2042  * New and empty pages, obviously, don't contain tuples. We could make
2043  * sure that the page is registered in the FSM, but it doesn't seem worth
2044  * waiting for a cleanup lock just for that, especially because it's
2045  * likely that the pin holder will do so.
2046  */
2047  if (PageIsNew(page) || PageIsEmpty(page))
2048  return false;
2049 
2050  maxoff = PageGetMaxOffsetNumber(page);
2051  for (offnum = FirstOffsetNumber;
2052  offnum <= maxoff;
2053  offnum = OffsetNumberNext(offnum))
2054  {
2055  ItemId itemid;
2056 
2057  /*
2058  * Set the offset number so that we can display it along with any
2059  * error that occurred while processing this tuple.
2060  */
2061  vacrelstats->offnum = offnum;
2062  itemid = PageGetItemId(page, offnum);
2063 
2064  /* this should match hastup test in count_nondeletable_pages() */
2065  if (ItemIdIsUsed(itemid))
2066  *hastup = true;
2067 
2068  /* dead and redirect items never need freezing */
2069  if (!ItemIdIsNormal(itemid))
2070  continue;
2071 
2072  tupleheader = (HeapTupleHeader) PageGetItem(page, itemid);
2073 
2074  if (heap_tuple_needs_freeze(tupleheader, FreezeLimit,
2075  MultiXactCutoff, buf))
2076  break;
2077  } /* scan along page */
2078 
2079  /* Clear the offset information once we have processed the given page. */
2080  vacrelstats->offnum = InvalidOffsetNumber;
2081 
2082  return (offnum <= maxoff);
2083 }
2084 
2085 /*
2086  * Perform index vacuum or index cleanup with parallel workers. This function
2087  * must be used by the parallel vacuum leader process. The caller must set
2088  * lps->lvshared->for_cleanup to indicate whether to perform vacuum or
2089  * cleanup.
2090  */
2091 static void
2093  LVRelStats *vacrelstats, LVParallelState *lps,
2094  int nindexes)
2095 {
2096  int nworkers;
2097 
2100  Assert(nindexes > 0);
2101 
2102  /* Determine the number of parallel workers to launch */
2103  if (lps->lvshared->for_cleanup)
2104  {
2105  if (lps->lvshared->first_time)
2106  nworkers = lps->nindexes_parallel_cleanup +
2108  else
2109  nworkers = lps->nindexes_parallel_cleanup;
2110  }
2111  else
2112  nworkers = lps->nindexes_parallel_bulkdel;
2113 
2114  /* The leader process will participate */
2115  nworkers--;
2116 
2117  /*
2118  * It is possible that parallel context is initialized with fewer workers
2119  * than the number of indexes that need a separate worker in the current
2120  * phase, so we need to consider it. See compute_parallel_vacuum_workers.
2121  */
2122  nworkers = Min(nworkers, lps->pcxt->nworkers);
2123 
2124  /* Setup the shared cost-based vacuum delay and launch workers */
2125  if (nworkers > 0)
2126  {
2127  if (vacrelstats->num_index_scans > 0)
2128  {
2129  /* Reset the parallel index processing counter */
2130  pg_atomic_write_u32(&(lps->lvshared->idx), 0);
2131 
2132  /* Reinitialize the parallel context to relaunch parallel workers */
2134  }
2135 
2136  /*
2137  * Set up shared cost balance and the number of active workers for
2138  * vacuum delay. We need to do this before launching workers as
2139  * otherwise, they might not see the updated values for these
2140  * parameters.
2141  */
2144 
2145  /*
2146  * The number of workers can vary between bulkdelete and cleanup
2147  * phase.
2148  */
2149  ReinitializeParallelWorkers(lps->pcxt, nworkers);
2150 
2152 
2153  if (lps->pcxt->nworkers_launched > 0)
2154  {
2155  /*
2156  * Reset the local cost values for leader backend as we have
2157  * already accumulated the remaining balance of heap.
2158  */
2159  VacuumCostBalance = 0;
2161 
2162  /* Enable shared cost balance for leader backend */
2165  }
2166 
2167  if (lps->lvshared->for_cleanup)
2168  ereport(elevel,
2169  (errmsg(ngettext("launched %d parallel vacuum worker for index cleanup (planned: %d)",
2170  "launched %d parallel vacuum workers for index cleanup (planned: %d)",
2171  lps->pcxt->nworkers_launched),
2172  lps->pcxt->nworkers_launched, nworkers)));
2173  else
2174  ereport(elevel,
2175  (errmsg(ngettext("launched %d parallel vacuum worker for index vacuuming (planned: %d)",
2176  "launched %d parallel vacuum workers for index vacuuming (planned: %d)",
2177  lps->pcxt->nworkers_launched),
2178  lps->pcxt->nworkers_launched, nworkers)));
2179  }
2180 
2181  /* Process the indexes that can be processed by only leader process */
2182  vacuum_indexes_leader(Irel, stats, vacrelstats, lps, nindexes);
2183 
2184  /*
2185  * Join as a parallel worker. The leader process alone processes all the
2186  * indexes in the case where no workers are launched.
2187  */
2188  parallel_vacuum_index(Irel, stats, lps->lvshared,
2189  vacrelstats->dead_tuples, nindexes, vacrelstats);
2190 
2191  /*
2192  * Next, accumulate buffer and WAL usage. (This must wait for the workers
2193  * to finish, or we might get incomplete data.)
2194  */
2195  if (nworkers > 0)
2196  {
2197  int i;
2198 
2199  /* Wait for all vacuum workers to finish */
2201 
2202  for (i = 0; i < lps->pcxt->nworkers_launched; i++)
2203  InstrAccumParallelQuery(&lps->buffer_usage[i], &lps->wal_usage[i]);
2204  }
2205 
2206  /*
2207  * Carry the shared balance value to heap scan and disable shared costing
2208  */
2210  {
2212  VacuumSharedCostBalance = NULL;
2213  VacuumActiveNWorkers = NULL;
2214  }
2215 }
2216 
2217 /*
2218  * Index vacuum/cleanup routine used by the leader process and parallel
2219  * vacuum worker processes to process the indexes in parallel.
2220  */
2221 static void
2223  LVShared *lvshared, LVDeadTuples *dead_tuples,
2224  int nindexes, LVRelStats *vacrelstats)
2225 {
2226  /*
2227  * Increment the active worker count if we are able to launch any worker.
2228  */
2231 
2232  /* Loop until all indexes are vacuumed */
2233  for (;;)
2234  {
2235  int idx;
2236  LVSharedIndStats *shared_indstats;
2237 
2238  /* Get an index number to process */
2239  idx = pg_atomic_fetch_add_u32(&(lvshared->idx), 1);
2240 
2241  /* Done for all indexes? */
2242  if (idx >= nindexes)
2243  break;
2244 
2245  /* Get the index statistics of this index from DSM */
2246  shared_indstats = get_indstats(lvshared, idx);
2247 
2248  /*
2249  * Skip processing indexes that don't participate in parallel
2250  * operation
2251  */
2252  if (shared_indstats == NULL ||
2253  skip_parallel_vacuum_index(Irel[idx], lvshared))
2254  continue;
2255 
2256  /* Do vacuum or cleanup of the index */
2257  vacuum_one_index(Irel[idx], &(stats[idx]), lvshared, shared_indstats,
2258  dead_tuples, vacrelstats);
2259  }
2260 
2261  /*
2262  * We have completed the index vacuum so decrement the active worker
2263  * count.
2264  */
2267 }
2268 
2269 /*
2270  * Vacuum or cleanup indexes that can be processed by only the leader process
2271  * because these indexes don't support parallel operation at that phase.
2272  */
2273 static void
2275  LVRelStats *vacrelstats, LVParallelState *lps,
2276  int nindexes)
2277 {
2278  int i;
2279 
2281 
2282  /*
2283  * Increment the active worker count if we are able to launch any worker.
2284  */
2287 
2288  for (i = 0; i < nindexes; i++)
2289  {
2290  LVSharedIndStats *shared_indstats;
2291 
2292  shared_indstats = get_indstats(lps->lvshared, i);
2293 
2294  /* Process the indexes skipped by parallel workers */
2295  if (shared_indstats == NULL ||
2296  skip_parallel_vacuum_index(Irel[i], lps->lvshared))
2297  vacuum_one_index(Irel[i], &(stats[i]), lps->lvshared,
2298  shared_indstats, vacrelstats->dead_tuples,
2299  vacrelstats);
2300  }
2301 
2302  /*
2303  * We have completed the index vacuum so decrement the active worker
2304  * count.
2305  */
2308 }
2309 
2310 /*
2311  * Vacuum or cleanup index either by leader process or by one of the worker
2312  * process. After processing the index this function copies the index
2313  * statistics returned from ambulkdelete and amvacuumcleanup to the DSM
2314  * segment.
2315  */
2316 static void
2318  LVShared *lvshared, LVSharedIndStats *shared_indstats,
2319  LVDeadTuples *dead_tuples, LVRelStats *vacrelstats)
2320 {
2321  IndexBulkDeleteResult *bulkdelete_res = NULL;
2322 
2323  if (shared_indstats)
2324  {
2325  /* Get the space for IndexBulkDeleteResult */
2326  bulkdelete_res = &(shared_indstats->stats);
2327 
2328  /*
2329  * Update the pointer to the corresponding bulk-deletion result if
2330  * someone has already updated it.
2331  */
2332  if (shared_indstats->updated && *stats == NULL)
2333  *stats = bulkdelete_res;
2334  }
2335 
2336  /* Do vacuum or cleanup of the index */
2337  if (lvshared->for_cleanup)
2338  lazy_cleanup_index(indrel, stats, lvshared->reltuples,
2339  lvshared->estimated_count, vacrelstats);
2340  else
2341  lazy_vacuum_index(indrel, stats, dead_tuples,
2342  lvshared->reltuples, vacrelstats);
2343 
2344  /*
2345  * Copy the index bulk-deletion result returned from ambulkdelete and
2346  * amvacuumcleanup to the DSM segment if it's the first cycle because they
2347  * allocate locally and it's possible that an index will be vacuumed by a
2348  * different vacuum process the next cycle. Copying the result normally
2349  * happens only the first time an index is vacuumed. For any additional
2350  * vacuum pass, we directly point to the result on the DSM segment and
2351  * pass it to vacuum index APIs so that workers can update it directly.
2352  *
2353  * Since all vacuum workers write the bulk-deletion result at different
2354  * slots we can write them without locking.
2355  */
2356  if (shared_indstats && !shared_indstats->updated && *stats != NULL)
2357  {
2358  memcpy(bulkdelete_res, *stats, sizeof(IndexBulkDeleteResult));
2359  shared_indstats->updated = true;
2360 
2361  /*
2362  * Now that stats[idx] points to the DSM segment, we don't need the
2363  * locally allocated results.
2364  */
2365  pfree(*stats);
2366  *stats = bulkdelete_res;
2367  }
2368 }
2369 
2370 /*
2371  * lazy_cleanup_all_indexes() -- cleanup all indexes of relation.
2372  *
2373  * Cleanup indexes. We process the indexes serially unless we are doing
2374  * parallel vacuum.
2375  */
2376 static void
2378  LVRelStats *vacrelstats, LVParallelState *lps,
2379  int nindexes)
2380 {
2381  int idx;
2382 
2384  Assert(nindexes > 0);
2385 
2386  /* Report that we are now cleaning up indexes */
2389 
2390  /*
2391  * If parallel vacuum is active we perform index cleanup with parallel
2392  * workers.
2393  */
2394  if (ParallelVacuumIsActive(lps))
2395  {
2396  /* Tell parallel workers to do index cleanup */
2397  lps->lvshared->for_cleanup = true;
2398  lps->lvshared->first_time =
2399  (vacrelstats->num_index_scans == 0);
2400 
2401  /*
2402  * Now we can provide a better estimate of total number of surviving
2403  * tuples (we assume indexes are more interested in that than in the
2404  * number of nominally live tuples).
2405  */
2406  lps->lvshared->reltuples = vacrelstats->new_rel_tuples;
2407  lps->lvshared->estimated_count =
2408  (vacrelstats->tupcount_pages < vacrelstats->rel_pages);
2409 
2410  lazy_parallel_vacuum_indexes(Irel, stats, vacrelstats, lps, nindexes);
2411  }
2412  else
2413  {
2414  for (idx = 0; idx < nindexes; idx++)
2415  lazy_cleanup_index(Irel[idx], &stats[idx],
2416  vacrelstats->new_rel_tuples,
2417  vacrelstats->tupcount_pages < vacrelstats->rel_pages,
2418  vacrelstats);
2419  }
2420 }
2421 
2422 /*
2423  * lazy_vacuum_index() -- vacuum one index relation.
2424  *
2425  * Delete all the index entries pointing to tuples listed in
2426  * dead_tuples, and update running statistics.
2427  *
2428  * reltuples is the number of heap tuples to be passed to the
2429  * bulkdelete callback. It's always assumed to be estimated.
2430  */
2431 static void
2433  LVDeadTuples *dead_tuples, double reltuples, LVRelStats *vacrelstats)
2434 {
2435  IndexVacuumInfo ivinfo;
2436  const char *msg;
2437  PGRUsage ru0;
2438  LVSavedErrInfo saved_err_info;
2439 
2440  pg_rusage_init(&ru0);
2441 
2442  ivinfo.index = indrel;
2443  ivinfo.analyze_only = false;
2444  ivinfo.report_progress = false;
2445  ivinfo.estimated_count = true;
2446  ivinfo.message_level = elevel;
2447  ivinfo.num_heap_tuples = reltuples;
2448  ivinfo.strategy = vac_strategy;
2449 
2450  /*
2451  * Update error traceback information.
2452  *
2453  * The index name is saved during this phase and restored immediately
2454  * after this phase. See vacuum_error_callback.
2455  */
2456  Assert(vacrelstats->indname == NULL);
2457  vacrelstats->indname = pstrdup(RelationGetRelationName(indrel));
2458  update_vacuum_error_info(vacrelstats, &saved_err_info,
2461 
2462  /* Do bulk deletion */
2463  *stats = index_bulk_delete(&ivinfo, *stats,
2464  lazy_tid_reaped, (void *) dead_tuples);
2465 
2466  if (IsParallelWorker())
2467  msg = gettext_noop("scanned index \"%s\" to remove %d row versions by parallel vacuum worker");
2468  else
2469  msg = gettext_noop("scanned index \"%s\" to remove %d row versions");
2470 
2471  ereport(elevel,
2472  (errmsg(msg,
2473  vacrelstats->indname,
2474  dead_tuples->num_tuples),
2475  errdetail_internal("%s", pg_rusage_show(&ru0))));
2476 
2477  /* Revert to the previous phase information for error traceback */
2478  restore_vacuum_error_info(vacrelstats, &saved_err_info);
2479  pfree(vacrelstats->indname);
2480  vacrelstats->indname = NULL;
2481 }
2482 
2483 /*
2484  * lazy_cleanup_index() -- do post-vacuum cleanup for one index relation.
2485  *
2486  * reltuples is the number of heap tuples and estimated_count is true
2487  * if reltuples is an estimated value.
2488  */
2489 static void
2491  IndexBulkDeleteResult **stats,
2492  double reltuples, bool estimated_count, LVRelStats *vacrelstats)
2493 {
2494  IndexVacuumInfo ivinfo;
2495  const char *msg;
2496  PGRUsage ru0;
2497  LVSavedErrInfo saved_err_info;
2498 
2499  pg_rusage_init(&ru0);
2500 
2501  ivinfo.index = indrel;
2502  ivinfo.analyze_only = false;
2503  ivinfo.report_progress = false;
2504  ivinfo.estimated_count = estimated_count;
2505  ivinfo.message_level = elevel;
2506 
2507  ivinfo.num_heap_tuples = reltuples;
2508  ivinfo.strategy = vac_strategy;
2509 
2510  /*
2511  * Update error traceback information.
2512  *
2513  * The index name is saved during this phase and restored immediately
2514  * after this phase. See vacuum_error_callback.
2515  */
2516  Assert(vacrelstats->indname == NULL);
2517  vacrelstats->indname = pstrdup(RelationGetRelationName(indrel));
2518  update_vacuum_error_info(vacrelstats, &saved_err_info,
2521 
2522  *stats = index_vacuum_cleanup(&ivinfo, *stats);
2523 
2524  if (*stats)
2525  {
2526  if (IsParallelWorker())
2527  msg = gettext_noop("index \"%s\" now contains %.0f row versions in %u pages as reported by parallel vacuum worker");
2528  else
2529  msg = gettext_noop("index \"%s\" now contains %.0f row versions in %u pages");
2530 
2531  ereport(elevel,
2532  (errmsg(msg,
2533  RelationGetRelationName(indrel),
2534  (*stats)->num_index_tuples,
2535  (*stats)->num_pages),
2536  errdetail("%.0f index row versions were removed.\n"
2537  "%u index pages have been deleted, %u are currently reusable.\n"
2538  "%s.",
2539  (*stats)->tuples_removed,
2540  (*stats)->pages_deleted, (*stats)->pages_free,
2541  pg_rusage_show(&ru0))));
2542  }
2543 
2544  /* Revert to the previous phase information for error traceback */
2545  restore_vacuum_error_info(vacrelstats, &saved_err_info);
2546  pfree(vacrelstats->indname);
2547  vacrelstats->indname = NULL;
2548 }
2549 
2550 /*
2551  * should_attempt_truncation - should we attempt to truncate the heap?
2552  *
2553  * Don't even think about it unless we have a shot at releasing a goodly
2554  * number of pages. Otherwise, the time taken isn't worth it.
2555  *
2556  * Also don't attempt it if we are doing early pruning/vacuuming, because a
2557  * scan which cannot find a truncated heap page cannot determine that the
2558  * snapshot is too old to read that page. We might be able to get away with
2559  * truncating all except one of the pages, setting its LSN to (at least) the
2560  * maximum of the truncated range if we also treated an index leaf tuple
2561  * pointing to a missing heap page as something to trigger the "snapshot too
2562  * old" error, but that seems fragile and seems like it deserves its own patch
2563  * if we consider it.
2564  *
2565  * This is split out so that we can test whether truncation is going to be
2566  * called for before we actually do it. If you change the logic here, be
2567  * careful to depend only on fields that lazy_scan_heap updates on-the-fly.
2568  */
2569 static bool
2571 {
2572  BlockNumber possibly_freeable;
2573 
2574  if (params->truncate == VACOPT_TERNARY_DISABLED)
2575  return false;
2576 
2577  possibly_freeable = vacrelstats->rel_pages - vacrelstats->nonempty_pages;
2578  if (possibly_freeable > 0 &&
2579  (possibly_freeable >= REL_TRUNCATE_MINIMUM ||
2580  possibly_freeable >= vacrelstats->rel_pages / REL_TRUNCATE_FRACTION) &&
2582  return true;
2583  else
2584  return false;
2585 }
2586 
2587 /*
2588  * lazy_truncate_heap - try to truncate off any empty pages at the end
2589  */
2590 static void
2592 {
2593  BlockNumber old_rel_pages = vacrelstats->rel_pages;
2594  BlockNumber new_rel_pages;
2595  int lock_retry;
2596 
2597  /* Report that we are now truncating */
2600 
2601  /*
2602  * Loop until no more truncating can be done.
2603  */
2604  do
2605  {
2606  PGRUsage ru0;
2607 
2608  pg_rusage_init(&ru0);
2609 
2610  /*
2611  * We need full exclusive lock on the relation in order to do
2612  * truncation. If we can't get it, give up rather than waiting --- we
2613  * don't want to block other backends, and we don't want to deadlock
2614  * (which is quite possible considering we already hold a lower-grade
2615  * lock).
2616  */
2617  vacrelstats->lock_waiter_detected = false;
2618  lock_retry = 0;
2619  while (true)
2620  {
2622  break;
2623 
2624  /*
2625  * Check for interrupts while trying to (re-)acquire the exclusive
2626  * lock.
2627  */
2629 
2630  if (++lock_retry > (VACUUM_TRUNCATE_LOCK_TIMEOUT /
2632  {
2633  /*
2634  * We failed to establish the lock in the specified number of
2635  * retries. This means we give up truncating.
2636  */
2637  vacrelstats->lock_waiter_detected = true;
2638  ereport(elevel,
2639  (errmsg("\"%s\": stopping truncate due to conflicting lock request",
2640  vacrelstats->relname)));
2641  return;
2642  }
2643 
2645  }
2646 
2647  /*
2648  * Now that we have exclusive lock, look to see if the rel has grown
2649  * whilst we were vacuuming with non-exclusive lock. If so, give up;
2650  * the newly added pages presumably contain non-deletable tuples.
2651  */
2652  new_rel_pages = RelationGetNumberOfBlocks(onerel);
2653  if (new_rel_pages != old_rel_pages)
2654  {
2655  /*
2656  * Note: we intentionally don't update vacrelstats->rel_pages with
2657  * the new rel size here. If we did, it would amount to assuming
2658  * that the new pages are empty, which is unlikely. Leaving the
2659  * numbers alone amounts to assuming that the new pages have the
2660  * same tuple density as existing ones, which is less unlikely.
2661  */
2663  return;
2664  }
2665 
2666  /*
2667  * Scan backwards from the end to verify that the end pages actually
2668  * contain no tuples. This is *necessary*, not optional, because
2669  * other backends could have added tuples to these pages whilst we
2670  * were vacuuming.
2671  */
2672  new_rel_pages = count_nondeletable_pages(onerel, vacrelstats);
2673  vacrelstats->blkno = new_rel_pages;
2674 
2675  if (new_rel_pages >= old_rel_pages)
2676  {
2677  /* can't do anything after all */
2679  return;
2680  }
2681 
2682  /*
2683  * Okay to truncate.
2684  */
2685  RelationTruncate(onerel, new_rel_pages);
2686 
2687  /*
2688  * We can release the exclusive lock as soon as we have truncated.
2689  * Other backends can't safely access the relation until they have
2690  * processed the smgr invalidation that smgrtruncate sent out ... but
2691  * that should happen as part of standard invalidation processing once
2692  * they acquire lock on the relation.
2693  */
2695 
2696  /*
2697  * Update statistics. Here, it *is* correct to adjust rel_pages
2698  * without also touching reltuples, since the tuple count wasn't
2699  * changed by the truncation.
2700  */
2701  vacrelstats->pages_removed += old_rel_pages - new_rel_pages;
2702  vacrelstats->rel_pages = new_rel_pages;
2703 
2704  ereport(elevel,
2705  (errmsg("\"%s\": truncated %u to %u pages",
2706  vacrelstats->relname,
2707  old_rel_pages, new_rel_pages),
2708  errdetail_internal("%s",
2709  pg_rusage_show(&ru0))));
2710  old_rel_pages = new_rel_pages;
2711  } while (new_rel_pages > vacrelstats->nonempty_pages &&
2712  vacrelstats->lock_waiter_detected);
2713 }
2714 
2715 /*
2716  * Rescan end pages to verify that they are (still) empty of tuples.
2717  *
2718  * Returns number of nondeletable pages (last nonempty page + 1).
2719  */
2720 static BlockNumber
2722 {
2723  BlockNumber blkno;
2724  BlockNumber prefetchedUntil;
2725  instr_time starttime;
2726 
2727  /* Initialize the starttime if we check for conflicting lock requests */
2728  INSTR_TIME_SET_CURRENT(starttime);
2729 
2730  /*
2731  * Start checking blocks at what we believe relation end to be and move
2732  * backwards. (Strange coding of loop control is needed because blkno is
2733  * unsigned.) To make the scan faster, we prefetch a few blocks at a time
2734  * in forward direction, so that OS-level readahead can kick in.
2735  */
2736  blkno = vacrelstats->rel_pages;
2738  "prefetch size must be power of 2");
2739  prefetchedUntil = InvalidBlockNumber;
2740  while (blkno > vacrelstats->nonempty_pages)
2741  {
2742  Buffer buf;
2743  Page page;
2744  OffsetNumber offnum,
2745  maxoff;
2746  bool hastup;
2747 
2748  /*
2749  * Check if another process requests a lock on our relation. We are
2750  * holding an AccessExclusiveLock here, so they will be waiting. We
2751  * only do this once per VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL, and we
2752  * only check if that interval has elapsed once every 32 blocks to
2753  * keep the number of system calls and actual shared lock table
2754  * lookups to a minimum.
2755  */
2756  if ((blkno % 32) == 0)
2757  {
2758  instr_time currenttime;
2759  instr_time elapsed;
2760 
2761  INSTR_TIME_SET_CURRENT(currenttime);
2762  elapsed = currenttime;
2763  INSTR_TIME_SUBTRACT(elapsed, starttime);
2764  if ((INSTR_TIME_GET_MICROSEC(elapsed) / 1000)
2766  {
2768  {
2769  ereport(elevel,
2770  (errmsg("\"%s\": suspending truncate due to conflicting lock request",
2771  vacrelstats->relname)));
2772 
2773  vacrelstats->lock_waiter_detected = true;
2774  return blkno;
2775  }
2776  starttime = currenttime;
2777  }
2778  }
2779 
2780  /*
2781  * We don't insert a vacuum delay point here, because we have an
2782  * exclusive lock on the table which we want to hold for as short a
2783  * time as possible. We still need to check for interrupts however.
2784  */
2786 
2787  blkno--;
2788 
2789  /* If we haven't prefetched this lot yet, do so now. */
2790  if (prefetchedUntil > blkno)
2791  {
2792  BlockNumber prefetchStart;
2793  BlockNumber pblkno;
2794 
2795  prefetchStart = blkno & ~(PREFETCH_SIZE - 1);
2796  for (pblkno = prefetchStart; pblkno <= blkno; pblkno++)
2797  {
2798  PrefetchBuffer(onerel, MAIN_FORKNUM, pblkno);
2800  }
2801  prefetchedUntil = prefetchStart;
2802  }
2803 
2804  buf = ReadBufferExtended(onerel, MAIN_FORKNUM, blkno,
2805  RBM_NORMAL, vac_strategy);
2806 
2807  /* In this phase we only need shared access to the buffer */
2809 
2810  page = BufferGetPage(buf);
2811 
2812  if (PageIsNew(page) || PageIsEmpty(page))
2813  {
2814  UnlockReleaseBuffer(buf);
2815  continue;
2816  }
2817 
2818  hastup = false;
2819  maxoff = PageGetMaxOffsetNumber(page);
2820  for (offnum = FirstOffsetNumber;
2821  offnum <= maxoff;
2822  offnum = OffsetNumberNext(offnum))
2823  {
2824  ItemId itemid;
2825 
2826  itemid = PageGetItemId(page, offnum);
2827 
2828  /*
2829  * Note: any non-unused item should be taken as a reason to keep
2830  * this page. We formerly thought that DEAD tuples could be
2831  * thrown away, but that's not so, because we'd not have cleaned
2832  * out their index entries.
2833  */
2834  if (ItemIdIsUsed(itemid))
2835  {
2836  hastup = true;
2837  break; /* can stop scanning */
2838  }
2839  } /* scan along page */
2840 
2841  UnlockReleaseBuffer(buf);
2842 
2843  /* Done scanning if we found a tuple here */
2844  if (hastup)
2845  return blkno + 1;
2846  }
2847 
2848  /*
2849  * If we fall out of the loop, all the previously-thought-to-be-empty
2850  * pages still are; we need not bother to look at the last known-nonempty
2851  * page.
2852  */
2853  return vacrelstats->nonempty_pages;
2854 }
2855 
2856 /*
2857  * Return the maximum number of dead tuples we can record.
2858  */
2859 static long
2860 compute_max_dead_tuples(BlockNumber relblocks, bool useindex)
2861 {
2862  long maxtuples;
2863  int vac_work_mem = IsAutoVacuumWorkerProcess() &&
2864  autovacuum_work_mem != -1 ?
2866 
2867  if (useindex)
2868  {
2869  maxtuples = MAXDEADTUPLES(vac_work_mem * 1024L);
2870  maxtuples = Min(maxtuples, INT_MAX);
2871  maxtuples = Min(maxtuples, MAXDEADTUPLES(MaxAllocSize));
2872 
2873  /* curious coding here to ensure the multiplication can't overflow */
2874  if ((BlockNumber) (maxtuples / LAZY_ALLOC_TUPLES) > relblocks)
2875  maxtuples = relblocks * LAZY_ALLOC_TUPLES;
2876 
2877  /* stay sane if small maintenance_work_mem */
2878  maxtuples = Max(maxtuples, MaxHeapTuplesPerPage);
2879  }
2880  else
2881  maxtuples = MaxHeapTuplesPerPage;
2882 
2883  return maxtuples;
2884 }
2885 
2886 /*
2887  * lazy_space_alloc - space allocation decisions for lazy vacuum
2888  *
2889  * See the comments at the head of this file for rationale.
2890  */
2891 static void
2892 lazy_space_alloc(LVRelStats *vacrelstats, BlockNumber relblocks)
2893 {
2894  LVDeadTuples *dead_tuples = NULL;
2895  long maxtuples;
2896 
2897  maxtuples = compute_max_dead_tuples(relblocks, vacrelstats->useindex);
2898 
2899  dead_tuples = (LVDeadTuples *) palloc(SizeOfDeadTuples(maxtuples));
2900  dead_tuples->num_tuples = 0;
2901  dead_tuples->max_tuples = (int) maxtuples;
2902 
2903  vacrelstats->dead_tuples = dead_tuples;
2904 }
2905 
2906 /*
2907  * lazy_record_dead_tuple - remember one deletable tuple
2908  */
2909 static void
2911 {
2912  /*
2913  * The array shouldn't overflow under normal behavior, but perhaps it
2914  * could if we are given a really small maintenance_work_mem. In that
2915  * case, just forget the last few tuples (we'll get 'em next time).
2916  */
2917  if (dead_tuples->num_tuples < dead_tuples->max_tuples)
2918  {
2919  dead_tuples->itemptrs[dead_tuples->num_tuples] = *itemptr;
2920  dead_tuples->num_tuples++;
2922  dead_tuples->num_tuples);
2923  }
2924 }
2925 
2926 /*
2927  * lazy_tid_reaped() -- is a particular tid deletable?
2928  *
2929  * This has the right signature to be an IndexBulkDeleteCallback.
2930  *
2931  * Assumes dead_tuples array is in sorted order.
2932  */
2933 static bool
2935 {
2936  LVDeadTuples *dead_tuples = (LVDeadTuples *) state;
2937  ItemPointer res;
2938 
2939  res = (ItemPointer) bsearch((void *) itemptr,
2940  (void *) dead_tuples->itemptrs,
2941  dead_tuples->num_tuples,
2942  sizeof(ItemPointerData),
2943  vac_cmp_itemptr);
2944 
2945  return (res != NULL);
2946 }
2947 
2948 /*
2949  * Comparator routines for use with qsort() and bsearch().
2950  */
2951 static int
2952 vac_cmp_itemptr(const void *left, const void *right)
2953 {
2954  BlockNumber lblk,
2955  rblk;
2956  OffsetNumber loff,
2957  roff;
2958 
2959  lblk = ItemPointerGetBlockNumber((ItemPointer) left);
2960  rblk = ItemPointerGetBlockNumber((ItemPointer) right);
2961 
2962  if (lblk < rblk)
2963  return -1;
2964  if (lblk > rblk)
2965  return 1;
2966 
2967  loff = ItemPointerGetOffsetNumber((ItemPointer) left);
2968  roff = ItemPointerGetOffsetNumber((ItemPointer) right);
2969 
2970  if (loff < roff)
2971  return -1;
2972  if (loff > roff)
2973  return 1;
2974 
2975  return 0;
2976 }
2977 
2978 /*
2979  * Check if every tuple in the given page is visible to all current and future
2980  * transactions. Also return the visibility_cutoff_xid which is the highest
2981  * xmin amongst the visible tuples. Set *all_frozen to true if every tuple
2982  * on this page is frozen.
2983  */
2984 static bool
2986  LVRelStats *vacrelstats,
2987  TransactionId *visibility_cutoff_xid,
2988  bool *all_frozen)
2989 {
2990  Page page = BufferGetPage(buf);
2991  BlockNumber blockno = BufferGetBlockNumber(buf);
2992  OffsetNumber offnum,
2993  maxoff;
2994  bool all_visible = true;
2995 
2996  *visibility_cutoff_xid = InvalidTransactionId;
2997  *all_frozen = true;
2998 
2999  /*
3000  * This is a stripped down version of the line pointer scan in
3001  * lazy_scan_heap(). So if you change anything here, also check that code.
3002  */
3003  maxoff = PageGetMaxOffsetNumber(page);
3004  for (offnum = FirstOffsetNumber;
3005  offnum <= maxoff && all_visible;
3006  offnum = OffsetNumberNext(offnum))
3007  {
3008  ItemId itemid;
3009  HeapTupleData tuple;
3010 
3011  /*
3012  * Set the offset number so that we can display it along with any
3013  * error that occurred while processing this tuple.
3014  */
3015  vacrelstats->offnum = offnum;
3016  itemid = PageGetItemId(page, offnum);
3017 
3018  /* Unused or redirect line pointers are of no interest */
3019  if (!ItemIdIsUsed(itemid) || ItemIdIsRedirected(itemid))
3020  continue;
3021 
3022  ItemPointerSet(&(tuple.t_self), blockno, offnum);
3023 
3024  /*
3025  * Dead line pointers can have index pointers pointing to them. So
3026  * they can't be treated as visible
3027  */
3028  if (ItemIdIsDead(itemid))
3029  {
3030  all_visible = false;
3031  *all_frozen = false;
3032  break;
3033  }
3034 
3035  Assert(ItemIdIsNormal(itemid));
3036 
3037  tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
3038  tuple.t_len = ItemIdGetLength(itemid);
3039  tuple.t_tableOid = RelationGetRelid(rel);
3040 
3041  switch (HeapTupleSatisfiesVacuum(&tuple, OldestXmin, buf))
3042  {
3043  case HEAPTUPLE_LIVE:
3044  {
3045  TransactionId xmin;
3046 
3047  /* Check comments in lazy_scan_heap. */
3049  {
3050  all_visible = false;
3051  *all_frozen = false;
3052  break;
3053  }
3054 
3055  /*
3056  * The inserter definitely committed. But is it old enough
3057  * that everyone sees it as committed?
3058  */
3059  xmin = HeapTupleHeaderGetXmin(tuple.t_data);
3060  if (!TransactionIdPrecedes(xmin, OldestXmin))
3061  {
3062  all_visible = false;
3063  *all_frozen = false;
3064  break;
3065  }
3066 
3067  /* Track newest xmin on page. */
3068  if (TransactionIdFollows(xmin, *visibility_cutoff_xid))
3069  *visibility_cutoff_xid = xmin;
3070 
3071  /* Check whether this tuple is already frozen or not */
3072  if (all_visible && *all_frozen &&
3074  *all_frozen = false;
3075  }
3076  break;
3077 
3078  case HEAPTUPLE_DEAD:
3082  {
3083  all_visible = false;
3084  *all_frozen = false;
3085  break;
3086  }
3087  default:
3088  elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
3089  break;
3090  }
3091  } /* scan along page */
3092 
3093  /* Clear the offset information once we have processed the given page. */
3094  vacrelstats->offnum = InvalidOffsetNumber;
3095 
3096  return all_visible;
3097 }
3098 
3099 /*
3100  * Compute the number of parallel worker processes to request. Both index
3101  * vacuum and index cleanup can be executed with parallel workers. The index
3102  * is eligible for parallel vacuum iff its size is greater than
3103  * min_parallel_index_scan_size as invoking workers for very small indexes
3104  * can hurt performance.
3105  *
3106  * nrequested is the number of parallel workers that user requested. If
3107  * nrequested is 0, we compute the parallel degree based on nindexes, that is
3108  * the number of indexes that support parallel vacuum. This function also
3109  * sets can_parallel_vacuum to remember indexes that participate in parallel
3110  * vacuum.
3111  */
3112 static int
3113 compute_parallel_vacuum_workers(Relation *Irel, int nindexes, int nrequested,
3114  bool *can_parallel_vacuum)
3115 {
3116  int nindexes_parallel = 0;
3117  int nindexes_parallel_bulkdel = 0;
3118  int nindexes_parallel_cleanup = 0;
3119  int parallel_workers;
3120  int i;
3121 
3122  /*
3123  * We don't allow performing parallel operation in standalone backend or
3124  * when parallelism is disabled.
3125  */
3127  return 0;
3128 
3129  /*
3130  * Compute the number of indexes that can participate in parallel vacuum.
3131  */
3132  for (i = 0; i < nindexes; i++)
3133  {
3134  uint8 vacoptions = Irel[i]->rd_indam->amparallelvacuumoptions;
3135 
3136  if (vacoptions == VACUUM_OPTION_NO_PARALLEL ||
3138  continue;
3139 
3140  can_parallel_vacuum[i] = true;
3141 
3142  if ((vacoptions & VACUUM_OPTION_PARALLEL_BULKDEL) != 0)
3143  nindexes_parallel_bulkdel++;
3144  if (((vacoptions & VACUUM_OPTION_PARALLEL_CLEANUP) != 0) ||
3145  ((vacoptions & VACUUM_OPTION_PARALLEL_COND_CLEANUP) != 0))
3146  nindexes_parallel_cleanup++;
3147  }
3148 
3149  nindexes_parallel = Max(nindexes_parallel_bulkdel,
3150  nindexes_parallel_cleanup);
3151 
3152  /* The leader process takes one index */
3153  nindexes_parallel--;
3154 
3155  /* No index supports parallel vacuum */
3156  if (nindexes_parallel <= 0)
3157  return 0;
3158 
3159  /* Compute the parallel degree */
3160  parallel_workers = (nrequested > 0) ?
3161  Min(nrequested, nindexes_parallel) : nindexes_parallel;
3162 
3163  /* Cap by max_parallel_maintenance_workers */
3164  parallel_workers = Min(parallel_workers, max_parallel_maintenance_workers);
3165 
3166  return parallel_workers;
3167 }
3168 
3169 /*
3170  * Initialize variables for shared index statistics, set NULL bitmap and the
3171  * size of stats for each index.
3172  */
3173 static void
3174 prepare_index_statistics(LVShared *lvshared, bool *can_parallel_vacuum,
3175  int nindexes)
3176 {
3177  int i;
3178 
3179  /* Currently, we don't support parallel vacuum for autovacuum */
3181 
3182  /* Set NULL for all indexes */
3183  memset(lvshared->bitmap, 0x00, BITMAPLEN(nindexes));
3184 
3185  for (i = 0; i < nindexes; i++)
3186  {
3187  if (!can_parallel_vacuum[i])
3188  continue;
3189 
3190  /* Set NOT NULL as this index does support parallelism */
3191  lvshared->bitmap[i >> 3] |= 1 << (i & 0x07);
3192  }
3193 }
3194 
3195 /*
3196  * Update index statistics in pg_class if the statistics are accurate.
3197  */
3198 static void
3200  int nindexes)
3201 {
3202  int i;
3203 
3205 
3206  for (i = 0; i < nindexes; i++)
3207  {
3208  if (stats[i] == NULL || stats[i]->estimated_count)
3209  continue;
3210 
3211  /* Update index statistics */
3212  vac_update_relstats(Irel[i],
3213  stats[i]->num_pages,
3214  stats[i]->num_index_tuples,
3215  0,
3216  false,
3219  false);
3220  pfree(stats[i]);
3221  }
3222 }
3223 
3224 /*
3225  * This function prepares and returns parallel vacuum state if we can launch
3226  * even one worker. This function is responsible for entering parallel mode,
3227  * create a parallel context, and then initialize the DSM segment.
3228  */
3229 static LVParallelState *
3230 begin_parallel_vacuum(Oid relid, Relation *Irel, LVRelStats *vacrelstats,
3231  BlockNumber nblocks, int nindexes, int nrequested)
3232 {
3233  LVParallelState *lps = NULL;
3234  ParallelContext *pcxt;
3235  LVShared *shared;
3236  LVDeadTuples *dead_tuples;
3237  BufferUsage *buffer_usage;
3238  WalUsage *wal_usage;
3239  bool *can_parallel_vacuum;
3240  long maxtuples;
3241  char *sharedquery;
3242  Size est_shared;
3243  Size est_deadtuples;
3244  int nindexes_mwm = 0;
3245  int parallel_workers = 0;
3246  int querylen;
3247  int i;
3248 
3249  /*
3250  * A parallel vacuum must be requested and there must be indexes on the
3251  * relation
3252  */
3253  Assert(nrequested >= 0);
3254  Assert(nindexes > 0);
3255 
3256  /*
3257  * Compute the number of parallel vacuum workers to launch
3258  */
3259  can_parallel_vacuum = (bool *) palloc0(sizeof(bool) * nindexes);
3260  parallel_workers = compute_parallel_vacuum_workers(Irel, nindexes,
3261  nrequested,
3262  can_parallel_vacuum);
3263 
3264  /* Can't perform vacuum in parallel */
3265  if (parallel_workers <= 0)
3266  {
3267  pfree(can_parallel_vacuum);
3268  return lps;
3269  }
3270 
3271  lps = (LVParallelState *) palloc0(sizeof(LVParallelState));
3272 
3274  pcxt = CreateParallelContext("postgres", "parallel_vacuum_main",
3275  parallel_workers);
3276  Assert(pcxt->nworkers > 0);
3277  lps->pcxt = pcxt;
3278 
3279  /* Estimate size for shared information -- PARALLEL_VACUUM_KEY_SHARED */
3280  est_shared = MAXALIGN(add_size(SizeOfLVShared, BITMAPLEN(nindexes)));
3281  for (i = 0; i < nindexes; i++)
3282  {
3283  uint8 vacoptions = Irel[i]->rd_indam->amparallelvacuumoptions;
3284 
3285  /*
3286  * Cleanup option should be either disabled, always performing in
3287  * parallel or conditionally performing in parallel.
3288  */
3289  Assert(((vacoptions & VACUUM_OPTION_PARALLEL_CLEANUP) == 0) ||
3290  ((vacoptions & VACUUM_OPTION_PARALLEL_COND_CLEANUP) == 0));
3291  Assert(vacoptions <= VACUUM_OPTION_MAX_VALID_VALUE);
3292 
3293  /* Skip indexes that don't participate in parallel vacuum */
3294  if (!can_parallel_vacuum[i])
3295  continue;
3296 
3297  if (Irel[i]->rd_indam->amusemaintenanceworkmem)
3298  nindexes_mwm++;
3299 
3300  est_shared = add_size(est_shared, sizeof(LVSharedIndStats));
3301 
3302  /*
3303  * Remember the number of indexes that support parallel operation for
3304  * each phase.
3305  */
3306  if ((vacoptions & VACUUM_OPTION_PARALLEL_BULKDEL) != 0)
3308  if ((vacoptions & VACUUM_OPTION_PARALLEL_CLEANUP) != 0)
3310  if ((vacoptions & VACUUM_OPTION_PARALLEL_COND_CLEANUP) != 0)
3312  }
3313  shm_toc_estimate_chunk(&pcxt->estimator, est_shared);
3314  shm_toc_estimate_keys(&pcxt->estimator, 1);
3315 
3316  /* Estimate size for dead tuples -- PARALLEL_VACUUM_KEY_DEAD_TUPLES */
3317  maxtuples = compute_max_dead_tuples(nblocks, true);
3318  est_deadtuples = MAXALIGN(SizeOfDeadTuples(maxtuples));
3319  shm_toc_estimate_chunk(&pcxt->estimator, est_deadtuples);
3320  shm_toc_estimate_keys(&pcxt->estimator, 1);
3321 
3322  /*
3323  * Estimate space for BufferUsage and WalUsage --
3324  * PARALLEL_VACUUM_KEY_BUFFER_USAGE and PARALLEL_VACUUM_KEY_WAL_USAGE.
3325  *
3326  * If there are no extensions loaded that care, we could skip this. We
3327  * have no way of knowing whether anyone's looking at pgBufferUsage or
3328  * pgWalUsage, so do it unconditionally.
3329  */
3331  mul_size(sizeof(BufferUsage), pcxt->nworkers));
3332  shm_toc_estimate_keys(&pcxt->estimator, 1);
3334  mul_size(sizeof(WalUsage), pcxt->nworkers));
3335  shm_toc_estimate_keys(&pcxt->estimator, 1);
3336 
3337  /* Finally, estimate PARALLEL_VACUUM_KEY_QUERY_TEXT space */
3338  querylen = strlen(debug_query_string);
3339  shm_toc_estimate_chunk(&pcxt->estimator, querylen + 1);
3340  shm_toc_estimate_keys(&pcxt->estimator, 1);
3341 
3342  InitializeParallelDSM(pcxt);
3343 
3344  /* Prepare shared information */
3345  shared = (LVShared *) shm_toc_allocate(pcxt->toc, est_shared);
3346  MemSet(shared, 0, est_shared);
3347  shared->relid = relid;
3348  shared->elevel = elevel;
3349  shared->maintenance_work_mem_worker =
3350  (nindexes_mwm > 0) ?
3351  maintenance_work_mem / Min(parallel_workers, nindexes_mwm) :
3353 
3354  pg_atomic_init_u32(&(shared->cost_balance), 0);
3355  pg_atomic_init_u32(&(shared->active_nworkers), 0);
3356  pg_atomic_init_u32(&(shared->idx), 0);
3357  shared->offset = MAXALIGN(add_size(SizeOfLVShared, BITMAPLEN(nindexes)));
3358  prepare_index_statistics(shared, can_parallel_vacuum, nindexes);
3359 
3361  lps->lvshared = shared;
3362 
3363  /* Prepare the dead tuple space */
3364  dead_tuples = (LVDeadTuples *) shm_toc_allocate(pcxt->toc, est_deadtuples);
3365  dead_tuples->max_tuples = maxtuples;
3366  dead_tuples->num_tuples = 0;
3367  MemSet(dead_tuples->itemptrs, 0, sizeof(ItemPointerData) * maxtuples);
3368  shm_toc_insert(pcxt->toc, PARALLEL_VACUUM_KEY_DEAD_TUPLES, dead_tuples);
3369  vacrelstats->dead_tuples = dead_tuples;
3370 
3371  /*
3372  * Allocate space for each worker's BufferUsage and WalUsage; no need to
3373  * initialize
3374  */
3375  buffer_usage = shm_toc_allocate(pcxt->toc,
3376  mul_size(sizeof(BufferUsage), pcxt->nworkers));
3377  shm_toc_insert(pcxt->toc, PARALLEL_VACUUM_KEY_BUFFER_USAGE, buffer_usage);
3378  lps->buffer_usage = buffer_usage;
3379  wal_usage = shm_toc_allocate(pcxt->toc,
3380  mul_size(sizeof(WalUsage), pcxt->nworkers));
3382  lps->wal_usage = wal_usage;
3383 
3384  /* Store query string for workers */
3385  sharedquery = (char *) shm_toc_allocate(pcxt->toc, querylen + 1);
3386  memcpy(sharedquery, debug_query_string, querylen + 1);
3387  sharedquery[querylen] = '\0';
3388  shm_toc_insert(pcxt->toc, PARALLEL_VACUUM_KEY_QUERY_TEXT, sharedquery);
3389 
3390  pfree(can_parallel_vacuum);
3391  return lps;
3392 }
3393 
3394 /*
3395  * Destroy the parallel context, and end parallel mode.
3396  *
3397  * Since writes are not allowed during parallel mode, copy the
3398  * updated index statistics from DSM into local memory and then later use that
3399  * to update the index statistics. One might think that we can exit from
3400  * parallel mode, update the index statistics and then destroy parallel
3401  * context, but that won't be safe (see ExitParallelMode).
3402  */
3403 static void
3405  int nindexes)
3406 {
3407  int i;
3408 
3410 
3411  /* Copy the updated statistics */
3412  for (i = 0; i < nindexes; i++)
3413  {
3414  LVSharedIndStats *indstats = get_indstats(lps->lvshared, i);
3415 
3416  /*
3417  * Skip unused slot. The statistics of this index are already stored
3418  * in local memory.
3419  */
3420  if (indstats == NULL)
3421  continue;
3422 
3423  if (indstats->updated)
3424  {
3425  stats[i] = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult));
3426  memcpy(stats[i], &(indstats->stats), sizeof(IndexBulkDeleteResult));
3427  }
3428  else
3429  stats[i] = NULL;
3430  }
3431 
3433  ExitParallelMode();
3434 
3435  /* Deactivate parallel vacuum */
3436  pfree(lps);
3437  lps = NULL;
3438 }
3439 
3440 /* Return the Nth index statistics or NULL */
3441 static LVSharedIndStats *
3442 get_indstats(LVShared *lvshared, int n)
3443 {
3444  int i;
3445  char *p;
3446 
3447  if (IndStatsIsNull(lvshared, n))
3448  return NULL;
3449 
3450  p = (char *) GetSharedIndStats(lvshared);
3451  for (i = 0; i < n; i++)
3452  {
3453  if (IndStatsIsNull(lvshared, i))
3454  continue;
3455 
3456  p += sizeof(LVSharedIndStats);
3457  }
3458 
3459  return (LVSharedIndStats *) p;
3460 }
3461 
3462 /*
3463  * Returns true, if the given index can't participate in parallel index vacuum
3464  * or parallel index cleanup, false, otherwise.
3465  */
3466 static bool
3468 {
3469  uint8 vacoptions = indrel->rd_indam->amparallelvacuumoptions;
3470 
3471  /* first_time must be true only if for_cleanup is true */
3472  Assert(lvshared->for_cleanup || !lvshared->first_time);
3473 
3474  if (lvshared->for_cleanup)
3475  {
3476  /* Skip, if the index does not support parallel cleanup */
3477  if (((vacoptions & VACUUM_OPTION_PARALLEL_CLEANUP) == 0) &&
3478  ((vacoptions & VACUUM_OPTION_PARALLEL_COND_CLEANUP) == 0))
3479  return true;
3480 
3481  /*
3482  * Skip, if the index supports parallel cleanup conditionally, but we
3483  * have already processed the index (for bulkdelete). See the
3484  * comments for option VACUUM_OPTION_PARALLEL_COND_CLEANUP to know
3485  * when indexes support parallel cleanup conditionally.
3486  */
3487  if (!lvshared->first_time &&
3488  ((vacoptions & VACUUM_OPTION_PARALLEL_COND_CLEANUP) != 0))
3489  return true;
3490  }
3491  else if ((vacoptions & VACUUM_OPTION_PARALLEL_BULKDEL) == 0)
3492  {
3493  /* Skip if the index does not support parallel bulk deletion */
3494  return true;
3495  }
3496 
3497  return false;
3498 }
3499 
3500 /*
3501  * Perform work within a launched parallel process.
3502  *
3503  * Since parallel vacuum workers perform only index vacuum or index cleanup,
3504  * we don't need to report progress information.
3505  */
3506 void
3508 {
3509  Relation onerel;
3510  Relation *indrels;
3511  LVShared *lvshared;
3512  LVDeadTuples *dead_tuples;
3513  BufferUsage *buffer_usage;
3514  WalUsage *wal_usage;
3515  int nindexes;
3516  char *sharedquery;
3517  IndexBulkDeleteResult **stats;
3518  LVRelStats vacrelstats;
3519  ErrorContextCallback errcallback;
3520 
3522  false);
3523  elevel = lvshared->elevel;
3524 
3525  if (lvshared->for_cleanup)
3526  elog(DEBUG1, "starting parallel vacuum worker for cleanup");
3527  else
3528  elog(DEBUG1, "starting parallel vacuum worker for bulk delete");
3529 
3530  /* Set debug_query_string for individual workers */
3531  sharedquery = shm_toc_lookup(toc, PARALLEL_VACUUM_KEY_QUERY_TEXT, false);
3532  debug_query_string = sharedquery;
3534 
3535  /*
3536  * Open table. The lock mode is the same as the leader process. It's
3537  * okay because the lock mode does not conflict among the parallel
3538  * workers.
3539  */
3540  onerel = table_open(lvshared->relid, ShareUpdateExclusiveLock);
3541 
3542  /*
3543  * Open all indexes. indrels are sorted in order by OID, which should be
3544  * matched to the leader's one.
3545  */
3546  vac_open_indexes(onerel, RowExclusiveLock, &nindexes, &indrels);
3547  Assert(nindexes > 0);
3548 
3549  /* Set dead tuple space */
3550  dead_tuples = (LVDeadTuples *) shm_toc_lookup(toc,
3552  false);
3553 
3554  /* Set cost-based vacuum delay */
3556  VacuumCostBalance = 0;
3557  VacuumPageHit = 0;
3558  VacuumPageMiss = 0;
3559  VacuumPageDirty = 0;
3561  VacuumSharedCostBalance = &(lvshared->cost_balance);
3562  VacuumActiveNWorkers = &(lvshared->active_nworkers);
3563 
3564  stats = (IndexBulkDeleteResult **)
3565  palloc0(nindexes * sizeof(IndexBulkDeleteResult *));
3566 
3567  if (lvshared->maintenance_work_mem_worker > 0)
3569 
3570  /*
3571  * Initialize vacrelstats for use as error callback arg by parallel
3572  * worker.
3573  */
3574  vacrelstats.relnamespace = get_namespace_name(RelationGetNamespace(onerel));
3575  vacrelstats.relname = pstrdup(RelationGetRelationName(onerel));
3576  vacrelstats.indname = NULL;
3577  vacrelstats.phase = VACUUM_ERRCB_PHASE_UNKNOWN; /* Not yet processing */
3578 
3579  /* Setup error traceback support for ereport() */
3580  errcallback.callback = vacuum_error_callback;
3581  errcallback.arg = &vacrelstats;
3582  errcallback.previous = error_context_stack;
3583  error_context_stack = &errcallback;
3584 
3585  /* Prepare to track buffer usage during parallel execution */
3587 
3588  /* Process indexes to perform vacuum/cleanup */
3589  parallel_vacuum_index(indrels, stats, lvshared, dead_tuples, nindexes,
3590  &vacrelstats);
3591 
3592  /* Report buffer/WAL usage during parallel execution */
3593  buffer_usage = shm_toc_lookup(toc, PARALLEL_VACUUM_KEY_BUFFER_USAGE, false);
3594  wal_usage = shm_toc_lookup(toc, PARALLEL_VACUUM_KEY_WAL_USAGE, false);
3596  &wal_usage[ParallelWorkerNumber]);
3597 
3598  /* Pop the error context stack */
3599  error_context_stack = errcallback.previous;
3600 
3601  vac_close_indexes(nindexes, indrels, RowExclusiveLock);
3603  pfree(stats);
3604 }
3605 
3606 /*
3607  * Error context callback for errors occurring during vacuum.
3608  */
3609 static void
3611 {
3612  LVRelStats *errinfo = arg;
3613 
3614  switch (errinfo->phase)
3615  {
3617  if (BlockNumberIsValid(errinfo->blkno))
3618  {
3619  if (OffsetNumberIsValid(errinfo->offnum))
3620  errcontext("while scanning block %u and offset %u of relation \"%s.%s\"",
3621  errinfo->blkno, errinfo->offnum, errinfo->relnamespace, errinfo->relname);
3622  else
3623  errcontext("while scanning block %u of relation \"%s.%s\"",
3624  errinfo->blkno, errinfo->relnamespace, errinfo->relname);
3625  }
3626  else
3627  errcontext("while scanning relation \"%s.%s\"",
3628  errinfo->relnamespace, errinfo->relname);
3629  break;
3630 
3632  if (BlockNumberIsValid(errinfo->blkno))
3633  {
3634  if (OffsetNumberIsValid(errinfo->offnum))
3635  errcontext("while vacuuming block %u and offset %u of relation \"%s.%s\"",
3636  errinfo->blkno, errinfo->offnum, errinfo->relnamespace, errinfo->relname);
3637  else
3638  errcontext("while vacuuming block %u of relation \"%s.%s\"",
3639  errinfo->blkno, errinfo->relnamespace, errinfo->relname);
3640  }
3641  else
3642  errcontext("while vacuuming relation \"%s.%s\"",
3643  errinfo->relnamespace, errinfo->relname);
3644  break;
3645 
3647  errcontext("while vacuuming index \"%s\" of relation \"%s.%s\"",
3648  errinfo->indname, errinfo->relnamespace, errinfo->relname);
3649  break;
3650 
3652  errcontext("while cleaning up index \"%s\" of relation \"%s.%s\"",
3653  errinfo->indname, errinfo->relnamespace, errinfo->relname);
3654  break;
3655 
3657  if (BlockNumberIsValid(errinfo->blkno))
3658  errcontext("while truncating relation \"%s.%s\" to %u blocks",
3659  errinfo->relnamespace, errinfo->relname, errinfo->blkno);
3660  break;
3661 
3663  default:
3664  return; /* do nothing; the errinfo may not be
3665  * initialized */
3666  }
3667 }
3668 
3669 /*
3670  * Updates the information required for vacuum error callback. This also saves
3671  * the current information which can be later restored via restore_vacuum_error_info.
3672  */
3673 static void
3674 update_vacuum_error_info(LVRelStats *errinfo, LVSavedErrInfo *saved_err_info, int phase,
3675  BlockNumber blkno, OffsetNumber offnum)
3676 {
3677  if (saved_err_info)
3678  {
3679  saved_err_info->offnum = errinfo->offnum;
3680  saved_err_info->blkno = errinfo->blkno;
3681  saved_err_info->phase = errinfo->phase;
3682  }
3683 
3684  errinfo->blkno = blkno;
3685  errinfo->offnum = offnum;
3686  errinfo->phase = phase;
3687 }
3688 
3689 /*
3690  * Restores the vacuum information saved via a prior call to update_vacuum_error_info.
3691  */
3692 static void
3693 restore_vacuum_error_info(LVRelStats *errinfo, const LVSavedErrInfo *saved_err_info)
3694 {
3695  errinfo->blkno = saved_err_info->blkno;
3696  errinfo->offnum = saved_err_info->offnum;
3697  errinfo->phase = saved_err_info->phase;
3698 }
int autovacuum_work_mem
Definition: autovacuum.c:116
double new_rel_tuples
Definition: vacuumlazy.c:306
void HeapTupleHeaderAdvanceLatestRemovedXid(HeapTupleHeader tuple, TransactionId *latestRemovedXid)
Definition: heapam.c:6903
#define GetSharedIndStats(s)
Definition: vacuumlazy.c:254
uint8 amparallelvacuumoptions
Definition: amapi.h:247
XLogRecPtr log_heap_cleanup_info(RelFileNode rnode, TransactionId latestRemovedXid)
Definition: heapam.c:7144
static void vacuum_error_callback(void *arg)
Definition: vacuumlazy.c:3610
struct IndexAmRoutine * rd_indam
Definition: rel.h:188
int multixact_freeze_table_age
Definition: vacuum.h:215
void vac_close_indexes(int nindexes, Relation *Irel, LOCKMODE lockmode)
Definition: vacuum.c:2030
#define BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:96
static int lazy_vacuum_page(Relation onerel, BlockNumber blkno, Buffer buffer, int tupindex, LVRelStats *vacrelstats, Buffer *vmbuffer)
Definition: vacuumlazy.c:1925
void LockBufferForCleanup(Buffer buffer)
Definition: bufmgr.c:3807
XLogRecPtr log_heap_freeze(Relation reln, Buffer buffer, TransactionId cutoff_xid, xl_heap_freeze_tuple *tuples, int ntuples)
Definition: heapam.c:7224
#define PROGRESS_VACUUM_HEAP_BLKS_VACUUMED
Definition: progress.h:24
#define InvalidXLogRecPtr
Definition: xlogdefs.h:28
WalUsage * wal_usage
Definition: vacuumlazy.c:281
#define PageIsEmpty(page)
Definition: bufpage.h:222
int64 VacuumPageMiss
Definition: globals.c:145
#define DEBUG1
Definition: elog.h:25
void table_close(Relation relation, LOCKMODE lockmode)
Definition: table.c:167
static BlockNumber count_nondeletable_pages(Relation onerel, LVRelStats *vacrelstats)
Definition: vacuumlazy.c:2721
int heap_page_prune(Relation relation, Buffer buffer, GlobalVisState *vistest, TransactionId old_snap_xmin, TimestampTz old_snap_ts, bool report_stats, TransactionId *latestRemovedXid, OffsetNumber *off_loc)
Definition: pruneheap.c:223
XLogRecPtr log_newpage_buffer(Buffer buffer, bool page_std)
Definition: xloginsert.c:1090
BlockNumber rel_pages
Definition: vacuumlazy.c:300
pg_atomic_uint32 * VacuumActiveNWorkers
Definition: vacuum.c:77
static int compute_parallel_vacuum_workers(Relation *Irel, int nindexes, int nrequested, bool *can_parallel_vacuum)
Definition: vacuumlazy.c:3113
double vac_estimate_reltuples(Relation relation, BlockNumber total_pages, BlockNumber scanned_pages, double scanned_tuples)
Definition: vacuum.c:1163
OffsetNumber offset
Definition: heapam_xlog.h:320
static void lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats)
Definition: vacuumlazy.c:1844
int VacuumCostBalance
Definition: globals.c:148
ItemPointerData itemptrs[FLEXIBLE_ARRAY_MEMBER]
Definition: vacuumlazy.c:172
bool estimated_count
Definition: vacuumlazy.c:215
static bool lazy_tid_reaped(ItemPointer itemptr, void *state)
Definition: vacuumlazy.c:2934
#define ItemIdIsRedirected(itemId)
Definition: itemid.h:106
bool TransactionIdFollows(TransactionId id1, TransactionId id2)
Definition: transam.c:334
void vacuum_set_xid_limits(Relation rel, int freeze_min_age, int freeze_table_age, int multixact_freeze_min_age, int multixact_freeze_table_age, bool isTopLevel, TransactionId *oldestXmin, TransactionId *freezeLimit, TransactionId *xidFullScanLimit, MultiXactId *multiXactCutoff, MultiXactId *mxactFullScanLimit)
Definition: vacuum.c:932
ParallelContext * CreateParallelContext(const char *library_name, const char *function_name, int nworkers)
Definition: parallel.c:164
LVDeadTuples * dead_tuples
Definition: vacuumlazy.c:312
#define PageIsAllVisible(page)
Definition: bufpage.h:385
uint32 TransactionId
Definition: c.h:520
#define IndStatsIsNull(s, i)
Definition: vacuumlazy.c:256
void UnlockRelation(Relation relation, LOCKMODE lockmode)
Definition: lmgr.c:282
void RecordPageWithFreeSpace(Relation rel, BlockNumber heapBlk, Size spaceAvail)
Definition: freespace.c:181
#define PARALLEL_VACUUM_KEY_DEAD_TUPLES
Definition: vacuumlazy.c:139
void pgstat_progress_start_command(ProgressCommandType cmdtype, Oid relid)
Definition: pgstat.c:3210
Oid relid
Definition: vacuumlazy.c:193
#define PROGRESS_VACUUM_MAX_DEAD_TUPLES
Definition: progress.h:26
#define PROGRESS_VACUUM_PHASE_VACUUM_INDEX
Definition: progress.h:31
void visibilitymap_pin(Relation rel, BlockNumber heapBlk, Buffer *buf)
TimestampTz GetCurrentTimestamp(void)
Definition: timestamp.c:1574
double tuples_deleted
Definition: vacuumlazy.c:310
void visibilitymap_set(Relation rel, BlockNumber heapBlk, Buffer heapBuf, XLogRecPtr recptr, Buffer vmBuf, TransactionId cutoff_xid, uint8 flags)
void pgstat_report_activity(BackendState state, const char *cmd_str)
Definition: pgstat.c:3132
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:1469
int64 TimestampTz
Definition: timestamp.h:39
WalUsage pgWalUsage
Definition: instrument.c:22
#define SizeOfDeadTuples(cnt)
Definition: vacuumlazy.c:177
HeapTupleHeaderData * HeapTupleHeader
Definition: htup.h:23
static uint32 pg_atomic_sub_fetch_u32(volatile pg_atomic_uint32 *ptr, int32 sub_)
Definition: atomics.h:401
#define MaxOffsetNumber
Definition: off.h:28
void pgstat_progress_update_param(int index, int64 val)
Definition: pgstat.c:3231
BufferUsage * buffer_usage
Definition: vacuumlazy.c:278
#define VISIBILITYMAP_ALL_FROZEN
Definition: visibilitymap.h:27
char * pstrdup(const char *in)
Definition: mcxt.c:1187
static void lazy_record_dead_tuple(LVDeadTuples *dead_tuples, ItemPointer itemptr)
Definition: vacuumlazy.c:2910
shm_toc_estimator estimator
Definition: parallel.h:42
bool analyze_only
Definition: genam.h:47
Buffer ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
Definition: bufmgr.c:653
bool useindex
Definition: vacuumlazy.c:297
#define XLogIsNeeded()
Definition: xlog.h:191
struct timeval instr_time
Definition: instr_time.h:150
int64 VacuumPageHit
Definition: globals.c:144
#define Min(x, y)
Definition: c.h:927
bool report_progress
Definition: genam.h:48
BlockNumber tupcount_pages
Definition: vacuumlazy.c:304
#define END_CRIT_SECTION()
Definition: miscadmin.h:134
BufferAccessStrategy strategy
Definition: genam.h:52
struct LVSharedIndStats LVSharedIndStats
#define ItemIdIsUsed(itemId)
Definition: itemid.h:92
#define MaxHeapTuplesPerPage
Definition: htup_details.h:574
void WalUsageAccumDiff(WalUsage *dst, const WalUsage *add, const WalUsage *sub)
Definition: instrument.c:255
#define VM_ALL_FROZEN(r, b, v)
Definition: visibilitymap.h:34
unsigned char uint8
Definition: c.h:372
#define PROGRESS_VACUUM_HEAP_BLKS_SCANNED
Definition: progress.h:23
#define InvalidBuffer
Definition: buf.h:25
#define gettext_noop(x)
Definition: c.h:1137
#define FLEXIBLE_ARRAY_MEMBER
Definition: c.h:283
char * relnamespace
Definition: vacuumlazy.c:294
#define PROGRESS_VACUUM_TOTAL_HEAP_BLKS
Definition: progress.h:22
#define START_CRIT_SECTION()
Definition: miscadmin.h:132
#define VACUUM_OPTION_MAX_VALID_VALUE
Definition: vacuum.h:63
Relation index
Definition: genam.h:46
BlockNumber scanned_pages
Definition: vacuumlazy.c:301
VacErrPhase phase
Definition: vacuumlazy.c:321
#define MemSet(start, val, len)
Definition: c.h:949
#define INFO
Definition: elog.h:33
#define VACUUM_TRUNCATE_LOCK_TIMEOUT
Definition: vacuumlazy.c:103
static uint32 pg_atomic_add_fetch_u32(volatile pg_atomic_uint32 *ptr, int32 add_)
Definition: atomics.h:386
Datum idx(PG_FUNCTION_ARGS)
Definition: _int_op.c:259
int64 VacuumPageDirty
Definition: globals.c:146
static long compute_max_dead_tuples(BlockNumber relblocks, bool hasindex)
Definition: vacuumlazy.c:2860
uint32 BlockNumber
Definition: block.h:31
void ReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:3511
#define BITMAPLEN(NATTS)
Definition: htup_details.h:547
static bool skip_parallel_vacuum_index(Relation indrel, LVShared *lvshared)
Definition: vacuumlazy.c:3467
int nindexes_parallel_bulkdel
Definition: vacuumlazy.c:287
OffsetNumber offnum
Definition: vacuumlazy.c:328
BlockNumber pinskipped_pages
Definition: vacuumlazy.c:302
int maintenance_work_mem_worker
Definition: vacuumlazy.c:225
void visibilitymap_count(Relation rel, BlockNumber *all_visible, BlockNumber *all_frozen)
#define SizeOfPageHeaderData
Definition: bufpage.h:216
#define LOG
Definition: elog.h:26
Form_pg_class rd_rel
Definition: rel.h:109
unsigned int Oid
Definition: postgres_ext.h:31
static void restore_vacuum_error_info(LVRelStats *errinfo, const LVSavedErrInfo *saved_err_info)
Definition: vacuumlazy.c:3693
#define shm_toc_estimate_chunk(e, sz)
Definition: shm_toc.h:51
#define ItemIdIsDead(itemId)
Definition: itemid.h:113
int nindexes_parallel_condcleanup
Definition: vacuumlazy.c:289
void InstrEndParallelQuery(BufferUsage *bufusage, WalUsage *walusage)
Definition: instrument.c:189
void(* callback)(void *arg)
Definition: elog.h:229
bool TimestampDifferenceExceeds(TimestampTz start_time, TimestampTz stop_time, int msec)
Definition: timestamp.c:1673
struct ErrorContextCallback * previous
Definition: elog.h:228
#define PageGetMaxOffsetNumber(page)
Definition: bufpage.h:357
int freeze_table_age
Definition: vacuum.h:212
void ReinitializeParallelWorkers(ParallelContext *pcxt, int nworkers_to_launch)
Definition: parallel.c:513
int errdetail_internal(const char *fmt,...)
Definition: elog.c:984
#define PARALLEL_VACUUM_KEY_QUERY_TEXT
Definition: vacuumlazy.c:140
static LVSharedIndStats * get_indstats(LVShared *lvshared, int n)
Definition: vacuumlazy.c:3442
uint16 OffsetNumber
Definition: off.h:24
ItemPointerData * ItemPointer
Definition: itemptr.h:49
#define VISIBILITYMAP_VALID_BITS
Definition: visibilitymap.h:28
HeapTupleHeader t_data
Definition: htup.h:68
void pg_rusage_init(PGRUsage *ru0)
Definition: pg_rusage.c:27
bool heap_prepare_freeze_tuple(HeapTupleHeader tuple, TransactionId relfrozenxid, TransactionId relminmxid, TransactionId cutoff_xid, TransactionId cutoff_multi, xl_heap_freeze_tuple *frz, bool *totally_frozen_p)
Definition: heapam.c:6126
#define FORCE_CHECK_PAGE()
ErrorContextCallback * error_context_stack
Definition: elog.c:92
ParallelContext * pcxt
Definition: vacuumlazy.c:272
#define PROGRESS_VACUUM_PHASE_TRUNCATE
Definition: progress.h:34
#define SizeOfLVShared
Definition: vacuumlazy.c:253
#define HeapTupleIsHotUpdated(tuple)
Definition: htup_details.h:676
pg_atomic_uint32 cost_balance
Definition: vacuumlazy.c:232
#define StaticAssertStmt(condition, errmessage)
Definition: c.h:859
BlockNumber blkno
Definition: vacuumlazy.c:327
void WaitForParallelWorkersToFinish(ParallelContext *pcxt)
Definition: parallel.c:750
BlockNumber old_rel_pages
Definition: vacuumlazy.c:299
void pg_usleep(long microsec)
Definition: signal.c:53
void DestroyParallelContext(ParallelContext *pcxt)
Definition: parallel.c:904
GlobalVisState * GlobalVisTestFor(Relation rel)
Definition: procarray.c:3866
#define PREFETCH_SIZE
Definition: vacuumlazy.c:131
bool heap_tuple_needs_eventual_freeze(HeapTupleHeader tuple)
Definition: heapam.c:6771
VacErrPhase phase
Definition: vacuumlazy.c:329
#define ItemIdGetLength(itemId)
Definition: itemid.h:59
void pfree(void *pointer)
Definition: mcxt.c:1057
bool IsInParallelMode(void)
Definition: xact.c:1012
void appendStringInfo(StringInfo str, const char *fmt,...)
Definition: stringinfo.c:91
bool visibilitymap_clear(Relation rel, BlockNumber heapBlk, Buffer buf, uint8 flags)
#define VACUUM_FSM_EVERY_PAGES
Definition: vacuumlazy.c:111
bool TransactionIdPrecedesOrEquals(TransactionId id1, TransactionId id2)
Definition: transam.c:319
void UnlockReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:3534
bool ConditionalLockBufferForCleanup(Buffer buffer)
Definition: bufmgr.c:3944
#define ERROR
Definition: elog.h:43
#define REL_TRUNCATE_MINIMUM
Definition: vacuumlazy.c:91
Size PageGetHeapFreeSpace(Page page)
Definition: bufpage.c:866
#define INSTR_TIME_SUBTRACT(x, y)
Definition: instr_time.h:170
int freeze_min_age
Definition: vacuum.h:211
ItemPointerData t_self
Definition: htup.h:65
void ExitParallelMode(void)
Definition: xact.c:992
bool is_wraparound
Definition: vacuum.h:217
long wal_records
Definition: instrument.h:37
char * get_database_name(Oid dbid)
Definition: dbcommands.c:2155
#define DEBUG2
Definition: elog.h:24
#define HeapTupleHeaderXminCommitted(tup)
Definition: htup_details.h:324
static TransactionId FreezeLimit
Definition: vacuumlazy.c:336
IndexBulkDeleteResult stats
Definition: vacuumlazy.c:266
static void lazy_vacuum_all_indexes(Relation onerel, Relation *Irel, IndexBulkDeleteResult **stats, LVRelStats *vacrelstats, LVParallelState *lps, int nindexes)
Definition: vacuumlazy.c:1785
uint32 t_len
Definition: htup.h:64
void heap_execute_freeze_tuple(HeapTupleHeader tuple, xl_heap_freeze_tuple *frz)
Definition: heapam.c:6355
char * get_namespace_name(Oid nspid)
Definition: lsyscache.c:3191
#define NoLock
Definition: lockdefs.h:34
static char * buf
Definition: pg_test_fsync.c:68
#define PageSetAllVisible(page)
Definition: bufpage.h:387
bool IsUnderPostmaster
Definition: globals.c:109
#define FirstOffsetNumber
Definition: off.h:27
#define RowExclusiveLock
Definition: lockdefs.h:38
struct LVDeadTuples LVDeadTuples
int errdetail(const char *fmt,...)
Definition: elog.c:957
int elevel
Definition: vacuumlazy.c:194
int ParallelWorkerNumber
Definition: parallel.c:112
static MultiXactId MultiXactCutoff
Definition: vacuumlazy.c:337
const char * pg_rusage_show(const PGRUsage *ru0)
Definition: pg_rusage.c:40
bool ConditionalLockRelation(Relation relation, LOCKMODE lockmode)
Definition: lmgr.c:247
#define InvalidTransactionId
Definition: transam.h:31
#define RelationGetRelationName(relation)
Definition: rel.h:490
static TransactionId OldestXmin
Definition: vacuumlazy.c:335
pg_atomic_uint32 idx
Definition: vacuumlazy.c:246
unsigned int uint32
Definition: c.h:374
Oid t_tableOid
Definition: htup.h:66
#define MultiXactIdIsValid(multi)
Definition: multixact.h:28
int min_parallel_index_scan_size
Definition: allpaths.c:65
int nworkers_launched
Definition: parallel.h:38
#define BufferGetPage(buffer)
Definition: bufmgr.h:169
static void lazy_vacuum_index(Relation indrel, IndexBulkDeleteResult **stats, LVDeadTuples *dead_tuples, double reltuples, LVRelStats *vacrelstats)
Definition: vacuumlazy.c:2432
HTSV_Result HeapTupleSatisfiesVacuum(HeapTuple htup, TransactionId OldestXmin, Buffer buffer)
void LaunchParallelWorkers(ParallelContext *pcxt)
Definition: parallel.c:527
bool IsAutoVacuumWorkerProcess(void)
Definition: autovacuum.c:3325
void InstrAccumParallelQuery(BufferUsage *bufusage, WalUsage *walusage)
Definition: instrument.c:199
static void vacuum_indexes_leader(Relation *Irel, IndexBulkDeleteResult **stats, LVRelStats *vacrelstats, LVParallelState *lps, int nindexes)
Definition: vacuumlazy.c:2274
bool heap_tuple_needs_freeze(HeapTupleHeader tuple, TransactionId cutoff_xid, MultiXactId cutoff_multi, Buffer buf)
Definition: heapam.c:6824
#define PROGRESS_VACUUM_NUM_DEAD_TUPLES
Definition: progress.h:27
static bool lazy_check_needs_freeze(Buffer buf, bool *hastup, LVRelStats *vacrelstats)
Definition: vacuumlazy.c:2032
#define IsParallelWorker()
Definition: parallel.h:61
bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition: transam.c:300
double new_live_tuples
Definition: vacuumlazy.c:307
bool first_time
Definition: vacuumlazy.c:202
VacOptTernaryValue index_cleanup
Definition: vacuum.h:221
#define SKIP_PAGES_THRESHOLD
Definition: vacuumlazy.c:125
#define VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL
Definition: vacuumlazy.c:102
#define MaxAllocSize
Definition: memutils.h:40
void initStringInfo(StringInfo str)
Definition: stringinfo.c:59
#define WARNING
Definition: elog.h:40
void vac_open_indexes(Relation relation, LOCKMODE lockmode, int *nindexes, Relation **Irel)
Definition: vacuum.c:1987
const char * debug_query_string
Definition: postgres.c:88
static void prepare_index_statistics(LVShared *lvshared, bool *can_parallel_vacuum, int nindexes)
Definition: vacuumlazy.c:3174
double reltuples
Definition: vacuumlazy.c:214
#define VACUUM_OPTION_NO_PARALLEL
Definition: vacuum.h:39
void InitializeParallelDSM(ParallelContext *pcxt)
Definition: parallel.c:202
#define PageGetItemId(page, offsetNumber)
Definition: bufpage.h:235
int VacuumCostBalanceLocal
Definition: vacuum.c:78
pg_atomic_uint32 * VacuumSharedCostBalance
Definition: vacuum.c:76
void InstrStartParallelQuery(void)
Definition: instrument.c:181
static int elevel
Definition: vacuumlazy.c:333
uint8 bits8
Definition: c.h:381
#define ngettext(s, p, n)
Definition: c.h:1123
static void lazy_cleanup_index(Relation indrel, IndexBulkDeleteResult **stats, double reltuples, bool estimated_count, LVRelStats *vacrelstats)
Definition: vacuumlazy.c:2490
int nindexes_parallel_cleanup
Definition: vacuumlazy.c:288
Size mul_size(Size s1, Size s2)
Definition: shmem.c:515
static void lazy_parallel_vacuum_indexes(Relation *Irel, IndexBulkDeleteResult **stats, LVRelStats *vacrelstats, LVParallelState *lps, int nindexes)
Definition: vacuumlazy.c:2092
Size GetRecordedFreeSpace(Relation rel, BlockNumber heapBlk)
Definition: freespace.c:230
#define ParallelVacuumIsActive(lps)
Definition: vacuumlazy.c:148
void * palloc0(Size size)
Definition: mcxt.c:981
char * indname
Definition: vacuumlazy.c:318
#define VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL
Definition: vacuumlazy.c:101
void pgstat_progress_end_command(void)
Definition: pgstat.c:3282
char * relname
Definition: vacuumlazy.c:295
IndexBulkDeleteResult * index_bulk_delete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state)
Definition: indexam.c:689
void heap_vacuum_rel(Relation onerel, VacuumParams *params, BufferAccessStrategy bstrategy)
Definition: vacuumlazy.c:419
Size add_size(Size s1, Size s2)
Definition: shmem.c:498
#define BufferGetPageSize(buffer)
Definition: bufmgr.h:156
Oid MyDatabaseId
Definition: globals.c:85
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:3750
#define PARALLEL_VACUUM_KEY_SHARED
Definition: vacuumlazy.c:138
int max_parallel_maintenance_workers
Definition: globals.c:124
void ReinitializeParallelDSM(ParallelContext *pcxt)
Definition: parallel.c:463
#define InvalidMultiXactId
Definition: multixact.h:24
#define RelationGetNumberOfBlocks(reln)
Definition: bufmgr.h:211
static void update_index_statistics(Relation *Irel, IndexBulkDeleteResult **stats, int nindexes)
Definition: vacuumlazy.c:3199
static void lazy_cleanup_all_indexes(Relation *Irel, IndexBulkDeleteResult **stats, LVRelStats *vacrelstats, LVParallelState *lps, int nindexes)
Definition: vacuumlazy.c:2377
static bool should_attempt_truncation(VacuumParams *params, LVRelStats *vacrelstats)
Definition: vacuumlazy.c:2570
pg_atomic_uint32 active_nworkers
Definition: vacuumlazy.c:239
#define InvalidOffsetNumber
Definition: off.h:26
VacOptTernaryValue truncate
Definition: vacuum.h:223
static LVParallelState * begin_parallel_vacuum(Oid relid, Relation *Irel, LVRelStats *vacrelstats, BlockNumber nblocks, int nindexes, int nrequested)
Definition: vacuumlazy.c:3230
#define PROGRESS_VACUUM_PHASE_FINAL_CLEANUP
Definition: progress.h:35
#define ereport(elevel,...)
Definition: elog.h:144
int num_index_scans
Definition: vacuumlazy.c:313
int maintenance_work_mem
Definition: globals.c:123
bool LockHasWaitersRelation(Relation relation, LOCKMODE lockmode)
Definition: lmgr.c:345
#define BlockNumberIsValid(blockNumber)
Definition: block.h:70
double old_live_tuples
Definition: vacuumlazy.c:305
static void update_vacuum_error_info(LVRelStats *errinfo, LVSavedErrInfo *saved_err_info, int phase, BlockNumber blkno, OffsetNumber offnum)
Definition: vacuumlazy.c:3674
static void vacuum_log_cleanup_info(Relation rel, LVRelStats *vacrelstats)
Definition: vacuumlazy.c:709
static uint32 pg_atomic_fetch_add_u32(volatile pg_atomic_uint32 *ptr, int32 add_)
Definition: atomics.h:328
OffsetNumber offnum
Definition: vacuumlazy.c:320
int message_level
Definition: genam.h:50
TransactionId MultiXactId
Definition: c.h:530
RelFileNode rd_node
Definition: rel.h:55
int errmsg_internal(const char *fmt,...)
Definition: elog.c:911
double num_heap_tuples
Definition: genam.h:51
#define Max(x, y)
Definition: c.h:921
static BufferAccessStrategy vac_strategy
Definition: vacuumlazy.c:339
#define ShareUpdateExclusiveLock
Definition: lockdefs.h:39
#define PARALLEL_VACUUM_KEY_BUFFER_USAGE
Definition: vacuumlazy.c:141
#define PageClearAllVisible(page)
Definition: bufpage.h:389
uint64 XLogRecPtr
Definition: xlogdefs.h:21
struct LVShared LVShared
#define HeapTupleIsHeapOnly(tuple)
Definition: htup_details.h:685
#define Assert(condition)
Definition: c.h:745
#define VACUUM_OPTION_PARALLEL_COND_CLEANUP
Definition: vacuum.h:52
double new_dead_tuples
Definition: vacuumlazy.c:308
Definition: regguts.h:298
bits8 bitmap[FLEXIBLE_ARRAY_MEMBER]
Definition: vacuumlazy.c:248
#define PARALLEL_VACUUM_KEY_WAL_USAGE
Definition: vacuumlazy.c:142
TransactionId latestRemovedXid
Definition: vacuumlazy.c:314
#define ItemIdIsNormal(itemId)
Definition: itemid.h:99
#define INSTR_TIME_GET_MICROSEC(t)
Definition: instr_time.h:205
#define PROGRESS_VACUUM_PHASE_INDEX_CLEANUP
Definition: progress.h:33
#define HeapTupleHeaderGetXmin(tup)
Definition: htup_details.h:313
#define VM_ALL_VISIBLE(r, b, v)
Definition: visibilitymap.h:32
void pgstat_progress_update_multi_param(int nparam, const int *index, const int64 *val)
Definition: pgstat.c:3253
#define OffsetNumberNext(offsetNumber)
Definition: off.h:52
size_t Size
Definition: c.h:473
#define PROGRESS_VACUUM_NUM_INDEX_VACUUMS
Definition: progress.h:25
#define PROGRESS_VACUUM_PHASE_SCAN_HEAP
Definition: progress.h:30
#define PROGRESS_VACUUM_PHASE
Definition: progress.h:21
void parallel_vacuum_main(dsm_segment *seg, shm_toc *toc)
Definition: vacuumlazy.c:3507
int nworkers
Definition: vacuum.h:231
#define InvalidBlockNumber
Definition: block.h:33
#define shm_toc_estimate_keys(e, cnt)
Definition: shm_toc.h:53
long wal_fpi
Definition: instrument.h:38
XLogRecPtr log_heap_clean(Relation reln, Buffer buffer, OffsetNumber *redirected, int nredirected, OffsetNumber *nowdead, int ndead, OffsetNumber *nowunused, int nunused, TransactionId latestRemovedXid)
Definition: heapam.c:7173
#define MAXALIGN(LEN)
Definition: c.h:698
#define BufferIsValid(bufnum)
Definition: bufmgr.h:123
int log_min_duration
Definition: vacuum.h:218
#define ItemPointerGetOffsetNumber(pointer)
Definition: itemptr.h:117
void EnterParallelMode(void)
Definition: xact.c:979
LVShared * lvshared
Definition: vacuumlazy.c:275
#define VACUUM_OPTION_PARALLEL_BULKDEL
Definition: vacuum.h:45
void * shm_toc_allocate(shm_toc *toc, Size nbytes)
Definition: shm_toc.c:88
#define RelationNeedsWAL(relation)
Definition: rel.h:562
IndexBulkDeleteResult * index_vacuum_cleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
Definition: indexam.c:710
#define VISIBILITYMAP_ALL_VISIBLE
Definition: visibilitymap.h:26
struct LVRelStats LVRelStats
#define INSTR_TIME_SET_CURRENT(t)
Definition: instr_time.h:156
void pgstat_report_vacuum(Oid tableoid, bool shared, PgStat_Counter livetuples, PgStat_Counter deadtuples)
Definition: pgstat.c:1463
#define RelationUsesLocalBuffers(relation)
Definition: rel.h:572
#define PageGetLSN(page)
Definition: bufpage.h:366
static void end_parallel_vacuum(IndexBulkDeleteResult **stats, LVParallelState *lps, int nindexes)
Definition: vacuumlazy.c:3404
bool for_cleanup
Definition: vacuumlazy.c:201
#define AccessExclusiveLock
Definition: lockdefs.h:45
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:2661
BlockNumber pages_removed
Definition: vacuumlazy.c:309
BlockNumber nonempty_pages
Definition: vacuumlazy.c:311
void PageRepairFragmentation(Page page)
Definition: bufpage.c:674
#define PageIsNew(page)
Definition: bufpage.h:229
void shm_toc_insert(shm_toc *toc, uint64 key, void *address)
Definition: shm_toc.c:171
void * palloc(Size size)
Definition: mcxt.c:950
int errmsg(const char *fmt,...)
Definition: elog.c:824
static void lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats, Relation *Irel, int nindexes, bool aggressive)
Definition: vacuumlazy.c:754
uint8 visibilitymap_get_status(Relation rel, BlockNumber heapBlk, Buffer *buf)
BlockNumber frozenskipped_pages
Definition: vacuumlazy.c:303
double VacuumCostDelay
Definition: globals.c:142
#define elog(elevel,...)
Definition: elog.h:214
int old_snapshot_threshold
Definition: snapmgr.c:78
bool MultiXactIdPrecedesOrEquals(MultiXactId multi1, MultiXactId multi2)
Definition: multixact.c:3141
int i
int options
Definition: vacuum.h:210
#define OffsetNumberIsValid(offsetNumber)
Definition: off.h:39
PrefetchBufferResult PrefetchBuffer(Relation reln, ForkNumber forkNum, BlockNumber blockNum)
Definition: bufmgr.c:575
#define errcontext
Definition: elog.h:185
#define BUFFER_LOCK_SHARE
Definition: bufmgr.h:97
void * arg
struct LVParallelState LVParallelState
static void vacuum_one_index(Relation indrel, IndexBulkDeleteResult **stats, LVShared *lvshared, LVSharedIndStats *shared_indstats, LVDeadTuples *dead_tuples, LVRelStats *vacrelstats)
Definition: vacuumlazy.c:2317
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:99
static void pg_atomic_init_u32(volatile pg_atomic_uint32 *ptr, uint32 val)
Definition: atomics.h:223
#define ItemPointerGetBlockNumber(pointer)
Definition: itemptr.h:98
uint32 offset
Definition: vacuumlazy.c:247
#define TransactionIdIsValid(xid)
Definition: transam.h:41
static void pg_atomic_write_u32(volatile pg_atomic_uint32 *ptr, uint32 val)
Definition: atomics.h:258
#define ItemIdSetUnused(itemId)
Definition: itemid.h:128
static int vac_cmp_itemptr(const void *left, const void *right)
Definition: vacuumlazy.c:2952
uint64 wal_bytes
Definition: instrument.h:39
void vacuum_delay_point(void)
Definition: vacuum.c:2051
#define MAXDEADTUPLES(max_size)
Definition: vacuumlazy.c:180
void TimestampDifference(TimestampTz start_time, TimestampTz stop_time, long *secs, int *microsecs)
Definition: timestamp.c:1648
#define TransactionIdIsNormal(xid)
Definition: transam.h:42
BlockNumber blkno
Definition: vacuumlazy.c:319
Relation table_open(Oid relationId, LOCKMODE lockmode)
Definition: table.c:39
#define VACUUM_OPTION_PARALLEL_CLEANUP
Definition: vacuum.h:60
void vac_update_relstats(Relation relation, BlockNumber num_pages, double num_tuples, BlockNumber num_all_visible_pages, bool hasindex, TransactionId frozenxid, MultiXactId minmulti, bool in_outer_xact)
Definition: vacuum.c:1246
static bool heap_page_is_all_visible(Relation rel, Buffer buf, LVRelStats *vacrelstats, TransactionId *visibility_cutoff_xid, bool *all_frozen)
Definition: vacuumlazy.c:2985
VacErrPhase
Definition: vacuumlazy.c:151
#define PageSetLSN(page, lsn)
Definition: bufpage.h:368
#define LAZY_ALLOC_TUPLES
Definition: vacuumlazy.c:119
int Buffer
Definition: buf.h:23
#define _(x)
Definition: elog.c:88
#define RelationGetRelid(relation)
Definition: rel.h:456
int multixact_freeze_min_age
Definition: vacuum.h:213
void * shm_toc_lookup(shm_toc *toc, uint64 key, bool noError)
Definition: shm_toc.c:232
void FreeSpaceMapVacuumRange(Relation rel, BlockNumber start, BlockNumber end)
Definition: freespace.c:354
static void lazy_truncate_heap(Relation onerel, LVRelStats *vacrelstats)
Definition: vacuumlazy.c:2591
#define PROGRESS_VACUUM_PHASE_VACUUM_HEAP
Definition: progress.h:32
#define REL_TRUNCATE_FRACTION
Definition: vacuumlazy.c:92
#define PageGetItem(page, itemId)
Definition: bufpage.h:340
Pointer Page
Definition: bufpage.h:78
#define ItemPointerSet(pointer, blockNumber, offNum)
Definition: itemptr.h:127
shm_toc * toc
Definition: parallel.h:45
static void lazy_space_alloc(LVRelStats *vacrelstats, BlockNumber relblocks)
Definition: vacuumlazy.c:2892
bool VacuumCostActive
Definition: globals.c:149
bool estimated_count
Definition: genam.h:49
static uint32 pg_atomic_read_u32(volatile pg_atomic_uint32 *ptr)
Definition: atomics.h:241
struct LVSavedErrInfo LVSavedErrInfo
void RelationTruncate(Relation rel, BlockNumber nblocks)
Definition: storage.c:277
#define RelationGetNamespace(relation)
Definition: rel.h:497
static void parallel_vacuum_index(Relation *Irel, IndexBulkDeleteResult **stats, LVShared *lvshared, LVDeadTuples *dead_tuples, int nindexes, LVRelStats *vacrelstats)
Definition: vacuumlazy.c:2222
bool lock_waiter_detected
Definition: vacuumlazy.c:315