PostgreSQL Source Code  git master
vacuumlazy.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * vacuumlazy.c
4  * Concurrent ("lazy") vacuuming.
5  *
6  *
7  * The major space usage for LAZY VACUUM is storage for the array of dead tuple
8  * TIDs. We want to ensure we can vacuum even the very largest relations with
9  * finite memory space usage. To do that, we set upper bounds on the number of
10  * tuples we will keep track of at once.
11  *
12  * We are willing to use at most maintenance_work_mem (or perhaps
13  * autovacuum_work_mem) memory space to keep track of dead tuples. We
14  * initially allocate an array of TIDs of that size, with an upper limit that
15  * depends on table size (this limit ensures we don't allocate a huge area
16  * uselessly for vacuuming small tables). If the array threatens to overflow,
17  * we suspend the heap scan phase and perform a pass of index cleanup and page
18  * compaction, then resume the heap scan with an empty TID array.
19  *
20  * If we're processing a table with no indexes, we can just vacuum each page
21  * as we go; there's no need to save up multiple tuples to minimize the number
22  * of index scans performed. So we don't use maintenance_work_mem memory for
23  * the TID array, just enough to hold as many heap tuples as fit on one page.
24  *
25  * Lazy vacuum supports parallel execution with parallel worker processes. In
26  * a parallel vacuum, we perform both index vacuum and index cleanup with
27  * parallel worker processes. Individual indexes are processed by one vacuum
28  * process. At the beginning of a lazy vacuum (at lazy_scan_heap) we prepare
29  * the parallel context and initialize the DSM segment that contains shared
30  * information as well as the memory space for storing dead tuples. When
31  * starting either index vacuum or index cleanup, we launch parallel worker
32  * processes. Once all indexes are processed the parallel worker processes
33  * exit. After that, the leader process re-initializes the parallel context
34  * so that it can use the same DSM for multiple passes of index vacuum and
35  * for performing index cleanup. For updating the index statistics, we need
36  * to update the system table and since updates are not allowed during
37  * parallel mode we update the index statistics after exiting from the
38  * parallel mode.
39  *
40  * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
41  * Portions Copyright (c) 1994, Regents of the University of California
42  *
43  *
44  * IDENTIFICATION
45  * src/backend/access/heap/vacuumlazy.c
46  *
47  *-------------------------------------------------------------------------
48  */
49 #include "postgres.h"
50 
51 #include <math.h>
52 
53 #include "access/amapi.h"
54 #include "access/genam.h"
55 #include "access/heapam.h"
56 #include "access/heapam_xlog.h"
57 #include "access/htup_details.h"
58 #include "access/multixact.h"
59 #include "access/parallel.h"
60 #include "access/transam.h"
61 #include "access/visibilitymap.h"
62 #include "access/xact.h"
63 #include "access/xlog.h"
64 #include "catalog/storage.h"
65 #include "commands/dbcommands.h"
66 #include "commands/progress.h"
67 #include "commands/vacuum.h"
68 #include "executor/instrument.h"
69 #include "miscadmin.h"
70 #include "optimizer/paths.h"
71 #include "pgstat.h"
72 #include "portability/instr_time.h"
73 #include "postmaster/autovacuum.h"
74 #include "storage/bufmgr.h"
75 #include "storage/freespace.h"
76 #include "storage/lmgr.h"
77 #include "tcop/tcopprot.h"
78 #include "utils/lsyscache.h"
79 #include "utils/memutils.h"
80 #include "utils/pg_rusage.h"
81 #include "utils/timestamp.h"
82 
83 
84 /*
85  * Space/time tradeoff parameters: do these need to be user-tunable?
86  *
87  * To consider truncating the relation, we want there to be at least
88  * REL_TRUNCATE_MINIMUM or (relsize / REL_TRUNCATE_FRACTION) (whichever
89  * is less) potentially-freeable pages.
90  */
91 #define REL_TRUNCATE_MINIMUM 1000
92 #define REL_TRUNCATE_FRACTION 16
93 
94 /*
95  * Timing parameters for truncate locking heuristics.
96  *
97  * These were not exposed as user tunable GUC values because it didn't seem
98  * that the potential for improvement was great enough to merit the cost of
99  * supporting them.
100  */
101 #define VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL 20 /* ms */
102 #define VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL 50 /* ms */
103 #define VACUUM_TRUNCATE_LOCK_TIMEOUT 5000 /* ms */
104 
105 /*
106  * When a table has no indexes, vacuum the FSM after every 8GB, approximately
107  * (it won't be exact because we only vacuum FSM after processing a heap page
108  * that has some removable tuples). When there are indexes, this is ignored,
109  * and we vacuum FSM after each index/heap cleaning pass.
110  */
111 #define VACUUM_FSM_EVERY_PAGES \
112  ((BlockNumber) (((uint64) 8 * 1024 * 1024 * 1024) / BLCKSZ))
113 
114 /*
115  * Guesstimation of number of dead tuples per page. This is used to
116  * provide an upper limit to memory allocated when vacuuming small
117  * tables.
118  */
119 #define LAZY_ALLOC_TUPLES MaxHeapTuplesPerPage
120 
121 /*
122  * Before we consider skipping a page that's marked as clean in
123  * visibility map, we must've seen at least this many clean pages.
124  */
125 #define SKIP_PAGES_THRESHOLD ((BlockNumber) 32)
126 
127 /*
128  * Size of the prefetch window for lazy vacuum backwards truncation scan.
129  * Needs to be a power of 2.
130  */
131 #define PREFETCH_SIZE ((BlockNumber) 32)
132 
133 /*
134  * DSM keys for parallel vacuum. Unlike other parallel execution code, since
135  * we don't need to worry about DSM keys conflicting with plan_node_id we can
136  * use small integers.
137  */
138 #define PARALLEL_VACUUM_KEY_SHARED 1
139 #define PARALLEL_VACUUM_KEY_DEAD_TUPLES 2
140 #define PARALLEL_VACUUM_KEY_QUERY_TEXT 3
141 #define PARALLEL_VACUUM_KEY_BUFFER_USAGE 4
142 #define PARALLEL_VACUUM_KEY_WAL_USAGE 5
143 
144 /*
145  * Macro to check if we are in a parallel vacuum. If true, we are in the
146  * parallel mode and the DSM segment is initialized.
147  */
148 #define ParallelVacuumIsActive(lps) PointerIsValid(lps)
149 
150 /* Phases of vacuum during which we report error context. */
151 typedef enum
152 {
159 } VacErrPhase;
160 
161 /*
162  * LVDeadTuples stores the dead tuple TIDs collected during the heap scan.
163  * This is allocated in the DSM segment in parallel mode and in local memory
164  * in non-parallel mode.
165  */
166 typedef struct LVDeadTuples
167 {
168  int max_tuples; /* # slots allocated in array */
169  int num_tuples; /* current # of entries */
170  /* List of TIDs of tuples we intend to delete */
171  /* NB: this list is ordered by TID address */
173  * ItemPointerData */
174 } LVDeadTuples;
175 
176 /* The dead tuple space consists of LVDeadTuples and dead tuple TIDs */
177 #define SizeOfDeadTuples(cnt) \
178  add_size(offsetof(LVDeadTuples, itemptrs), \
179  mul_size(sizeof(ItemPointerData), cnt))
180 #define MAXDEADTUPLES(max_size) \
181  (((max_size) - offsetof(LVDeadTuples, itemptrs)) / sizeof(ItemPointerData))
182 
183 /*
184  * Shared information among parallel workers. So this is allocated in the DSM
185  * segment.
186  */
187 typedef struct LVShared
188 {
189  /*
190  * Target table relid and log level. These fields are not modified during
191  * the lazy vacuum.
192  */
194  int elevel;
195 
196  /*
197  * An indication for vacuum workers to perform either index vacuum or
198  * index cleanup. first_time is true only if for_cleanup is true and
199  * bulk-deletion is not performed yet.
200  */
203 
204  /*
205  * Fields for both index vacuum and cleanup.
206  *
207  * reltuples is the total number of input heap tuples. We set either old
208  * live tuples in the index vacuum case or the new live tuples in the
209  * index cleanup case.
210  *
211  * estimated_count is true if reltuples is an estimated value. (Note that
212  * reltuples could be -1 in this case, indicating we have no idea.)
213  */
214  double reltuples;
216 
217  /*
218  * In single process lazy vacuum we could consume more memory during index
219  * vacuuming or cleanup apart from the memory for heap scanning. In
220  * parallel vacuum, since individual vacuum workers can consume memory
221  * equal to maintenance_work_mem, the new maintenance_work_mem for each
222  * worker is set such that the parallel operation doesn't consume more
223  * memory than single process lazy vacuum.
224  */
226 
227  /*
228  * Shared vacuum cost balance. During parallel vacuum,
229  * VacuumSharedCostBalance points to this value and it accumulates the
230  * balance of each parallel vacuum worker.
231  */
233 
234  /*
235  * Number of active parallel workers. This is used for computing the
236  * minimum threshold of the vacuum cost balance before a worker sleeps for
237  * cost-based delay.
238  */
240 
241  /*
242  * Variables to control parallel vacuum. We have a bitmap to indicate
243  * which index has stats in shared memory. The set bit in the map
244  * indicates that the particular index supports a parallel vacuum.
245  */
246  pg_atomic_uint32 idx; /* counter for vacuuming and clean up */
247  uint32 offset; /* sizeof header incl. bitmap */
248  bits8 bitmap[FLEXIBLE_ARRAY_MEMBER]; /* bit map of NULLs */
249 
250  /* Shared index statistics data follows at end of struct */
251 } LVShared;
252 
253 #define SizeOfLVShared (offsetof(LVShared, bitmap) + sizeof(bits8))
254 #define GetSharedIndStats(s) \
255  ((LVSharedIndStats *)((char *)(s) + ((LVShared *)(s))->offset))
256 #define IndStatsIsNull(s, i) \
257  (!(((LVShared *)(s))->bitmap[(i) >> 3] & (1 << ((i) & 0x07))))
258 
259 /*
260  * Struct for an index bulk-deletion statistic used for parallel vacuum. This
261  * is allocated in the DSM segment.
262  */
263 typedef struct LVSharedIndStats
264 {
265  bool updated; /* are the stats updated? */
268 
269 /* Struct for maintaining a parallel vacuum state. */
270 typedef struct LVParallelState
271 {
273 
274  /* Shared information among parallel vacuum workers */
276 
277  /* Points to buffer usage area in DSM */
279 
280  /* Points to WAL usage area in DSM */
282 
283  /*
284  * The number of indexes that support parallel index bulk-deletion and
285  * parallel index cleanup respectively.
286  */
291 
292 typedef struct LVRelStats
293 {
295  char *relname;
296  /* useindex = true means two-pass strategy; false means one-pass */
297  bool useindex;
298  /* Overall statistics about rel */
299  BlockNumber old_rel_pages; /* previous value of pg_class.relpages */
300  BlockNumber rel_pages; /* total number of pages */
301  BlockNumber scanned_pages; /* number of pages we examined */
302  BlockNumber pinskipped_pages; /* # of pages we skipped due to a pin */
303  BlockNumber frozenskipped_pages; /* # of frozen pages we skipped */
304  BlockNumber tupcount_pages; /* pages whose tuples we counted */
305  double old_live_tuples; /* previous value of pg_class.reltuples */
306  double new_rel_tuples; /* new estimated total # of tuples */
307  double new_live_tuples; /* new estimated total # of live tuples */
308  double new_dead_tuples; /* new estimated total # of dead tuples */
311  BlockNumber nonempty_pages; /* actually, last nonempty page + 1 */
316 
317  /* Used for error callback */
318  char *indname;
319  BlockNumber blkno; /* used only for heap operations */
320  OffsetNumber offnum; /* used only for heap operations */
322 } LVRelStats;
323 
324 /* Struct for saving and restoring vacuum error information. */
325 typedef struct LVSavedErrInfo
326 {
331 
332 /* A few variables that don't seem worth passing around as parameters */
333 static int elevel = -1;
334 
338 
340 
341 
342 /* non-export function prototypes */
343 static void lazy_scan_heap(Relation onerel, VacuumParams *params,
344  LVRelStats *vacrelstats, Relation *Irel, int nindexes,
345  bool aggressive);
346 static void lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats);
347 static bool lazy_check_needs_freeze(Buffer buf, bool *hastup,
348  LVRelStats *vacrelstats);
349 static void lazy_vacuum_all_indexes(Relation onerel, Relation *Irel,
350  IndexBulkDeleteResult **stats,
351  LVRelStats *vacrelstats, LVParallelState *lps,
352  int nindexes);
353 static void lazy_vacuum_index(Relation indrel, IndexBulkDeleteResult **stats,
354  LVDeadTuples *dead_tuples, double reltuples, LVRelStats *vacrelstats);
355 static void lazy_cleanup_index(Relation indrel,
356  IndexBulkDeleteResult **stats,
357  double reltuples, bool estimated_count, LVRelStats *vacrelstats);
358 static int lazy_vacuum_page(Relation onerel, BlockNumber blkno, Buffer buffer,
359  int tupindex, LVRelStats *vacrelstats, Buffer *vmbuffer);
360 static bool should_attempt_truncation(VacuumParams *params,
361  LVRelStats *vacrelstats);
362 static void lazy_truncate_heap(Relation onerel, LVRelStats *vacrelstats);
364  LVRelStats *vacrelstats);
365 static void lazy_space_alloc(LVRelStats *vacrelstats, BlockNumber relblocks);
366 static void lazy_record_dead_tuple(LVDeadTuples *dead_tuples,
367  ItemPointer itemptr);
368 static bool lazy_tid_reaped(ItemPointer itemptr, void *state);
369 static int vac_cmp_itemptr(const void *left, const void *right);
371  LVRelStats *vacrelstats,
372  TransactionId *visibility_cutoff_xid, bool *all_frozen);
374  LVRelStats *vacrelstats, LVParallelState *lps,
375  int nindexes);
376 static void parallel_vacuum_index(Relation *Irel, IndexBulkDeleteResult **stats,
377  LVShared *lvshared, LVDeadTuples *dead_tuples,
378  int nindexes, LVRelStats *vacrelstats);
379 static void vacuum_indexes_leader(Relation *Irel, IndexBulkDeleteResult **stats,
380  LVRelStats *vacrelstats, LVParallelState *lps,
381  int nindexes);
382 static void vacuum_one_index(Relation indrel, IndexBulkDeleteResult **stats,
383  LVShared *lvshared, LVSharedIndStats *shared_indstats,
384  LVDeadTuples *dead_tuples, LVRelStats *vacrelstats);
385 static void lazy_cleanup_all_indexes(Relation *Irel, IndexBulkDeleteResult **stats,
386  LVRelStats *vacrelstats, LVParallelState *lps,
387  int nindexes);
388 static long compute_max_dead_tuples(BlockNumber relblocks, bool hasindex);
389 static int compute_parallel_vacuum_workers(Relation *Irel, int nindexes, int nrequested,
390  bool *can_parallel_vacuum);
391 static void prepare_index_statistics(LVShared *lvshared, bool *can_parallel_vacuum,
392  int nindexes);
393 static void update_index_statistics(Relation *Irel, IndexBulkDeleteResult **stats,
394  int nindexes);
396  LVRelStats *vacrelstats, BlockNumber nblocks,
397  int nindexes, int nrequested);
398 static void end_parallel_vacuum(IndexBulkDeleteResult **stats,
399  LVParallelState *lps, int nindexes);
400 static LVSharedIndStats *get_indstats(LVShared *lvshared, int n);
401 static bool skip_parallel_vacuum_index(Relation indrel, LVShared *lvshared);
402 static void vacuum_error_callback(void *arg);
403 static void update_vacuum_error_info(LVRelStats *errinfo, LVSavedErrInfo *saved_err_info,
404  int phase, BlockNumber blkno,
405  OffsetNumber offnum);
406 static void restore_vacuum_error_info(LVRelStats *errinfo, const LVSavedErrInfo *saved_err_info);
407 
408 
409 /*
410  * heap_vacuum_rel() -- perform VACUUM for one heap relation
411  *
412  * This routine vacuums a single heap, cleans out its indexes, and
413  * updates its relpages and reltuples statistics.
414  *
415  * At entry, we have already established a transaction and opened
416  * and locked the relation.
417  */
418 void
420  BufferAccessStrategy bstrategy)
421 {
422  LVRelStats *vacrelstats;
423  Relation *Irel;
424  int nindexes;
425  PGRUsage ru0;
426  TimestampTz starttime = 0;
427  WalUsage walusage_start = pgWalUsage;
428  WalUsage walusage = {0, 0, 0};
429  long secs;
430  int usecs;
431  double read_rate,
432  write_rate;
433  bool aggressive; /* should we scan all unfrozen pages? */
434  bool scanned_all_unfrozen; /* actually scanned all such pages? */
435  TransactionId xidFullScanLimit;
436  MultiXactId mxactFullScanLimit;
437  BlockNumber new_rel_pages;
438  BlockNumber new_rel_allvisible;
439  double new_live_tuples;
440  TransactionId new_frozen_xid;
441  MultiXactId new_min_multi;
442  ErrorContextCallback errcallback;
443 
444  Assert(params != NULL);
447 
448  /* not every AM requires these to be valid, but heap does */
449  Assert(TransactionIdIsNormal(onerel->rd_rel->relfrozenxid));
450  Assert(MultiXactIdIsValid(onerel->rd_rel->relminmxid));
451 
452  /* measure elapsed time iff autovacuum logging requires it */
453  if (IsAutoVacuumWorkerProcess() && params->log_min_duration >= 0)
454  {
455  pg_rusage_init(&ru0);
456  starttime = GetCurrentTimestamp();
457  }
458 
459  if (params->options & VACOPT_VERBOSE)
460  elevel = INFO;
461  else
462  elevel = DEBUG2;
463 
465  RelationGetRelid(onerel));
466 
467  vac_strategy = bstrategy;
468 
469  vacuum_set_xid_limits(onerel,
470  params->freeze_min_age,
471  params->freeze_table_age,
472  params->multixact_freeze_min_age,
474  &OldestXmin, &FreezeLimit, &xidFullScanLimit,
475  &MultiXactCutoff, &mxactFullScanLimit);
476 
477  /*
478  * We request an aggressive scan if the table's frozen Xid is now older
479  * than or equal to the requested Xid full-table scan limit; or if the
480  * table's minimum MultiXactId is older than or equal to the requested
481  * mxid full-table scan limit; or if DISABLE_PAGE_SKIPPING was specified.
482  */
483  aggressive = TransactionIdPrecedesOrEquals(onerel->rd_rel->relfrozenxid,
484  xidFullScanLimit);
485  aggressive |= MultiXactIdPrecedesOrEquals(onerel->rd_rel->relminmxid,
486  mxactFullScanLimit);
488  aggressive = true;
489 
490  vacrelstats = (LVRelStats *) palloc0(sizeof(LVRelStats));
491 
492  vacrelstats->relnamespace = get_namespace_name(RelationGetNamespace(onerel));
493  vacrelstats->relname = pstrdup(RelationGetRelationName(onerel));
494  vacrelstats->indname = NULL;
495  vacrelstats->phase = VACUUM_ERRCB_PHASE_UNKNOWN;
496  vacrelstats->old_rel_pages = onerel->rd_rel->relpages;
497  vacrelstats->old_live_tuples = onerel->rd_rel->reltuples;
498  vacrelstats->num_index_scans = 0;
499  vacrelstats->pages_removed = 0;
500  vacrelstats->lock_waiter_detected = false;
501 
502  /* Open all indexes of the relation */
503  vac_open_indexes(onerel, RowExclusiveLock, &nindexes, &Irel);
504  vacrelstats->useindex = (nindexes > 0 &&
506 
507  /*
508  * Setup error traceback support for ereport(). The idea is to set up an
509  * error context callback to display additional information on any error
510  * during a vacuum. During different phases of vacuum (heap scan, heap
511  * vacuum, index vacuum, index clean up, heap truncate), we update the
512  * error context callback to display appropriate information.
513  *
514  * Note that the index vacuum and heap vacuum phases may be called
515  * multiple times in the middle of the heap scan phase. So the old phase
516  * information is restored at the end of those phases.
517  */
518  errcallback.callback = vacuum_error_callback;
519  errcallback.arg = vacrelstats;
520  errcallback.previous = error_context_stack;
521  error_context_stack = &errcallback;
522 
523  /* Do the vacuuming */
524  lazy_scan_heap(onerel, params, vacrelstats, Irel, nindexes, aggressive);
525 
526  /* Done with indexes */
527  vac_close_indexes(nindexes, Irel, NoLock);
528 
529  /*
530  * Compute whether we actually scanned the all unfrozen pages. If we did,
531  * we can adjust relfrozenxid and relminmxid.
532  *
533  * NB: We need to check this before truncating the relation, because that
534  * will change ->rel_pages.
535  */
536  if ((vacrelstats->scanned_pages + vacrelstats->frozenskipped_pages)
537  < vacrelstats->rel_pages)
538  {
539  Assert(!aggressive);
540  scanned_all_unfrozen = false;
541  }
542  else
543  scanned_all_unfrozen = true;
544 
545  /*
546  * Optionally truncate the relation.
547  */
548  if (should_attempt_truncation(params, vacrelstats))
549  {
550  /*
551  * Update error traceback information. This is the last phase during
552  * which we add context information to errors, so we don't need to
553  * revert to the previous phase.
554  */
556  vacrelstats->nonempty_pages,
558  lazy_truncate_heap(onerel, vacrelstats);
559  }
560 
561  /* Pop the error context stack */
562  error_context_stack = errcallback.previous;
563 
564  /* Report that we are now doing final cleanup */
567 
568  /*
569  * Update statistics in pg_class.
570  *
571  * In principle new_live_tuples could be -1 indicating that we (still)
572  * don't know the tuple count. In practice that probably can't happen,
573  * since we'd surely have scanned some pages if the table is new and
574  * nonempty.
575  *
576  * For safety, clamp relallvisible to be not more than what we're setting
577  * relpages to.
578  *
579  * Also, don't change relfrozenxid/relminmxid if we skipped any pages,
580  * since then we don't know for certain that all tuples have a newer xmin.
581  */
582  new_rel_pages = vacrelstats->rel_pages;
583  new_live_tuples = vacrelstats->new_live_tuples;
584 
585  visibilitymap_count(onerel, &new_rel_allvisible, NULL);
586  if (new_rel_allvisible > new_rel_pages)
587  new_rel_allvisible = new_rel_pages;
588 
589  new_frozen_xid = scanned_all_unfrozen ? FreezeLimit : InvalidTransactionId;
590  new_min_multi = scanned_all_unfrozen ? MultiXactCutoff : InvalidMultiXactId;
591 
592  vac_update_relstats(onerel,
593  new_rel_pages,
594  new_live_tuples,
595  new_rel_allvisible,
596  nindexes > 0,
597  new_frozen_xid,
598  new_min_multi,
599  false);
600 
601  /* report results to the stats collector, too */
603  onerel->rd_rel->relisshared,
604  Max(new_live_tuples, 0),
605  vacrelstats->new_dead_tuples);
607 
608  /* and log the action if appropriate */
609  if (IsAutoVacuumWorkerProcess() && params->log_min_duration >= 0)
610  {
611  TimestampTz endtime = GetCurrentTimestamp();
612 
613  if (params->log_min_duration == 0 ||
614  TimestampDifferenceExceeds(starttime, endtime,
615  params->log_min_duration))
616  {
618  char *msgfmt;
619 
620  TimestampDifference(starttime, endtime, &secs, &usecs);
621 
622  memset(&walusage, 0, sizeof(WalUsage));
623  WalUsageAccumDiff(&walusage, &pgWalUsage, &walusage_start);
624 
625  read_rate = 0;
626  write_rate = 0;
627  if ((secs > 0) || (usecs > 0))
628  {
629  read_rate = (double) BLCKSZ * VacuumPageMiss / (1024 * 1024) /
630  (secs + usecs / 1000000.0);
631  write_rate = (double) BLCKSZ * VacuumPageDirty / (1024 * 1024) /
632  (secs + usecs / 1000000.0);
633  }
634 
635  /*
636  * This is pretty messy, but we split it up so that we can skip
637  * emitting individual parts of the message when not applicable.
638  */
639  initStringInfo(&buf);
640  if (params->is_wraparound)
641  {
642  if (aggressive)
643  msgfmt = _("automatic aggressive vacuum to prevent wraparound of table \"%s.%s.%s\": index scans: %d\n");
644  else
645  msgfmt = _("automatic vacuum to prevent wraparound of table \"%s.%s.%s\": index scans: %d\n");
646  }
647  else
648  {
649  if (aggressive)
650  msgfmt = _("automatic aggressive vacuum of table \"%s.%s.%s\": index scans: %d\n");
651  else
652  msgfmt = _("automatic vacuum of table \"%s.%s.%s\": index scans: %d\n");
653  }
654  appendStringInfo(&buf, msgfmt,
656  vacrelstats->relnamespace,
657  vacrelstats->relname,
658  vacrelstats->num_index_scans);
659  appendStringInfo(&buf, _("pages: %u removed, %u remain, %u skipped due to pins, %u skipped frozen\n"),
660  vacrelstats->pages_removed,
661  vacrelstats->rel_pages,
662  vacrelstats->pinskipped_pages,
663  vacrelstats->frozenskipped_pages);
664  appendStringInfo(&buf,
665  _("tuples: %.0f removed, %.0f remain, %.0f are dead but not yet removable, oldest xmin: %u\n"),
666  vacrelstats->tuples_deleted,
667  vacrelstats->new_rel_tuples,
668  vacrelstats->new_dead_tuples,
669  OldestXmin);
670  appendStringInfo(&buf,
671  _("buffer usage: %lld hits, %lld misses, %lld dirtied\n"),
672  (long long) VacuumPageHit,
673  (long long) VacuumPageMiss,
674  (long long) VacuumPageDirty);
675  appendStringInfo(&buf, _("avg read rate: %.3f MB/s, avg write rate: %.3f MB/s\n"),
676  read_rate, write_rate);
677  appendStringInfo(&buf, _("system usage: %s\n"), pg_rusage_show(&ru0));
678  appendStringInfo(&buf,
679  _("WAL usage: %ld records, %ld full page images, %llu bytes"),
680  walusage.wal_records,
681  walusage.wal_fpi,
682  (unsigned long long) walusage.wal_bytes);
683 
684  ereport(LOG,
685  (errmsg_internal("%s", buf.data)));
686  pfree(buf.data);
687  }
688  }
689 }
690 
691 /*
692  * For Hot Standby we need to know the highest transaction id that will
693  * be removed by any change. VACUUM proceeds in a number of passes so
694  * we need to consider how each pass operates. The first phase runs
695  * heap_page_prune(), which can issue XLOG_HEAP2_CLEAN records as it
696  * progresses - these will have a latestRemovedXid on each record.
697  * In some cases this removes all of the tuples to be removed, though
698  * often we have dead tuples with index pointers so we must remember them
699  * for removal in phase 3. Index records for those rows are removed
700  * in phase 2 and index blocks do not have MVCC information attached.
701  * So before we can allow removal of any index tuples we need to issue
702  * a WAL record containing the latestRemovedXid of rows that will be
703  * removed in phase three. This allows recovery queries to block at the
704  * correct place, i.e. before phase two, rather than during phase three
705  * which would be after the rows have become inaccessible.
706  */
707 static void
709 {
710  /*
711  * Skip this for relations for which no WAL is to be written, or if we're
712  * not trying to support archive recovery.
713  */
714  if (!RelationNeedsWAL(rel) || !XLogIsNeeded())
715  return;
716 
717  /*
718  * No need to write the record at all unless it contains a valid value
719  */
720  if (TransactionIdIsValid(vacrelstats->latestRemovedXid))
721  (void) log_heap_cleanup_info(rel->rd_node, vacrelstats->latestRemovedXid);
722 }
723 
724 /*
725  * lazy_scan_heap() -- scan an open heap relation
726  *
727  * This routine prunes each page in the heap, which will among other
728  * things truncate dead tuples to dead line pointers, defragment the
729  * page, and set commit status bits (see heap_page_prune). It also builds
730  * lists of dead tuples and pages with free space, calculates statistics
731  * on the number of live tuples in the heap, and marks pages as
732  * all-visible if appropriate. When done, or when we run low on space for
733  * dead-tuple TIDs, invoke vacuuming of indexes and call lazy_vacuum_heap
734  * to reclaim dead line pointers.
735  *
736  * If the table has at least two indexes, we execute both index vacuum
737  * and index cleanup with parallel workers unless parallel vacuum is
738  * disabled. In a parallel vacuum, we enter parallel mode and then
739  * create both the parallel context and the DSM segment before starting
740  * heap scan so that we can record dead tuples to the DSM segment. All
741  * parallel workers are launched at beginning of index vacuuming and
742  * index cleanup and they exit once done with all indexes. At the end of
743  * this function we exit from parallel mode. Index bulk-deletion results
744  * are stored in the DSM segment and we update index statistics for all
745  * the indexes after exiting from parallel mode since writes are not
746  * allowed during parallel mode.
747  *
748  * If there are no indexes then we can reclaim line pointers on the fly;
749  * dead line pointers need only be retained until all index pointers that
750  * reference them have been killed.
751  */
752 static void
753 lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats,
754  Relation *Irel, int nindexes, bool aggressive)
755 {
756  LVParallelState *lps = NULL;
757  LVDeadTuples *dead_tuples;
758  BlockNumber nblocks,
759  blkno;
760  HeapTupleData tuple;
761  TransactionId relfrozenxid = onerel->rd_rel->relfrozenxid;
762  TransactionId relminmxid = onerel->rd_rel->relminmxid;
763  BlockNumber empty_pages,
764  vacuumed_pages,
765  next_fsm_block_to_vacuum;
766  double num_tuples, /* total number of nonremovable tuples */
767  live_tuples, /* live tuples (reltuples estimate) */
768  tups_vacuumed, /* tuples cleaned up by vacuum */
769  nkeep, /* dead-but-not-removable tuples */
770  nunused; /* unused line pointers */
771  IndexBulkDeleteResult **indstats;
772  int i;
773  PGRUsage ru0;
774  Buffer vmbuffer = InvalidBuffer;
775  BlockNumber next_unskippable_block;
776  bool skipping_blocks;
777  xl_heap_freeze_tuple *frozen;
779  const int initprog_index[] = {
783  };
784  int64 initprog_val[3];
785  GlobalVisState *vistest;
786 
787  pg_rusage_init(&ru0);
788 
789  if (aggressive)
790  ereport(elevel,
791  (errmsg("aggressively vacuuming \"%s.%s\"",
792  vacrelstats->relnamespace,
793  vacrelstats->relname)));
794  else
795  ereport(elevel,
796  (errmsg("vacuuming \"%s.%s\"",
797  vacrelstats->relnamespace,
798  vacrelstats->relname)));
799 
800  empty_pages = vacuumed_pages = 0;
801  next_fsm_block_to_vacuum = (BlockNumber) 0;
802  num_tuples = live_tuples = tups_vacuumed = nkeep = nunused = 0;
803 
804  indstats = (IndexBulkDeleteResult **)
805  palloc0(nindexes * sizeof(IndexBulkDeleteResult *));
806 
807  nblocks = RelationGetNumberOfBlocks(onerel);
808  vacrelstats->rel_pages = nblocks;
809  vacrelstats->scanned_pages = 0;
810  vacrelstats->tupcount_pages = 0;
811  vacrelstats->nonempty_pages = 0;
812  vacrelstats->latestRemovedXid = InvalidTransactionId;
813 
814  vistest = GlobalVisTestFor(onerel);
815 
816  /*
817  * Initialize state for a parallel vacuum. As of now, only one worker can
818  * be used for an index, so we invoke parallelism only if there are at
819  * least two indexes on a table.
820  */
821  if (params->nworkers >= 0 && vacrelstats->useindex && nindexes > 1)
822  {
823  /*
824  * Since parallel workers cannot access data in temporary tables, we
825  * can't perform parallel vacuum on them.
826  */
827  if (RelationUsesLocalBuffers(onerel))
828  {
829  /*
830  * Give warning only if the user explicitly tries to perform a
831  * parallel vacuum on the temporary table.
832  */
833  if (params->nworkers > 0)
835  (errmsg("disabling parallel option of vacuum on \"%s\" --- cannot vacuum temporary tables in parallel",
836  vacrelstats->relname)));
837  }
838  else
839  lps = begin_parallel_vacuum(RelationGetRelid(onerel), Irel,
840  vacrelstats, nblocks, nindexes,
841  params->nworkers);
842  }
843 
844  /*
845  * Allocate the space for dead tuples in case parallel vacuum is not
846  * initialized.
847  */
848  if (!ParallelVacuumIsActive(lps))
849  lazy_space_alloc(vacrelstats, nblocks);
850 
851  dead_tuples = vacrelstats->dead_tuples;
853 
854  /* Report that we're scanning the heap, advertising total # of blocks */
855  initprog_val[0] = PROGRESS_VACUUM_PHASE_SCAN_HEAP;
856  initprog_val[1] = nblocks;
857  initprog_val[2] = dead_tuples->max_tuples;
858  pgstat_progress_update_multi_param(3, initprog_index, initprog_val);
859 
860  /*
861  * Except when aggressive is set, we want to skip pages that are
862  * all-visible according to the visibility map, but only when we can skip
863  * at least SKIP_PAGES_THRESHOLD consecutive pages. Since we're reading
864  * sequentially, the OS should be doing readahead for us, so there's no
865  * gain in skipping a page now and then; that's likely to disable
866  * readahead and so be counterproductive. Also, skipping even a single
867  * page means that we can't update relfrozenxid, so we only want to do it
868  * if we can skip a goodly number of pages.
869  *
870  * When aggressive is set, we can't skip pages just because they are
871  * all-visible, but we can still skip pages that are all-frozen, since
872  * such pages do not need freezing and do not affect the value that we can
873  * safely set for relfrozenxid or relminmxid.
874  *
875  * Before entering the main loop, establish the invariant that
876  * next_unskippable_block is the next block number >= blkno that we can't
877  * skip based on the visibility map, either all-visible for a regular scan
878  * or all-frozen for an aggressive scan. We set it to nblocks if there's
879  * no such block. We also set up the skipping_blocks flag correctly at
880  * this stage.
881  *
882  * Note: The value returned by visibilitymap_get_status could be slightly
883  * out-of-date, since we make this test before reading the corresponding
884  * heap page or locking the buffer. This is OK. If we mistakenly think
885  * that the page is all-visible or all-frozen when in fact the flag's just
886  * been cleared, we might fail to vacuum the page. It's easy to see that
887  * skipping a page when aggressive is not set is not a very big deal; we
888  * might leave some dead tuples lying around, but the next vacuum will
889  * find them. But even when aggressive *is* set, it's still OK if we miss
890  * a page whose all-frozen marking has just been cleared. Any new XIDs
891  * just added to that page are necessarily newer than the GlobalXmin we
892  * computed, so they'll have no effect on the value to which we can safely
893  * set relfrozenxid. A similar argument applies for MXIDs and relminmxid.
894  *
895  * We will scan the table's last page, at least to the extent of
896  * determining whether it has tuples or not, even if it should be skipped
897  * according to the above rules; except when we've already determined that
898  * it's not worth trying to truncate the table. This avoids having
899  * lazy_truncate_heap() take access-exclusive lock on the table to attempt
900  * a truncation that just fails immediately because there are tuples in
901  * the last page. This is worth avoiding mainly because such a lock must
902  * be replayed on any hot standby, where it can be disruptive.
903  */
904  next_unskippable_block = 0;
905  if ((params->options & VACOPT_DISABLE_PAGE_SKIPPING) == 0)
906  {
907  while (next_unskippable_block < nblocks)
908  {
909  uint8 vmstatus;
910 
911  vmstatus = visibilitymap_get_status(onerel, next_unskippable_block,
912  &vmbuffer);
913  if (aggressive)
914  {
915  if ((vmstatus & VISIBILITYMAP_ALL_FROZEN) == 0)
916  break;
917  }
918  else
919  {
920  if ((vmstatus & VISIBILITYMAP_ALL_VISIBLE) == 0)
921  break;
922  }
924  next_unskippable_block++;
925  }
926  }
927 
928  if (next_unskippable_block >= SKIP_PAGES_THRESHOLD)
929  skipping_blocks = true;
930  else
931  skipping_blocks = false;
932 
933  for (blkno = 0; blkno < nblocks; blkno++)
934  {
935  Buffer buf;
936  Page page;
937  OffsetNumber offnum,
938  maxoff;
939  bool tupgone,
940  hastup;
941  int prev_dead_count;
942  int nfrozen;
943  Size freespace;
944  bool all_visible_according_to_vm = false;
945  bool all_visible;
946  bool all_frozen = true; /* provided all_visible is also true */
947  bool has_dead_tuples;
948  TransactionId visibility_cutoff_xid = InvalidTransactionId;
949 
950  /* see note above about forcing scanning of last page */
951 #define FORCE_CHECK_PAGE() \
952  (blkno == nblocks - 1 && should_attempt_truncation(params, vacrelstats))
953 
955 
957  blkno, InvalidOffsetNumber);
958 
959  if (blkno == next_unskippable_block)
960  {
961  /* Time to advance next_unskippable_block */
962  next_unskippable_block++;
963  if ((params->options & VACOPT_DISABLE_PAGE_SKIPPING) == 0)
964  {
965  while (next_unskippable_block < nblocks)
966  {
967  uint8 vmskipflags;
968 
969  vmskipflags = visibilitymap_get_status(onerel,
970  next_unskippable_block,
971  &vmbuffer);
972  if (aggressive)
973  {
974  if ((vmskipflags & VISIBILITYMAP_ALL_FROZEN) == 0)
975  break;
976  }
977  else
978  {
979  if ((vmskipflags & VISIBILITYMAP_ALL_VISIBLE) == 0)
980  break;
981  }
983  next_unskippable_block++;
984  }
985  }
986 
987  /*
988  * We know we can't skip the current block. But set up
989  * skipping_blocks to do the right thing at the following blocks.
990  */
991  if (next_unskippable_block - blkno > SKIP_PAGES_THRESHOLD)
992  skipping_blocks = true;
993  else
994  skipping_blocks = false;
995 
996  /*
997  * Normally, the fact that we can't skip this block must mean that
998  * it's not all-visible. But in an aggressive vacuum we know only
999  * that it's not all-frozen, so it might still be all-visible.
1000  */
1001  if (aggressive && VM_ALL_VISIBLE(onerel, blkno, &vmbuffer))
1002  all_visible_according_to_vm = true;
1003  }
1004  else
1005  {
1006  /*
1007  * The current block is potentially skippable; if we've seen a
1008  * long enough run of skippable blocks to justify skipping it, and
1009  * we're not forced to check it, then go ahead and skip.
1010  * Otherwise, the page must be at least all-visible if not
1011  * all-frozen, so we can set all_visible_according_to_vm = true.
1012  */
1013  if (skipping_blocks && !FORCE_CHECK_PAGE())
1014  {
1015  /*
1016  * Tricky, tricky. If this is in aggressive vacuum, the page
1017  * must have been all-frozen at the time we checked whether it
1018  * was skippable, but it might not be any more. We must be
1019  * careful to count it as a skipped all-frozen page in that
1020  * case, or else we'll think we can't update relfrozenxid and
1021  * relminmxid. If it's not an aggressive vacuum, we don't
1022  * know whether it was all-frozen, so we have to recheck; but
1023  * in this case an approximate answer is OK.
1024  */
1025  if (aggressive || VM_ALL_FROZEN(onerel, blkno, &vmbuffer))
1026  vacrelstats->frozenskipped_pages++;
1027  continue;
1028  }
1029  all_visible_according_to_vm = true;
1030  }
1031 
1033 
1034  /*
1035  * If we are close to overrunning the available space for dead-tuple
1036  * TIDs, pause and do a cycle of vacuuming before we tackle this page.
1037  */
1038  if ((dead_tuples->max_tuples - dead_tuples->num_tuples) < MaxHeapTuplesPerPage &&
1039  dead_tuples->num_tuples > 0)
1040  {
1041  /*
1042  * Before beginning index vacuuming, we release any pin we may
1043  * hold on the visibility map page. This isn't necessary for
1044  * correctness, but we do it anyway to avoid holding the pin
1045  * across a lengthy, unrelated operation.
1046  */
1047  if (BufferIsValid(vmbuffer))
1048  {
1049  ReleaseBuffer(vmbuffer);
1050  vmbuffer = InvalidBuffer;
1051  }
1052 
1053  /* Work on all the indexes, then the heap */
1054  lazy_vacuum_all_indexes(onerel, Irel, indstats,
1055  vacrelstats, lps, nindexes);
1056 
1057  /* Remove tuples from heap */
1058  lazy_vacuum_heap(onerel, vacrelstats);
1059 
1060  /*
1061  * Forget the now-vacuumed tuples, and press on, but be careful
1062  * not to reset latestRemovedXid since we want that value to be
1063  * valid.
1064  */
1065  dead_tuples->num_tuples = 0;
1066 
1067  /*
1068  * Vacuum the Free Space Map to make newly-freed space visible on
1069  * upper-level FSM pages. Note we have not yet processed blkno.
1070  */
1071  FreeSpaceMapVacuumRange(onerel, next_fsm_block_to_vacuum, blkno);
1072  next_fsm_block_to_vacuum = blkno;
1073 
1074  /* Report that we are once again scanning the heap */
1077  }
1078 
1079  /*
1080  * Pin the visibility map page in case we need to mark the page
1081  * all-visible. In most cases this will be very cheap, because we'll
1082  * already have the correct page pinned anyway. However, it's
1083  * possible that (a) next_unskippable_block is covered by a different
1084  * VM page than the current block or (b) we released our pin and did a
1085  * cycle of index vacuuming.
1086  *
1087  */
1088  visibilitymap_pin(onerel, blkno, &vmbuffer);
1089 
1090  buf = ReadBufferExtended(onerel, MAIN_FORKNUM, blkno,
1091  RBM_NORMAL, vac_strategy);
1092 
1093  /* We need buffer cleanup lock so that we can prune HOT chains. */
1095  {
1096  /*
1097  * If we're not performing an aggressive scan to guard against XID
1098  * wraparound, and we don't want to forcibly check the page, then
1099  * it's OK to skip vacuuming pages we get a lock conflict on. They
1100  * will be dealt with in some future vacuum.
1101  */
1102  if (!aggressive && !FORCE_CHECK_PAGE())
1103  {
1104  ReleaseBuffer(buf);
1105  vacrelstats->pinskipped_pages++;
1106  continue;
1107  }
1108 
1109  /*
1110  * Read the page with share lock to see if any xids on it need to
1111  * be frozen. If not we just skip the page, after updating our
1112  * scan statistics. If there are some, we wait for cleanup lock.
1113  *
1114  * We could defer the lock request further by remembering the page
1115  * and coming back to it later, or we could even register
1116  * ourselves for multiple buffers and then service whichever one
1117  * is received first. For now, this seems good enough.
1118  *
1119  * If we get here with aggressive false, then we're just forcibly
1120  * checking the page, and so we don't want to insist on getting
1121  * the lock; we only need to know if the page contains tuples, so
1122  * that we can update nonempty_pages correctly. It's convenient
1123  * to use lazy_check_needs_freeze() for both situations, though.
1124  */
1126  if (!lazy_check_needs_freeze(buf, &hastup, vacrelstats))
1127  {
1128  UnlockReleaseBuffer(buf);
1129  vacrelstats->scanned_pages++;
1130  vacrelstats->pinskipped_pages++;
1131  if (hastup)
1132  vacrelstats->nonempty_pages = blkno + 1;
1133  continue;
1134  }
1135  if (!aggressive)
1136  {
1137  /*
1138  * Here, we must not advance scanned_pages; that would amount
1139  * to claiming that the page contains no freezable tuples.
1140  */
1141  UnlockReleaseBuffer(buf);
1142  vacrelstats->pinskipped_pages++;
1143  if (hastup)
1144  vacrelstats->nonempty_pages = blkno + 1;
1145  continue;
1146  }
1148  LockBufferForCleanup(buf);
1149  /* drop through to normal processing */
1150  }
1151 
1152  vacrelstats->scanned_pages++;
1153  vacrelstats->tupcount_pages++;
1154 
1155  page = BufferGetPage(buf);
1156 
1157  if (PageIsNew(page))
1158  {
1159  /*
1160  * All-zeroes pages can be left over if either a backend extends
1161  * the relation by a single page, but crashes before the newly
1162  * initialized page has been written out, or when bulk-extending
1163  * the relation (which creates a number of empty pages at the tail
1164  * end of the relation, but enters them into the FSM).
1165  *
1166  * Note we do not enter the page into the visibilitymap. That has
1167  * the downside that we repeatedly visit this page in subsequent
1168  * vacuums, but otherwise we'll never not discover the space on a
1169  * promoted standby. The harm of repeated checking ought to
1170  * normally not be too bad - the space usually should be used at
1171  * some point, otherwise there wouldn't be any regular vacuums.
1172  *
1173  * Make sure these pages are in the FSM, to ensure they can be
1174  * reused. Do that by testing if there's any space recorded for
1175  * the page. If not, enter it. We do so after releasing the lock
1176  * on the heap page, the FSM is approximate, after all.
1177  */
1178  UnlockReleaseBuffer(buf);
1179 
1180  empty_pages++;
1181 
1182  if (GetRecordedFreeSpace(onerel, blkno) == 0)
1183  {
1184  Size freespace;
1185 
1186  freespace = BufferGetPageSize(buf) - SizeOfPageHeaderData;
1187  RecordPageWithFreeSpace(onerel, blkno, freespace);
1188  }
1189  continue;
1190  }
1191 
1192  if (PageIsEmpty(page))
1193  {
1194  empty_pages++;
1195  freespace = PageGetHeapFreeSpace(page);
1196 
1197  /*
1198  * Empty pages are always all-visible and all-frozen (note that
1199  * the same is currently not true for new pages, see above).
1200  */
1201  if (!PageIsAllVisible(page))
1202  {
1204 
1205  /* mark buffer dirty before writing a WAL record */
1206  MarkBufferDirty(buf);
1207 
1208  /*
1209  * It's possible that another backend has extended the heap,
1210  * initialized the page, and then failed to WAL-log the page
1211  * due to an ERROR. Since heap extension is not WAL-logged,
1212  * recovery might try to replay our record setting the page
1213  * all-visible and find that the page isn't initialized, which
1214  * will cause a PANIC. To prevent that, check whether the
1215  * page has been previously WAL-logged, and if not, do that
1216  * now.
1217  */
1218  if (RelationNeedsWAL(onerel) &&
1219  PageGetLSN(page) == InvalidXLogRecPtr)
1220  log_newpage_buffer(buf, true);
1221 
1222  PageSetAllVisible(page);
1223  visibilitymap_set(onerel, blkno, buf, InvalidXLogRecPtr,
1224  vmbuffer, InvalidTransactionId,
1226  END_CRIT_SECTION();
1227  }
1228 
1229  UnlockReleaseBuffer(buf);
1230  RecordPageWithFreeSpace(onerel, blkno, freespace);
1231  continue;
1232  }
1233 
1234  /*
1235  * Prune all HOT-update chains in this page.
1236  *
1237  * We count tuples removed by the pruning step as removed by VACUUM.
1238  */
1239  tups_vacuumed += heap_page_prune(onerel, buf, vistest, false,
1241  &vacrelstats->latestRemovedXid,
1242  &vacrelstats->offnum);
1243 
1244  /*
1245  * Now scan the page to collect vacuumable items and check for tuples
1246  * requiring freezing.
1247  */
1248  all_visible = true;
1249  has_dead_tuples = false;
1250  nfrozen = 0;
1251  hastup = false;
1252  prev_dead_count = dead_tuples->num_tuples;
1253  maxoff = PageGetMaxOffsetNumber(page);
1254 
1255  /*
1256  * Note: If you change anything in the loop below, also look at
1257  * heap_page_is_all_visible to see if that needs to be changed.
1258  */
1259  for (offnum = FirstOffsetNumber;
1260  offnum <= maxoff;
1261  offnum = OffsetNumberNext(offnum))
1262  {
1263  ItemId itemid;
1264 
1265  /*
1266  * Set the offset number so that we can display it along with any
1267  * error that occurred while processing this tuple.
1268  */
1269  vacrelstats->offnum = offnum;
1270  itemid = PageGetItemId(page, offnum);
1271 
1272  /* Unused items require no processing, but we count 'em */
1273  if (!ItemIdIsUsed(itemid))
1274  {
1275  nunused += 1;
1276  continue;
1277  }
1278 
1279  /* Redirect items mustn't be touched */
1280  if (ItemIdIsRedirected(itemid))
1281  {
1282  hastup = true; /* this page won't be truncatable */
1283  continue;
1284  }
1285 
1286  ItemPointerSet(&(tuple.t_self), blkno, offnum);
1287 
1288  /*
1289  * DEAD line pointers are to be vacuumed normally; but we don't
1290  * count them in tups_vacuumed, else we'd be double-counting (at
1291  * least in the common case where heap_page_prune() just freed up
1292  * a non-HOT tuple).
1293  */
1294  if (ItemIdIsDead(itemid))
1295  {
1296  lazy_record_dead_tuple(dead_tuples, &(tuple.t_self));
1297  all_visible = false;
1298  continue;
1299  }
1300 
1301  Assert(ItemIdIsNormal(itemid));
1302 
1303  tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
1304  tuple.t_len = ItemIdGetLength(itemid);
1305  tuple.t_tableOid = RelationGetRelid(onerel);
1306 
1307  tupgone = false;
1308 
1309  /*
1310  * The criteria for counting a tuple as live in this block need to
1311  * match what analyze.c's acquire_sample_rows() does, otherwise
1312  * VACUUM and ANALYZE may produce wildly different reltuples
1313  * values, e.g. when there are many recently-dead tuples.
1314  *
1315  * The logic here is a bit simpler than acquire_sample_rows(), as
1316  * VACUUM can't run inside a transaction block, which makes some
1317  * cases impossible (e.g. in-progress insert from the same
1318  * transaction).
1319  */
1320  switch (HeapTupleSatisfiesVacuum(&tuple, OldestXmin, buf))
1321  {
1322  case HEAPTUPLE_DEAD:
1323 
1324  /*
1325  * Ordinarily, DEAD tuples would have been removed by
1326  * heap_page_prune(), but it's possible that the tuple
1327  * state changed since heap_page_prune() looked. In
1328  * particular an INSERT_IN_PROGRESS tuple could have
1329  * changed to DEAD if the inserter aborted. So this
1330  * cannot be considered an error condition.
1331  *
1332  * If the tuple is HOT-updated then it must only be
1333  * removed by a prune operation; so we keep it just as if
1334  * it were RECENTLY_DEAD. Also, if it's a heap-only
1335  * tuple, we choose to keep it, because it'll be a lot
1336  * cheaper to get rid of it in the next pruning pass than
1337  * to treat it like an indexed tuple. Finally, if index
1338  * cleanup is disabled, the second heap pass will not
1339  * execute, and the tuple will not get removed, so we must
1340  * treat it like any other dead tuple that we choose to
1341  * keep.
1342  *
1343  * If this were to happen for a tuple that actually needed
1344  * to be deleted, we'd be in trouble, because it'd
1345  * possibly leave a tuple below the relation's xmin
1346  * horizon alive. heap_prepare_freeze_tuple() is prepared
1347  * to detect that case and abort the transaction,
1348  * preventing corruption.
1349  */
1350  if (HeapTupleIsHotUpdated(&tuple) ||
1351  HeapTupleIsHeapOnly(&tuple) ||
1353  nkeep += 1;
1354  else
1355  tupgone = true; /* we can delete the tuple */
1356  all_visible = false;
1357  break;
1358  case HEAPTUPLE_LIVE:
1359 
1360  /*
1361  * Count it as live. Not only is this natural, but it's
1362  * also what acquire_sample_rows() does.
1363  */
1364  live_tuples += 1;
1365 
1366  /*
1367  * Is the tuple definitely visible to all transactions?
1368  *
1369  * NB: Like with per-tuple hint bits, we can't set the
1370  * PD_ALL_VISIBLE flag if the inserter committed
1371  * asynchronously. See SetHintBits for more info. Check
1372  * that the tuple is hinted xmin-committed because of
1373  * that.
1374  */
1375  if (all_visible)
1376  {
1377  TransactionId xmin;
1378 
1380  {
1381  all_visible = false;
1382  break;
1383  }
1384 
1385  /*
1386  * The inserter definitely committed. But is it old
1387  * enough that everyone sees it as committed?
1388  */
1389  xmin = HeapTupleHeaderGetXmin(tuple.t_data);
1390  if (!TransactionIdPrecedes(xmin, OldestXmin))
1391  {
1392  all_visible = false;
1393  break;
1394  }
1395 
1396  /* Track newest xmin on page. */
1397  if (TransactionIdFollows(xmin, visibility_cutoff_xid))
1398  visibility_cutoff_xid = xmin;
1399  }
1400  break;
1402 
1403  /*
1404  * If tuple is recently deleted then we must not remove it
1405  * from relation.
1406  */
1407  nkeep += 1;
1408  all_visible = false;
1409  break;
1411 
1412  /*
1413  * This is an expected case during concurrent vacuum.
1414  *
1415  * We do not count these rows as live, because we expect
1416  * the inserting transaction to update the counters at
1417  * commit, and we assume that will happen only after we
1418  * report our results. This assumption is a bit shaky,
1419  * but it is what acquire_sample_rows() does, so be
1420  * consistent.
1421  */
1422  all_visible = false;
1423  break;
1425  /* This is an expected case during concurrent vacuum */
1426  all_visible = false;
1427 
1428  /*
1429  * Count such rows as live. As above, we assume the
1430  * deleting transaction will commit and update the
1431  * counters after we report.
1432  */
1433  live_tuples += 1;
1434  break;
1435  default:
1436  elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
1437  break;
1438  }
1439 
1440  if (tupgone)
1441  {
1442  lazy_record_dead_tuple(dead_tuples, &(tuple.t_self));
1444  &vacrelstats->latestRemovedXid);
1445  tups_vacuumed += 1;
1446  has_dead_tuples = true;
1447  }
1448  else
1449  {
1450  bool tuple_totally_frozen;
1451 
1452  num_tuples += 1;
1453  hastup = true;
1454 
1455  /*
1456  * Each non-removable tuple must be checked to see if it needs
1457  * freezing. Note we already have exclusive buffer lock.
1458  */
1460  relfrozenxid, relminmxid,
1462  &frozen[nfrozen],
1463  &tuple_totally_frozen))
1464  frozen[nfrozen++].offset = offnum;
1465 
1466  if (!tuple_totally_frozen)
1467  all_frozen = false;
1468  }
1469  } /* scan along page */
1470 
1471  /*
1472  * Clear the offset information once we have processed all the tuples
1473  * on the page.
1474  */
1475  vacrelstats->offnum = InvalidOffsetNumber;
1476 
1477  /*
1478  * If we froze any tuples, mark the buffer dirty, and write a WAL
1479  * record recording the changes. We must log the changes to be
1480  * crash-safe against future truncation of CLOG.
1481  */
1482  if (nfrozen > 0)
1483  {
1485 
1486  MarkBufferDirty(buf);
1487 
1488  /* execute collected freezes */
1489  for (i = 0; i < nfrozen; i++)
1490  {
1491  ItemId itemid;
1492  HeapTupleHeader htup;
1493 
1494  itemid = PageGetItemId(page, frozen[i].offset);
1495  htup = (HeapTupleHeader) PageGetItem(page, itemid);
1496 
1497  heap_execute_freeze_tuple(htup, &frozen[i]);
1498  }
1499 
1500  /* Now WAL-log freezing if necessary */
1501  if (RelationNeedsWAL(onerel))
1502  {
1503  XLogRecPtr recptr;
1504 
1505  recptr = log_heap_freeze(onerel, buf, FreezeLimit,
1506  frozen, nfrozen);
1507  PageSetLSN(page, recptr);
1508  }
1509 
1510  END_CRIT_SECTION();
1511  }
1512 
1513  /*
1514  * If there are no indexes we can vacuum the page right now instead of
1515  * doing a second scan. Also we don't do that but forget dead tuples
1516  * when index cleanup is disabled.
1517  */
1518  if (!vacrelstats->useindex && dead_tuples->num_tuples > 0)
1519  {
1520  if (nindexes == 0)
1521  {
1522  /* Remove tuples from heap if the table has no index */
1523  lazy_vacuum_page(onerel, blkno, buf, 0, vacrelstats, &vmbuffer);
1524  vacuumed_pages++;
1525  has_dead_tuples = false;
1526  }
1527  else
1528  {
1529  /*
1530  * Here, we have indexes but index cleanup is disabled.
1531  * Instead of vacuuming the dead tuples on the heap, we just
1532  * forget them.
1533  *
1534  * Note that vacrelstats->dead_tuples could have tuples which
1535  * became dead after HOT-pruning but are not marked dead yet.
1536  * We do not process them because it's a very rare condition,
1537  * and the next vacuum will process them anyway.
1538  */
1540  }
1541 
1542  /*
1543  * Forget the now-vacuumed tuples, and press on, but be careful
1544  * not to reset latestRemovedXid since we want that value to be
1545  * valid.
1546  */
1547  dead_tuples->num_tuples = 0;
1548 
1549  /*
1550  * Periodically do incremental FSM vacuuming to make newly-freed
1551  * space visible on upper FSM pages. Note: although we've cleaned
1552  * the current block, we haven't yet updated its FSM entry (that
1553  * happens further down), so passing end == blkno is correct.
1554  */
1555  if (blkno - next_fsm_block_to_vacuum >= VACUUM_FSM_EVERY_PAGES)
1556  {
1557  FreeSpaceMapVacuumRange(onerel, next_fsm_block_to_vacuum,
1558  blkno);
1559  next_fsm_block_to_vacuum = blkno;
1560  }
1561  }
1562 
1563  freespace = PageGetHeapFreeSpace(page);
1564 
1565  /* mark page all-visible, if appropriate */
1566  if (all_visible && !all_visible_according_to_vm)
1567  {
1569 
1570  if (all_frozen)
1571  flags |= VISIBILITYMAP_ALL_FROZEN;
1572 
1573  /*
1574  * It should never be the case that the visibility map page is set
1575  * while the page-level bit is clear, but the reverse is allowed
1576  * (if checksums are not enabled). Regardless, set both bits so
1577  * that we get back in sync.
1578  *
1579  * NB: If the heap page is all-visible but the VM bit is not set,
1580  * we don't need to dirty the heap page. However, if checksums
1581  * are enabled, we do need to make sure that the heap page is
1582  * dirtied before passing it to visibilitymap_set(), because it
1583  * may be logged. Given that this situation should only happen in
1584  * rare cases after a crash, it is not worth optimizing.
1585  */
1586  PageSetAllVisible(page);
1587  MarkBufferDirty(buf);
1588  visibilitymap_set(onerel, blkno, buf, InvalidXLogRecPtr,
1589  vmbuffer, visibility_cutoff_xid, flags);
1590  }
1591 
1592  /*
1593  * As of PostgreSQL 9.2, the visibility map bit should never be set if
1594  * the page-level bit is clear. However, it's possible that the bit
1595  * got cleared after we checked it and before we took the buffer
1596  * content lock, so we must recheck before jumping to the conclusion
1597  * that something bad has happened.
1598  */
1599  else if (all_visible_according_to_vm && !PageIsAllVisible(page)
1600  && VM_ALL_VISIBLE(onerel, blkno, &vmbuffer))
1601  {
1602  elog(WARNING, "page is not marked all-visible but visibility map bit is set in relation \"%s\" page %u",
1603  vacrelstats->relname, blkno);
1604  visibilitymap_clear(onerel, blkno, vmbuffer,
1606  }
1607 
1608  /*
1609  * It's possible for the value returned by
1610  * GetOldestNonRemovableTransactionId() to move backwards, so it's not
1611  * wrong for us to see tuples that appear to not be visible to
1612  * everyone yet, while PD_ALL_VISIBLE is already set. The real safe
1613  * xmin value never moves backwards, but
1614  * GetOldestNonRemovableTransactionId() is conservative and sometimes
1615  * returns a value that's unnecessarily small, so if we see that
1616  * contradiction it just means that the tuples that we think are not
1617  * visible to everyone yet actually are, and the PD_ALL_VISIBLE flag
1618  * is correct.
1619  *
1620  * There should never be dead tuples on a page with PD_ALL_VISIBLE
1621  * set, however.
1622  */
1623  else if (PageIsAllVisible(page) && has_dead_tuples)
1624  {
1625  elog(WARNING, "page containing dead tuples is marked as all-visible in relation \"%s\" page %u",
1626  vacrelstats->relname, blkno);
1627  PageClearAllVisible(page);
1628  MarkBufferDirty(buf);
1629  visibilitymap_clear(onerel, blkno, vmbuffer,
1631  }
1632 
1633  /*
1634  * If the all-visible page is all-frozen but not marked as such yet,
1635  * mark it as all-frozen. Note that all_frozen is only valid if
1636  * all_visible is true, so we must check both.
1637  */
1638  else if (all_visible_according_to_vm && all_visible && all_frozen &&
1639  !VM_ALL_FROZEN(onerel, blkno, &vmbuffer))
1640  {
1641  /*
1642  * We can pass InvalidTransactionId as the cutoff XID here,
1643  * because setting the all-frozen bit doesn't cause recovery
1644  * conflicts.
1645  */
1646  visibilitymap_set(onerel, blkno, buf, InvalidXLogRecPtr,
1647  vmbuffer, InvalidTransactionId,
1649  }
1650 
1651  UnlockReleaseBuffer(buf);
1652 
1653  /* Remember the location of the last page with nonremovable tuples */
1654  if (hastup)
1655  vacrelstats->nonempty_pages = blkno + 1;
1656 
1657  /*
1658  * If we remembered any tuples for deletion, then the page will be
1659  * visited again by lazy_vacuum_heap, which will compute and record
1660  * its post-compaction free space. If not, then we're done with this
1661  * page, so remember its free space as-is. (This path will always be
1662  * taken if there are no indexes.)
1663  */
1664  if (dead_tuples->num_tuples == prev_dead_count)
1665  RecordPageWithFreeSpace(onerel, blkno, freespace);
1666  }
1667 
1668  /* report that everything is scanned and vacuumed */
1670 
1671  /* Clear the block number information */
1672  vacrelstats->blkno = InvalidBlockNumber;
1673 
1674  pfree(frozen);
1675 
1676  /* save stats for use later */
1677  vacrelstats->tuples_deleted = tups_vacuumed;
1678  vacrelstats->new_dead_tuples = nkeep;
1679 
1680  /* now we can compute the new value for pg_class.reltuples */
1681  vacrelstats->new_live_tuples = vac_estimate_reltuples(onerel,
1682  nblocks,
1683  vacrelstats->tupcount_pages,
1684  live_tuples);
1685 
1686  /*
1687  * Also compute the total number of surviving heap entries. In the
1688  * (unlikely) scenario that new_live_tuples is -1, take it as zero.
1689  */
1690  vacrelstats->new_rel_tuples =
1691  Max(vacrelstats->new_live_tuples, 0) + vacrelstats->new_dead_tuples;
1692 
1693  /*
1694  * Release any remaining pin on visibility map page.
1695  */
1696  if (BufferIsValid(vmbuffer))
1697  {
1698  ReleaseBuffer(vmbuffer);
1699  vmbuffer = InvalidBuffer;
1700  }
1701 
1702  /* If any tuples need to be deleted, perform final vacuum cycle */
1703  /* XXX put a threshold on min number of tuples here? */
1704  if (dead_tuples->num_tuples > 0)
1705  {
1706  /* Work on all the indexes, and then the heap */
1707  lazy_vacuum_all_indexes(onerel, Irel, indstats, vacrelstats,
1708  lps, nindexes);
1709 
1710  /* Remove tuples from heap */
1711  lazy_vacuum_heap(onerel, vacrelstats);
1712  }
1713 
1714  /*
1715  * Vacuum the remainder of the Free Space Map. We must do this whether or
1716  * not there were indexes.
1717  */
1718  if (blkno > next_fsm_block_to_vacuum)
1719  FreeSpaceMapVacuumRange(onerel, next_fsm_block_to_vacuum, blkno);
1720 
1721  /* report all blocks vacuumed */
1723 
1724  /* Do post-vacuum cleanup */
1725  if (vacrelstats->useindex)
1726  lazy_cleanup_all_indexes(Irel, indstats, vacrelstats, lps, nindexes);
1727 
1728  /*
1729  * End parallel mode before updating index statistics as we cannot write
1730  * during parallel mode.
1731  */
1732  if (ParallelVacuumIsActive(lps))
1733  end_parallel_vacuum(indstats, lps, nindexes);
1734 
1735  /* Update index statistics */
1736  update_index_statistics(Irel, indstats, nindexes);
1737 
1738  /* If no indexes, make log report that lazy_vacuum_heap would've made */
1739  if (vacuumed_pages)
1740  ereport(elevel,
1741  (errmsg("\"%s\": removed %.0f row versions in %u pages",
1742  vacrelstats->relname,
1743  tups_vacuumed, vacuumed_pages)));
1744 
1745  /*
1746  * This is pretty messy, but we split it up so that we can skip emitting
1747  * individual parts of the message when not applicable.
1748  */
1749  initStringInfo(&buf);
1750  appendStringInfo(&buf,
1751  _("%.0f dead row versions cannot be removed yet, oldest xmin: %u\n"),
1752  nkeep, OldestXmin);
1753  appendStringInfo(&buf, _("There were %.0f unused item identifiers.\n"),
1754  nunused);
1755  appendStringInfo(&buf, ngettext("Skipped %u page due to buffer pins, ",
1756  "Skipped %u pages due to buffer pins, ",
1757  vacrelstats->pinskipped_pages),
1758  vacrelstats->pinskipped_pages);
1759  appendStringInfo(&buf, ngettext("%u frozen page.\n",
1760  "%u frozen pages.\n",
1761  vacrelstats->frozenskipped_pages),
1762  vacrelstats->frozenskipped_pages);
1763  appendStringInfo(&buf, ngettext("%u page is entirely empty.\n",
1764  "%u pages are entirely empty.\n",
1765  empty_pages),
1766  empty_pages);
1767  appendStringInfo(&buf, _("%s."), pg_rusage_show(&ru0));
1768 
1769  ereport(elevel,
1770  (errmsg("\"%s\": found %.0f removable, %.0f nonremovable row versions in %u out of %u pages",
1771  vacrelstats->relname,
1772  tups_vacuumed, num_tuples,
1773  vacrelstats->scanned_pages, nblocks),
1774  errdetail_internal("%s", buf.data)));
1775  pfree(buf.data);
1776 }
1777 
1778 /*
1779  * lazy_vacuum_all_indexes() -- vacuum all indexes of relation.
1780  *
1781  * We process the indexes serially unless we are doing parallel vacuum.
1782  */
1783 static void
1785  IndexBulkDeleteResult **stats,
1786  LVRelStats *vacrelstats, LVParallelState *lps,
1787  int nindexes)
1788 {
1790  Assert(nindexes > 0);
1791 
1792  /* Log cleanup info before we touch indexes */
1793  vacuum_log_cleanup_info(onerel, vacrelstats);
1794 
1795  /* Report that we are now vacuuming indexes */
1798 
1799  /* Perform index vacuuming with parallel workers for parallel vacuum. */
1800  if (ParallelVacuumIsActive(lps))
1801  {
1802  /* Tell parallel workers to do index vacuuming */
1803  lps->lvshared->for_cleanup = false;
1804  lps->lvshared->first_time = false;
1805 
1806  /*
1807  * We can only provide an approximate value of num_heap_tuples in
1808  * vacuum cases.
1809  */
1810  lps->lvshared->reltuples = vacrelstats->old_live_tuples;
1811  lps->lvshared->estimated_count = true;
1812 
1813  lazy_parallel_vacuum_indexes(Irel, stats, vacrelstats, lps, nindexes);
1814  }
1815  else
1816  {
1817  int idx;
1818 
1819  for (idx = 0; idx < nindexes; idx++)
1820  lazy_vacuum_index(Irel[idx], &stats[idx], vacrelstats->dead_tuples,
1821  vacrelstats->old_live_tuples, vacrelstats);
1822  }
1823 
1824  /* Increase and report the number of index scans */
1825  vacrelstats->num_index_scans++;
1827  vacrelstats->num_index_scans);
1828 }
1829 
1830 
1831 /*
1832  * lazy_vacuum_heap() -- second pass over the heap
1833  *
1834  * This routine marks dead tuples as unused and compacts out free
1835  * space on their pages. Pages not having dead tuples recorded from
1836  * lazy_scan_heap are not visited at all.
1837  *
1838  * Note: the reason for doing this as a second pass is we cannot remove
1839  * the tuples until we've removed their index entries, and we want to
1840  * process index entry removal in batches as large as possible.
1841  */
1842 static void
1843 lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats)
1844 {
1845  int tupindex;
1846  int npages;
1847  PGRUsage ru0;
1848  Buffer vmbuffer = InvalidBuffer;
1849  LVSavedErrInfo saved_err_info;
1850 
1851  /* Report that we are now vacuuming the heap */
1854 
1855  /* Update error traceback information */
1856  update_vacuum_error_info(vacrelstats, &saved_err_info, VACUUM_ERRCB_PHASE_VACUUM_HEAP,
1858 
1859  pg_rusage_init(&ru0);
1860  npages = 0;
1861 
1862  tupindex = 0;
1863  while (tupindex < vacrelstats->dead_tuples->num_tuples)
1864  {
1865  BlockNumber tblk;
1866  Buffer buf;
1867  Page page;
1868  Size freespace;
1869 
1871 
1872  tblk = ItemPointerGetBlockNumber(&vacrelstats->dead_tuples->itemptrs[tupindex]);
1873  vacrelstats->blkno = tblk;
1874  buf = ReadBufferExtended(onerel, MAIN_FORKNUM, tblk, RBM_NORMAL,
1875  vac_strategy);
1877  {
1878  ReleaseBuffer(buf);
1879  ++tupindex;
1880  continue;
1881  }
1882  tupindex = lazy_vacuum_page(onerel, tblk, buf, tupindex, vacrelstats,
1883  &vmbuffer);
1884 
1885  /* Now that we've compacted the page, record its available space */
1886  page = BufferGetPage(buf);
1887  freespace = PageGetHeapFreeSpace(page);
1888 
1889  UnlockReleaseBuffer(buf);
1890  RecordPageWithFreeSpace(onerel, tblk, freespace);
1891  npages++;
1892  }
1893 
1894  /* Clear the block number information */
1895  vacrelstats->blkno = InvalidBlockNumber;
1896 
1897  if (BufferIsValid(vmbuffer))
1898  {
1899  ReleaseBuffer(vmbuffer);
1900  vmbuffer = InvalidBuffer;
1901  }
1902 
1903  ereport(elevel,
1904  (errmsg("\"%s\": removed %d row versions in %d pages",
1905  vacrelstats->relname,
1906  tupindex, npages),
1907  errdetail_internal("%s", pg_rusage_show(&ru0))));
1908 
1909  /* Revert to the previous phase information for error traceback */
1910  restore_vacuum_error_info(vacrelstats, &saved_err_info);
1911 }
1912 
1913 /*
1914  * lazy_vacuum_page() -- free dead tuples on a page
1915  * and repair its fragmentation.
1916  *
1917  * Caller must hold pin and buffer cleanup lock on the buffer.
1918  *
1919  * tupindex is the index in vacrelstats->dead_tuples of the first dead
1920  * tuple for this page. We assume the rest follow sequentially.
1921  * The return value is the first tupindex after the tuples of this page.
1922  */
1923 static int
1925  int tupindex, LVRelStats *vacrelstats, Buffer *vmbuffer)
1926 {
1927  LVDeadTuples *dead_tuples = vacrelstats->dead_tuples;
1928  Page page = BufferGetPage(buffer);
1929  OffsetNumber unused[MaxOffsetNumber];
1930  int uncnt = 0;
1931  TransactionId visibility_cutoff_xid;
1932  bool all_frozen;
1933  LVSavedErrInfo saved_err_info;
1934 
1936 
1937  /* Update error traceback information */
1938  update_vacuum_error_info(vacrelstats, &saved_err_info, VACUUM_ERRCB_PHASE_VACUUM_HEAP,
1939  blkno, InvalidOffsetNumber);
1940 
1942 
1943  for (; tupindex < dead_tuples->num_tuples; tupindex++)
1944  {
1945  BlockNumber tblk;
1946  OffsetNumber toff;
1947  ItemId itemid;
1948 
1949  tblk = ItemPointerGetBlockNumber(&dead_tuples->itemptrs[tupindex]);
1950  if (tblk != blkno)
1951  break; /* past end of tuples for this block */
1952  toff = ItemPointerGetOffsetNumber(&dead_tuples->itemptrs[tupindex]);
1953  itemid = PageGetItemId(page, toff);
1954  ItemIdSetUnused(itemid);
1955  unused[uncnt++] = toff;
1956  }
1957 
1959 
1960  /*
1961  * Mark buffer dirty before we write WAL.
1962  */
1963  MarkBufferDirty(buffer);
1964 
1965  /* XLOG stuff */
1966  if (RelationNeedsWAL(onerel))
1967  {
1968  XLogRecPtr recptr;
1969 
1970  recptr = log_heap_clean(onerel, buffer,
1971  NULL, 0, NULL, 0,
1972  unused, uncnt,
1973  vacrelstats->latestRemovedXid);
1974  PageSetLSN(page, recptr);
1975  }
1976 
1977  /*
1978  * End critical section, so we safely can do visibility tests (which
1979  * possibly need to perform IO and allocate memory!). If we crash now the
1980  * page (including the corresponding vm bit) might not be marked all
1981  * visible, but that's fine. A later vacuum will fix that.
1982  */
1983  END_CRIT_SECTION();
1984 
1985  /*
1986  * Now that we have removed the dead tuples from the page, once again
1987  * check if the page has become all-visible. The page is already marked
1988  * dirty, exclusively locked, and, if needed, a full page image has been
1989  * emitted in the log_heap_clean() above.
1990  */
1991  if (heap_page_is_all_visible(onerel, buffer, vacrelstats,
1992  &visibility_cutoff_xid,
1993  &all_frozen))
1994  PageSetAllVisible(page);
1995 
1996  /*
1997  * All the changes to the heap page have been done. If the all-visible
1998  * flag is now set, also set the VM all-visible bit (and, if possible, the
1999  * all-frozen bit) unless this has already been done previously.
2000  */
2001  if (PageIsAllVisible(page))
2002  {
2003  uint8 vm_status = visibilitymap_get_status(onerel, blkno, vmbuffer);
2004  uint8 flags = 0;
2005 
2006  /* Set the VM all-frozen bit to flag, if needed */
2007  if ((vm_status & VISIBILITYMAP_ALL_VISIBLE) == 0)
2008  flags |= VISIBILITYMAP_ALL_VISIBLE;
2009  if ((vm_status & VISIBILITYMAP_ALL_FROZEN) == 0 && all_frozen)
2010  flags |= VISIBILITYMAP_ALL_FROZEN;
2011 
2012  Assert(BufferIsValid(*vmbuffer));
2013  if (flags != 0)
2014  visibilitymap_set(onerel, blkno, buffer, InvalidXLogRecPtr,
2015  *vmbuffer, visibility_cutoff_xid, flags);
2016  }
2017 
2018  /* Revert to the previous phase information for error traceback */
2019  restore_vacuum_error_info(vacrelstats, &saved_err_info);
2020  return tupindex;
2021 }
2022 
2023 /*
2024  * lazy_check_needs_freeze() -- scan page to see if any tuples
2025  * need to be cleaned to avoid wraparound
2026  *
2027  * Returns true if the page needs to be vacuumed using cleanup lock.
2028  * Also returns a flag indicating whether page contains any tuples at all.
2029  */
2030 static bool
2031 lazy_check_needs_freeze(Buffer buf, bool *hastup, LVRelStats *vacrelstats)
2032 {
2033  Page page = BufferGetPage(buf);
2034  OffsetNumber offnum,
2035  maxoff;
2036  HeapTupleHeader tupleheader;
2037 
2038  *hastup = false;
2039 
2040  /*
2041  * New and empty pages, obviously, don't contain tuples. We could make
2042  * sure that the page is registered in the FSM, but it doesn't seem worth
2043  * waiting for a cleanup lock just for that, especially because it's
2044  * likely that the pin holder will do so.
2045  */
2046  if (PageIsNew(page) || PageIsEmpty(page))
2047  return false;
2048 
2049  maxoff = PageGetMaxOffsetNumber(page);
2050  for (offnum = FirstOffsetNumber;
2051  offnum <= maxoff;
2052  offnum = OffsetNumberNext(offnum))
2053  {
2054  ItemId itemid;
2055 
2056  /*
2057  * Set the offset number so that we can display it along with any
2058  * error that occurred while processing this tuple.
2059  */
2060  vacrelstats->offnum = offnum;
2061  itemid = PageGetItemId(page, offnum);
2062 
2063  /* this should match hastup test in count_nondeletable_pages() */
2064  if (ItemIdIsUsed(itemid))
2065  *hastup = true;
2066 
2067  /* dead and redirect items never need freezing */
2068  if (!ItemIdIsNormal(itemid))
2069  continue;
2070 
2071  tupleheader = (HeapTupleHeader) PageGetItem(page, itemid);
2072 
2073  if (heap_tuple_needs_freeze(tupleheader, FreezeLimit,
2074  MultiXactCutoff, buf))
2075  break;
2076  } /* scan along page */
2077 
2078  /* Clear the offset information once we have processed the given page. */
2079  vacrelstats->offnum = InvalidOffsetNumber;
2080 
2081  return (offnum <= maxoff);
2082 }
2083 
2084 /*
2085  * Perform index vacuum or index cleanup with parallel workers. This function
2086  * must be used by the parallel vacuum leader process. The caller must set
2087  * lps->lvshared->for_cleanup to indicate whether to perform vacuum or
2088  * cleanup.
2089  */
2090 static void
2092  LVRelStats *vacrelstats, LVParallelState *lps,
2093  int nindexes)
2094 {
2095  int nworkers;
2096 
2099  Assert(nindexes > 0);
2100 
2101  /* Determine the number of parallel workers to launch */
2102  if (lps->lvshared->for_cleanup)
2103  {
2104  if (lps->lvshared->first_time)
2105  nworkers = lps->nindexes_parallel_cleanup +
2107  else
2108  nworkers = lps->nindexes_parallel_cleanup;
2109  }
2110  else
2111  nworkers = lps->nindexes_parallel_bulkdel;
2112 
2113  /* The leader process will participate */
2114  nworkers--;
2115 
2116  /*
2117  * It is possible that parallel context is initialized with fewer workers
2118  * than the number of indexes that need a separate worker in the current
2119  * phase, so we need to consider it. See compute_parallel_vacuum_workers.
2120  */
2121  nworkers = Min(nworkers, lps->pcxt->nworkers);
2122 
2123  /* Setup the shared cost-based vacuum delay and launch workers */
2124  if (nworkers > 0)
2125  {
2126  if (vacrelstats->num_index_scans > 0)
2127  {
2128  /* Reset the parallel index processing counter */
2129  pg_atomic_write_u32(&(lps->lvshared->idx), 0);
2130 
2131  /* Reinitialize the parallel context to relaunch parallel workers */
2133  }
2134 
2135  /*
2136  * Set up shared cost balance and the number of active workers for
2137  * vacuum delay. We need to do this before launching workers as
2138  * otherwise, they might not see the updated values for these
2139  * parameters.
2140  */
2143 
2144  /*
2145  * The number of workers can vary between bulkdelete and cleanup
2146  * phase.
2147  */
2148  ReinitializeParallelWorkers(lps->pcxt, nworkers);
2149 
2151 
2152  if (lps->pcxt->nworkers_launched > 0)
2153  {
2154  /*
2155  * Reset the local cost values for leader backend as we have
2156  * already accumulated the remaining balance of heap.
2157  */
2158  VacuumCostBalance = 0;
2160 
2161  /* Enable shared cost balance for leader backend */
2164  }
2165 
2166  if (lps->lvshared->for_cleanup)
2167  ereport(elevel,
2168  (errmsg(ngettext("launched %d parallel vacuum worker for index cleanup (planned: %d)",
2169  "launched %d parallel vacuum workers for index cleanup (planned: %d)",
2170  lps->pcxt->nworkers_launched),
2171  lps->pcxt->nworkers_launched, nworkers)));
2172  else
2173  ereport(elevel,
2174  (errmsg(ngettext("launched %d parallel vacuum worker for index vacuuming (planned: %d)",
2175  "launched %d parallel vacuum workers for index vacuuming (planned: %d)",
2176  lps->pcxt->nworkers_launched),
2177  lps->pcxt->nworkers_launched, nworkers)));
2178  }
2179 
2180  /* Process the indexes that can be processed by only leader process */
2181  vacuum_indexes_leader(Irel, stats, vacrelstats, lps, nindexes);
2182 
2183  /*
2184  * Join as a parallel worker. The leader process alone processes all the
2185  * indexes in the case where no workers are launched.
2186  */
2187  parallel_vacuum_index(Irel, stats, lps->lvshared,
2188  vacrelstats->dead_tuples, nindexes, vacrelstats);
2189 
2190  /*
2191  * Next, accumulate buffer and WAL usage. (This must wait for the workers
2192  * to finish, or we might get incomplete data.)
2193  */
2194  if (nworkers > 0)
2195  {
2196  int i;
2197 
2198  /* Wait for all vacuum workers to finish */
2200 
2201  for (i = 0; i < lps->pcxt->nworkers_launched; i++)
2202  InstrAccumParallelQuery(&lps->buffer_usage[i], &lps->wal_usage[i]);
2203  }
2204 
2205  /*
2206  * Carry the shared balance value to heap scan and disable shared costing
2207  */
2209  {
2211  VacuumSharedCostBalance = NULL;
2212  VacuumActiveNWorkers = NULL;
2213  }
2214 }
2215 
2216 /*
2217  * Index vacuum/cleanup routine used by the leader process and parallel
2218  * vacuum worker processes to process the indexes in parallel.
2219  */
2220 static void
2222  LVShared *lvshared, LVDeadTuples *dead_tuples,
2223  int nindexes, LVRelStats *vacrelstats)
2224 {
2225  /*
2226  * Increment the active worker count if we are able to launch any worker.
2227  */
2230 
2231  /* Loop until all indexes are vacuumed */
2232  for (;;)
2233  {
2234  int idx;
2235  LVSharedIndStats *shared_indstats;
2236 
2237  /* Get an index number to process */
2238  idx = pg_atomic_fetch_add_u32(&(lvshared->idx), 1);
2239 
2240  /* Done for all indexes? */
2241  if (idx >= nindexes)
2242  break;
2243 
2244  /* Get the index statistics of this index from DSM */
2245  shared_indstats = get_indstats(lvshared, idx);
2246 
2247  /*
2248  * Skip processing indexes that don't participate in parallel
2249  * operation
2250  */
2251  if (shared_indstats == NULL ||
2252  skip_parallel_vacuum_index(Irel[idx], lvshared))
2253  continue;
2254 
2255  /* Do vacuum or cleanup of the index */
2256  vacuum_one_index(Irel[idx], &(stats[idx]), lvshared, shared_indstats,
2257  dead_tuples, vacrelstats);
2258  }
2259 
2260  /*
2261  * We have completed the index vacuum so decrement the active worker
2262  * count.
2263  */
2266 }
2267 
2268 /*
2269  * Vacuum or cleanup indexes that can be processed by only the leader process
2270  * because these indexes don't support parallel operation at that phase.
2271  */
2272 static void
2274  LVRelStats *vacrelstats, LVParallelState *lps,
2275  int nindexes)
2276 {
2277  int i;
2278 
2280 
2281  /*
2282  * Increment the active worker count if we are able to launch any worker.
2283  */
2286 
2287  for (i = 0; i < nindexes; i++)
2288  {
2289  LVSharedIndStats *shared_indstats;
2290 
2291  shared_indstats = get_indstats(lps->lvshared, i);
2292 
2293  /* Process the indexes skipped by parallel workers */
2294  if (shared_indstats == NULL ||
2295  skip_parallel_vacuum_index(Irel[i], lps->lvshared))
2296  vacuum_one_index(Irel[i], &(stats[i]), lps->lvshared,
2297  shared_indstats, vacrelstats->dead_tuples,
2298  vacrelstats);
2299  }
2300 
2301  /*
2302  * We have completed the index vacuum so decrement the active worker
2303  * count.
2304  */
2307 }
2308 
2309 /*
2310  * Vacuum or cleanup index either by leader process or by one of the worker
2311  * process. After processing the index this function copies the index
2312  * statistics returned from ambulkdelete and amvacuumcleanup to the DSM
2313  * segment.
2314  */
2315 static void
2317  LVShared *lvshared, LVSharedIndStats *shared_indstats,
2318  LVDeadTuples *dead_tuples, LVRelStats *vacrelstats)
2319 {
2320  IndexBulkDeleteResult *bulkdelete_res = NULL;
2321 
2322  if (shared_indstats)
2323  {
2324  /* Get the space for IndexBulkDeleteResult */
2325  bulkdelete_res = &(shared_indstats->stats);
2326 
2327  /*
2328  * Update the pointer to the corresponding bulk-deletion result if
2329  * someone has already updated it.
2330  */
2331  if (shared_indstats->updated && *stats == NULL)
2332  *stats = bulkdelete_res;
2333  }
2334 
2335  /* Do vacuum or cleanup of the index */
2336  if (lvshared->for_cleanup)
2337  lazy_cleanup_index(indrel, stats, lvshared->reltuples,
2338  lvshared->estimated_count, vacrelstats);
2339  else
2340  lazy_vacuum_index(indrel, stats, dead_tuples,
2341  lvshared->reltuples, vacrelstats);
2342 
2343  /*
2344  * Copy the index bulk-deletion result returned from ambulkdelete and
2345  * amvacuumcleanup to the DSM segment if it's the first cycle because they
2346  * allocate locally and it's possible that an index will be vacuumed by a
2347  * different vacuum process the next cycle. Copying the result normally
2348  * happens only the first time an index is vacuumed. For any additional
2349  * vacuum pass, we directly point to the result on the DSM segment and
2350  * pass it to vacuum index APIs so that workers can update it directly.
2351  *
2352  * Since all vacuum workers write the bulk-deletion result at different
2353  * slots we can write them without locking.
2354  */
2355  if (shared_indstats && !shared_indstats->updated && *stats != NULL)
2356  {
2357  memcpy(bulkdelete_res, *stats, sizeof(IndexBulkDeleteResult));
2358  shared_indstats->updated = true;
2359 
2360  /*
2361  * Now that stats[idx] points to the DSM segment, we don't need the
2362  * locally allocated results.
2363  */
2364  pfree(*stats);
2365  *stats = bulkdelete_res;
2366  }
2367 }
2368 
2369 /*
2370  * lazy_cleanup_all_indexes() -- cleanup all indexes of relation.
2371  *
2372  * Cleanup indexes. We process the indexes serially unless we are doing
2373  * parallel vacuum.
2374  */
2375 static void
2377  LVRelStats *vacrelstats, LVParallelState *lps,
2378  int nindexes)
2379 {
2380  int idx;
2381 
2383  Assert(nindexes > 0);
2384 
2385  /* Report that we are now cleaning up indexes */
2388 
2389  /*
2390  * If parallel vacuum is active we perform index cleanup with parallel
2391  * workers.
2392  */
2393  if (ParallelVacuumIsActive(lps))
2394  {
2395  /* Tell parallel workers to do index cleanup */
2396  lps->lvshared->for_cleanup = true;
2397  lps->lvshared->first_time =
2398  (vacrelstats->num_index_scans == 0);
2399 
2400  /*
2401  * Now we can provide a better estimate of total number of surviving
2402  * tuples (we assume indexes are more interested in that than in the
2403  * number of nominally live tuples).
2404  */
2405  lps->lvshared->reltuples = vacrelstats->new_rel_tuples;
2406  lps->lvshared->estimated_count =
2407  (vacrelstats->tupcount_pages < vacrelstats->rel_pages);
2408 
2409  lazy_parallel_vacuum_indexes(Irel, stats, vacrelstats, lps, nindexes);
2410  }
2411  else
2412  {
2413  for (idx = 0; idx < nindexes; idx++)
2414  lazy_cleanup_index(Irel[idx], &stats[idx],
2415  vacrelstats->new_rel_tuples,
2416  vacrelstats->tupcount_pages < vacrelstats->rel_pages,
2417  vacrelstats);
2418  }
2419 }
2420 
2421 /*
2422  * lazy_vacuum_index() -- vacuum one index relation.
2423  *
2424  * Delete all the index entries pointing to tuples listed in
2425  * dead_tuples, and update running statistics.
2426  *
2427  * reltuples is the number of heap tuples to be passed to the
2428  * bulkdelete callback. It's always assumed to be estimated.
2429  */
2430 static void
2432  LVDeadTuples *dead_tuples, double reltuples, LVRelStats *vacrelstats)
2433 {
2434  IndexVacuumInfo ivinfo;
2435  PGRUsage ru0;
2436  LVSavedErrInfo saved_err_info;
2437 
2438  pg_rusage_init(&ru0);
2439 
2440  ivinfo.index = indrel;
2441  ivinfo.analyze_only = false;
2442  ivinfo.report_progress = false;
2443  ivinfo.estimated_count = true;
2444  ivinfo.message_level = elevel;
2445  ivinfo.num_heap_tuples = reltuples;
2446  ivinfo.strategy = vac_strategy;
2447 
2448  /*
2449  * Update error traceback information.
2450  *
2451  * The index name is saved during this phase and restored immediately
2452  * after this phase. See vacuum_error_callback.
2453  */
2454  Assert(vacrelstats->indname == NULL);
2455  vacrelstats->indname = pstrdup(RelationGetRelationName(indrel));
2456  update_vacuum_error_info(vacrelstats, &saved_err_info,
2459 
2460  /* Do bulk deletion */
2461  *stats = index_bulk_delete(&ivinfo, *stats,
2462  lazy_tid_reaped, (void *) dead_tuples);
2463 
2464  ereport(elevel,
2465  (errmsg("scanned index \"%s\" to remove %d row versions",
2466  vacrelstats->indname,
2467  dead_tuples->num_tuples),
2468  errdetail_internal("%s", pg_rusage_show(&ru0))));
2469 
2470  /* Revert to the previous phase information for error traceback */
2471  restore_vacuum_error_info(vacrelstats, &saved_err_info);
2472  pfree(vacrelstats->indname);
2473  vacrelstats->indname = NULL;
2474 }
2475 
2476 /*
2477  * lazy_cleanup_index() -- do post-vacuum cleanup for one index relation.
2478  *
2479  * reltuples is the number of heap tuples and estimated_count is true
2480  * if reltuples is an estimated value.
2481  */
2482 static void
2484  IndexBulkDeleteResult **stats,
2485  double reltuples, bool estimated_count, LVRelStats *vacrelstats)
2486 {
2487  IndexVacuumInfo ivinfo;
2488  PGRUsage ru0;
2489  LVSavedErrInfo saved_err_info;
2490 
2491  pg_rusage_init(&ru0);
2492 
2493  ivinfo.index = indrel;
2494  ivinfo.analyze_only = false;
2495  ivinfo.report_progress = false;
2496  ivinfo.estimated_count = estimated_count;
2497  ivinfo.message_level = elevel;
2498 
2499  ivinfo.num_heap_tuples = reltuples;
2500  ivinfo.strategy = vac_strategy;
2501 
2502  /*
2503  * Update error traceback information.
2504  *
2505  * The index name is saved during this phase and restored immediately
2506  * after this phase. See vacuum_error_callback.
2507  */
2508  Assert(vacrelstats->indname == NULL);
2509  vacrelstats->indname = pstrdup(RelationGetRelationName(indrel));
2510  update_vacuum_error_info(vacrelstats, &saved_err_info,
2513 
2514  *stats = index_vacuum_cleanup(&ivinfo, *stats);
2515 
2516  if (*stats)
2517  {
2518  ereport(elevel,
2519  (errmsg("index \"%s\" now contains %.0f row versions in %u pages",
2520  RelationGetRelationName(indrel),
2521  (*stats)->num_index_tuples,
2522  (*stats)->num_pages),
2523  errdetail("%.0f index row versions were removed.\n"
2524  "%u index pages have been deleted, %u are currently reusable.\n"
2525  "%s.",
2526  (*stats)->tuples_removed,
2527  (*stats)->pages_deleted, (*stats)->pages_free,
2528  pg_rusage_show(&ru0))));
2529  }
2530 
2531  /* Revert to the previous phase information for error traceback */
2532  restore_vacuum_error_info(vacrelstats, &saved_err_info);
2533  pfree(vacrelstats->indname);
2534  vacrelstats->indname = NULL;
2535 }
2536 
2537 /*
2538  * should_attempt_truncation - should we attempt to truncate the heap?
2539  *
2540  * Don't even think about it unless we have a shot at releasing a goodly
2541  * number of pages. Otherwise, the time taken isn't worth it.
2542  *
2543  * Also don't attempt it if we are doing early pruning/vacuuming, because a
2544  * scan which cannot find a truncated heap page cannot determine that the
2545  * snapshot is too old to read that page. We might be able to get away with
2546  * truncating all except one of the pages, setting its LSN to (at least) the
2547  * maximum of the truncated range if we also treated an index leaf tuple
2548  * pointing to a missing heap page as something to trigger the "snapshot too
2549  * old" error, but that seems fragile and seems like it deserves its own patch
2550  * if we consider it.
2551  *
2552  * This is split out so that we can test whether truncation is going to be
2553  * called for before we actually do it. If you change the logic here, be
2554  * careful to depend only on fields that lazy_scan_heap updates on-the-fly.
2555  */
2556 static bool
2558 {
2559  BlockNumber possibly_freeable;
2560 
2561  if (params->truncate == VACOPT_TERNARY_DISABLED)
2562  return false;
2563 
2564  possibly_freeable = vacrelstats->rel_pages - vacrelstats->nonempty_pages;
2565  if (possibly_freeable > 0 &&
2566  (possibly_freeable >= REL_TRUNCATE_MINIMUM ||
2567  possibly_freeable >= vacrelstats->rel_pages / REL_TRUNCATE_FRACTION) &&
2569  return true;
2570  else
2571  return false;
2572 }
2573 
2574 /*
2575  * lazy_truncate_heap - try to truncate off any empty pages at the end
2576  */
2577 static void
2579 {
2580  BlockNumber old_rel_pages = vacrelstats->rel_pages;
2581  BlockNumber new_rel_pages;
2582  int lock_retry;
2583 
2584  /* Report that we are now truncating */
2587 
2588  /*
2589  * Loop until no more truncating can be done.
2590  */
2591  do
2592  {
2593  PGRUsage ru0;
2594 
2595  pg_rusage_init(&ru0);
2596 
2597  /*
2598  * We need full exclusive lock on the relation in order to do
2599  * truncation. If we can't get it, give up rather than waiting --- we
2600  * don't want to block other backends, and we don't want to deadlock
2601  * (which is quite possible considering we already hold a lower-grade
2602  * lock).
2603  */
2604  vacrelstats->lock_waiter_detected = false;
2605  lock_retry = 0;
2606  while (true)
2607  {
2609  break;
2610 
2611  /*
2612  * Check for interrupts while trying to (re-)acquire the exclusive
2613  * lock.
2614  */
2616 
2617  if (++lock_retry > (VACUUM_TRUNCATE_LOCK_TIMEOUT /
2619  {
2620  /*
2621  * We failed to establish the lock in the specified number of
2622  * retries. This means we give up truncating.
2623  */
2624  vacrelstats->lock_waiter_detected = true;
2625  ereport(elevel,
2626  (errmsg("\"%s\": stopping truncate due to conflicting lock request",
2627  vacrelstats->relname)));
2628  return;
2629  }
2630 
2632  }
2633 
2634  /*
2635  * Now that we have exclusive lock, look to see if the rel has grown
2636  * whilst we were vacuuming with non-exclusive lock. If so, give up;
2637  * the newly added pages presumably contain non-deletable tuples.
2638  */
2639  new_rel_pages = RelationGetNumberOfBlocks(onerel);
2640  if (new_rel_pages != old_rel_pages)
2641  {
2642  /*
2643  * Note: we intentionally don't update vacrelstats->rel_pages with
2644  * the new rel size here. If we did, it would amount to assuming
2645  * that the new pages are empty, which is unlikely. Leaving the
2646  * numbers alone amounts to assuming that the new pages have the
2647  * same tuple density as existing ones, which is less unlikely.
2648  */
2650  return;
2651  }
2652 
2653  /*
2654  * Scan backwards from the end to verify that the end pages actually
2655  * contain no tuples. This is *necessary*, not optional, because
2656  * other backends could have added tuples to these pages whilst we
2657  * were vacuuming.
2658  */
2659  new_rel_pages = count_nondeletable_pages(onerel, vacrelstats);
2660  vacrelstats->blkno = new_rel_pages;
2661 
2662  if (new_rel_pages >= old_rel_pages)
2663  {
2664  /* can't do anything after all */
2666  return;
2667  }
2668 
2669  /*
2670  * Okay to truncate.
2671  */
2672  RelationTruncate(onerel, new_rel_pages);
2673 
2674  /*
2675  * We can release the exclusive lock as soon as we have truncated.
2676  * Other backends can't safely access the relation until they have
2677  * processed the smgr invalidation that smgrtruncate sent out ... but
2678  * that should happen as part of standard invalidation processing once
2679  * they acquire lock on the relation.
2680  */
2682 
2683  /*
2684  * Update statistics. Here, it *is* correct to adjust rel_pages
2685  * without also touching reltuples, since the tuple count wasn't
2686  * changed by the truncation.
2687  */
2688  vacrelstats->pages_removed += old_rel_pages - new_rel_pages;
2689  vacrelstats->rel_pages = new_rel_pages;
2690 
2691  ereport(elevel,
2692  (errmsg("\"%s\": truncated %u to %u pages",
2693  vacrelstats->relname,
2694  old_rel_pages, new_rel_pages),
2695  errdetail_internal("%s",
2696  pg_rusage_show(&ru0))));
2697  old_rel_pages = new_rel_pages;
2698  } while (new_rel_pages > vacrelstats->nonempty_pages &&
2699  vacrelstats->lock_waiter_detected);
2700 }
2701 
2702 /*
2703  * Rescan end pages to verify that they are (still) empty of tuples.
2704  *
2705  * Returns number of nondeletable pages (last nonempty page + 1).
2706  */
2707 static BlockNumber
2709 {
2710  BlockNumber blkno;
2711  BlockNumber prefetchedUntil;
2712  instr_time starttime;
2713 
2714  /* Initialize the starttime if we check for conflicting lock requests */
2715  INSTR_TIME_SET_CURRENT(starttime);
2716 
2717  /*
2718  * Start checking blocks at what we believe relation end to be and move
2719  * backwards. (Strange coding of loop control is needed because blkno is
2720  * unsigned.) To make the scan faster, we prefetch a few blocks at a time
2721  * in forward direction, so that OS-level readahead can kick in.
2722  */
2723  blkno = vacrelstats->rel_pages;
2725  "prefetch size must be power of 2");
2726  prefetchedUntil = InvalidBlockNumber;
2727  while (blkno > vacrelstats->nonempty_pages)
2728  {
2729  Buffer buf;
2730  Page page;
2731  OffsetNumber offnum,
2732  maxoff;
2733  bool hastup;
2734 
2735  /*
2736  * Check if another process requests a lock on our relation. We are
2737  * holding an AccessExclusiveLock here, so they will be waiting. We
2738  * only do this once per VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL, and we
2739  * only check if that interval has elapsed once every 32 blocks to
2740  * keep the number of system calls and actual shared lock table
2741  * lookups to a minimum.
2742  */
2743  if ((blkno % 32) == 0)
2744  {
2745  instr_time currenttime;
2746  instr_time elapsed;
2747 
2748  INSTR_TIME_SET_CURRENT(currenttime);
2749  elapsed = currenttime;
2750  INSTR_TIME_SUBTRACT(elapsed, starttime);
2751  if ((INSTR_TIME_GET_MICROSEC(elapsed) / 1000)
2753  {
2755  {
2756  ereport(elevel,
2757  (errmsg("\"%s\": suspending truncate due to conflicting lock request",
2758  vacrelstats->relname)));
2759 
2760  vacrelstats->lock_waiter_detected = true;
2761  return blkno;
2762  }
2763  starttime = currenttime;
2764  }
2765  }
2766 
2767  /*
2768  * We don't insert a vacuum delay point here, because we have an
2769  * exclusive lock on the table which we want to hold for as short a
2770  * time as possible. We still need to check for interrupts however.
2771  */
2773 
2774  blkno--;
2775 
2776  /* If we haven't prefetched this lot yet, do so now. */
2777  if (prefetchedUntil > blkno)
2778  {
2779  BlockNumber prefetchStart;
2780  BlockNumber pblkno;
2781 
2782  prefetchStart = blkno & ~(PREFETCH_SIZE - 1);
2783  for (pblkno = prefetchStart; pblkno <= blkno; pblkno++)
2784  {
2785  PrefetchBuffer(onerel, MAIN_FORKNUM, pblkno);
2787  }
2788  prefetchedUntil = prefetchStart;
2789  }
2790 
2791  buf = ReadBufferExtended(onerel, MAIN_FORKNUM, blkno,
2792  RBM_NORMAL, vac_strategy);
2793 
2794  /* In this phase we only need shared access to the buffer */
2796 
2797  page = BufferGetPage(buf);
2798 
2799  if (PageIsNew(page) || PageIsEmpty(page))
2800  {
2801  UnlockReleaseBuffer(buf);
2802  continue;
2803  }
2804 
2805  hastup = false;
2806  maxoff = PageGetMaxOffsetNumber(page);
2807  for (offnum = FirstOffsetNumber;
2808  offnum <= maxoff;
2809  offnum = OffsetNumberNext(offnum))
2810  {
2811  ItemId itemid;
2812 
2813  itemid = PageGetItemId(page, offnum);
2814 
2815  /*
2816  * Note: any non-unused item should be taken as a reason to keep
2817  * this page. We formerly thought that DEAD tuples could be
2818  * thrown away, but that's not so, because we'd not have cleaned
2819  * out their index entries.
2820  */
2821  if (ItemIdIsUsed(itemid))
2822  {
2823  hastup = true;
2824  break; /* can stop scanning */
2825  }
2826  } /* scan along page */
2827 
2828  UnlockReleaseBuffer(buf);
2829 
2830  /* Done scanning if we found a tuple here */
2831  if (hastup)
2832  return blkno + 1;
2833  }
2834 
2835  /*
2836  * If we fall out of the loop, all the previously-thought-to-be-empty
2837  * pages still are; we need not bother to look at the last known-nonempty
2838  * page.
2839  */
2840  return vacrelstats->nonempty_pages;
2841 }
2842 
2843 /*
2844  * Return the maximum number of dead tuples we can record.
2845  */
2846 static long
2847 compute_max_dead_tuples(BlockNumber relblocks, bool useindex)
2848 {
2849  long maxtuples;
2850  int vac_work_mem = IsAutoVacuumWorkerProcess() &&
2851  autovacuum_work_mem != -1 ?
2853 
2854  if (useindex)
2855  {
2856  maxtuples = MAXDEADTUPLES(vac_work_mem * 1024L);
2857  maxtuples = Min(maxtuples, INT_MAX);
2858  maxtuples = Min(maxtuples, MAXDEADTUPLES(MaxAllocSize));
2859 
2860  /* curious coding here to ensure the multiplication can't overflow */
2861  if ((BlockNumber) (maxtuples / LAZY_ALLOC_TUPLES) > relblocks)
2862  maxtuples = relblocks * LAZY_ALLOC_TUPLES;
2863 
2864  /* stay sane if small maintenance_work_mem */
2865  maxtuples = Max(maxtuples, MaxHeapTuplesPerPage);
2866  }
2867  else
2868  maxtuples = MaxHeapTuplesPerPage;
2869 
2870  return maxtuples;
2871 }
2872 
2873 /*
2874  * lazy_space_alloc - space allocation decisions for lazy vacuum
2875  *
2876  * See the comments at the head of this file for rationale.
2877  */
2878 static void
2879 lazy_space_alloc(LVRelStats *vacrelstats, BlockNumber relblocks)
2880 {
2881  LVDeadTuples *dead_tuples = NULL;
2882  long maxtuples;
2883 
2884  maxtuples = compute_max_dead_tuples(relblocks, vacrelstats->useindex);
2885 
2886  dead_tuples = (LVDeadTuples *) palloc(SizeOfDeadTuples(maxtuples));
2887  dead_tuples->num_tuples = 0;
2888  dead_tuples->max_tuples = (int) maxtuples;
2889 
2890  vacrelstats->dead_tuples = dead_tuples;
2891 }
2892 
2893 /*
2894  * lazy_record_dead_tuple - remember one deletable tuple
2895  */
2896 static void
2898 {
2899  /*
2900  * The array shouldn't overflow under normal behavior, but perhaps it
2901  * could if we are given a really small maintenance_work_mem. In that
2902  * case, just forget the last few tuples (we'll get 'em next time).
2903  */
2904  if (dead_tuples->num_tuples < dead_tuples->max_tuples)
2905  {
2906  dead_tuples->itemptrs[dead_tuples->num_tuples] = *itemptr;
2907  dead_tuples->num_tuples++;
2909  dead_tuples->num_tuples);
2910  }
2911 }
2912 
2913 /*
2914  * lazy_tid_reaped() -- is a particular tid deletable?
2915  *
2916  * This has the right signature to be an IndexBulkDeleteCallback.
2917  *
2918  * Assumes dead_tuples array is in sorted order.
2919  */
2920 static bool
2922 {
2923  LVDeadTuples *dead_tuples = (LVDeadTuples *) state;
2924  ItemPointer res;
2925 
2926  res = (ItemPointer) bsearch((void *) itemptr,
2927  (void *) dead_tuples->itemptrs,
2928  dead_tuples->num_tuples,
2929  sizeof(ItemPointerData),
2930  vac_cmp_itemptr);
2931 
2932  return (res != NULL);
2933 }
2934 
2935 /*
2936  * Comparator routines for use with qsort() and bsearch().
2937  */
2938 static int
2939 vac_cmp_itemptr(const void *left, const void *right)
2940 {
2941  BlockNumber lblk,
2942  rblk;
2943  OffsetNumber loff,
2944  roff;
2945 
2946  lblk = ItemPointerGetBlockNumber((ItemPointer) left);
2947  rblk = ItemPointerGetBlockNumber((ItemPointer) right);
2948 
2949  if (lblk < rblk)
2950  return -1;
2951  if (lblk > rblk)
2952  return 1;
2953 
2954  loff = ItemPointerGetOffsetNumber((ItemPointer) left);
2955  roff = ItemPointerGetOffsetNumber((ItemPointer) right);
2956 
2957  if (loff < roff)
2958  return -1;
2959  if (loff > roff)
2960  return 1;
2961 
2962  return 0;
2963 }
2964 
2965 /*
2966  * Check if every tuple in the given page is visible to all current and future
2967  * transactions. Also return the visibility_cutoff_xid which is the highest
2968  * xmin amongst the visible tuples. Set *all_frozen to true if every tuple
2969  * on this page is frozen.
2970  */
2971 static bool
2973  LVRelStats *vacrelstats,
2974  TransactionId *visibility_cutoff_xid,
2975  bool *all_frozen)
2976 {
2977  Page page = BufferGetPage(buf);
2978  BlockNumber blockno = BufferGetBlockNumber(buf);
2979  OffsetNumber offnum,
2980  maxoff;
2981  bool all_visible = true;
2982 
2983  *visibility_cutoff_xid = InvalidTransactionId;
2984  *all_frozen = true;
2985 
2986  /*
2987  * This is a stripped down version of the line pointer scan in
2988  * lazy_scan_heap(). So if you change anything here, also check that code.
2989  */
2990  maxoff = PageGetMaxOffsetNumber(page);
2991  for (offnum = FirstOffsetNumber;
2992  offnum <= maxoff && all_visible;
2993  offnum = OffsetNumberNext(offnum))
2994  {
2995  ItemId itemid;
2996  HeapTupleData tuple;
2997 
2998  /*
2999  * Set the offset number so that we can display it along with any
3000  * error that occurred while processing this tuple.
3001  */
3002  vacrelstats->offnum = offnum;
3003  itemid = PageGetItemId(page, offnum);
3004 
3005  /* Unused or redirect line pointers are of no interest */
3006  if (!ItemIdIsUsed(itemid) || ItemIdIsRedirected(itemid))
3007  continue;
3008 
3009  ItemPointerSet(&(tuple.t_self), blockno, offnum);
3010 
3011  /*
3012  * Dead line pointers can have index pointers pointing to them. So
3013  * they can't be treated as visible
3014  */
3015  if (ItemIdIsDead(itemid))
3016  {
3017  all_visible = false;
3018  *all_frozen = false;
3019  break;
3020  }
3021 
3022  Assert(ItemIdIsNormal(itemid));
3023 
3024  tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
3025  tuple.t_len = ItemIdGetLength(itemid);
3026  tuple.t_tableOid = RelationGetRelid(rel);
3027 
3028  switch (HeapTupleSatisfiesVacuum(&tuple, OldestXmin, buf))
3029  {
3030  case HEAPTUPLE_LIVE:
3031  {
3032  TransactionId xmin;
3033 
3034  /* Check comments in lazy_scan_heap. */
3036  {
3037  all_visible = false;
3038  *all_frozen = false;
3039  break;
3040  }
3041 
3042  /*
3043  * The inserter definitely committed. But is it old enough
3044  * that everyone sees it as committed?
3045  */
3046  xmin = HeapTupleHeaderGetXmin(tuple.t_data);
3047  if (!TransactionIdPrecedes(xmin, OldestXmin))
3048  {
3049  all_visible = false;
3050  *all_frozen = false;
3051  break;
3052  }
3053 
3054  /* Track newest xmin on page. */
3055  if (TransactionIdFollows(xmin, *visibility_cutoff_xid))
3056  *visibility_cutoff_xid = xmin;
3057 
3058  /* Check whether this tuple is already frozen or not */
3059  if (all_visible && *all_frozen &&
3061  *all_frozen = false;
3062  }
3063  break;
3064 
3065  case HEAPTUPLE_DEAD:
3069  {
3070  all_visible = false;
3071  *all_frozen = false;
3072  break;
3073  }
3074  default:
3075  elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
3076  break;
3077  }
3078  } /* scan along page */
3079 
3080  /* Clear the offset information once we have processed the given page. */
3081  vacrelstats->offnum = InvalidOffsetNumber;
3082 
3083  return all_visible;
3084 }
3085 
3086 /*
3087  * Compute the number of parallel worker processes to request. Both index
3088  * vacuum and index cleanup can be executed with parallel workers. The index
3089  * is eligible for parallel vacuum iff its size is greater than
3090  * min_parallel_index_scan_size as invoking workers for very small indexes
3091  * can hurt performance.
3092  *
3093  * nrequested is the number of parallel workers that user requested. If
3094  * nrequested is 0, we compute the parallel degree based on nindexes, that is
3095  * the number of indexes that support parallel vacuum. This function also
3096  * sets can_parallel_vacuum to remember indexes that participate in parallel
3097  * vacuum.
3098  */
3099 static int
3100 compute_parallel_vacuum_workers(Relation *Irel, int nindexes, int nrequested,
3101  bool *can_parallel_vacuum)
3102 {
3103  int nindexes_parallel = 0;
3104  int nindexes_parallel_bulkdel = 0;
3105  int nindexes_parallel_cleanup = 0;
3106  int parallel_workers;
3107  int i;
3108 
3109  /*
3110  * We don't allow performing parallel operation in standalone backend or
3111  * when parallelism is disabled.
3112  */
3114  return 0;
3115 
3116  /*
3117  * Compute the number of indexes that can participate in parallel vacuum.
3118  */
3119  for (i = 0; i < nindexes; i++)
3120  {
3121  uint8 vacoptions = Irel[i]->rd_indam->amparallelvacuumoptions;
3122 
3123  if (vacoptions == VACUUM_OPTION_NO_PARALLEL ||
3125  continue;
3126 
3127  can_parallel_vacuum[i] = true;
3128 
3129  if ((vacoptions & VACUUM_OPTION_PARALLEL_BULKDEL) != 0)
3130  nindexes_parallel_bulkdel++;
3131  if (((vacoptions & VACUUM_OPTION_PARALLEL_CLEANUP) != 0) ||
3132  ((vacoptions & VACUUM_OPTION_PARALLEL_COND_CLEANUP) != 0))
3133  nindexes_parallel_cleanup++;
3134  }
3135 
3136  nindexes_parallel = Max(nindexes_parallel_bulkdel,
3137  nindexes_parallel_cleanup);
3138 
3139  /* The leader process takes one index */
3140  nindexes_parallel--;
3141 
3142  /* No index supports parallel vacuum */
3143  if (nindexes_parallel <= 0)
3144  return 0;
3145 
3146  /* Compute the parallel degree */
3147  parallel_workers = (nrequested > 0) ?
3148  Min(nrequested, nindexes_parallel) : nindexes_parallel;
3149 
3150  /* Cap by max_parallel_maintenance_workers */
3151  parallel_workers = Min(parallel_workers, max_parallel_maintenance_workers);
3152 
3153  return parallel_workers;
3154 }
3155 
3156 /*
3157  * Initialize variables for shared index statistics, set NULL bitmap and the
3158  * size of stats for each index.
3159  */
3160 static void
3161 prepare_index_statistics(LVShared *lvshared, bool *can_parallel_vacuum,
3162  int nindexes)
3163 {
3164  int i;
3165 
3166  /* Currently, we don't support parallel vacuum for autovacuum */
3168 
3169  /* Set NULL for all indexes */
3170  memset(lvshared->bitmap, 0x00, BITMAPLEN(nindexes));
3171 
3172  for (i = 0; i < nindexes; i++)
3173  {
3174  if (!can_parallel_vacuum[i])
3175  continue;
3176 
3177  /* Set NOT NULL as this index does support parallelism */
3178  lvshared->bitmap[i >> 3] |= 1 << (i & 0x07);
3179  }
3180 }
3181 
3182 /*
3183  * Update index statistics in pg_class if the statistics are accurate.
3184  */
3185 static void
3187  int nindexes)
3188 {
3189  int i;
3190 
3192 
3193  for (i = 0; i < nindexes; i++)
3194  {
3195  if (stats[i] == NULL || stats[i]->estimated_count)
3196  continue;
3197 
3198  /* Update index statistics */
3199  vac_update_relstats(Irel[i],
3200  stats[i]->num_pages,
3201  stats[i]->num_index_tuples,
3202  0,
3203  false,
3206  false);
3207  pfree(stats[i]);
3208  }
3209 }
3210 
3211 /*
3212  * This function prepares and returns parallel vacuum state if we can launch
3213  * even one worker. This function is responsible for entering parallel mode,
3214  * create a parallel context, and then initialize the DSM segment.
3215  */
3216 static LVParallelState *
3217 begin_parallel_vacuum(Oid relid, Relation *Irel, LVRelStats *vacrelstats,
3218  BlockNumber nblocks, int nindexes, int nrequested)
3219 {
3220  LVParallelState *lps = NULL;
3221  ParallelContext *pcxt;
3222  LVShared *shared;
3223  LVDeadTuples *dead_tuples;
3224  BufferUsage *buffer_usage;
3225  WalUsage *wal_usage;
3226  bool *can_parallel_vacuum;
3227  long maxtuples;
3228  Size est_shared;
3229  Size est_deadtuples;
3230  int nindexes_mwm = 0;
3231  int parallel_workers = 0;
3232  int querylen;
3233  int i;
3234 
3235  /*
3236  * A parallel vacuum must be requested and there must be indexes on the
3237  * relation
3238  */
3239  Assert(nrequested >= 0);
3240  Assert(nindexes > 0);
3241 
3242  /*
3243  * Compute the number of parallel vacuum workers to launch
3244  */
3245  can_parallel_vacuum = (bool *) palloc0(sizeof(bool) * nindexes);
3246  parallel_workers = compute_parallel_vacuum_workers(Irel, nindexes,
3247  nrequested,
3248  can_parallel_vacuum);
3249 
3250  /* Can't perform vacuum in parallel */
3251  if (parallel_workers <= 0)
3252  {
3253  pfree(can_parallel_vacuum);
3254  return lps;
3255  }
3256 
3257  lps = (LVParallelState *) palloc0(sizeof(LVParallelState));
3258 
3260  pcxt = CreateParallelContext("postgres", "parallel_vacuum_main",
3261  parallel_workers);
3262  Assert(pcxt->nworkers > 0);
3263  lps->pcxt = pcxt;
3264 
3265  /* Estimate size for shared information -- PARALLEL_VACUUM_KEY_SHARED */
3266  est_shared = MAXALIGN(add_size(SizeOfLVShared, BITMAPLEN(nindexes)));
3267  for (i = 0; i < nindexes; i++)
3268  {
3269  uint8 vacoptions = Irel[i]->rd_indam->amparallelvacuumoptions;
3270 
3271  /*
3272  * Cleanup option should be either disabled, always performing in
3273  * parallel or conditionally performing in parallel.
3274  */
3275  Assert(((vacoptions & VACUUM_OPTION_PARALLEL_CLEANUP) == 0) ||
3276  ((vacoptions & VACUUM_OPTION_PARALLEL_COND_CLEANUP) == 0));
3277  Assert(vacoptions <= VACUUM_OPTION_MAX_VALID_VALUE);
3278 
3279  /* Skip indexes that don't participate in parallel vacuum */
3280  if (!can_parallel_vacuum[i])
3281  continue;
3282 
3283  if (Irel[i]->rd_indam->amusemaintenanceworkmem)
3284  nindexes_mwm++;
3285 
3286  est_shared = add_size(est_shared, sizeof(LVSharedIndStats));
3287 
3288  /*
3289  * Remember the number of indexes that support parallel operation for
3290  * each phase.
3291  */
3292  if ((vacoptions & VACUUM_OPTION_PARALLEL_BULKDEL) != 0)
3294  if ((vacoptions & VACUUM_OPTION_PARALLEL_CLEANUP) != 0)
3296  if ((vacoptions & VACUUM_OPTION_PARALLEL_COND_CLEANUP) != 0)
3298  }
3299  shm_toc_estimate_chunk(&pcxt->estimator, est_shared);
3300  shm_toc_estimate_keys(&pcxt->estimator, 1);
3301 
3302  /* Estimate size for dead tuples -- PARALLEL_VACUUM_KEY_DEAD_TUPLES */
3303  maxtuples = compute_max_dead_tuples(nblocks, true);
3304  est_deadtuples = MAXALIGN(SizeOfDeadTuples(maxtuples));
3305  shm_toc_estimate_chunk(&pcxt->estimator, est_deadtuples);
3306  shm_toc_estimate_keys(&pcxt->estimator, 1);
3307 
3308  /*
3309  * Estimate space for BufferUsage and WalUsage --
3310  * PARALLEL_VACUUM_KEY_BUFFER_USAGE and PARALLEL_VACUUM_KEY_WAL_USAGE.
3311  *
3312  * If there are no extensions loaded that care, we could skip this. We
3313  * have no way of knowing whether anyone's looking at pgBufferUsage or
3314  * pgWalUsage, so do it unconditionally.
3315  */
3317  mul_size(sizeof(BufferUsage), pcxt->nworkers));
3318  shm_toc_estimate_keys(&pcxt->estimator, 1);
3320  mul_size(sizeof(WalUsage), pcxt->nworkers));
3321  shm_toc_estimate_keys(&pcxt->estimator, 1);
3322 
3323  /* Finally, estimate PARALLEL_VACUUM_KEY_QUERY_TEXT space */
3324  if (debug_query_string)
3325  {
3326  querylen = strlen(debug_query_string);
3327  shm_toc_estimate_chunk(&pcxt->estimator, querylen + 1);
3328  shm_toc_estimate_keys(&pcxt->estimator, 1);
3329  }
3330  else
3331  querylen = 0; /* keep compiler quiet */
3332 
3333  InitializeParallelDSM(pcxt);
3334 
3335  /* Prepare shared information */
3336  shared = (LVShared *) shm_toc_allocate(pcxt->toc, est_shared);
3337  MemSet(shared, 0, est_shared);
3338  shared->relid = relid;
3339  shared->elevel = elevel;
3340  shared->maintenance_work_mem_worker =
3341  (nindexes_mwm > 0) ?
3342  maintenance_work_mem / Min(parallel_workers, nindexes_mwm) :
3344 
3345  pg_atomic_init_u32(&(shared->cost_balance), 0);
3346  pg_atomic_init_u32(&(shared->active_nworkers), 0);
3347  pg_atomic_init_u32(&(shared->idx), 0);
3348  shared->offset = MAXALIGN(add_size(SizeOfLVShared, BITMAPLEN(nindexes)));
3349  prepare_index_statistics(shared, can_parallel_vacuum, nindexes);
3350 
3352  lps->lvshared = shared;
3353 
3354  /* Prepare the dead tuple space */
3355  dead_tuples = (LVDeadTuples *) shm_toc_allocate(pcxt->toc, est_deadtuples);
3356  dead_tuples->max_tuples = maxtuples;
3357  dead_tuples->num_tuples = 0;
3358  MemSet(dead_tuples->itemptrs, 0, sizeof(ItemPointerData) * maxtuples);
3359  shm_toc_insert(pcxt->toc, PARALLEL_VACUUM_KEY_DEAD_TUPLES, dead_tuples);
3360  vacrelstats->dead_tuples = dead_tuples;
3361 
3362  /*
3363  * Allocate space for each worker's BufferUsage and WalUsage; no need to
3364  * initialize
3365  */
3366  buffer_usage = shm_toc_allocate(pcxt->toc,
3367  mul_size(sizeof(BufferUsage), pcxt->nworkers));
3368  shm_toc_insert(pcxt->toc, PARALLEL_VACUUM_KEY_BUFFER_USAGE, buffer_usage);
3369  lps->buffer_usage = buffer_usage;
3370  wal_usage = shm_toc_allocate(pcxt->toc,
3371  mul_size(sizeof(WalUsage), pcxt->nworkers));
3373  lps->wal_usage = wal_usage;
3374 
3375  /* Store query string for workers */
3376  if (debug_query_string)
3377  {
3378  char *sharedquery;
3379 
3380  sharedquery = (char *) shm_toc_allocate(pcxt->toc, querylen + 1);
3381  memcpy(sharedquery, debug_query_string, querylen + 1);
3382  sharedquery[querylen] = '\0';
3383  shm_toc_insert(pcxt->toc,
3384  PARALLEL_VACUUM_KEY_QUERY_TEXT, sharedquery);
3385  }
3386 
3387  pfree(can_parallel_vacuum);
3388  return lps;
3389 }
3390 
3391 /*
3392  * Destroy the parallel context, and end parallel mode.
3393  *
3394  * Since writes are not allowed during parallel mode, copy the
3395  * updated index statistics from DSM into local memory and then later use that
3396  * to update the index statistics. One might think that we can exit from
3397  * parallel mode, update the index statistics and then destroy parallel
3398  * context, but that won't be safe (see ExitParallelMode).
3399  */
3400 static void
3402  int nindexes)
3403 {
3404  int i;
3405 
3407 
3408  /* Copy the updated statistics */
3409  for (i = 0; i < nindexes; i++)
3410  {
3411  LVSharedIndStats *indstats = get_indstats(lps->lvshared, i);
3412 
3413  /*
3414  * Skip unused slot. The statistics of this index are already stored
3415  * in local memory.
3416  */
3417  if (indstats == NULL)
3418  continue;
3419 
3420  if (indstats->updated)
3421  {
3422  stats[i] = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult));
3423  memcpy(stats[i], &(indstats->stats), sizeof(IndexBulkDeleteResult));
3424  }
3425  else
3426  stats[i] = NULL;
3427  }
3428 
3430  ExitParallelMode();
3431 
3432  /* Deactivate parallel vacuum */
3433  pfree(lps);
3434  lps = NULL;
3435 }
3436 
3437 /* Return the Nth index statistics or NULL */
3438 static LVSharedIndStats *
3439 get_indstats(LVShared *lvshared, int n)
3440 {
3441  int i;
3442  char *p;
3443 
3444  if (IndStatsIsNull(lvshared, n))
3445  return NULL;
3446 
3447  p = (char *) GetSharedIndStats(lvshared);
3448  for (i = 0; i < n; i++)
3449  {
3450  if (IndStatsIsNull(lvshared, i))
3451  continue;
3452 
3453  p += sizeof(LVSharedIndStats);
3454  }
3455 
3456  return (LVSharedIndStats *) p;
3457 }
3458 
3459 /*
3460  * Returns true, if the given index can't participate in parallel index vacuum
3461  * or parallel index cleanup, false, otherwise.
3462  */
3463 static bool
3465 {
3466  uint8 vacoptions = indrel->rd_indam->amparallelvacuumoptions;
3467 
3468  /* first_time must be true only if for_cleanup is true */
3469  Assert(lvshared->for_cleanup || !lvshared->first_time);
3470 
3471  if (lvshared->for_cleanup)
3472  {
3473  /* Skip, if the index does not support parallel cleanup */
3474  if (((vacoptions & VACUUM_OPTION_PARALLEL_CLEANUP) == 0) &&
3475  ((vacoptions & VACUUM_OPTION_PARALLEL_COND_CLEANUP) == 0))
3476  return true;
3477 
3478  /*
3479  * Skip, if the index supports parallel cleanup conditionally, but we
3480  * have already processed the index (for bulkdelete). See the
3481  * comments for option VACUUM_OPTION_PARALLEL_COND_CLEANUP to know
3482  * when indexes support parallel cleanup conditionally.
3483  */
3484  if (!lvshared->first_time &&
3485  ((vacoptions & VACUUM_OPTION_PARALLEL_COND_CLEANUP) != 0))
3486  return true;
3487  }
3488  else if ((vacoptions & VACUUM_OPTION_PARALLEL_BULKDEL) == 0)
3489  {
3490  /* Skip if the index does not support parallel bulk deletion */
3491  return true;
3492  }
3493 
3494  return false;
3495 }
3496 
3497 /*
3498  * Perform work within a launched parallel process.
3499  *
3500  * Since parallel vacuum workers perform only index vacuum or index cleanup,
3501  * we don't need to report progress information.
3502  */
3503 void
3505 {
3506  Relation onerel;
3507  Relation *indrels;
3508  LVShared *lvshared;
3509  LVDeadTuples *dead_tuples;
3510  BufferUsage *buffer_usage;
3511  WalUsage *wal_usage;
3512  int nindexes;
3513  char *sharedquery;
3514  IndexBulkDeleteResult **stats;
3515  LVRelStats vacrelstats;
3516  ErrorContextCallback errcallback;
3517 
3519  false);
3520  elevel = lvshared->elevel;
3521 
3522  if (lvshared->for_cleanup)
3523  elog(DEBUG1, "starting parallel vacuum worker for cleanup");
3524  else
3525  elog(DEBUG1, "starting parallel vacuum worker for bulk delete");
3526 
3527  /* Set debug_query_string for individual workers */
3528  sharedquery = shm_toc_lookup(toc, PARALLEL_VACUUM_KEY_QUERY_TEXT, true);
3529  debug_query_string = sharedquery;
3531 
3532  /*
3533  * Open table. The lock mode is the same as the leader process. It's
3534  * okay because the lock mode does not conflict among the parallel
3535  * workers.
3536  */
3537  onerel = table_open(lvshared->relid, ShareUpdateExclusiveLock);
3538 
3539  /*
3540  * Open all indexes. indrels are sorted in order by OID, which should be
3541  * matched to the leader's one.
3542  */
3543  vac_open_indexes(onerel, RowExclusiveLock, &nindexes, &indrels);
3544  Assert(nindexes > 0);
3545 
3546  /* Set dead tuple space */
3547  dead_tuples = (LVDeadTuples *) shm_toc_lookup(toc,
3549  false);
3550 
3551  /* Set cost-based vacuum delay */
3553  VacuumCostBalance = 0;
3554  VacuumPageHit = 0;
3555  VacuumPageMiss = 0;
3556  VacuumPageDirty = 0;
3558  VacuumSharedCostBalance = &(lvshared->cost_balance);
3559  VacuumActiveNWorkers = &(lvshared->active_nworkers);
3560 
3561  stats = (IndexBulkDeleteResult **)
3562  palloc0(nindexes * sizeof(IndexBulkDeleteResult *));
3563 
3564  if (lvshared->maintenance_work_mem_worker > 0)
3566 
3567  /*
3568  * Initialize vacrelstats for use as error callback arg by parallel
3569  * worker.
3570  */
3571  vacrelstats.relnamespace = get_namespace_name(RelationGetNamespace(onerel));
3572  vacrelstats.relname = pstrdup(RelationGetRelationName(onerel));
3573  vacrelstats.indname = NULL;
3574  vacrelstats.phase = VACUUM_ERRCB_PHASE_UNKNOWN; /* Not yet processing */
3575 
3576  /* Setup error traceback support for ereport() */
3577  errcallback.callback = vacuum_error_callback;
3578  errcallback.arg = &vacrelstats;
3579  errcallback.previous = error_context_stack;
3580  error_context_stack = &errcallback;
3581 
3582  /* Prepare to track buffer usage during parallel execution */
3584 
3585  /* Process indexes to perform vacuum/cleanup */
3586  parallel_vacuum_index(indrels, stats, lvshared, dead_tuples, nindexes,
3587  &vacrelstats);
3588 
3589  /* Report buffer/WAL usage during parallel execution */
3590  buffer_usage = shm_toc_lookup(toc, PARALLEL_VACUUM_KEY_BUFFER_USAGE, false);
3591  wal_usage = shm_toc_lookup(toc, PARALLEL_VACUUM_KEY_WAL_USAGE, false);
3593  &wal_usage[ParallelWorkerNumber]);
3594 
3595  /* Pop the error context stack */
3596  error_context_stack = errcallback.previous;
3597 
3598  vac_close_indexes(nindexes, indrels, RowExclusiveLock);
3600  pfree(stats);
3601 }
3602 
3603 /*
3604  * Error context callback for errors occurring during vacuum.
3605  */
3606 static void
3608 {
3609  LVRelStats *errinfo = arg;
3610 
3611  switch (errinfo->phase)
3612  {
3614  if (BlockNumberIsValid(errinfo->blkno))
3615  {
3616  if (OffsetNumberIsValid(errinfo->offnum))
3617  errcontext("while scanning block %u and offset %u of relation \"%s.%s\"",
3618  errinfo->blkno, errinfo->offnum, errinfo->relnamespace, errinfo->relname);
3619  else
3620  errcontext("while scanning block %u of relation \"%s.%s\"",
3621  errinfo->blkno, errinfo->relnamespace, errinfo->relname);
3622  }
3623  else
3624  errcontext("while scanning relation \"%s.%s\"",
3625  errinfo->relnamespace, errinfo->relname);
3626  break;
3627 
3629  if (BlockNumberIsValid(errinfo->blkno))
3630  {
3631  if (OffsetNumberIsValid(errinfo->offnum))
3632  errcontext("while vacuuming block %u and offset %u of relation \"%s.%s\"",
3633  errinfo->blkno, errinfo->offnum, errinfo->relnamespace, errinfo->relname);
3634  else
3635  errcontext("while vacuuming block %u of relation \"%s.%s\"",
3636  errinfo->blkno, errinfo->relnamespace, errinfo->relname);
3637  }
3638  else
3639  errcontext("while vacuuming relation \"%s.%s\"",
3640  errinfo->relnamespace, errinfo->relname);
3641  break;
3642 
3644  errcontext("while vacuuming index \"%s\" of relation \"%s.%s\"",
3645  errinfo->indname, errinfo->relnamespace, errinfo->relname);
3646  break;
3647 
3649  errcontext("while cleaning up index \"%s\" of relation \"%s.%s\"",
3650  errinfo->indname, errinfo->relnamespace, errinfo->relname);
3651  break;
3652 
3654  if (BlockNumberIsValid(errinfo->blkno))
3655  errcontext("while truncating relation \"%s.%s\" to %u blocks",
3656  errinfo->relnamespace, errinfo->relname, errinfo->blkno);
3657  break;
3658 
3660  default:
3661  return; /* do nothing; the errinfo may not be
3662  * initialized */
3663  }
3664 }
3665 
3666 /*
3667  * Updates the information required for vacuum error callback. This also saves
3668  * the current information which can be later restored via restore_vacuum_error_info.
3669  */
3670 static void
3671 update_vacuum_error_info(LVRelStats *errinfo, LVSavedErrInfo *saved_err_info, int phase,
3672  BlockNumber blkno, OffsetNumber offnum)
3673 {
3674  if (saved_err_info)
3675  {
3676  saved_err_info->offnum = errinfo->offnum;
3677  saved_err_info->blkno = errinfo->blkno;
3678  saved_err_info->phase = errinfo->phase;
3679  }
3680 
3681  errinfo->blkno = blkno;
3682  errinfo->offnum = offnum;
3683  errinfo->phase = phase;
3684 }
3685 
3686 /*
3687  * Restores the vacuum information saved via a prior call to update_vacuum_error_info.
3688  */
3689 static void
3690 restore_vacuum_error_info(LVRelStats *errinfo, const LVSavedErrInfo *saved_err_info)
3691 {
3692  errinfo->blkno = saved_err_info->blkno;
3693  errinfo->offnum = saved_err_info->offnum;
3694  errinfo->phase = saved_err_info->phase;
3695 }
int autovacuum_work_mem
Definition: autovacuum.c:116
double new_rel_tuples
Definition: vacuumlazy.c:306
void HeapTupleHeaderAdvanceLatestRemovedXid(HeapTupleHeader tuple, TransactionId *latestRemovedXid)
Definition: heapam.c:6903
#define GetSharedIndStats(s)
Definition: vacuumlazy.c:254
uint8 amparallelvacuumoptions
Definition: amapi.h:247
XLogRecPtr log_heap_cleanup_info(RelFileNode rnode, TransactionId latestRemovedXid)
Definition: heapam.c:7143
static void vacuum_error_callback(void *arg)
Definition: vacuumlazy.c:3607
struct IndexAmRoutine * rd_indam
Definition: rel.h:189
int multixact_freeze_table_age
Definition: vacuum.h:215
void vac_close_indexes(int nindexes, Relation *Irel, LOCKMODE lockmode)
Definition: vacuum.c:2010
#define BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:96
static int lazy_vacuum_page(Relation onerel, BlockNumber blkno, Buffer buffer, int tupindex, LVRelStats *vacrelstats, Buffer *vmbuffer)
Definition: vacuumlazy.c:1924
void LockBufferForCleanup(Buffer buffer)
Definition: bufmgr.c:3809
XLogRecPtr log_heap_freeze(Relation reln, Buffer buffer, TransactionId cutoff_xid, xl_heap_freeze_tuple *tuples, int ntuples)
Definition: heapam.c:7223
#define PROGRESS_VACUUM_HEAP_BLKS_VACUUMED
Definition: progress.h:24
#define InvalidXLogRecPtr
Definition: xlogdefs.h:28
WalUsage * wal_usage
Definition: vacuumlazy.c:281
#define PageIsEmpty(page)
Definition: bufpage.h:222
int64 VacuumPageMiss
Definition: globals.c:145
#define DEBUG1
Definition: elog.h:25
void table_close(Relation relation, LOCKMODE lockmode)
Definition: table.c:167
static BlockNumber count_nondeletable_pages(Relation onerel, LVRelStats *vacrelstats)
Definition: vacuumlazy.c:2708
int heap_page_prune(Relation relation, Buffer buffer, GlobalVisState *vistest, TransactionId old_snap_xmin, TimestampTz old_snap_ts, bool report_stats, TransactionId *latestRemovedXid, OffsetNumber *off_loc)
Definition: pruneheap.c:223
XLogRecPtr log_newpage_buffer(Buffer buffer, bool page_std)
Definition: xloginsert.c:1090
BlockNumber rel_pages
Definition: vacuumlazy.c:300
pg_atomic_uint32 * VacuumActiveNWorkers
Definition: vacuum.c:77
static int compute_parallel_vacuum_workers(Relation *Irel, int nindexes, int nrequested, bool *can_parallel_vacuum)
Definition: vacuumlazy.c:3100
double vac_estimate_reltuples(Relation relation, BlockNumber total_pages, BlockNumber scanned_pages, double scanned_tuples)
Definition: vacuum.c:1141
OffsetNumber offset
Definition: heapam_xlog.h:320
static void lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats)
Definition: vacuumlazy.c:1843
int VacuumCostBalance
Definition: globals.c:148
ItemPointerData itemptrs[FLEXIBLE_ARRAY_MEMBER]
Definition: vacuumlazy.c:172
bool estimated_count
Definition: vacuumlazy.c:215
static bool lazy_tid_reaped(ItemPointer itemptr, void *state)
Definition: vacuumlazy.c:2921
#define ItemIdIsRedirected(itemId)
Definition: itemid.h:106
bool TransactionIdFollows(TransactionId id1, TransactionId id2)
Definition: transam.c:334
ParallelContext * CreateParallelContext(const char *library_name, const char *function_name, int nworkers)
Definition: parallel.c:164
LVDeadTuples * dead_tuples
Definition: vacuumlazy.c:312
#define PageIsAllVisible(page)
Definition: bufpage.h:385
uint32 TransactionId
Definition: c.h:575
#define IndStatsIsNull(s, i)
Definition: vacuumlazy.c:256
void UnlockRelation(Relation relation, LOCKMODE lockmode)
Definition: lmgr.c:282
void RecordPageWithFreeSpace(Relation rel, BlockNumber heapBlk, Size spaceAvail)
Definition: freespace.c:181
#define PARALLEL_VACUUM_KEY_DEAD_TUPLES
Definition: vacuumlazy.c:139
void pgstat_progress_start_command(ProgressCommandType cmdtype, Oid relid)
Definition: pgstat.c:3374
Oid relid
Definition: vacuumlazy.c:193
#define PROGRESS_VACUUM_MAX_DEAD_TUPLES
Definition: progress.h:26
#define PROGRESS_VACUUM_PHASE_VACUUM_INDEX
Definition: progress.h:31
void visibilitymap_pin(Relation rel, BlockNumber heapBlk, Buffer *buf)
TimestampTz GetCurrentTimestamp(void)
Definition: timestamp.c:1578
double tuples_deleted
Definition: vacuumlazy.c:310
void visibilitymap_set(Relation rel, BlockNumber heapBlk, Buffer heapBuf, XLogRecPtr recptr, Buffer vmBuf, TransactionId cutoff_xid, uint8 flags)
void pgstat_report_activity(BackendState state, const char *cmd_str)
Definition: pgstat.c:3296
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:1471
int64 TimestampTz
Definition: timestamp.h:39
WalUsage pgWalUsage
Definition: instrument.c:22
#define SizeOfDeadTuples(cnt)
Definition: vacuumlazy.c:177
HeapTupleHeaderData * HeapTupleHeader
Definition: htup.h:23
static uint32 pg_atomic_sub_fetch_u32(volatile pg_atomic_uint32 *ptr, int32 sub_)
Definition: atomics.h:401
#define MaxOffsetNumber
Definition: off.h:28
void pgstat_progress_update_param(int index, int64 val)
Definition: pgstat.c:3395
BufferUsage * buffer_usage
Definition: vacuumlazy.c:278
#define VISIBILITYMAP_ALL_FROZEN
Definition: visibilitymap.h:27
char * pstrdup(const char *in)
Definition: mcxt.c:1187
static void lazy_record_dead_tuple(LVDeadTuples *dead_tuples, ItemPointer itemptr)
Definition: vacuumlazy.c:2897
shm_toc_estimator estimator
Definition: parallel.h:42
bool analyze_only
Definition: genam.h:47
Buffer ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
Definition: bufmgr.c:654
bool useindex
Definition: vacuumlazy.c:297
#define XLogIsNeeded()
Definition: xlog.h:191
struct timeval instr_time
Definition: instr_time.h:150
int64 VacuumPageHit
Definition: globals.c:144
#define Min(x, y)
Definition: c.h:982
bool report_progress
Definition: genam.h:48
BlockNumber tupcount_pages
Definition: vacuumlazy.c:304
#define END_CRIT_SECTION()
Definition: miscadmin.h:134
BufferAccessStrategy strategy
Definition: genam.h:52
struct LVSharedIndStats LVSharedIndStats
#define ItemIdIsUsed(itemId)
Definition: itemid.h:92
#define MaxHeapTuplesPerPage
Definition: htup_details.h:574
void WalUsageAccumDiff(WalUsage *dst, const WalUsage *add, const WalUsage *sub)
Definition: instrument.c:255
#define VM_ALL_FROZEN(r, b, v)
Definition: visibilitymap.h:34
unsigned char uint8
Definition: c.h:427
#define PROGRESS_VACUUM_HEAP_BLKS_SCANNED
Definition: progress.h:23
#define InvalidBuffer
Definition: buf.h:25
#define FLEXIBLE_ARRAY_MEMBER
Definition: c.h:338
char * relnamespace
Definition: vacuumlazy.c:294
#define PROGRESS_VACUUM_TOTAL_HEAP_BLKS
Definition: progress.h:22
#define START_CRIT_SECTION()
Definition: miscadmin.h:132
#define VACUUM_OPTION_MAX_VALID_VALUE
Definition: vacuum.h:63
Relation index
Definition: genam.h:46
BlockNumber scanned_pages
Definition: vacuumlazy.c:301
VacErrPhase phase
Definition: vacuumlazy.c:321
#define MemSet(start, val, len)
Definition: c.h:1004
#define INFO
Definition: elog.h:33
#define VACUUM_TRUNCATE_LOCK_TIMEOUT
Definition: vacuumlazy.c:103
static uint32 pg_atomic_add_fetch_u32(volatile pg_atomic_uint32 *ptr, int32 add_)
Definition: atomics.h:386
Datum idx(PG_FUNCTION_ARGS)
Definition: _int_op.c:259
void vacuum_set_xid_limits(Relation rel, int freeze_min_age, int freeze_table_age, int multixact_freeze_min_age, int multixact_freeze_table_age, TransactionId *oldestXmin, TransactionId *freezeLimit, TransactionId *xidFullScanLimit, MultiXactId *multiXactCutoff, MultiXactId *mxactFullScanLimit)
Definition: vacuum.c:931
int64 VacuumPageDirty
Definition: globals.c:146
static long compute_max_dead_tuples(BlockNumber relblocks, bool hasindex)
Definition: vacuumlazy.c:2847
uint32 BlockNumber
Definition: block.h:31
void ReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:3513
#define BITMAPLEN(NATTS)
Definition: htup_details.h:547
static bool skip_parallel_vacuum_index(Relation indrel, LVShared *lvshared)
Definition: vacuumlazy.c:3464
int nindexes_parallel_bulkdel
Definition: vacuumlazy.c:287
OffsetNumber offnum
Definition: vacuumlazy.c:328
BlockNumber pinskipped_pages
Definition: vacuumlazy.c:302
int maintenance_work_mem_worker
Definition: vacuumlazy.c:225
void visibilitymap_count(Relation rel, BlockNumber *all_visible, BlockNumber *all_frozen)
#define SizeOfPageHeaderData
Definition: bufpage.h:216
#define LOG
Definition: elog.h:26
Form_pg_class rd_rel
Definition: rel.h:110
unsigned int Oid
Definition: postgres_ext.h:31
static void restore_vacuum_error_info(LVRelStats *errinfo, const LVSavedErrInfo *saved_err_info)
Definition: vacuumlazy.c:3690
#define shm_toc_estimate_chunk(e, sz)
Definition: shm_toc.h:51
#define ItemIdIsDead(itemId)
Definition: itemid.h:113
int nindexes_parallel_condcleanup
Definition: vacuumlazy.c:289
void InstrEndParallelQuery(BufferUsage *bufusage, WalUsage *walusage)
Definition: instrument.c:189
void(* callback)(void *arg)
Definition: elog.h:243
bool TimestampDifferenceExceeds(TimestampTz start_time, TimestampTz stop_time, int msec)
Definition: timestamp.c:1709
struct ErrorContextCallback * previous
Definition: elog.h:242
#define PageGetMaxOffsetNumber(page)
Definition: bufpage.h:357
int freeze_table_age
Definition: vacuum.h:212
void ReinitializeParallelWorkers(ParallelContext *pcxt, int nworkers_to_launch)
Definition: parallel.c:513
int errdetail_internal(const char *fmt,...)
Definition: elog.c:1062
#define PARALLEL_VACUUM_KEY_QUERY_TEXT
Definition: vacuumlazy.c:140
static LVSharedIndStats * get_indstats(LVShared *lvshared, int n)
Definition: vacuumlazy.c:3439
uint16 OffsetNumber
Definition: off.h:24
ItemPointerData * ItemPointer
Definition: itemptr.h:49
#define VISIBILITYMAP_VALID_BITS
Definition: visibilitymap.h:28
HeapTupleHeader t_data
Definition: htup.h:68
void pg_rusage_init(PGRUsage *ru0)
Definition: pg_rusage.c:27
bool heap_prepare_freeze_tuple(HeapTupleHeader tuple, TransactionId relfrozenxid, TransactionId relminmxid, TransactionId cutoff_xid, TransactionId cutoff_multi, xl_heap_freeze_tuple *frz, bool *totally_frozen_p)
Definition: heapam.c:6126
#define FORCE_CHECK_PAGE()
ErrorContextCallback * error_context_stack
Definition: elog.c:92
ParallelContext * pcxt
Definition: vacuumlazy.c:272
#define PROGRESS_VACUUM_PHASE_TRUNCATE
Definition: progress.h:34
#define SizeOfLVShared
Definition: vacuumlazy.c:253
#define HeapTupleIsHotUpdated(tuple)
Definition: htup_details.h:676
pg_atomic_uint32 cost_balance
Definition: vacuumlazy.c:232
#define StaticAssertStmt(condition, errmessage)
Definition: c.h:914
BlockNumber blkno
Definition: vacuumlazy.c:327
void WaitForParallelWorkersToFinish(ParallelContext *pcxt)
Definition: parallel.c:750
BlockNumber old_rel_pages
Definition: vacuumlazy.c:299
void pg_usleep(long microsec)
Definition: signal.c:53
void DestroyParallelContext(ParallelContext *pcxt)
Definition: parallel.c:904
GlobalVisState * GlobalVisTestFor(Relation rel)
Definition: procarray.c:3922
#define PREFETCH_SIZE
Definition: vacuumlazy.c:131
bool heap_tuple_needs_eventual_freeze(HeapTupleHeader tuple)
Definition: heapam.c:6771
VacErrPhase phase
Definition: vacuumlazy.c:329
#define ItemIdGetLength(itemId)
Definition: itemid.h:59
void pfree(void *pointer)
Definition: mcxt.c:1057
bool IsInParallelMode(void)
Definition: xact.c:1012
void appendStringInfo(StringInfo str, const char *fmt,...)
Definition: stringinfo.c:91
bool visibilitymap_clear(Relation rel, BlockNumber heapBlk, Buffer buf, uint8 flags)
#define VACUUM_FSM_EVERY_PAGES
Definition: vacuumlazy.c:111
bool TransactionIdPrecedesOrEquals(TransactionId id1, TransactionId id2)
Definition: transam.c:319
void UnlockReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:3536
bool ConditionalLockBufferForCleanup(Buffer buffer)
Definition: bufmgr.c:3946
#define ERROR
Definition: elog.h:43
#define REL_TRUNCATE_MINIMUM
Definition: vacuumlazy.c:91
Size PageGetHeapFreeSpace(Page page)
Definition: bufpage.c:874
#define INSTR_TIME_SUBTRACT(x, y)
Definition: instr_time.h:170
int freeze_min_age
Definition: vacuum.h:211
ItemPointerData t_self
Definition: htup.h:65
void ExitParallelMode(void)
Definition: xact.c:992
bool is_wraparound
Definition: vacuum.h:217
long wal_records
Definition: instrument.h:37
char * get_database_name(Oid dbid)
Definition: dbcommands.c:2155
#define DEBUG2
Definition: elog.h:24
#define HeapTupleHeaderXminCommitted(tup)
Definition: htup_details.h:324
static TransactionId FreezeLimit
Definition: vacuumlazy.c:336
IndexBulkDeleteResult stats
Definition: vacuumlazy.c:266
static void lazy_vacuum_all_indexes(Relation onerel, Relation *Irel, IndexBulkDeleteResult **stats, LVRelStats *vacrelstats, LVParallelState *lps, int nindexes)
Definition: vacuumlazy.c:1784
uint32 t_len
Definition: htup.h:64
void heap_execute_freeze_tuple(HeapTupleHeader tuple, xl_heap_freeze_tuple *frz)
Definition: heapam.c:6355
char * get_namespace_name(Oid nspid)
Definition: lsyscache.c:3196
#define NoLock
Definition: lockdefs.h:34
static char * buf
Definition: pg_test_fsync.c:68
#define PageSetAllVisible(page)
Definition: bufpage.h:387
bool IsUnderPostmaster
Definition: globals.c:109
#define FirstOffsetNumber
Definition: off.h:27
#define RowExclusiveLock
Definition: lockdefs.h:38
struct LVDeadTuples LVDeadTuples
int errdetail(const char *fmt,...)
Definition: elog.c:1035
int elevel
Definition: vacuumlazy.c:194
int ParallelWorkerNumber
Definition: parallel.c:112
static MultiXactId MultiXactCutoff
Definition: vacuumlazy.c:337
const char * pg_rusage_show(const PGRUsage *ru0)
Definition: pg_rusage.c:40
bool ConditionalLockRelation(Relation relation, LOCKMODE lockmode)
Definition: lmgr.c:247
#define InvalidTransactionId
Definition: transam.h:31
#define RelationGetRelationName(relation)
Definition: rel.h:491
static TransactionId OldestXmin
Definition: vacuumlazy.c:335
pg_atomic_uint32 idx
Definition: vacuumlazy.c:246
unsigned int uint32
Definition: c.h:429
Oid t_tableOid
Definition: htup.h:66
#define MultiXactIdIsValid(multi)
Definition: multixact.h:28
int min_parallel_index_scan_size
Definition: allpaths.c:65
int nworkers_launched
Definition: parallel.h:38
#define BufferGetPage(buffer)
Definition: bufmgr.h:169
static void lazy_vacuum_index(Relation indrel, IndexBulkDeleteResult **stats, LVDeadTuples *dead_tuples, double reltuples, LVRelStats *vacrelstats)
Definition: vacuumlazy.c:2431
HTSV_Result HeapTupleSatisfiesVacuum(HeapTuple htup, TransactionId OldestXmin, Buffer buffer)
void LaunchParallelWorkers(ParallelContext *pcxt)
Definition: parallel.c:527
bool IsAutoVacuumWorkerProcess(void)
Definition: autovacuum.c:3325
void InstrAccumParallelQuery(BufferUsage *bufusage, WalUsage *walusage)
Definition: instrument.c:199
static void vacuum_indexes_leader(Relation *Irel, IndexBulkDeleteResult **stats, LVRelStats *vacrelstats, LVParallelState *lps, int nindexes)
Definition: vacuumlazy.c:2273
bool heap_tuple_needs_freeze(HeapTupleHeader tuple, TransactionId cutoff_xid, MultiXactId cutoff_multi, Buffer buf)
Definition: heapam.c:6824
#define PROGRESS_VACUUM_NUM_DEAD_TUPLES
Definition: progress.h:27
static bool lazy_check_needs_freeze(Buffer buf, bool *hastup, LVRelStats *vacrelstats)
Definition: vacuumlazy.c:2031
#define IsParallelWorker()
Definition: parallel.h:61
bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition: transam.c:300
double new_live_tuples
Definition: vacuumlazy.c:307
bool first_time
Definition: vacuumlazy.c:202
VacOptTernaryValue index_cleanup
Definition: vacuum.h:221
#define SKIP_PAGES_THRESHOLD
Definition: vacuumlazy.c:125
#define VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL
Definition: vacuumlazy.c:102
#define MaxAllocSize
Definition: memutils.h:40
void initStringInfo(StringInfo str)
Definition: stringinfo.c:59
#define WARNING
Definition: elog.h:40
void vac_open_indexes(Relation relation, LOCKMODE lockmode, int *nindexes, Relation **Irel)
Definition: vacuum.c:1967
const char * debug_query_string
Definition: postgres.c:88
static void prepare_index_statistics(LVShared *lvshared, bool *can_parallel_vacuum, int nindexes)
Definition: vacuumlazy.c:3161
double reltuples
Definition: vacuumlazy.c:214
#define VACUUM_OPTION_NO_PARALLEL
Definition: vacuum.h:39
void InitializeParallelDSM(ParallelContext *pcxt)
Definition: parallel.c:202
#define PageGetItemId(page, offsetNumber)
Definition: bufpage.h:235
int VacuumCostBalanceLocal
Definition: vacuum.c:78
pg_atomic_uint32 * VacuumSharedCostBalance
Definition: vacuum.c:76
void InstrStartParallelQuery(void)
Definition: instrument.c:181
static int elevel
Definition: vacuumlazy.c:333
uint8 bits8
Definition: c.h:436
#define ngettext(s, p, n)
Definition: c.h:1178
static void lazy_cleanup_index(Relation indrel, IndexBulkDeleteResult **stats, double reltuples, bool estimated_count, LVRelStats *vacrelstats)
Definition: vacuumlazy.c:2483
int nindexes_parallel_cleanup
Definition: vacuumlazy.c:288
Size mul_size(Size s1, Size s2)
Definition: shmem.c:515
static void lazy_parallel_vacuum_indexes(Relation *Irel, IndexBulkDeleteResult **stats, LVRelStats *vacrelstats, LVParallelState *lps, int nindexes)
Definition: vacuumlazy.c:2091
Size GetRecordedFreeSpace(Relation rel, BlockNumber heapBlk)
Definition: freespace.c:230
#define ParallelVacuumIsActive(lps)
Definition: vacuumlazy.c:148
void * palloc0(Size size)
Definition: mcxt.c:981
char * indname
Definition: vacuumlazy.c:318
#define VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL
Definition: vacuumlazy.c:101
void pgstat_progress_end_command(void)
Definition: pgstat.c:3446
char * relname
Definition: vacuumlazy.c:295
IndexBulkDeleteResult * index_bulk_delete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state)
Definition: indexam.c:689
void heap_vacuum_rel(Relation onerel, VacuumParams *params, BufferAccessStrategy bstrategy)
Definition: vacuumlazy.c:419
Size add_size(Size s1, Size s2)
Definition: shmem.c:498
#define BufferGetPageSize(buffer)
Definition: bufmgr.h:156
Oid MyDatabaseId
Definition: globals.c:85
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:3752
#define PARALLEL_VACUUM_KEY_SHARED
Definition: vacuumlazy.c:138
int max_parallel_maintenance_workers
Definition: globals.c:124
void ReinitializeParallelDSM(ParallelContext *pcxt)
Definition: parallel.c:463
#define InvalidMultiXactId
Definition: multixact.h:24
#define RelationGetNumberOfBlocks(reln)
Definition: bufmgr.h:211
static void update_index_statistics(Relation *Irel, IndexBulkDeleteResult **stats, int nindexes)
Definition: vacuumlazy.c:3186
static void lazy_cleanup_all_indexes(Relation *Irel, IndexBulkDeleteResult **stats, LVRelStats *vacrelstats, LVParallelState *lps, int nindexes)
Definition: vacuumlazy.c:2376
static bool should_attempt_truncation(VacuumParams *params, LVRelStats *vacrelstats)
Definition: vacuumlazy.c:2557
pg_atomic_uint32 active_nworkers
Definition: vacuumlazy.c:239
#define InvalidOffsetNumber
Definition: off.h:26
VacOptTernaryValue truncate
Definition: vacuum.h:223
static LVParallelState * begin_parallel_vacuum(Oid relid, Relation *Irel, LVRelStats *vacrelstats, BlockNumber nblocks, int nindexes, int nrequested)
Definition: vacuumlazy.c:3217
#define PROGRESS_VACUUM_PHASE_FINAL_CLEANUP
Definition: progress.h:35
#define ereport(elevel,...)
Definition: elog.h:155
int num_index_scans
Definition: vacuumlazy.c:313
int maintenance_work_mem
Definition: globals.c:123
bool LockHasWaitersRelation(Relation relation, LOCKMODE lockmode)
Definition: lmgr.c:345
#define BlockNumberIsValid(blockNumber)
Definition: block.h:70
double old_live_tuples
Definition: vacuumlazy.c:305
static void update_vacuum_error_info(LVRelStats *errinfo, LVSavedErrInfo *saved_err_info, int phase, BlockNumber blkno, OffsetNumber offnum)
Definition: vacuumlazy.c:3671
static void vacuum_log_cleanup_info(Relation rel, LVRelStats *vacrelstats)
Definition: vacuumlazy.c:708
static uint32 pg_atomic_fetch_add_u32(volatile pg_atomic_uint32 *ptr, int32 add_)
Definition: atomics.h:328
OffsetNumber offnum
Definition: vacuumlazy.c:320
int message_level
Definition: genam.h:50
TransactionId MultiXactId
Definition: c.h:585
RelFileNode rd_node
Definition: rel.h:55
int errmsg_internal(const char *fmt,...)
Definition: elog.c:989
double num_heap_tuples
Definition: genam.h:51
#define Max(x, y)
Definition: c.h:976
static BufferAccessStrategy vac_strategy
Definition: vacuumlazy.c:339
#define ShareUpdateExclusiveLock
Definition: lockdefs.h:39
#define PARALLEL_VACUUM_KEY_BUFFER_USAGE
Definition: vacuumlazy.c:141
#define PageClearAllVisible(page)
Definition: bufpage.h:389
uint64 XLogRecPtr
Definition: xlogdefs.h:21
struct LVShared LVShared
#define HeapTupleIsHeapOnly(tuple)
Definition: htup_details.h:685
#define Assert(condition)
Definition: c.h:800
#define VACUUM_OPTION_PARALLEL_COND_CLEANUP
Definition: vacuum.h:52
double new_dead_tuples
Definition: vacuumlazy.c:308
Definition: regguts.h:298
bits8 bitmap[FLEXIBLE_ARRAY_MEMBER]
Definition: vacuumlazy.c:248
#define PARALLEL_VACUUM_KEY_WAL_USAGE
Definition: vacuumlazy.c:142
TransactionId latestRemovedXid
Definition: vacuumlazy.c:314
#define ItemIdIsNormal(itemId)
Definition: itemid.h:99
#define INSTR_TIME_GET_MICROSEC(t)
Definition: instr_time.h:205
#define PROGRESS_VACUUM_PHASE_INDEX_CLEANUP
Definition: progress.h:33
#define HeapTupleHeaderGetXmin(tup)
Definition: htup_details.h:313
#define VM_ALL_VISIBLE(r, b, v)
Definition: visibilitymap.h:32
void pgstat_progress_update_multi_param(int nparam, const int *index, const int64 *val)
Definition: pgstat.c:3417
#define OffsetNumberNext(offsetNumber)
Definition: off.h:52
size_t Size
Definition: c.h:528
#define PROGRESS_VACUUM_NUM_INDEX_VACUUMS
Definition: progress.h:25
#define PROGRESS_VACUUM_PHASE_SCAN_HEAP
Definition: progress.h:30
#define PROGRESS_VACUUM_PHASE
Definition: progress.h:21
void parallel_vacuum_main(dsm_segment *seg, shm_toc *toc)
Definition: vacuumlazy.c:3504
int nworkers
Definition: vacuum.h:231
#define InvalidBlockNumber
Definition: block.h:33
#define shm_toc_estimate_keys(e, cnt)
Definition: shm_toc.h:53
long wal_fpi
Definition: instrument.h:38
XLogRecPtr log_heap_clean(Relation reln, Buffer buffer, OffsetNumber *redirected, int nredirected, OffsetNumber *nowdead, int ndead, OffsetNumber *nowunused, int nunused, TransactionId latestRemovedXid)
Definition: heapam.c:7172
#define MAXALIGN(LEN)
Definition: c.h:753
#define BufferIsValid(bufnum)
Definition: bufmgr.h:123
int log_min_duration
Definition: vacuum.h:218
#define ItemPointerGetOffsetNumber(pointer)
Definition: itemptr.h:117
void EnterParallelMode(void)
Definition: xact.c:979
LVShared * lvshared
Definition: vacuumlazy.c:275
#define VACUUM_OPTION_PARALLEL_BULKDEL
Definition: vacuum.h:45
void * shm_toc_allocate(shm_toc *toc, Size nbytes)
Definition: shm_toc.c:88
#define RelationNeedsWAL(relation)
Definition: rel.h:563
IndexBulkDeleteResult * index_vacuum_cleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
Definition: indexam.c:710
#define VISIBILITYMAP_ALL_VISIBLE
Definition: visibilitymap.h:26
struct LVRelStats LVRelStats
#define INSTR_TIME_SET_CURRENT(t)
Definition: instr_time.h:156
void pgstat_report_vacuum(Oid tableoid, bool shared, PgStat_Counter livetuples, PgStat_Counter deadtuples)
Definition: pgstat.c:1545
#define RelationUsesLocalBuffers(relation)
Definition: rel.h:573
#define PageGetLSN(page)
Definition: bufpage.h:366
static void end_parallel_vacuum(IndexBulkDeleteResult **stats, LVParallelState *lps, int nindexes)
Definition: vacuumlazy.c:3401
bool for_cleanup
Definition: vacuumlazy.c:201
#define AccessExclusiveLock
Definition: lockdefs.h:45
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:2663
BlockNumber pages_removed
Definition: vacuumlazy.c:309
BlockNumber nonempty_pages
Definition: vacuumlazy.c:311
void PageRepairFragmentation(Page page)
Definition: bufpage.c:682
#define PageIsNew(page)
Definition: bufpage.h:229
void shm_toc_insert(shm_toc *toc, uint64 key, void *address)
Definition: shm_toc.c:171
void * palloc(Size size)
Definition: mcxt.c:950
int errmsg(const char *fmt,...)
Definition: elog.c:902
static void lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats, Relation *Irel, int nindexes, bool aggressive)
Definition: vacuumlazy.c:753
uint8 visibilitymap_get_status(Relation rel, BlockNumber heapBlk, Buffer *buf)
BlockNumber frozenskipped_pages
Definition: vacuumlazy.c:303
double VacuumCostDelay
Definition: globals.c:142
#define elog(elevel,...)
Definition: elog.h:228
int old_snapshot_threshold
Definition: snapmgr.c:78
bool MultiXactIdPrecedesOrEquals(MultiXactId multi1, MultiXactId multi2)
Definition: multixact.c:3160
int i
int options
Definition: vacuum.h:210
#define OffsetNumberIsValid(offsetNumber)
Definition: off.h:39
PrefetchBufferResult PrefetchBuffer(Relation reln, ForkNumber forkNum, BlockNumber blockNum)
Definition: bufmgr.c:575
#define errcontext
Definition: elog.h:199
#define BUFFER_LOCK_SHARE
Definition: bufmgr.h:97
void * arg
struct LVParallelState LVParallelState
static void vacuum_one_index(Relation indrel, IndexBulkDeleteResult **stats, LVShared *lvshared, LVSharedIndStats *shared_indstats, LVDeadTuples *dead_tuples, LVRelStats *vacrelstats)
Definition: vacuumlazy.c:2316
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:99
static void pg_atomic_init_u32(volatile pg_atomic_uint32 *ptr, uint32 val)
Definition: atomics.h:223
#define ItemPointerGetBlockNumber(pointer)
Definition: itemptr.h:98
uint32 offset
Definition: vacuumlazy.c:247
#define TransactionIdIsValid(xid)
Definition: transam.h:41
static void pg_atomic_write_u32(volatile pg_atomic_uint32 *ptr, uint32 val)
Definition: atomics.h:258
#define ItemIdSetUnused(itemId)
Definition: itemid.h:128
static int vac_cmp_itemptr(const void *left, const void *right)
Definition: vacuumlazy.c:2939
uint64 wal_bytes
Definition: instrument.h:39
void vacuum_delay_point(void)
Definition: vacuum.c:2031
#define MAXDEADTUPLES(max_size)
Definition: vacuumlazy.c:180
void TimestampDifference(TimestampTz start_time, TimestampTz stop_time, long *secs, int *microsecs)
Definition: timestamp.c:1654
#define TransactionIdIsNormal(xid)
Definition: transam.h:42
BlockNumber blkno
Definition: vacuumlazy.c:319
Relation table_open(Oid relationId, LOCKMODE lockmode)
Definition: table.c:39
#define VACUUM_OPTION_PARALLEL_CLEANUP
Definition: vacuum.h:60
void vac_update_relstats(Relation relation, BlockNumber num_pages, double num_tuples, BlockNumber num_all_visible_pages, bool hasindex, TransactionId frozenxid, MultiXactId minmulti, bool in_outer_xact)
Definition: vacuum.c:1224
static bool heap_page_is_all_visible(Relation rel, Buffer buf, LVRelStats *vacrelstats, TransactionId *visibility_cutoff_xid, bool *all_frozen)
Definition: vacuumlazy.c:2972
VacErrPhase
Definition: vacuumlazy.c:151
#define PageSetLSN(page, lsn)
Definition: bufpage.h:368
#define LAZY_ALLOC_TUPLES
Definition: vacuumlazy.c:119
int Buffer
Definition: buf.h:23
#define _(x)
Definition: elog.c:88
#define RelationGetRelid(relation)
Definition: rel.h:457
int multixact_freeze_min_age
Definition: vacuum.h:213
void * shm_toc_lookup(shm_toc *toc, uint64 key, bool noError)
Definition: shm_toc.c:232
void FreeSpaceMapVacuumRange(Relation rel, BlockNumber start, BlockNumber end)
Definition: freespace.c:354
static void lazy_truncate_heap(Relation onerel, LVRelStats *vacrelstats)
Definition: vacuumlazy.c:2578
#define PROGRESS_VACUUM_PHASE_VACUUM_HEAP
Definition: progress.h:32
#define REL_TRUNCATE_FRACTION
Definition: vacuumlazy.c:92
#define PageGetItem(page, itemId)
Definition: bufpage.h:340
Pointer Page
Definition: bufpage.h:78
#define ItemPointerSet(pointer, blockNumber, offNum)
Definition: itemptr.h:127
shm_toc * toc
Definition: parallel.h:45
static void lazy_space_alloc(LVRelStats *vacrelstats, BlockNumber relblocks)
Definition: vacuumlazy.c:2879
bool VacuumCostActive
Definition: globals.c:149
bool estimated_count
Definition: genam.h:49
static uint32 pg_atomic_read_u32(volatile pg_atomic_uint32 *ptr)
Definition: atomics.h:241
struct LVSavedErrInfo LVSavedErrInfo
void RelationTruncate(Relation rel, BlockNumber nblocks)
Definition: storage.c:277
#define RelationGetNamespace(relation)
Definition: rel.h:498
static void parallel_vacuum_index(Relation *Irel, IndexBulkDeleteResult **stats, LVShared *lvshared, LVDeadTuples *dead_tuples, int nindexes, LVRelStats *vacrelstats)
Definition: vacuumlazy.c:2221
bool lock_waiter_detected
Definition: vacuumlazy.c:315