PostgreSQL Source Code  git master
vacuumlazy.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * vacuumlazy.c
4  * Concurrent ("lazy") vacuuming.
5  *
6  *
7  * The major space usage for LAZY VACUUM is storage for the array of dead tuple
8  * TIDs. We want to ensure we can vacuum even the very largest relations with
9  * finite memory space usage. To do that, we set upper bounds on the number of
10  * tuples we will keep track of at once.
11  *
12  * We are willing to use at most maintenance_work_mem (or perhaps
13  * autovacuum_work_mem) memory space to keep track of dead tuples. We
14  * initially allocate an array of TIDs of that size, with an upper limit that
15  * depends on table size (this limit ensures we don't allocate a huge area
16  * uselessly for vacuuming small tables). If the array threatens to overflow,
17  * we suspend the heap scan phase and perform a pass of index cleanup and page
18  * compaction, then resume the heap scan with an empty TID array.
19  *
20  * If we're processing a table with no indexes, we can just vacuum each page
21  * as we go; there's no need to save up multiple tuples to minimize the number
22  * of index scans performed. So we don't use maintenance_work_mem memory for
23  * the TID array, just enough to hold as many heap tuples as fit on one page.
24  *
25  * Lazy vacuum supports parallel execution with parallel worker processes. In
26  * a parallel vacuum, we perform both index vacuum and index cleanup with
27  * parallel worker processes. Individual indexes are processed by one vacuum
28  * process. At the beginning of a lazy vacuum (at lazy_scan_heap) we prepare
29  * the parallel context and initialize the DSM segment that contains shared
30  * information as well as the memory space for storing dead tuples. When
31  * starting either index vacuum or index cleanup, we launch parallel worker
32  * processes. Once all indexes are processed the parallel worker processes
33  * exit. After that, the leader process re-initializes the parallel context
34  * so that it can use the same DSM for multiple passes of index vacuum and
35  * for performing index cleanup. For updating the index statistics, we need
36  * to update the system table and since updates are not allowed during
37  * parallel mode we update the index statistics after exiting from the
38  * parallel mode.
39  *
40  * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
41  * Portions Copyright (c) 1994, Regents of the University of California
42  *
43  *
44  * IDENTIFICATION
45  * src/backend/access/heap/vacuumlazy.c
46  *
47  *-------------------------------------------------------------------------
48  */
49 #include "postgres.h"
50 
51 #include <math.h>
52 
53 #include "access/amapi.h"
54 #include "access/genam.h"
55 #include "access/heapam.h"
56 #include "access/heapam_xlog.h"
57 #include "access/htup_details.h"
58 #include "access/multixact.h"
59 #include "access/parallel.h"
60 #include "access/transam.h"
61 #include "access/visibilitymap.h"
62 #include "access/xact.h"
63 #include "access/xlog.h"
64 #include "catalog/storage.h"
65 #include "commands/dbcommands.h"
66 #include "commands/progress.h"
67 #include "commands/vacuum.h"
68 #include "executor/instrument.h"
69 #include "miscadmin.h"
70 #include "optimizer/paths.h"
71 #include "pgstat.h"
72 #include "portability/instr_time.h"
73 #include "postmaster/autovacuum.h"
74 #include "storage/bufmgr.h"
75 #include "storage/freespace.h"
76 #include "storage/lmgr.h"
77 #include "tcop/tcopprot.h"
78 #include "utils/lsyscache.h"
79 #include "utils/memutils.h"
80 #include "utils/pg_rusage.h"
81 #include "utils/timestamp.h"
82 
83 
84 /*
85  * Space/time tradeoff parameters: do these need to be user-tunable?
86  *
87  * To consider truncating the relation, we want there to be at least
88  * REL_TRUNCATE_MINIMUM or (relsize / REL_TRUNCATE_FRACTION) (whichever
89  * is less) potentially-freeable pages.
90  */
91 #define REL_TRUNCATE_MINIMUM 1000
92 #define REL_TRUNCATE_FRACTION 16
93 
94 /*
95  * Timing parameters for truncate locking heuristics.
96  *
97  * These were not exposed as user tunable GUC values because it didn't seem
98  * that the potential for improvement was great enough to merit the cost of
99  * supporting them.
100  */
101 #define VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL 20 /* ms */
102 #define VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL 50 /* ms */
103 #define VACUUM_TRUNCATE_LOCK_TIMEOUT 5000 /* ms */
104 
105 /*
106  * When a table has no indexes, vacuum the FSM after every 8GB, approximately
107  * (it won't be exact because we only vacuum FSM after processing a heap page
108  * that has some removable tuples). When there are indexes, this is ignored,
109  * and we vacuum FSM after each index/heap cleaning pass.
110  */
111 #define VACUUM_FSM_EVERY_PAGES \
112  ((BlockNumber) (((uint64) 8 * 1024 * 1024 * 1024) / BLCKSZ))
113 
114 /*
115  * Guesstimation of number of dead tuples per page. This is used to
116  * provide an upper limit to memory allocated when vacuuming small
117  * tables.
118  */
119 #define LAZY_ALLOC_TUPLES MaxHeapTuplesPerPage
120 
121 /*
122  * Before we consider skipping a page that's marked as clean in
123  * visibility map, we must've seen at least this many clean pages.
124  */
125 #define SKIP_PAGES_THRESHOLD ((BlockNumber) 32)
126 
127 /*
128  * Size of the prefetch window for lazy vacuum backwards truncation scan.
129  * Needs to be a power of 2.
130  */
131 #define PREFETCH_SIZE ((BlockNumber) 32)
132 
133 /*
134  * DSM keys for parallel vacuum. Unlike other parallel execution code, since
135  * we don't need to worry about DSM keys conflicting with plan_node_id we can
136  * use small integers.
137  */
138 #define PARALLEL_VACUUM_KEY_SHARED 1
139 #define PARALLEL_VACUUM_KEY_DEAD_TUPLES 2
140 #define PARALLEL_VACUUM_KEY_QUERY_TEXT 3
141 #define PARALLEL_VACUUM_KEY_BUFFER_USAGE 4
142 #define PARALLEL_VACUUM_KEY_WAL_USAGE 5
143 
144 /*
145  * Macro to check if we are in a parallel vacuum. If true, we are in the
146  * parallel mode and the DSM segment is initialized.
147  */
148 #define ParallelVacuumIsActive(lps) PointerIsValid(lps)
149 
150 /* Phases of vacuum during which we report error context. */
151 typedef enum
152 {
159 } VacErrPhase;
160 
161 /*
162  * LVDeadTuples stores the dead tuple TIDs collected during the heap scan.
163  * This is allocated in the DSM segment in parallel mode and in local memory
164  * in non-parallel mode.
165  */
166 typedef struct LVDeadTuples
167 {
168  int max_tuples; /* # slots allocated in array */
169  int num_tuples; /* current # of entries */
170  /* List of TIDs of tuples we intend to delete */
171  /* NB: this list is ordered by TID address */
173  * ItemPointerData */
174 } LVDeadTuples;
175 
176 /* The dead tuple space consists of LVDeadTuples and dead tuple TIDs */
177 #define SizeOfDeadTuples(cnt) \
178  add_size(offsetof(LVDeadTuples, itemptrs), \
179  mul_size(sizeof(ItemPointerData), cnt))
180 #define MAXDEADTUPLES(max_size) \
181  (((max_size) - offsetof(LVDeadTuples, itemptrs)) / sizeof(ItemPointerData))
182 
183 /*
184  * Shared information among parallel workers. So this is allocated in the DSM
185  * segment.
186  */
187 typedef struct LVShared
188 {
189  /*
190  * Target table relid and log level. These fields are not modified during
191  * the lazy vacuum.
192  */
194  int elevel;
195 
196  /*
197  * An indication for vacuum workers to perform either index vacuum or
198  * index cleanup. first_time is true only if for_cleanup is true and
199  * bulk-deletion is not performed yet.
200  */
203 
204  /*
205  * Fields for both index vacuum and cleanup.
206  *
207  * reltuples is the total number of input heap tuples. We set either old
208  * live tuples in the index vacuum case or the new live tuples in the
209  * index cleanup case.
210  *
211  * estimated_count is true if reltuples is an estimated value.
212  */
213  double reltuples;
215 
216  /*
217  * In single process lazy vacuum we could consume more memory during index
218  * vacuuming or cleanup apart from the memory for heap scanning. In
219  * parallel vacuum, since individual vacuum workers can consume memory
220  * equal to maintenance_work_mem, the new maintenance_work_mem for each
221  * worker is set such that the parallel operation doesn't consume more
222  * memory than single process lazy vacuum.
223  */
225 
226  /*
227  * Shared vacuum cost balance. During parallel vacuum,
228  * VacuumSharedCostBalance points to this value and it accumulates the
229  * balance of each parallel vacuum worker.
230  */
232 
233  /*
234  * Number of active parallel workers. This is used for computing the
235  * minimum threshold of the vacuum cost balance before a worker sleeps for
236  * cost-based delay.
237  */
239 
240  /*
241  * Variables to control parallel vacuum. We have a bitmap to indicate
242  * which index has stats in shared memory. The set bit in the map
243  * indicates that the particular index supports a parallel vacuum.
244  */
245  pg_atomic_uint32 idx; /* counter for vacuuming and clean up */
246  uint32 offset; /* sizeof header incl. bitmap */
247  bits8 bitmap[FLEXIBLE_ARRAY_MEMBER]; /* bit map of NULLs */
248 
249  /* Shared index statistics data follows at end of struct */
250 } LVShared;
251 
252 #define SizeOfLVShared (offsetof(LVShared, bitmap) + sizeof(bits8))
253 #define GetSharedIndStats(s) \
254  ((LVSharedIndStats *)((char *)(s) + ((LVShared *)(s))->offset))
255 #define IndStatsIsNull(s, i) \
256  (!(((LVShared *)(s))->bitmap[(i) >> 3] & (1 << ((i) & 0x07))))
257 
258 /*
259  * Struct for an index bulk-deletion statistic used for parallel vacuum. This
260  * is allocated in the DSM segment.
261  */
262 typedef struct LVSharedIndStats
263 {
264  bool updated; /* are the stats updated? */
267 
268 /* Struct for maintaining a parallel vacuum state. */
269 typedef struct LVParallelState
270 {
272 
273  /* Shared information among parallel vacuum workers */
275 
276  /* Points to buffer usage area in DSM */
278 
279  /* Points to WAL usage area in DSM */
281 
282  /*
283  * The number of indexes that support parallel index bulk-deletion and
284  * parallel index cleanup respectively.
285  */
290 
291 typedef struct LVRelStats
292 {
294  char *relname;
295  /* useindex = true means two-pass strategy; false means one-pass */
296  bool useindex;
297  /* Overall statistics about rel */
298  BlockNumber old_rel_pages; /* previous value of pg_class.relpages */
299  BlockNumber rel_pages; /* total number of pages */
300  BlockNumber scanned_pages; /* number of pages we examined */
301  BlockNumber pinskipped_pages; /* # of pages we skipped due to a pin */
302  BlockNumber frozenskipped_pages; /* # of frozen pages we skipped */
303  BlockNumber tupcount_pages; /* pages whose tuples we counted */
304  double old_live_tuples; /* previous value of pg_class.reltuples */
305  double new_rel_tuples; /* new estimated total # of tuples */
306  double new_live_tuples; /* new estimated total # of live tuples */
307  double new_dead_tuples; /* new estimated total # of dead tuples */
310  BlockNumber nonempty_pages; /* actually, last nonempty page + 1 */
315 
316  /* Used for error callback */
317  char *indname;
318  BlockNumber blkno; /* used only for heap operations */
320 } LVRelStats;
321 
322 /* Struct for saving and restoring vacuum error information. */
323 typedef struct LVSavedErrInfo
324 {
328 
329 /* A few variables that don't seem worth passing around as parameters */
330 static int elevel = -1;
331 
335 
337 
338 
339 /* non-export function prototypes */
340 static void lazy_scan_heap(Relation onerel, VacuumParams *params,
341  LVRelStats *vacrelstats, Relation *Irel, int nindexes,
342  bool aggressive);
343 static void lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats);
344 static bool lazy_check_needs_freeze(Buffer buf, bool *hastup);
345 static void lazy_vacuum_all_indexes(Relation onerel, Relation *Irel,
346  IndexBulkDeleteResult **stats,
347  LVRelStats *vacrelstats, LVParallelState *lps,
348  int nindexes);
349 static void lazy_vacuum_index(Relation indrel, IndexBulkDeleteResult **stats,
350  LVDeadTuples *dead_tuples, double reltuples, LVRelStats *vacrelstats);
351 static void lazy_cleanup_index(Relation indrel,
352  IndexBulkDeleteResult **stats,
353  double reltuples, bool estimated_count, LVRelStats *vacrelstats);
354 static int lazy_vacuum_page(Relation onerel, BlockNumber blkno, Buffer buffer,
355  int tupindex, LVRelStats *vacrelstats, Buffer *vmbuffer);
356 static bool should_attempt_truncation(VacuumParams *params,
357  LVRelStats *vacrelstats);
358 static void lazy_truncate_heap(Relation onerel, LVRelStats *vacrelstats);
360  LVRelStats *vacrelstats);
361 static void lazy_space_alloc(LVRelStats *vacrelstats, BlockNumber relblocks);
362 static void lazy_record_dead_tuple(LVDeadTuples *dead_tuples,
363  ItemPointer itemptr);
364 static bool lazy_tid_reaped(ItemPointer itemptr, void *state);
365 static int vac_cmp_itemptr(const void *left, const void *right);
367  TransactionId *visibility_cutoff_xid, bool *all_frozen);
369  LVRelStats *vacrelstats, LVParallelState *lps,
370  int nindexes);
371 static void parallel_vacuum_index(Relation *Irel, IndexBulkDeleteResult **stats,
372  LVShared *lvshared, LVDeadTuples *dead_tuples,
373  int nindexes, LVRelStats *vacrelstats);
374 static void vacuum_indexes_leader(Relation *Irel, IndexBulkDeleteResult **stats,
375  LVRelStats *vacrelstats, LVParallelState *lps,
376  int nindexes);
377 static void vacuum_one_index(Relation indrel, IndexBulkDeleteResult **stats,
378  LVShared *lvshared, LVSharedIndStats *shared_indstats,
379  LVDeadTuples *dead_tuples, LVRelStats *vacrelstats);
380 static void lazy_cleanup_all_indexes(Relation *Irel, IndexBulkDeleteResult **stats,
381  LVRelStats *vacrelstats, LVParallelState *lps,
382  int nindexes);
383 static long compute_max_dead_tuples(BlockNumber relblocks, bool hasindex);
384 static int compute_parallel_vacuum_workers(Relation *Irel, int nindexes, int nrequested,
385  bool *can_parallel_vacuum);
386 static void prepare_index_statistics(LVShared *lvshared, bool *can_parallel_vacuum,
387  int nindexes);
388 static void update_index_statistics(Relation *Irel, IndexBulkDeleteResult **stats,
389  int nindexes);
391  LVRelStats *vacrelstats, BlockNumber nblocks,
392  int nindexes, int nrequested);
393 static void end_parallel_vacuum(IndexBulkDeleteResult **stats,
394  LVParallelState *lps, int nindexes);
395 static LVSharedIndStats *get_indstats(LVShared *lvshared, int n);
396 static bool skip_parallel_vacuum_index(Relation indrel, LVShared *lvshared);
397 static void vacuum_error_callback(void *arg);
398 static void update_vacuum_error_info(LVRelStats *errinfo, LVSavedErrInfo *saved_err_info,
399  int phase, BlockNumber blkno);
400 static void restore_vacuum_error_info(LVRelStats *errinfo, const LVSavedErrInfo *saved_err_info);
401 
402 
403 /*
404  * heap_vacuum_rel() -- perform VACUUM for one heap relation
405  *
406  * This routine vacuums a single heap, cleans out its indexes, and
407  * updates its relpages and reltuples statistics.
408  *
409  * At entry, we have already established a transaction and opened
410  * and locked the relation.
411  */
412 void
414  BufferAccessStrategy bstrategy)
415 {
416  LVRelStats *vacrelstats;
417  Relation *Irel;
418  int nindexes;
419  PGRUsage ru0;
420  TimestampTz starttime = 0;
421  WalUsage walusage_start = pgWalUsage;
422  WalUsage walusage = {0, 0, 0};
423  long secs;
424  int usecs;
425  double read_rate,
426  write_rate;
427  bool aggressive; /* should we scan all unfrozen pages? */
428  bool scanned_all_unfrozen; /* actually scanned all such pages? */
429  TransactionId xidFullScanLimit;
430  MultiXactId mxactFullScanLimit;
431  BlockNumber new_rel_pages;
432  BlockNumber new_rel_allvisible;
433  double new_live_tuples;
434  TransactionId new_frozen_xid;
435  MultiXactId new_min_multi;
436  ErrorContextCallback errcallback;
437 
438  Assert(params != NULL);
441 
442  /* not every AM requires these to be valid, but heap does */
443  Assert(TransactionIdIsNormal(onerel->rd_rel->relfrozenxid));
444  Assert(MultiXactIdIsValid(onerel->rd_rel->relminmxid));
445 
446  /* measure elapsed time iff autovacuum logging requires it */
447  if (IsAutoVacuumWorkerProcess() && params->log_min_duration >= 0)
448  {
449  pg_rusage_init(&ru0);
450  starttime = GetCurrentTimestamp();
451  }
452 
453  if (params->options & VACOPT_VERBOSE)
454  elevel = INFO;
455  else
456  elevel = DEBUG2;
457 
459  RelationGetRelid(onerel));
460 
461  vac_strategy = bstrategy;
462 
463  vacuum_set_xid_limits(onerel,
464  params->freeze_min_age,
465  params->freeze_table_age,
466  params->multixact_freeze_min_age,
468  &OldestXmin, &FreezeLimit, &xidFullScanLimit,
469  &MultiXactCutoff, &mxactFullScanLimit);
470 
471  /*
472  * We request an aggressive scan if the table's frozen Xid is now older
473  * than or equal to the requested Xid full-table scan limit; or if the
474  * table's minimum MultiXactId is older than or equal to the requested
475  * mxid full-table scan limit; or if DISABLE_PAGE_SKIPPING was specified.
476  */
477  aggressive = TransactionIdPrecedesOrEquals(onerel->rd_rel->relfrozenxid,
478  xidFullScanLimit);
479  aggressive |= MultiXactIdPrecedesOrEquals(onerel->rd_rel->relminmxid,
480  mxactFullScanLimit);
482  aggressive = true;
483 
484  vacrelstats = (LVRelStats *) palloc0(sizeof(LVRelStats));
485 
486  vacrelstats->relnamespace = get_namespace_name(RelationGetNamespace(onerel));
487  vacrelstats->relname = pstrdup(RelationGetRelationName(onerel));
488  vacrelstats->indname = NULL;
489  vacrelstats->phase = VACUUM_ERRCB_PHASE_UNKNOWN;
490  vacrelstats->old_rel_pages = onerel->rd_rel->relpages;
491  vacrelstats->old_live_tuples = onerel->rd_rel->reltuples;
492  vacrelstats->num_index_scans = 0;
493  vacrelstats->pages_removed = 0;
494  vacrelstats->lock_waiter_detected = false;
495 
496  /* Open all indexes of the relation */
497  vac_open_indexes(onerel, RowExclusiveLock, &nindexes, &Irel);
498  vacrelstats->useindex = (nindexes > 0 &&
500 
501  /*
502  * Setup error traceback support for ereport(). The idea is to set up an
503  * error context callback to display additional information on any error
504  * during a vacuum. During different phases of vacuum (heap scan, heap
505  * vacuum, index vacuum, index clean up, heap truncate), we update the
506  * error context callback to display appropriate information.
507  *
508  * Note that the index vacuum and heap vacuum phases may be called
509  * multiple times in the middle of the heap scan phase. So the old phase
510  * information is restored at the end of those phases.
511  */
512  errcallback.callback = vacuum_error_callback;
513  errcallback.arg = vacrelstats;
514  errcallback.previous = error_context_stack;
515  error_context_stack = &errcallback;
516 
517  /* Do the vacuuming */
518  lazy_scan_heap(onerel, params, vacrelstats, Irel, nindexes, aggressive);
519 
520  /* Done with indexes */
521  vac_close_indexes(nindexes, Irel, NoLock);
522 
523  /*
524  * Compute whether we actually scanned the all unfrozen pages. If we did,
525  * we can adjust relfrozenxid and relminmxid.
526  *
527  * NB: We need to check this before truncating the relation, because that
528  * will change ->rel_pages.
529  */
530  if ((vacrelstats->scanned_pages + vacrelstats->frozenskipped_pages)
531  < vacrelstats->rel_pages)
532  {
533  Assert(!aggressive);
534  scanned_all_unfrozen = false;
535  }
536  else
537  scanned_all_unfrozen = true;
538 
539  /*
540  * Optionally truncate the relation.
541  */
542  if (should_attempt_truncation(params, vacrelstats))
543  {
544  /*
545  * Update error traceback information. This is the last phase during
546  * which we add context information to errors, so we don't need to
547  * revert to the previous phase.
548  */
550  vacrelstats->nonempty_pages);
551  lazy_truncate_heap(onerel, vacrelstats);
552  }
553 
554  /* Pop the error context stack */
555  error_context_stack = errcallback.previous;
556 
557  /* Report that we are now doing final cleanup */
560 
561  /*
562  * Update statistics in pg_class.
563  *
564  * A corner case here is that if we scanned no pages at all because every
565  * page is all-visible, we should not update relpages/reltuples, because
566  * we have no new information to contribute. In particular this keeps us
567  * from replacing relpages=reltuples=0 (which means "unknown tuple
568  * density") with nonzero relpages and reltuples=0 (which means "zero
569  * tuple density") unless there's some actual evidence for the latter.
570  *
571  * It's important that we use tupcount_pages and not scanned_pages for the
572  * check described above; scanned_pages counts pages where we could not
573  * get cleanup lock, and which were processed only for frozenxid purposes.
574  *
575  * We do update relallvisible even in the corner case, since if the table
576  * is all-visible we'd definitely like to know that. But clamp the value
577  * to be not more than what we're setting relpages to.
578  *
579  * Also, don't change relfrozenxid/relminmxid if we skipped any pages,
580  * since then we don't know for certain that all tuples have a newer xmin.
581  */
582  new_rel_pages = vacrelstats->rel_pages;
583  new_live_tuples = vacrelstats->new_live_tuples;
584  if (vacrelstats->tupcount_pages == 0 && new_rel_pages > 0)
585  {
586  new_rel_pages = vacrelstats->old_rel_pages;
587  new_live_tuples = vacrelstats->old_live_tuples;
588  }
589 
590  visibilitymap_count(onerel, &new_rel_allvisible, NULL);
591  if (new_rel_allvisible > new_rel_pages)
592  new_rel_allvisible = new_rel_pages;
593 
594  new_frozen_xid = scanned_all_unfrozen ? FreezeLimit : InvalidTransactionId;
595  new_min_multi = scanned_all_unfrozen ? MultiXactCutoff : InvalidMultiXactId;
596 
597  vac_update_relstats(onerel,
598  new_rel_pages,
599  new_live_tuples,
600  new_rel_allvisible,
601  nindexes > 0,
602  new_frozen_xid,
603  new_min_multi,
604  false);
605 
606  /* report results to the stats collector, too */
608  onerel->rd_rel->relisshared,
609  new_live_tuples,
610  vacrelstats->new_dead_tuples);
612 
613  /* and log the action if appropriate */
614  if (IsAutoVacuumWorkerProcess() && params->log_min_duration >= 0)
615  {
616  TimestampTz endtime = GetCurrentTimestamp();
617 
618  if (params->log_min_duration == 0 ||
619  TimestampDifferenceExceeds(starttime, endtime,
620  params->log_min_duration))
621  {
623  char *msgfmt;
624 
625  TimestampDifference(starttime, endtime, &secs, &usecs);
626 
627  memset(&walusage, 0, sizeof(WalUsage));
628  WalUsageAccumDiff(&walusage, &pgWalUsage, &walusage_start);
629 
630  read_rate = 0;
631  write_rate = 0;
632  if ((secs > 0) || (usecs > 0))
633  {
634  read_rate = (double) BLCKSZ * VacuumPageMiss / (1024 * 1024) /
635  (secs + usecs / 1000000.0);
636  write_rate = (double) BLCKSZ * VacuumPageDirty / (1024 * 1024) /
637  (secs + usecs / 1000000.0);
638  }
639 
640  /*
641  * This is pretty messy, but we split it up so that we can skip
642  * emitting individual parts of the message when not applicable.
643  */
644  initStringInfo(&buf);
645  if (params->is_wraparound)
646  {
647  if (aggressive)
648  msgfmt = _("automatic aggressive vacuum to prevent wraparound of table \"%s.%s.%s\": index scans: %d\n");
649  else
650  msgfmt = _("automatic vacuum to prevent wraparound of table \"%s.%s.%s\": index scans: %d\n");
651  }
652  else
653  {
654  if (aggressive)
655  msgfmt = _("automatic aggressive vacuum of table \"%s.%s.%s\": index scans: %d\n");
656  else
657  msgfmt = _("automatic vacuum of table \"%s.%s.%s\": index scans: %d\n");
658  }
659  appendStringInfo(&buf, msgfmt,
661  vacrelstats->relnamespace,
662  vacrelstats->relname,
663  vacrelstats->num_index_scans);
664  appendStringInfo(&buf, _("pages: %u removed, %u remain, %u skipped due to pins, %u skipped frozen\n"),
665  vacrelstats->pages_removed,
666  vacrelstats->rel_pages,
667  vacrelstats->pinskipped_pages,
668  vacrelstats->frozenskipped_pages);
669  appendStringInfo(&buf,
670  _("tuples: %.0f removed, %.0f remain, %.0f are dead but not yet removable, oldest xmin: %u\n"),
671  vacrelstats->tuples_deleted,
672  vacrelstats->new_rel_tuples,
673  vacrelstats->new_dead_tuples,
674  OldestXmin);
675  appendStringInfo(&buf,
676  _("buffer usage: %lld hits, %lld misses, %lld dirtied\n"),
677  (long long) VacuumPageHit,
678  (long long) VacuumPageMiss,
679  (long long) VacuumPageDirty);
680  appendStringInfo(&buf, _("avg read rate: %.3f MB/s, avg write rate: %.3f MB/s\n"),
681  read_rate, write_rate);
682  appendStringInfo(&buf, _("system usage: %s\n"), pg_rusage_show(&ru0));
683  appendStringInfo(&buf,
684  _("WAL usage: %ld records, %ld full page images, "
685  UINT64_FORMAT " bytes"),
686  walusage.wal_records,
687  walusage.wal_fpi,
688  walusage.wal_bytes);
689 
690  ereport(LOG,
691  (errmsg_internal("%s", buf.data)));
692  pfree(buf.data);
693  }
694  }
695 }
696 
697 /*
698  * For Hot Standby we need to know the highest transaction id that will
699  * be removed by any change. VACUUM proceeds in a number of passes so
700  * we need to consider how each pass operates. The first phase runs
701  * heap_page_prune(), which can issue XLOG_HEAP2_CLEAN records as it
702  * progresses - these will have a latestRemovedXid on each record.
703  * In some cases this removes all of the tuples to be removed, though
704  * often we have dead tuples with index pointers so we must remember them
705  * for removal in phase 3. Index records for those rows are removed
706  * in phase 2 and index blocks do not have MVCC information attached.
707  * So before we can allow removal of any index tuples we need to issue
708  * a WAL record containing the latestRemovedXid of rows that will be
709  * removed in phase three. This allows recovery queries to block at the
710  * correct place, i.e. before phase two, rather than during phase three
711  * which would be after the rows have become inaccessible.
712  */
713 static void
715 {
716  /*
717  * Skip this for relations for which no WAL is to be written, or if we're
718  * not trying to support archive recovery.
719  */
720  if (!RelationNeedsWAL(rel) || !XLogIsNeeded())
721  return;
722 
723  /*
724  * No need to write the record at all unless it contains a valid value
725  */
726  if (TransactionIdIsValid(vacrelstats->latestRemovedXid))
727  (void) log_heap_cleanup_info(rel->rd_node, vacrelstats->latestRemovedXid);
728 }
729 
730 /*
731  * lazy_scan_heap() -- scan an open heap relation
732  *
733  * This routine prunes each page in the heap, which will among other
734  * things truncate dead tuples to dead line pointers, defragment the
735  * page, and set commit status bits (see heap_page_prune). It also builds
736  * lists of dead tuples and pages with free space, calculates statistics
737  * on the number of live tuples in the heap, and marks pages as
738  * all-visible if appropriate. When done, or when we run low on space for
739  * dead-tuple TIDs, invoke vacuuming of indexes and call lazy_vacuum_heap
740  * to reclaim dead line pointers.
741  *
742  * If the table has at least two indexes, we execute both index vacuum
743  * and index cleanup with parallel workers unless parallel vacuum is
744  * disabled. In a parallel vacuum, we enter parallel mode and then
745  * create both the parallel context and the DSM segment before starting
746  * heap scan so that we can record dead tuples to the DSM segment. All
747  * parallel workers are launched at beginning of index vacuuming and
748  * index cleanup and they exit once done with all indexes. At the end of
749  * this function we exit from parallel mode. Index bulk-deletion results
750  * are stored in the DSM segment and we update index statistics for all
751  * the indexes after exiting from parallel mode since writes are not
752  * allowed during parallel mode.
753  *
754  * If there are no indexes then we can reclaim line pointers on the fly;
755  * dead line pointers need only be retained until all index pointers that
756  * reference them have been killed.
757  */
758 static void
759 lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats,
760  Relation *Irel, int nindexes, bool aggressive)
761 {
762  LVParallelState *lps = NULL;
763  LVDeadTuples *dead_tuples;
764  BlockNumber nblocks,
765  blkno;
766  HeapTupleData tuple;
767  TransactionId relfrozenxid = onerel->rd_rel->relfrozenxid;
768  TransactionId relminmxid = onerel->rd_rel->relminmxid;
769  BlockNumber empty_pages,
770  vacuumed_pages,
771  next_fsm_block_to_vacuum;
772  double num_tuples, /* total number of nonremovable tuples */
773  live_tuples, /* live tuples (reltuples estimate) */
774  tups_vacuumed, /* tuples cleaned up by vacuum */
775  nkeep, /* dead-but-not-removable tuples */
776  nunused; /* unused line pointers */
777  IndexBulkDeleteResult **indstats;
778  int i;
779  PGRUsage ru0;
780  Buffer vmbuffer = InvalidBuffer;
781  BlockNumber next_unskippable_block;
782  bool skipping_blocks;
783  xl_heap_freeze_tuple *frozen;
785  const int initprog_index[] = {
789  };
790  int64 initprog_val[3];
791 
792  pg_rusage_init(&ru0);
793 
794  if (aggressive)
795  ereport(elevel,
796  (errmsg("aggressively vacuuming \"%s.%s\"",
797  vacrelstats->relnamespace,
798  vacrelstats->relname)));
799  else
800  ereport(elevel,
801  (errmsg("vacuuming \"%s.%s\"",
802  vacrelstats->relnamespace,
803  vacrelstats->relname)));
804 
805  empty_pages = vacuumed_pages = 0;
806  next_fsm_block_to_vacuum = (BlockNumber) 0;
807  num_tuples = live_tuples = tups_vacuumed = nkeep = nunused = 0;
808 
809  indstats = (IndexBulkDeleteResult **)
810  palloc0(nindexes * sizeof(IndexBulkDeleteResult *));
811 
812  nblocks = RelationGetNumberOfBlocks(onerel);
813  vacrelstats->rel_pages = nblocks;
814  vacrelstats->scanned_pages = 0;
815  vacrelstats->tupcount_pages = 0;
816  vacrelstats->nonempty_pages = 0;
817  vacrelstats->latestRemovedXid = InvalidTransactionId;
818 
819  /*
820  * Initialize state for a parallel vacuum. As of now, only one worker can
821  * be used for an index, so we invoke parallelism only if there are at
822  * least two indexes on a table.
823  */
824  if (params->nworkers >= 0 && vacrelstats->useindex && nindexes > 1)
825  {
826  /*
827  * Since parallel workers cannot access data in temporary tables, we
828  * can't perform parallel vacuum on them.
829  */
830  if (RelationUsesLocalBuffers(onerel))
831  {
832  /*
833  * Give warning only if the user explicitly tries to perform a
834  * parallel vacuum on the temporary table.
835  */
836  if (params->nworkers > 0)
838  (errmsg("disabling parallel option of vacuum on \"%s\" --- cannot vacuum temporary tables in parallel",
839  vacrelstats->relname)));
840  }
841  else
842  lps = begin_parallel_vacuum(RelationGetRelid(onerel), Irel,
843  vacrelstats, nblocks, nindexes,
844  params->nworkers);
845  }
846 
847  /*
848  * Allocate the space for dead tuples in case parallel vacuum is not
849  * initialized.
850  */
851  if (!ParallelVacuumIsActive(lps))
852  lazy_space_alloc(vacrelstats, nblocks);
853 
854  dead_tuples = vacrelstats->dead_tuples;
856 
857  /* Report that we're scanning the heap, advertising total # of blocks */
858  initprog_val[0] = PROGRESS_VACUUM_PHASE_SCAN_HEAP;
859  initprog_val[1] = nblocks;
860  initprog_val[2] = dead_tuples->max_tuples;
861  pgstat_progress_update_multi_param(3, initprog_index, initprog_val);
862 
863  /*
864  * Except when aggressive is set, we want to skip pages that are
865  * all-visible according to the visibility map, but only when we can skip
866  * at least SKIP_PAGES_THRESHOLD consecutive pages. Since we're reading
867  * sequentially, the OS should be doing readahead for us, so there's no
868  * gain in skipping a page now and then; that's likely to disable
869  * readahead and so be counterproductive. Also, skipping even a single
870  * page means that we can't update relfrozenxid, so we only want to do it
871  * if we can skip a goodly number of pages.
872  *
873  * When aggressive is set, we can't skip pages just because they are
874  * all-visible, but we can still skip pages that are all-frozen, since
875  * such pages do not need freezing and do not affect the value that we can
876  * safely set for relfrozenxid or relminmxid.
877  *
878  * Before entering the main loop, establish the invariant that
879  * next_unskippable_block is the next block number >= blkno that we can't
880  * skip based on the visibility map, either all-visible for a regular scan
881  * or all-frozen for an aggressive scan. We set it to nblocks if there's
882  * no such block. We also set up the skipping_blocks flag correctly at
883  * this stage.
884  *
885  * Note: The value returned by visibilitymap_get_status could be slightly
886  * out-of-date, since we make this test before reading the corresponding
887  * heap page or locking the buffer. This is OK. If we mistakenly think
888  * that the page is all-visible or all-frozen when in fact the flag's just
889  * been cleared, we might fail to vacuum the page. It's easy to see that
890  * skipping a page when aggressive is not set is not a very big deal; we
891  * might leave some dead tuples lying around, but the next vacuum will
892  * find them. But even when aggressive *is* set, it's still OK if we miss
893  * a page whose all-frozen marking has just been cleared. Any new XIDs
894  * just added to that page are necessarily newer than the GlobalXmin we
895  * computed, so they'll have no effect on the value to which we can safely
896  * set relfrozenxid. A similar argument applies for MXIDs and relminmxid.
897  *
898  * We will scan the table's last page, at least to the extent of
899  * determining whether it has tuples or not, even if it should be skipped
900  * according to the above rules; except when we've already determined that
901  * it's not worth trying to truncate the table. This avoids having
902  * lazy_truncate_heap() take access-exclusive lock on the table to attempt
903  * a truncation that just fails immediately because there are tuples in
904  * the last page. This is worth avoiding mainly because such a lock must
905  * be replayed on any hot standby, where it can be disruptive.
906  */
907  next_unskippable_block = 0;
908  if ((params->options & VACOPT_DISABLE_PAGE_SKIPPING) == 0)
909  {
910  while (next_unskippable_block < nblocks)
911  {
912  uint8 vmstatus;
913 
914  vmstatus = visibilitymap_get_status(onerel, next_unskippable_block,
915  &vmbuffer);
916  if (aggressive)
917  {
918  if ((vmstatus & VISIBILITYMAP_ALL_FROZEN) == 0)
919  break;
920  }
921  else
922  {
923  if ((vmstatus & VISIBILITYMAP_ALL_VISIBLE) == 0)
924  break;
925  }
927  next_unskippable_block++;
928  }
929  }
930 
931  if (next_unskippable_block >= SKIP_PAGES_THRESHOLD)
932  skipping_blocks = true;
933  else
934  skipping_blocks = false;
935 
936  for (blkno = 0; blkno < nblocks; blkno++)
937  {
938  Buffer buf;
939  Page page;
940  OffsetNumber offnum,
941  maxoff;
942  bool tupgone,
943  hastup;
944  int prev_dead_count;
945  int nfrozen;
946  Size freespace;
947  bool all_visible_according_to_vm = false;
948  bool all_visible;
949  bool all_frozen = true; /* provided all_visible is also true */
950  bool has_dead_tuples;
951  TransactionId visibility_cutoff_xid = InvalidTransactionId;
952 
953  /* see note above about forcing scanning of last page */
954 #define FORCE_CHECK_PAGE() \
955  (blkno == nblocks - 1 && should_attempt_truncation(params, vacrelstats))
956 
958 
960  blkno);
961 
962  if (blkno == next_unskippable_block)
963  {
964  /* Time to advance next_unskippable_block */
965  next_unskippable_block++;
966  if ((params->options & VACOPT_DISABLE_PAGE_SKIPPING) == 0)
967  {
968  while (next_unskippable_block < nblocks)
969  {
970  uint8 vmskipflags;
971 
972  vmskipflags = visibilitymap_get_status(onerel,
973  next_unskippable_block,
974  &vmbuffer);
975  if (aggressive)
976  {
977  if ((vmskipflags & VISIBILITYMAP_ALL_FROZEN) == 0)
978  break;
979  }
980  else
981  {
982  if ((vmskipflags & VISIBILITYMAP_ALL_VISIBLE) == 0)
983  break;
984  }
986  next_unskippable_block++;
987  }
988  }
989 
990  /*
991  * We know we can't skip the current block. But set up
992  * skipping_blocks to do the right thing at the following blocks.
993  */
994  if (next_unskippable_block - blkno > SKIP_PAGES_THRESHOLD)
995  skipping_blocks = true;
996  else
997  skipping_blocks = false;
998 
999  /*
1000  * Normally, the fact that we can't skip this block must mean that
1001  * it's not all-visible. But in an aggressive vacuum we know only
1002  * that it's not all-frozen, so it might still be all-visible.
1003  */
1004  if (aggressive && VM_ALL_VISIBLE(onerel, blkno, &vmbuffer))
1005  all_visible_according_to_vm = true;
1006  }
1007  else
1008  {
1009  /*
1010  * The current block is potentially skippable; if we've seen a
1011  * long enough run of skippable blocks to justify skipping it, and
1012  * we're not forced to check it, then go ahead and skip.
1013  * Otherwise, the page must be at least all-visible if not
1014  * all-frozen, so we can set all_visible_according_to_vm = true.
1015  */
1016  if (skipping_blocks && !FORCE_CHECK_PAGE())
1017  {
1018  /*
1019  * Tricky, tricky. If this is in aggressive vacuum, the page
1020  * must have been all-frozen at the time we checked whether it
1021  * was skippable, but it might not be any more. We must be
1022  * careful to count it as a skipped all-frozen page in that
1023  * case, or else we'll think we can't update relfrozenxid and
1024  * relminmxid. If it's not an aggressive vacuum, we don't
1025  * know whether it was all-frozen, so we have to recheck; but
1026  * in this case an approximate answer is OK.
1027  */
1028  if (aggressive || VM_ALL_FROZEN(onerel, blkno, &vmbuffer))
1029  vacrelstats->frozenskipped_pages++;
1030  continue;
1031  }
1032  all_visible_according_to_vm = true;
1033  }
1034 
1036 
1037  /*
1038  * If we are close to overrunning the available space for dead-tuple
1039  * TIDs, pause and do a cycle of vacuuming before we tackle this page.
1040  */
1041  if ((dead_tuples->max_tuples - dead_tuples->num_tuples) < MaxHeapTuplesPerPage &&
1042  dead_tuples->num_tuples > 0)
1043  {
1044  /*
1045  * Before beginning index vacuuming, we release any pin we may
1046  * hold on the visibility map page. This isn't necessary for
1047  * correctness, but we do it anyway to avoid holding the pin
1048  * across a lengthy, unrelated operation.
1049  */
1050  if (BufferIsValid(vmbuffer))
1051  {
1052  ReleaseBuffer(vmbuffer);
1053  vmbuffer = InvalidBuffer;
1054  }
1055 
1056  /* Work on all the indexes, then the heap */
1057  lazy_vacuum_all_indexes(onerel, Irel, indstats,
1058  vacrelstats, lps, nindexes);
1059 
1060  /* Remove tuples from heap */
1061  lazy_vacuum_heap(onerel, vacrelstats);
1062 
1063  /*
1064  * Forget the now-vacuumed tuples, and press on, but be careful
1065  * not to reset latestRemovedXid since we want that value to be
1066  * valid.
1067  */
1068  dead_tuples->num_tuples = 0;
1069 
1070  /*
1071  * Vacuum the Free Space Map to make newly-freed space visible on
1072  * upper-level FSM pages. Note we have not yet processed blkno.
1073  */
1074  FreeSpaceMapVacuumRange(onerel, next_fsm_block_to_vacuum, blkno);
1075  next_fsm_block_to_vacuum = blkno;
1076 
1077  /* Report that we are once again scanning the heap */
1080  }
1081 
1082  /*
1083  * Pin the visibility map page in case we need to mark the page
1084  * all-visible. In most cases this will be very cheap, because we'll
1085  * already have the correct page pinned anyway. However, it's
1086  * possible that (a) next_unskippable_block is covered by a different
1087  * VM page than the current block or (b) we released our pin and did a
1088  * cycle of index vacuuming.
1089  *
1090  */
1091  visibilitymap_pin(onerel, blkno, &vmbuffer);
1092 
1093  buf = ReadBufferExtended(onerel, MAIN_FORKNUM, blkno,
1094  RBM_NORMAL, vac_strategy);
1095 
1096  /* We need buffer cleanup lock so that we can prune HOT chains. */
1098  {
1099  /*
1100  * If we're not performing an aggressive scan to guard against XID
1101  * wraparound, and we don't want to forcibly check the page, then
1102  * it's OK to skip vacuuming pages we get a lock conflict on. They
1103  * will be dealt with in some future vacuum.
1104  */
1105  if (!aggressive && !FORCE_CHECK_PAGE())
1106  {
1107  ReleaseBuffer(buf);
1108  vacrelstats->pinskipped_pages++;
1109  continue;
1110  }
1111 
1112  /*
1113  * Read the page with share lock to see if any xids on it need to
1114  * be frozen. If not we just skip the page, after updating our
1115  * scan statistics. If there are some, we wait for cleanup lock.
1116  *
1117  * We could defer the lock request further by remembering the page
1118  * and coming back to it later, or we could even register
1119  * ourselves for multiple buffers and then service whichever one
1120  * is received first. For now, this seems good enough.
1121  *
1122  * If we get here with aggressive false, then we're just forcibly
1123  * checking the page, and so we don't want to insist on getting
1124  * the lock; we only need to know if the page contains tuples, so
1125  * that we can update nonempty_pages correctly. It's convenient
1126  * to use lazy_check_needs_freeze() for both situations, though.
1127  */
1129  if (!lazy_check_needs_freeze(buf, &hastup))
1130  {
1131  UnlockReleaseBuffer(buf);
1132  vacrelstats->scanned_pages++;
1133  vacrelstats->pinskipped_pages++;
1134  if (hastup)
1135  vacrelstats->nonempty_pages = blkno + 1;
1136  continue;
1137  }
1138  if (!aggressive)
1139  {
1140  /*
1141  * Here, we must not advance scanned_pages; that would amount
1142  * to claiming that the page contains no freezable tuples.
1143  */
1144  UnlockReleaseBuffer(buf);
1145  vacrelstats->pinskipped_pages++;
1146  if (hastup)
1147  vacrelstats->nonempty_pages = blkno + 1;
1148  continue;
1149  }
1151  LockBufferForCleanup(buf);
1152  /* drop through to normal processing */
1153  }
1154 
1155  vacrelstats->scanned_pages++;
1156  vacrelstats->tupcount_pages++;
1157 
1158  page = BufferGetPage(buf);
1159 
1160  if (PageIsNew(page))
1161  {
1162  /*
1163  * All-zeroes pages can be left over if either a backend extends
1164  * the relation by a single page, but crashes before the newly
1165  * initialized page has been written out, or when bulk-extending
1166  * the relation (which creates a number of empty pages at the tail
1167  * end of the relation, but enters them into the FSM).
1168  *
1169  * Note we do not enter the page into the visibilitymap. That has
1170  * the downside that we repeatedly visit this page in subsequent
1171  * vacuums, but otherwise we'll never not discover the space on a
1172  * promoted standby. The harm of repeated checking ought to
1173  * normally not be too bad - the space usually should be used at
1174  * some point, otherwise there wouldn't be any regular vacuums.
1175  *
1176  * Make sure these pages are in the FSM, to ensure they can be
1177  * reused. Do that by testing if there's any space recorded for
1178  * the page. If not, enter it. We do so after releasing the lock
1179  * on the heap page, the FSM is approximate, after all.
1180  */
1181  UnlockReleaseBuffer(buf);
1182 
1183  empty_pages++;
1184 
1185  if (GetRecordedFreeSpace(onerel, blkno) == 0)
1186  {
1187  Size freespace;
1188 
1189  freespace = BufferGetPageSize(buf) - SizeOfPageHeaderData;
1190  RecordPageWithFreeSpace(onerel, blkno, freespace);
1191  }
1192  continue;
1193  }
1194 
1195  if (PageIsEmpty(page))
1196  {
1197  empty_pages++;
1198  freespace = PageGetHeapFreeSpace(page);
1199 
1200  /*
1201  * Empty pages are always all-visible and all-frozen (note that
1202  * the same is currently not true for new pages, see above).
1203  */
1204  if (!PageIsAllVisible(page))
1205  {
1207 
1208  /* mark buffer dirty before writing a WAL record */
1209  MarkBufferDirty(buf);
1210 
1211  /*
1212  * It's possible that another backend has extended the heap,
1213  * initialized the page, and then failed to WAL-log the page
1214  * due to an ERROR. Since heap extension is not WAL-logged,
1215  * recovery might try to replay our record setting the page
1216  * all-visible and find that the page isn't initialized, which
1217  * will cause a PANIC. To prevent that, check whether the
1218  * page has been previously WAL-logged, and if not, do that
1219  * now.
1220  */
1221  if (RelationNeedsWAL(onerel) &&
1222  PageGetLSN(page) == InvalidXLogRecPtr)
1223  log_newpage_buffer(buf, true);
1224 
1225  PageSetAllVisible(page);
1226  visibilitymap_set(onerel, blkno, buf, InvalidXLogRecPtr,
1227  vmbuffer, InvalidTransactionId,
1229  END_CRIT_SECTION();
1230  }
1231 
1232  UnlockReleaseBuffer(buf);
1233  RecordPageWithFreeSpace(onerel, blkno, freespace);
1234  continue;
1235  }
1236 
1237  /*
1238  * Prune all HOT-update chains in this page.
1239  *
1240  * We count tuples removed by the pruning step as removed by VACUUM.
1241  */
1242  tups_vacuumed += heap_page_prune(onerel, buf, OldestXmin, false,
1243  &vacrelstats->latestRemovedXid);
1244 
1245  /*
1246  * Now scan the page to collect vacuumable items and check for tuples
1247  * requiring freezing.
1248  */
1249  all_visible = true;
1250  has_dead_tuples = false;
1251  nfrozen = 0;
1252  hastup = false;
1253  prev_dead_count = dead_tuples->num_tuples;
1254  maxoff = PageGetMaxOffsetNumber(page);
1255 
1256  /*
1257  * Note: If you change anything in the loop below, also look at
1258  * heap_page_is_all_visible to see if that needs to be changed.
1259  */
1260  for (offnum = FirstOffsetNumber;
1261  offnum <= maxoff;
1262  offnum = OffsetNumberNext(offnum))
1263  {
1264  ItemId itemid;
1265 
1266  itemid = PageGetItemId(page, offnum);
1267 
1268  /* Unused items require no processing, but we count 'em */
1269  if (!ItemIdIsUsed(itemid))
1270  {
1271  nunused += 1;
1272  continue;
1273  }
1274 
1275  /* Redirect items mustn't be touched */
1276  if (ItemIdIsRedirected(itemid))
1277  {
1278  hastup = true; /* this page won't be truncatable */
1279  continue;
1280  }
1281 
1282  ItemPointerSet(&(tuple.t_self), blkno, offnum);
1283 
1284  /*
1285  * DEAD line pointers are to be vacuumed normally; but we don't
1286  * count them in tups_vacuumed, else we'd be double-counting (at
1287  * least in the common case where heap_page_prune() just freed up
1288  * a non-HOT tuple).
1289  */
1290  if (ItemIdIsDead(itemid))
1291  {
1292  lazy_record_dead_tuple(dead_tuples, &(tuple.t_self));
1293  all_visible = false;
1294  continue;
1295  }
1296 
1297  Assert(ItemIdIsNormal(itemid));
1298 
1299  tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
1300  tuple.t_len = ItemIdGetLength(itemid);
1301  tuple.t_tableOid = RelationGetRelid(onerel);
1302 
1303  tupgone = false;
1304 
1305  /*
1306  * The criteria for counting a tuple as live in this block need to
1307  * match what analyze.c's acquire_sample_rows() does, otherwise
1308  * VACUUM and ANALYZE may produce wildly different reltuples
1309  * values, e.g. when there are many recently-dead tuples.
1310  *
1311  * The logic here is a bit simpler than acquire_sample_rows(), as
1312  * VACUUM can't run inside a transaction block, which makes some
1313  * cases impossible (e.g. in-progress insert from the same
1314  * transaction).
1315  */
1316  switch (HeapTupleSatisfiesVacuum(&tuple, OldestXmin, buf))
1317  {
1318  case HEAPTUPLE_DEAD:
1319 
1320  /*
1321  * Ordinarily, DEAD tuples would have been removed by
1322  * heap_page_prune(), but it's possible that the tuple
1323  * state changed since heap_page_prune() looked. In
1324  * particular an INSERT_IN_PROGRESS tuple could have
1325  * changed to DEAD if the inserter aborted. So this
1326  * cannot be considered an error condition.
1327  *
1328  * If the tuple is HOT-updated then it must only be
1329  * removed by a prune operation; so we keep it just as if
1330  * it were RECENTLY_DEAD. Also, if it's a heap-only
1331  * tuple, we choose to keep it, because it'll be a lot
1332  * cheaper to get rid of it in the next pruning pass than
1333  * to treat it like an indexed tuple. Finally, if index
1334  * cleanup is disabled, the second heap pass will not
1335  * execute, and the tuple will not get removed, so we must
1336  * treat it like any other dead tuple that we choose to
1337  * keep.
1338  *
1339  * If this were to happen for a tuple that actually needed
1340  * to be deleted, we'd be in trouble, because it'd
1341  * possibly leave a tuple below the relation's xmin
1342  * horizon alive. heap_prepare_freeze_tuple() is prepared
1343  * to detect that case and abort the transaction,
1344  * preventing corruption.
1345  */
1346  if (HeapTupleIsHotUpdated(&tuple) ||
1347  HeapTupleIsHeapOnly(&tuple) ||
1349  nkeep += 1;
1350  else
1351  tupgone = true; /* we can delete the tuple */
1352  all_visible = false;
1353  break;
1354  case HEAPTUPLE_LIVE:
1355 
1356  /*
1357  * Count it as live. Not only is this natural, but it's
1358  * also what acquire_sample_rows() does.
1359  */
1360  live_tuples += 1;
1361 
1362  /*
1363  * Is the tuple definitely visible to all transactions?
1364  *
1365  * NB: Like with per-tuple hint bits, we can't set the
1366  * PD_ALL_VISIBLE flag if the inserter committed
1367  * asynchronously. See SetHintBits for more info. Check
1368  * that the tuple is hinted xmin-committed because of
1369  * that.
1370  */
1371  if (all_visible)
1372  {
1373  TransactionId xmin;
1374 
1376  {
1377  all_visible = false;
1378  break;
1379  }
1380 
1381  /*
1382  * The inserter definitely committed. But is it old
1383  * enough that everyone sees it as committed?
1384  */
1385  xmin = HeapTupleHeaderGetXmin(tuple.t_data);
1386  if (!TransactionIdPrecedes(xmin, OldestXmin))
1387  {
1388  all_visible = false;
1389  break;
1390  }
1391 
1392  /* Track newest xmin on page. */
1393  if (TransactionIdFollows(xmin, visibility_cutoff_xid))
1394  visibility_cutoff_xid = xmin;
1395  }
1396  break;
1398 
1399  /*
1400  * If tuple is recently deleted then we must not remove it
1401  * from relation.
1402  */
1403  nkeep += 1;
1404  all_visible = false;
1405  break;
1407 
1408  /*
1409  * This is an expected case during concurrent vacuum.
1410  *
1411  * We do not count these rows as live, because we expect
1412  * the inserting transaction to update the counters at
1413  * commit, and we assume that will happen only after we
1414  * report our results. This assumption is a bit shaky,
1415  * but it is what acquire_sample_rows() does, so be
1416  * consistent.
1417  */
1418  all_visible = false;
1419  break;
1421  /* This is an expected case during concurrent vacuum */
1422  all_visible = false;
1423 
1424  /*
1425  * Count such rows as live. As above, we assume the
1426  * deleting transaction will commit and update the
1427  * counters after we report.
1428  */
1429  live_tuples += 1;
1430  break;
1431  default:
1432  elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
1433  break;
1434  }
1435 
1436  if (tupgone)
1437  {
1438  lazy_record_dead_tuple(dead_tuples, &(tuple.t_self));
1440  &vacrelstats->latestRemovedXid);
1441  tups_vacuumed += 1;
1442  has_dead_tuples = true;
1443  }
1444  else
1445  {
1446  bool tuple_totally_frozen;
1447 
1448  num_tuples += 1;
1449  hastup = true;
1450 
1451  /*
1452  * Each non-removable tuple must be checked to see if it needs
1453  * freezing. Note we already have exclusive buffer lock.
1454  */
1456  relfrozenxid, relminmxid,
1458  &frozen[nfrozen],
1459  &tuple_totally_frozen))
1460  frozen[nfrozen++].offset = offnum;
1461 
1462  if (!tuple_totally_frozen)
1463  all_frozen = false;
1464  }
1465  } /* scan along page */
1466 
1467  /*
1468  * If we froze any tuples, mark the buffer dirty, and write a WAL
1469  * record recording the changes. We must log the changes to be
1470  * crash-safe against future truncation of CLOG.
1471  */
1472  if (nfrozen > 0)
1473  {
1475 
1476  MarkBufferDirty(buf);
1477 
1478  /* execute collected freezes */
1479  for (i = 0; i < nfrozen; i++)
1480  {
1481  ItemId itemid;
1482  HeapTupleHeader htup;
1483 
1484  itemid = PageGetItemId(page, frozen[i].offset);
1485  htup = (HeapTupleHeader) PageGetItem(page, itemid);
1486 
1487  heap_execute_freeze_tuple(htup, &frozen[i]);
1488  }
1489 
1490  /* Now WAL-log freezing if necessary */
1491  if (RelationNeedsWAL(onerel))
1492  {
1493  XLogRecPtr recptr;
1494 
1495  recptr = log_heap_freeze(onerel, buf, FreezeLimit,
1496  frozen, nfrozen);
1497  PageSetLSN(page, recptr);
1498  }
1499 
1500  END_CRIT_SECTION();
1501  }
1502 
1503  /*
1504  * If there are no indexes we can vacuum the page right now instead of
1505  * doing a second scan. Also we don't do that but forget dead tuples
1506  * when index cleanup is disabled.
1507  */
1508  if (!vacrelstats->useindex && dead_tuples->num_tuples > 0)
1509  {
1510  if (nindexes == 0)
1511  {
1512  /* Remove tuples from heap if the table has no index */
1513  lazy_vacuum_page(onerel, blkno, buf, 0, vacrelstats, &vmbuffer);
1514  vacuumed_pages++;
1515  has_dead_tuples = false;
1516  }
1517  else
1518  {
1519  /*
1520  * Here, we have indexes but index cleanup is disabled.
1521  * Instead of vacuuming the dead tuples on the heap, we just
1522  * forget them.
1523  *
1524  * Note that vacrelstats->dead_tuples could have tuples which
1525  * became dead after HOT-pruning but are not marked dead yet.
1526  * We do not process them because it's a very rare condition,
1527  * and the next vacuum will process them anyway.
1528  */
1530  }
1531 
1532  /*
1533  * Forget the now-vacuumed tuples, and press on, but be careful
1534  * not to reset latestRemovedXid since we want that value to be
1535  * valid.
1536  */
1537  dead_tuples->num_tuples = 0;
1538 
1539  /*
1540  * Periodically do incremental FSM vacuuming to make newly-freed
1541  * space visible on upper FSM pages. Note: although we've cleaned
1542  * the current block, we haven't yet updated its FSM entry (that
1543  * happens further down), so passing end == blkno is correct.
1544  */
1545  if (blkno - next_fsm_block_to_vacuum >= VACUUM_FSM_EVERY_PAGES)
1546  {
1547  FreeSpaceMapVacuumRange(onerel, next_fsm_block_to_vacuum,
1548  blkno);
1549  next_fsm_block_to_vacuum = blkno;
1550  }
1551  }
1552 
1553  freespace = PageGetHeapFreeSpace(page);
1554 
1555  /* mark page all-visible, if appropriate */
1556  if (all_visible && !all_visible_according_to_vm)
1557  {
1559 
1560  if (all_frozen)
1561  flags |= VISIBILITYMAP_ALL_FROZEN;
1562 
1563  /*
1564  * It should never be the case that the visibility map page is set
1565  * while the page-level bit is clear, but the reverse is allowed
1566  * (if checksums are not enabled). Regardless, set both bits so
1567  * that we get back in sync.
1568  *
1569  * NB: If the heap page is all-visible but the VM bit is not set,
1570  * we don't need to dirty the heap page. However, if checksums
1571  * are enabled, we do need to make sure that the heap page is
1572  * dirtied before passing it to visibilitymap_set(), because it
1573  * may be logged. Given that this situation should only happen in
1574  * rare cases after a crash, it is not worth optimizing.
1575  */
1576  PageSetAllVisible(page);
1577  MarkBufferDirty(buf);
1578  visibilitymap_set(onerel, blkno, buf, InvalidXLogRecPtr,
1579  vmbuffer, visibility_cutoff_xid, flags);
1580  }
1581 
1582  /*
1583  * As of PostgreSQL 9.2, the visibility map bit should never be set if
1584  * the page-level bit is clear. However, it's possible that the bit
1585  * got cleared after we checked it and before we took the buffer
1586  * content lock, so we must recheck before jumping to the conclusion
1587  * that something bad has happened.
1588  */
1589  else if (all_visible_according_to_vm && !PageIsAllVisible(page)
1590  && VM_ALL_VISIBLE(onerel, blkno, &vmbuffer))
1591  {
1592  elog(WARNING, "page is not marked all-visible but visibility map bit is set in relation \"%s\" page %u",
1593  vacrelstats->relname, blkno);
1594  visibilitymap_clear(onerel, blkno, vmbuffer,
1596  }
1597 
1598  /*
1599  * It's possible for the value returned by GetOldestXmin() to move
1600  * backwards, so it's not wrong for us to see tuples that appear to
1601  * not be visible to everyone yet, while PD_ALL_VISIBLE is already
1602  * set. The real safe xmin value never moves backwards, but
1603  * GetOldestXmin() is conservative and sometimes returns a value
1604  * that's unnecessarily small, so if we see that contradiction it just
1605  * means that the tuples that we think are not visible to everyone yet
1606  * actually are, and the PD_ALL_VISIBLE flag is correct.
1607  *
1608  * There should never be dead tuples on a page with PD_ALL_VISIBLE
1609  * set, however.
1610  */
1611  else if (PageIsAllVisible(page) && has_dead_tuples)
1612  {
1613  elog(WARNING, "page containing dead tuples is marked as all-visible in relation \"%s\" page %u",
1614  vacrelstats->relname, blkno);
1615  PageClearAllVisible(page);
1616  MarkBufferDirty(buf);
1617  visibilitymap_clear(onerel, blkno, vmbuffer,
1619  }
1620 
1621  /*
1622  * If the all-visible page is all-frozen but not marked as such yet,
1623  * mark it as all-frozen. Note that all_frozen is only valid if
1624  * all_visible is true, so we must check both.
1625  */
1626  else if (all_visible_according_to_vm && all_visible && all_frozen &&
1627  !VM_ALL_FROZEN(onerel, blkno, &vmbuffer))
1628  {
1629  /*
1630  * We can pass InvalidTransactionId as the cutoff XID here,
1631  * because setting the all-frozen bit doesn't cause recovery
1632  * conflicts.
1633  */
1634  visibilitymap_set(onerel, blkno, buf, InvalidXLogRecPtr,
1635  vmbuffer, InvalidTransactionId,
1637  }
1638 
1639  UnlockReleaseBuffer(buf);
1640 
1641  /* Remember the location of the last page with nonremovable tuples */
1642  if (hastup)
1643  vacrelstats->nonempty_pages = blkno + 1;
1644 
1645  /*
1646  * If we remembered any tuples for deletion, then the page will be
1647  * visited again by lazy_vacuum_heap, which will compute and record
1648  * its post-compaction free space. If not, then we're done with this
1649  * page, so remember its free space as-is. (This path will always be
1650  * taken if there are no indexes.)
1651  */
1652  if (dead_tuples->num_tuples == prev_dead_count)
1653  RecordPageWithFreeSpace(onerel, blkno, freespace);
1654  }
1655 
1656  /* report that everything is scanned and vacuumed */
1658 
1659  pfree(frozen);
1660 
1661  /* save stats for use later */
1662  vacrelstats->tuples_deleted = tups_vacuumed;
1663  vacrelstats->new_dead_tuples = nkeep;
1664 
1665  /* now we can compute the new value for pg_class.reltuples */
1666  vacrelstats->new_live_tuples = vac_estimate_reltuples(onerel,
1667  nblocks,
1668  vacrelstats->tupcount_pages,
1669  live_tuples);
1670 
1671  /* also compute total number of surviving heap entries */
1672  vacrelstats->new_rel_tuples =
1673  vacrelstats->new_live_tuples + vacrelstats->new_dead_tuples;
1674 
1675  /*
1676  * Release any remaining pin on visibility map page.
1677  */
1678  if (BufferIsValid(vmbuffer))
1679  {
1680  ReleaseBuffer(vmbuffer);
1681  vmbuffer = InvalidBuffer;
1682  }
1683 
1684  /* If any tuples need to be deleted, perform final vacuum cycle */
1685  /* XXX put a threshold on min number of tuples here? */
1686  if (dead_tuples->num_tuples > 0)
1687  {
1688  /* Work on all the indexes, and then the heap */
1689  lazy_vacuum_all_indexes(onerel, Irel, indstats, vacrelstats,
1690  lps, nindexes);
1691 
1692  /* Remove tuples from heap */
1693  lazy_vacuum_heap(onerel, vacrelstats);
1694  }
1695 
1696  /*
1697  * Vacuum the remainder of the Free Space Map. We must do this whether or
1698  * not there were indexes.
1699  */
1700  if (blkno > next_fsm_block_to_vacuum)
1701  FreeSpaceMapVacuumRange(onerel, next_fsm_block_to_vacuum, blkno);
1702 
1703  /* report all blocks vacuumed */
1705 
1706  /* Do post-vacuum cleanup */
1707  if (vacrelstats->useindex)
1708  lazy_cleanup_all_indexes(Irel, indstats, vacrelstats, lps, nindexes);
1709 
1710  /*
1711  * End parallel mode before updating index statistics as we cannot write
1712  * during parallel mode.
1713  */
1714  if (ParallelVacuumIsActive(lps))
1715  end_parallel_vacuum(indstats, lps, nindexes);
1716 
1717  /* Update index statistics */
1718  update_index_statistics(Irel, indstats, nindexes);
1719 
1720  /* If no indexes, make log report that lazy_vacuum_heap would've made */
1721  if (vacuumed_pages)
1722  ereport(elevel,
1723  (errmsg("\"%s\": removed %.0f row versions in %u pages",
1724  vacrelstats->relname,
1725  tups_vacuumed, vacuumed_pages)));
1726 
1727  /*
1728  * This is pretty messy, but we split it up so that we can skip emitting
1729  * individual parts of the message when not applicable.
1730  */
1731  initStringInfo(&buf);
1732  appendStringInfo(&buf,
1733  _("%.0f dead row versions cannot be removed yet, oldest xmin: %u\n"),
1734  nkeep, OldestXmin);
1735  appendStringInfo(&buf, _("There were %.0f unused item identifiers.\n"),
1736  nunused);
1737  appendStringInfo(&buf, ngettext("Skipped %u page due to buffer pins, ",
1738  "Skipped %u pages due to buffer pins, ",
1739  vacrelstats->pinskipped_pages),
1740  vacrelstats->pinskipped_pages);
1741  appendStringInfo(&buf, ngettext("%u frozen page.\n",
1742  "%u frozen pages.\n",
1743  vacrelstats->frozenskipped_pages),
1744  vacrelstats->frozenskipped_pages);
1745  appendStringInfo(&buf, ngettext("%u page is entirely empty.\n",
1746  "%u pages are entirely empty.\n",
1747  empty_pages),
1748  empty_pages);
1749  appendStringInfo(&buf, _("%s."), pg_rusage_show(&ru0));
1750 
1751  ereport(elevel,
1752  (errmsg("\"%s\": found %.0f removable, %.0f nonremovable row versions in %u out of %u pages",
1753  vacrelstats->relname,
1754  tups_vacuumed, num_tuples,
1755  vacrelstats->scanned_pages, nblocks),
1756  errdetail_internal("%s", buf.data)));
1757  pfree(buf.data);
1758 }
1759 
1760 /*
1761  * lazy_vacuum_all_indexes() -- vacuum all indexes of relation.
1762  *
1763  * We process the indexes serially unless we are doing parallel vacuum.
1764  */
1765 static void
1767  IndexBulkDeleteResult **stats,
1768  LVRelStats *vacrelstats, LVParallelState *lps,
1769  int nindexes)
1770 {
1772  Assert(nindexes > 0);
1773 
1774  /* Log cleanup info before we touch indexes */
1775  vacuum_log_cleanup_info(onerel, vacrelstats);
1776 
1777  /* Report that we are now vacuuming indexes */
1780 
1781  /* Perform index vacuuming with parallel workers for parallel vacuum. */
1782  if (ParallelVacuumIsActive(lps))
1783  {
1784  /* Tell parallel workers to do index vacuuming */
1785  lps->lvshared->for_cleanup = false;
1786  lps->lvshared->first_time = false;
1787 
1788  /*
1789  * We can only provide an approximate value of num_heap_tuples in
1790  * vacuum cases.
1791  */
1792  lps->lvshared->reltuples = vacrelstats->old_live_tuples;
1793  lps->lvshared->estimated_count = true;
1794 
1795  lazy_parallel_vacuum_indexes(Irel, stats, vacrelstats, lps, nindexes);
1796  }
1797  else
1798  {
1799  int idx;
1800 
1801  for (idx = 0; idx < nindexes; idx++)
1802  lazy_vacuum_index(Irel[idx], &stats[idx], vacrelstats->dead_tuples,
1803  vacrelstats->old_live_tuples, vacrelstats);
1804  }
1805 
1806  /* Increase and report the number of index scans */
1807  vacrelstats->num_index_scans++;
1809  vacrelstats->num_index_scans);
1810 }
1811 
1812 
1813 /*
1814  * lazy_vacuum_heap() -- second pass over the heap
1815  *
1816  * This routine marks dead tuples as unused and compacts out free
1817  * space on their pages. Pages not having dead tuples recorded from
1818  * lazy_scan_heap are not visited at all.
1819  *
1820  * Note: the reason for doing this as a second pass is we cannot remove
1821  * the tuples until we've removed their index entries, and we want to
1822  * process index entry removal in batches as large as possible.
1823  */
1824 static void
1825 lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats)
1826 {
1827  int tupindex;
1828  int npages;
1829  PGRUsage ru0;
1830  Buffer vmbuffer = InvalidBuffer;
1831  LVSavedErrInfo saved_err_info;
1832 
1833  /* Report that we are now vacuuming the heap */
1836 
1837  /* Update error traceback information */
1838  update_vacuum_error_info(vacrelstats, &saved_err_info, VACUUM_ERRCB_PHASE_VACUUM_HEAP,
1840 
1841  pg_rusage_init(&ru0);
1842  npages = 0;
1843 
1844  tupindex = 0;
1845  while (tupindex < vacrelstats->dead_tuples->num_tuples)
1846  {
1847  BlockNumber tblk;
1848  Buffer buf;
1849  Page page;
1850  Size freespace;
1851 
1853 
1854  tblk = ItemPointerGetBlockNumber(&vacrelstats->dead_tuples->itemptrs[tupindex]);
1855  vacrelstats->blkno = tblk;
1856  buf = ReadBufferExtended(onerel, MAIN_FORKNUM, tblk, RBM_NORMAL,
1857  vac_strategy);
1859  {
1860  ReleaseBuffer(buf);
1861  ++tupindex;
1862  continue;
1863  }
1864  tupindex = lazy_vacuum_page(onerel, tblk, buf, tupindex, vacrelstats,
1865  &vmbuffer);
1866 
1867  /* Now that we've compacted the page, record its available space */
1868  page = BufferGetPage(buf);
1869  freespace = PageGetHeapFreeSpace(page);
1870 
1871  UnlockReleaseBuffer(buf);
1872  RecordPageWithFreeSpace(onerel, tblk, freespace);
1873  npages++;
1874  }
1875 
1876  if (BufferIsValid(vmbuffer))
1877  {
1878  ReleaseBuffer(vmbuffer);
1879  vmbuffer = InvalidBuffer;
1880  }
1881 
1882  ereport(elevel,
1883  (errmsg("\"%s\": removed %d row versions in %d pages",
1884  vacrelstats->relname,
1885  tupindex, npages),
1886  errdetail_internal("%s", pg_rusage_show(&ru0))));
1887 
1888  /* Revert to the previous phase information for error traceback */
1889  restore_vacuum_error_info(vacrelstats, &saved_err_info);
1890 }
1891 
1892 /*
1893  * lazy_vacuum_page() -- free dead tuples on a page
1894  * and repair its fragmentation.
1895  *
1896  * Caller must hold pin and buffer cleanup lock on the buffer.
1897  *
1898  * tupindex is the index in vacrelstats->dead_tuples of the first dead
1899  * tuple for this page. We assume the rest follow sequentially.
1900  * The return value is the first tupindex after the tuples of this page.
1901  */
1902 static int
1904  int tupindex, LVRelStats *vacrelstats, Buffer *vmbuffer)
1905 {
1906  LVDeadTuples *dead_tuples = vacrelstats->dead_tuples;
1907  Page page = BufferGetPage(buffer);
1908  OffsetNumber unused[MaxOffsetNumber];
1909  int uncnt = 0;
1910  TransactionId visibility_cutoff_xid;
1911  bool all_frozen;
1912  LVSavedErrInfo saved_err_info;
1913 
1915 
1916  /* Update error traceback information */
1917  update_vacuum_error_info(vacrelstats, &saved_err_info, VACUUM_ERRCB_PHASE_VACUUM_HEAP,
1918  blkno);
1919 
1921 
1922  for (; tupindex < dead_tuples->num_tuples; tupindex++)
1923  {
1924  BlockNumber tblk;
1925  OffsetNumber toff;
1926  ItemId itemid;
1927 
1928  tblk = ItemPointerGetBlockNumber(&dead_tuples->itemptrs[tupindex]);
1929  if (tblk != blkno)
1930  break; /* past end of tuples for this block */
1931  toff = ItemPointerGetOffsetNumber(&dead_tuples->itemptrs[tupindex]);
1932  itemid = PageGetItemId(page, toff);
1933  ItemIdSetUnused(itemid);
1934  unused[uncnt++] = toff;
1935  }
1936 
1938 
1939  /*
1940  * Mark buffer dirty before we write WAL.
1941  */
1942  MarkBufferDirty(buffer);
1943 
1944  /* XLOG stuff */
1945  if (RelationNeedsWAL(onerel))
1946  {
1947  XLogRecPtr recptr;
1948 
1949  recptr = log_heap_clean(onerel, buffer,
1950  NULL, 0, NULL, 0,
1951  unused, uncnt,
1952  vacrelstats->latestRemovedXid);
1953  PageSetLSN(page, recptr);
1954  }
1955 
1956  /*
1957  * End critical section, so we safely can do visibility tests (which
1958  * possibly need to perform IO and allocate memory!). If we crash now the
1959  * page (including the corresponding vm bit) might not be marked all
1960  * visible, but that's fine. A later vacuum will fix that.
1961  */
1962  END_CRIT_SECTION();
1963 
1964  /*
1965  * Now that we have removed the dead tuples from the page, once again
1966  * check if the page has become all-visible. The page is already marked
1967  * dirty, exclusively locked, and, if needed, a full page image has been
1968  * emitted in the log_heap_clean() above.
1969  */
1970  if (heap_page_is_all_visible(onerel, buffer, &visibility_cutoff_xid,
1971  &all_frozen))
1972  PageSetAllVisible(page);
1973 
1974  /*
1975  * All the changes to the heap page have been done. If the all-visible
1976  * flag is now set, also set the VM all-visible bit (and, if possible, the
1977  * all-frozen bit) unless this has already been done previously.
1978  */
1979  if (PageIsAllVisible(page))
1980  {
1981  uint8 vm_status = visibilitymap_get_status(onerel, blkno, vmbuffer);
1982  uint8 flags = 0;
1983 
1984  /* Set the VM all-frozen bit to flag, if needed */
1985  if ((vm_status & VISIBILITYMAP_ALL_VISIBLE) == 0)
1986  flags |= VISIBILITYMAP_ALL_VISIBLE;
1987  if ((vm_status & VISIBILITYMAP_ALL_FROZEN) == 0 && all_frozen)
1988  flags |= VISIBILITYMAP_ALL_FROZEN;
1989 
1990  Assert(BufferIsValid(*vmbuffer));
1991  if (flags != 0)
1992  visibilitymap_set(onerel, blkno, buffer, InvalidXLogRecPtr,
1993  *vmbuffer, visibility_cutoff_xid, flags);
1994  }
1995 
1996  /* Revert to the previous phase information for error traceback */
1997  restore_vacuum_error_info(vacrelstats, &saved_err_info);
1998  return tupindex;
1999 }
2000 
2001 /*
2002  * lazy_check_needs_freeze() -- scan page to see if any tuples
2003  * need to be cleaned to avoid wraparound
2004  *
2005  * Returns true if the page needs to be vacuumed using cleanup lock.
2006  * Also returns a flag indicating whether page contains any tuples at all.
2007  */
2008 static bool
2010 {
2011  Page page = BufferGetPage(buf);
2012  OffsetNumber offnum,
2013  maxoff;
2014  HeapTupleHeader tupleheader;
2015 
2016  *hastup = false;
2017 
2018  /*
2019  * New and empty pages, obviously, don't contain tuples. We could make
2020  * sure that the page is registered in the FSM, but it doesn't seem worth
2021  * waiting for a cleanup lock just for that, especially because it's
2022  * likely that the pin holder will do so.
2023  */
2024  if (PageIsNew(page) || PageIsEmpty(page))
2025  return false;
2026 
2027  maxoff = PageGetMaxOffsetNumber(page);
2028  for (offnum = FirstOffsetNumber;
2029  offnum <= maxoff;
2030  offnum = OffsetNumberNext(offnum))
2031  {
2032  ItemId itemid;
2033 
2034  itemid = PageGetItemId(page, offnum);
2035 
2036  /* this should match hastup test in count_nondeletable_pages() */
2037  if (ItemIdIsUsed(itemid))
2038  *hastup = true;
2039 
2040  /* dead and redirect items never need freezing */
2041  if (!ItemIdIsNormal(itemid))
2042  continue;
2043 
2044  tupleheader = (HeapTupleHeader) PageGetItem(page, itemid);
2045 
2046  if (heap_tuple_needs_freeze(tupleheader, FreezeLimit,
2047  MultiXactCutoff, buf))
2048  return true;
2049  } /* scan along page */
2050 
2051  return false;
2052 }
2053 
2054 /*
2055  * Perform index vacuum or index cleanup with parallel workers. This function
2056  * must be used by the parallel vacuum leader process. The caller must set
2057  * lps->lvshared->for_cleanup to indicate whether to perform vacuum or
2058  * cleanup.
2059  */
2060 static void
2062  LVRelStats *vacrelstats, LVParallelState *lps,
2063  int nindexes)
2064 {
2065  int nworkers;
2066 
2069  Assert(nindexes > 0);
2070 
2071  /* Determine the number of parallel workers to launch */
2072  if (lps->lvshared->for_cleanup)
2073  {
2074  if (lps->lvshared->first_time)
2075  nworkers = lps->nindexes_parallel_cleanup +
2077  else
2078  nworkers = lps->nindexes_parallel_cleanup;
2079  }
2080  else
2081  nworkers = lps->nindexes_parallel_bulkdel;
2082 
2083  /* The leader process will participate */
2084  nworkers--;
2085 
2086  /*
2087  * It is possible that parallel context is initialized with fewer workers
2088  * than the number of indexes that need a separate worker in the current
2089  * phase, so we need to consider it. See compute_parallel_vacuum_workers.
2090  */
2091  nworkers = Min(nworkers, lps->pcxt->nworkers);
2092 
2093  /* Setup the shared cost-based vacuum delay and launch workers */
2094  if (nworkers > 0)
2095  {
2096  if (vacrelstats->num_index_scans > 0)
2097  {
2098  /* Reset the parallel index processing counter */
2099  pg_atomic_write_u32(&(lps->lvshared->idx), 0);
2100 
2101  /* Reinitialize the parallel context to relaunch parallel workers */
2103  }
2104 
2105  /*
2106  * Set up shared cost balance and the number of active workers for
2107  * vacuum delay. We need to do this before launching workers as
2108  * otherwise, they might not see the updated values for these
2109  * parameters.
2110  */
2113 
2114  /*
2115  * The number of workers can vary between bulkdelete and cleanup
2116  * phase.
2117  */
2118  ReinitializeParallelWorkers(lps->pcxt, nworkers);
2119 
2121 
2122  if (lps->pcxt->nworkers_launched > 0)
2123  {
2124  /*
2125  * Reset the local cost values for leader backend as we have
2126  * already accumulated the remaining balance of heap.
2127  */
2128  VacuumCostBalance = 0;
2130 
2131  /* Enable shared cost balance for leader backend */
2134  }
2135 
2136  if (lps->lvshared->for_cleanup)
2137  ereport(elevel,
2138  (errmsg(ngettext("launched %d parallel vacuum worker for index cleanup (planned: %d)",
2139  "launched %d parallel vacuum workers for index cleanup (planned: %d)",
2140  lps->pcxt->nworkers_launched),
2141  lps->pcxt->nworkers_launched, nworkers)));
2142  else
2143  ereport(elevel,
2144  (errmsg(ngettext("launched %d parallel vacuum worker for index vacuuming (planned: %d)",
2145  "launched %d parallel vacuum workers for index vacuuming (planned: %d)",
2146  lps->pcxt->nworkers_launched),
2147  lps->pcxt->nworkers_launched, nworkers)));
2148  }
2149 
2150  /* Process the indexes that can be processed by only leader process */
2151  vacuum_indexes_leader(Irel, stats, vacrelstats, lps, nindexes);
2152 
2153  /*
2154  * Join as a parallel worker. The leader process alone processes all the
2155  * indexes in the case where no workers are launched.
2156  */
2157  parallel_vacuum_index(Irel, stats, lps->lvshared,
2158  vacrelstats->dead_tuples, nindexes, vacrelstats);
2159 
2160  /*
2161  * Next, accumulate buffer and WAL usage. (This must wait for the workers
2162  * to finish, or we might get incomplete data.)
2163  */
2164  if (nworkers > 0)
2165  {
2166  int i;
2167 
2168  /* Wait for all vacuum workers to finish */
2170 
2171  for (i = 0; i < lps->pcxt->nworkers_launched; i++)
2172  InstrAccumParallelQuery(&lps->buffer_usage[i], &lps->wal_usage[i]);
2173  }
2174 
2175  /*
2176  * Carry the shared balance value to heap scan and disable shared costing
2177  */
2179  {
2181  VacuumSharedCostBalance = NULL;
2182  VacuumActiveNWorkers = NULL;
2183  }
2184 }
2185 
2186 /*
2187  * Index vacuum/cleanup routine used by the leader process and parallel
2188  * vacuum worker processes to process the indexes in parallel.
2189  */
2190 static void
2192  LVShared *lvshared, LVDeadTuples *dead_tuples,
2193  int nindexes, LVRelStats *vacrelstats)
2194 {
2195  /*
2196  * Increment the active worker count if we are able to launch any worker.
2197  */
2200 
2201  /* Loop until all indexes are vacuumed */
2202  for (;;)
2203  {
2204  int idx;
2205  LVSharedIndStats *shared_indstats;
2206 
2207  /* Get an index number to process */
2208  idx = pg_atomic_fetch_add_u32(&(lvshared->idx), 1);
2209 
2210  /* Done for all indexes? */
2211  if (idx >= nindexes)
2212  break;
2213 
2214  /* Get the index statistics of this index from DSM */
2215  shared_indstats = get_indstats(lvshared, idx);
2216 
2217  /*
2218  * Skip processing indexes that don't participate in parallel
2219  * operation
2220  */
2221  if (shared_indstats == NULL ||
2222  skip_parallel_vacuum_index(Irel[idx], lvshared))
2223  continue;
2224 
2225  /* Do vacuum or cleanup of the index */
2226  vacuum_one_index(Irel[idx], &(stats[idx]), lvshared, shared_indstats,
2227  dead_tuples, vacrelstats);
2228  }
2229 
2230  /*
2231  * We have completed the index vacuum so decrement the active worker
2232  * count.
2233  */
2236 }
2237 
2238 /*
2239  * Vacuum or cleanup indexes that can be processed by only the leader process
2240  * because these indexes don't support parallel operation at that phase.
2241  */
2242 static void
2244  LVRelStats *vacrelstats, LVParallelState *lps,
2245  int nindexes)
2246 {
2247  int i;
2248 
2250 
2251  /*
2252  * Increment the active worker count if we are able to launch any worker.
2253  */
2256 
2257  for (i = 0; i < nindexes; i++)
2258  {
2259  LVSharedIndStats *shared_indstats;
2260 
2261  shared_indstats = get_indstats(lps->lvshared, i);
2262 
2263  /* Process the indexes skipped by parallel workers */
2264  if (shared_indstats == NULL ||
2265  skip_parallel_vacuum_index(Irel[i], lps->lvshared))
2266  vacuum_one_index(Irel[i], &(stats[i]), lps->lvshared,
2267  shared_indstats, vacrelstats->dead_tuples,
2268  vacrelstats);
2269  }
2270 
2271  /*
2272  * We have completed the index vacuum so decrement the active worker
2273  * count.
2274  */
2277 }
2278 
2279 /*
2280  * Vacuum or cleanup index either by leader process or by one of the worker
2281  * process. After processing the index this function copies the index
2282  * statistics returned from ambulkdelete and amvacuumcleanup to the DSM
2283  * segment.
2284  */
2285 static void
2287  LVShared *lvshared, LVSharedIndStats *shared_indstats,
2288  LVDeadTuples *dead_tuples, LVRelStats *vacrelstats)
2289 {
2290  IndexBulkDeleteResult *bulkdelete_res = NULL;
2291 
2292  if (shared_indstats)
2293  {
2294  /* Get the space for IndexBulkDeleteResult */
2295  bulkdelete_res = &(shared_indstats->stats);
2296 
2297  /*
2298  * Update the pointer to the corresponding bulk-deletion result if
2299  * someone has already updated it.
2300  */
2301  if (shared_indstats->updated && *stats == NULL)
2302  *stats = bulkdelete_res;
2303  }
2304 
2305  /* Do vacuum or cleanup of the index */
2306  if (lvshared->for_cleanup)
2307  lazy_cleanup_index(indrel, stats, lvshared->reltuples,
2308  lvshared->estimated_count, vacrelstats);
2309  else
2310  lazy_vacuum_index(indrel, stats, dead_tuples,
2311  lvshared->reltuples, vacrelstats);
2312 
2313  /*
2314  * Copy the index bulk-deletion result returned from ambulkdelete and
2315  * amvacuumcleanup to the DSM segment if it's the first cycle because they
2316  * allocate locally and it's possible that an index will be vacuumed by a
2317  * different vacuum process the next cycle. Copying the result normally
2318  * happens only the first time an index is vacuumed. For any additional
2319  * vacuum pass, we directly point to the result on the DSM segment and
2320  * pass it to vacuum index APIs so that workers can update it directly.
2321  *
2322  * Since all vacuum workers write the bulk-deletion result at different
2323  * slots we can write them without locking.
2324  */
2325  if (shared_indstats && !shared_indstats->updated && *stats != NULL)
2326  {
2327  memcpy(bulkdelete_res, *stats, sizeof(IndexBulkDeleteResult));
2328  shared_indstats->updated = true;
2329 
2330  /*
2331  * Now that stats[idx] points to the DSM segment, we don't need the
2332  * locally allocated results.
2333  */
2334  pfree(*stats);
2335  *stats = bulkdelete_res;
2336  }
2337 }
2338 
2339 /*
2340  * lazy_cleanup_all_indexes() -- cleanup all indexes of relation.
2341  *
2342  * Cleanup indexes. We process the indexes serially unless we are doing
2343  * parallel vacuum.
2344  */
2345 static void
2347  LVRelStats *vacrelstats, LVParallelState *lps,
2348  int nindexes)
2349 {
2350  int idx;
2351 
2353  Assert(nindexes > 0);
2354 
2355  /* Report that we are now cleaning up indexes */
2358 
2359  /*
2360  * If parallel vacuum is active we perform index cleanup with parallel
2361  * workers.
2362  */
2363  if (ParallelVacuumIsActive(lps))
2364  {
2365  /* Tell parallel workers to do index cleanup */
2366  lps->lvshared->for_cleanup = true;
2367  lps->lvshared->first_time =
2368  (vacrelstats->num_index_scans == 0);
2369 
2370  /*
2371  * Now we can provide a better estimate of total number of surviving
2372  * tuples (we assume indexes are more interested in that than in the
2373  * number of nominally live tuples).
2374  */
2375  lps->lvshared->reltuples = vacrelstats->new_rel_tuples;
2376  lps->lvshared->estimated_count =
2377  (vacrelstats->tupcount_pages < vacrelstats->rel_pages);
2378 
2379  lazy_parallel_vacuum_indexes(Irel, stats, vacrelstats, lps, nindexes);
2380  }
2381  else
2382  {
2383  for (idx = 0; idx < nindexes; idx++)
2384  lazy_cleanup_index(Irel[idx], &stats[idx],
2385  vacrelstats->new_rel_tuples,
2386  vacrelstats->tupcount_pages < vacrelstats->rel_pages,
2387  vacrelstats);
2388  }
2389 }
2390 
2391 /*
2392  * lazy_vacuum_index() -- vacuum one index relation.
2393  *
2394  * Delete all the index entries pointing to tuples listed in
2395  * dead_tuples, and update running statistics.
2396  *
2397  * reltuples is the number of heap tuples to be passed to the
2398  * bulkdelete callback.
2399  */
2400 static void
2402  LVDeadTuples *dead_tuples, double reltuples, LVRelStats *vacrelstats)
2403 {
2404  IndexVacuumInfo ivinfo;
2405  const char *msg;
2406  PGRUsage ru0;
2407  LVSavedErrInfo saved_err_info;
2408 
2409  pg_rusage_init(&ru0);
2410 
2411  ivinfo.index = indrel;
2412  ivinfo.analyze_only = false;
2413  ivinfo.report_progress = false;
2414  ivinfo.estimated_count = true;
2415  ivinfo.message_level = elevel;
2416  ivinfo.num_heap_tuples = reltuples;
2417  ivinfo.strategy = vac_strategy;
2418 
2419  /*
2420  * Update error traceback information.
2421  *
2422  * The index name is saved during this phase and restored immediately
2423  * after this phase. See vacuum_error_callback.
2424  */
2425  Assert(vacrelstats->indname == NULL);
2426  vacrelstats->indname = pstrdup(RelationGetRelationName(indrel));
2427  update_vacuum_error_info(vacrelstats, &saved_err_info,
2430 
2431  /* Do bulk deletion */
2432  *stats = index_bulk_delete(&ivinfo, *stats,
2433  lazy_tid_reaped, (void *) dead_tuples);
2434 
2435  if (IsParallelWorker())
2436  msg = gettext_noop("scanned index \"%s\" to remove %d row versions by parallel vacuum worker");
2437  else
2438  msg = gettext_noop("scanned index \"%s\" to remove %d row versions");
2439 
2440  ereport(elevel,
2441  (errmsg(msg,
2442  vacrelstats->indname,
2443  dead_tuples->num_tuples),
2444  errdetail_internal("%s", pg_rusage_show(&ru0))));
2445 
2446  /* Revert to the previous phase information for error traceback */
2447  restore_vacuum_error_info(vacrelstats, &saved_err_info);
2448  pfree(vacrelstats->indname);
2449  vacrelstats->indname = NULL;
2450 }
2451 
2452 /*
2453  * lazy_cleanup_index() -- do post-vacuum cleanup for one index relation.
2454  *
2455  * reltuples is the number of heap tuples and estimated_count is true
2456  * if reltuples is an estimated value.
2457  */
2458 static void
2460  IndexBulkDeleteResult **stats,
2461  double reltuples, bool estimated_count, LVRelStats *vacrelstats)
2462 {
2463  IndexVacuumInfo ivinfo;
2464  const char *msg;
2465  PGRUsage ru0;
2466  LVSavedErrInfo saved_err_info;
2467 
2468  pg_rusage_init(&ru0);
2469 
2470  ivinfo.index = indrel;
2471  ivinfo.analyze_only = false;
2472  ivinfo.report_progress = false;
2473  ivinfo.estimated_count = estimated_count;
2474  ivinfo.message_level = elevel;
2475 
2476  ivinfo.num_heap_tuples = reltuples;
2477  ivinfo.strategy = vac_strategy;
2478 
2479  /*
2480  * Update error traceback information.
2481  *
2482  * The index name is saved during this phase and restored immediately
2483  * after this phase. See vacuum_error_callback.
2484  */
2485  Assert(vacrelstats->indname == NULL);
2486  vacrelstats->indname = pstrdup(RelationGetRelationName(indrel));
2487  update_vacuum_error_info(vacrelstats, &saved_err_info,
2490 
2491  *stats = index_vacuum_cleanup(&ivinfo, *stats);
2492 
2493  /* Revert back to the old phase information for error traceback */
2494  restore_vacuum_error_info(vacrelstats, &saved_err_info);
2495  pfree(vacrelstats->indname);
2496  vacrelstats->indname = NULL;
2497 
2498  if (!(*stats))
2499  return;
2500 
2501  if (IsParallelWorker())
2502  msg = gettext_noop("index \"%s\" now contains %.0f row versions in %u pages as reported by parallel vacuum worker");
2503  else
2504  msg = gettext_noop("index \"%s\" now contains %.0f row versions in %u pages");
2505 
2506  ereport(elevel,
2507  (errmsg(msg,
2508  RelationGetRelationName(indrel),
2509  (*stats)->num_index_tuples,
2510  (*stats)->num_pages),
2511  errdetail("%.0f index row versions were removed.\n"
2512  "%u index pages have been deleted, %u are currently reusable.\n"
2513  "%s.",
2514  (*stats)->tuples_removed,
2515  (*stats)->pages_deleted, (*stats)->pages_free,
2516  pg_rusage_show(&ru0))));
2517 }
2518 
2519 /*
2520  * should_attempt_truncation - should we attempt to truncate the heap?
2521  *
2522  * Don't even think about it unless we have a shot at releasing a goodly
2523  * number of pages. Otherwise, the time taken isn't worth it.
2524  *
2525  * Also don't attempt it if we are doing early pruning/vacuuming, because a
2526  * scan which cannot find a truncated heap page cannot determine that the
2527  * snapshot is too old to read that page. We might be able to get away with
2528  * truncating all except one of the pages, setting its LSN to (at least) the
2529  * maximum of the truncated range if we also treated an index leaf tuple
2530  * pointing to a missing heap page as something to trigger the "snapshot too
2531  * old" error, but that seems fragile and seems like it deserves its own patch
2532  * if we consider it.
2533  *
2534  * This is split out so that we can test whether truncation is going to be
2535  * called for before we actually do it. If you change the logic here, be
2536  * careful to depend only on fields that lazy_scan_heap updates on-the-fly.
2537  */
2538 static bool
2540 {
2541  BlockNumber possibly_freeable;
2542 
2543  if (params->truncate == VACOPT_TERNARY_DISABLED)
2544  return false;
2545 
2546  possibly_freeable = vacrelstats->rel_pages - vacrelstats->nonempty_pages;
2547  if (possibly_freeable > 0 &&
2548  (possibly_freeable >= REL_TRUNCATE_MINIMUM ||
2549  possibly_freeable >= vacrelstats->rel_pages / REL_TRUNCATE_FRACTION) &&
2551  return true;
2552  else
2553  return false;
2554 }
2555 
2556 /*
2557  * lazy_truncate_heap - try to truncate off any empty pages at the end
2558  */
2559 static void
2561 {
2562  BlockNumber old_rel_pages = vacrelstats->rel_pages;
2563  BlockNumber new_rel_pages;
2564  int lock_retry;
2565 
2566  /* Report that we are now truncating */
2569 
2570  /*
2571  * Loop until no more truncating can be done.
2572  */
2573  do
2574  {
2575  PGRUsage ru0;
2576 
2577  pg_rusage_init(&ru0);
2578 
2579  /*
2580  * We need full exclusive lock on the relation in order to do
2581  * truncation. If we can't get it, give up rather than waiting --- we
2582  * don't want to block other backends, and we don't want to deadlock
2583  * (which is quite possible considering we already hold a lower-grade
2584  * lock).
2585  */
2586  vacrelstats->lock_waiter_detected = false;
2587  lock_retry = 0;
2588  while (true)
2589  {
2591  break;
2592 
2593  /*
2594  * Check for interrupts while trying to (re-)acquire the exclusive
2595  * lock.
2596  */
2598 
2599  if (++lock_retry > (VACUUM_TRUNCATE_LOCK_TIMEOUT /
2601  {
2602  /*
2603  * We failed to establish the lock in the specified number of
2604  * retries. This means we give up truncating.
2605  */
2606  vacrelstats->lock_waiter_detected = true;
2607  ereport(elevel,
2608  (errmsg("\"%s\": stopping truncate due to conflicting lock request",
2609  vacrelstats->relname)));
2610  return;
2611  }
2612 
2614  }
2615 
2616  /*
2617  * Now that we have exclusive lock, look to see if the rel has grown
2618  * whilst we were vacuuming with non-exclusive lock. If so, give up;
2619  * the newly added pages presumably contain non-deletable tuples.
2620  */
2621  new_rel_pages = RelationGetNumberOfBlocks(onerel);
2622  if (new_rel_pages != old_rel_pages)
2623  {
2624  /*
2625  * Note: we intentionally don't update vacrelstats->rel_pages with
2626  * the new rel size here. If we did, it would amount to assuming
2627  * that the new pages are empty, which is unlikely. Leaving the
2628  * numbers alone amounts to assuming that the new pages have the
2629  * same tuple density as existing ones, which is less unlikely.
2630  */
2632  return;
2633  }
2634 
2635  /*
2636  * Scan backwards from the end to verify that the end pages actually
2637  * contain no tuples. This is *necessary*, not optional, because
2638  * other backends could have added tuples to these pages whilst we
2639  * were vacuuming.
2640  */
2641  new_rel_pages = count_nondeletable_pages(onerel, vacrelstats);
2642  vacrelstats->blkno = new_rel_pages;
2643 
2644  if (new_rel_pages >= old_rel_pages)
2645  {
2646  /* can't do anything after all */
2648  return;
2649  }
2650 
2651  /*
2652  * Okay to truncate.
2653  */
2654  RelationTruncate(onerel, new_rel_pages);
2655 
2656  /*
2657  * We can release the exclusive lock as soon as we have truncated.
2658  * Other backends can't safely access the relation until they have
2659  * processed the smgr invalidation that smgrtruncate sent out ... but
2660  * that should happen as part of standard invalidation processing once
2661  * they acquire lock on the relation.
2662  */
2664 
2665  /*
2666  * Update statistics. Here, it *is* correct to adjust rel_pages
2667  * without also touching reltuples, since the tuple count wasn't
2668  * changed by the truncation.
2669  */
2670  vacrelstats->pages_removed += old_rel_pages - new_rel_pages;
2671  vacrelstats->rel_pages = new_rel_pages;
2672 
2673  ereport(elevel,
2674  (errmsg("\"%s\": truncated %u to %u pages",
2675  vacrelstats->relname,
2676  old_rel_pages, new_rel_pages),
2677  errdetail_internal("%s",
2678  pg_rusage_show(&ru0))));
2679  old_rel_pages = new_rel_pages;
2680  } while (new_rel_pages > vacrelstats->nonempty_pages &&
2681  vacrelstats->lock_waiter_detected);
2682 }
2683 
2684 /*
2685  * Rescan end pages to verify that they are (still) empty of tuples.
2686  *
2687  * Returns number of nondeletable pages (last nonempty page + 1).
2688  */
2689 static BlockNumber
2691 {
2692  BlockNumber blkno;
2693  BlockNumber prefetchedUntil;
2694  instr_time starttime;
2695 
2696  /* Initialize the starttime if we check for conflicting lock requests */
2697  INSTR_TIME_SET_CURRENT(starttime);
2698 
2699  /*
2700  * Start checking blocks at what we believe relation end to be and move
2701  * backwards. (Strange coding of loop control is needed because blkno is
2702  * unsigned.) To make the scan faster, we prefetch a few blocks at a time
2703  * in forward direction, so that OS-level readahead can kick in.
2704  */
2705  blkno = vacrelstats->rel_pages;
2707  "prefetch size must be power of 2");
2708  prefetchedUntil = InvalidBlockNumber;
2709  while (blkno > vacrelstats->nonempty_pages)
2710  {
2711  Buffer buf;
2712  Page page;
2713  OffsetNumber offnum,
2714  maxoff;
2715  bool hastup;
2716 
2717  /*
2718  * Check if another process requests a lock on our relation. We are
2719  * holding an AccessExclusiveLock here, so they will be waiting. We
2720  * only do this once per VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL, and we
2721  * only check if that interval has elapsed once every 32 blocks to
2722  * keep the number of system calls and actual shared lock table
2723  * lookups to a minimum.
2724  */
2725  if ((blkno % 32) == 0)
2726  {
2727  instr_time currenttime;
2728  instr_time elapsed;
2729 
2730  INSTR_TIME_SET_CURRENT(currenttime);
2731  elapsed = currenttime;
2732  INSTR_TIME_SUBTRACT(elapsed, starttime);
2733  if ((INSTR_TIME_GET_MICROSEC(elapsed) / 1000)
2735  {
2737  {
2738  ereport(elevel,
2739  (errmsg("\"%s\": suspending truncate due to conflicting lock request",
2740  vacrelstats->relname)));
2741 
2742  vacrelstats->lock_waiter_detected = true;
2743  return blkno;
2744  }
2745  starttime = currenttime;
2746  }
2747  }
2748 
2749  /*
2750  * We don't insert a vacuum delay point here, because we have an
2751  * exclusive lock on the table which we want to hold for as short a
2752  * time as possible. We still need to check for interrupts however.
2753  */
2755 
2756  blkno--;
2757 
2758  /* If we haven't prefetched this lot yet, do so now. */
2759  if (prefetchedUntil > blkno)
2760  {
2761  BlockNumber prefetchStart;
2762  BlockNumber pblkno;
2763 
2764  prefetchStart = blkno & ~(PREFETCH_SIZE - 1);
2765  for (pblkno = prefetchStart; pblkno <= blkno; pblkno++)
2766  {
2767  PrefetchBuffer(onerel, MAIN_FORKNUM, pblkno);
2769  }
2770  prefetchedUntil = prefetchStart;
2771  }
2772 
2773  buf = ReadBufferExtended(onerel, MAIN_FORKNUM, blkno,
2774  RBM_NORMAL, vac_strategy);
2775 
2776  /* In this phase we only need shared access to the buffer */
2778 
2779  page = BufferGetPage(buf);
2780 
2781  if (PageIsNew(page) || PageIsEmpty(page))
2782  {
2783  UnlockReleaseBuffer(buf);
2784  continue;
2785  }
2786 
2787  hastup = false;
2788  maxoff = PageGetMaxOffsetNumber(page);
2789  for (offnum = FirstOffsetNumber;
2790  offnum <= maxoff;
2791  offnum = OffsetNumberNext(offnum))
2792  {
2793  ItemId itemid;
2794 
2795  itemid = PageGetItemId(page, offnum);
2796 
2797  /*
2798  * Note: any non-unused item should be taken as a reason to keep
2799  * this page. We formerly thought that DEAD tuples could be
2800  * thrown away, but that's not so, because we'd not have cleaned
2801  * out their index entries.
2802  */
2803  if (ItemIdIsUsed(itemid))
2804  {
2805  hastup = true;
2806  break; /* can stop scanning */
2807  }
2808  } /* scan along page */
2809 
2810  UnlockReleaseBuffer(buf);
2811 
2812  /* Done scanning if we found a tuple here */
2813  if (hastup)
2814  return blkno + 1;
2815  }
2816 
2817  /*
2818  * If we fall out of the loop, all the previously-thought-to-be-empty
2819  * pages still are; we need not bother to look at the last known-nonempty
2820  * page.
2821  */
2822  return vacrelstats->nonempty_pages;
2823 }
2824 
2825 /*
2826  * Return the maximum number of dead tuples we can record.
2827  */
2828 static long
2829 compute_max_dead_tuples(BlockNumber relblocks, bool useindex)
2830 {
2831  long maxtuples;
2832  int vac_work_mem = IsAutoVacuumWorkerProcess() &&
2833  autovacuum_work_mem != -1 ?
2835 
2836  if (useindex)
2837  {
2838  maxtuples = MAXDEADTUPLES(vac_work_mem * 1024L);
2839  maxtuples = Min(maxtuples, INT_MAX);
2840  maxtuples = Min(maxtuples, MAXDEADTUPLES(MaxAllocSize));
2841 
2842  /* curious coding here to ensure the multiplication can't overflow */
2843  if ((BlockNumber) (maxtuples / LAZY_ALLOC_TUPLES) > relblocks)
2844  maxtuples = relblocks * LAZY_ALLOC_TUPLES;
2845 
2846  /* stay sane if small maintenance_work_mem */
2847  maxtuples = Max(maxtuples, MaxHeapTuplesPerPage);
2848  }
2849  else
2850  maxtuples = MaxHeapTuplesPerPage;
2851 
2852  return maxtuples;
2853 }
2854 
2855 /*
2856  * lazy_space_alloc - space allocation decisions for lazy vacuum
2857  *
2858  * See the comments at the head of this file for rationale.
2859  */
2860 static void
2861 lazy_space_alloc(LVRelStats *vacrelstats, BlockNumber relblocks)
2862 {
2863  LVDeadTuples *dead_tuples = NULL;
2864  long maxtuples;
2865 
2866  maxtuples = compute_max_dead_tuples(relblocks, vacrelstats->useindex);
2867 
2868  dead_tuples = (LVDeadTuples *) palloc(SizeOfDeadTuples(maxtuples));
2869  dead_tuples->num_tuples = 0;
2870  dead_tuples->max_tuples = (int) maxtuples;
2871 
2872  vacrelstats->dead_tuples = dead_tuples;
2873 }
2874 
2875 /*
2876  * lazy_record_dead_tuple - remember one deletable tuple
2877  */
2878 static void
2880 {
2881  /*
2882  * The array shouldn't overflow under normal behavior, but perhaps it
2883  * could if we are given a really small maintenance_work_mem. In that
2884  * case, just forget the last few tuples (we'll get 'em next time).
2885  */
2886  if (dead_tuples->num_tuples < dead_tuples->max_tuples)
2887  {
2888  dead_tuples->itemptrs[dead_tuples->num_tuples] = *itemptr;
2889  dead_tuples->num_tuples++;
2891  dead_tuples->num_tuples);
2892  }
2893 }
2894 
2895 /*
2896  * lazy_tid_reaped() -- is a particular tid deletable?
2897  *
2898  * This has the right signature to be an IndexBulkDeleteCallback.
2899  *
2900  * Assumes dead_tuples array is in sorted order.
2901  */
2902 static bool
2904 {
2905  LVDeadTuples *dead_tuples = (LVDeadTuples *) state;
2906  ItemPointer res;
2907 
2908  res = (ItemPointer) bsearch((void *) itemptr,
2909  (void *) dead_tuples->itemptrs,
2910  dead_tuples->num_tuples,
2911  sizeof(ItemPointerData),
2912  vac_cmp_itemptr);
2913 
2914  return (res != NULL);
2915 }
2916 
2917 /*
2918  * Comparator routines for use with qsort() and bsearch().
2919  */
2920 static int
2921 vac_cmp_itemptr(const void *left, const void *right)
2922 {
2923  BlockNumber lblk,
2924  rblk;
2925  OffsetNumber loff,
2926  roff;
2927 
2928  lblk = ItemPointerGetBlockNumber((ItemPointer) left);
2929  rblk = ItemPointerGetBlockNumber((ItemPointer) right);
2930 
2931  if (lblk < rblk)
2932  return -1;
2933  if (lblk > rblk)
2934  return 1;
2935 
2936  loff = ItemPointerGetOffsetNumber((ItemPointer) left);
2937  roff = ItemPointerGetOffsetNumber((ItemPointer) right);
2938 
2939  if (loff < roff)
2940  return -1;
2941  if (loff > roff)
2942  return 1;
2943 
2944  return 0;
2945 }
2946 
2947 /*
2948  * Check if every tuple in the given page is visible to all current and future
2949  * transactions. Also return the visibility_cutoff_xid which is the highest
2950  * xmin amongst the visible tuples. Set *all_frozen to true if every tuple
2951  * on this page is frozen.
2952  */
2953 static bool
2955  TransactionId *visibility_cutoff_xid,
2956  bool *all_frozen)
2957 {
2958  Page page = BufferGetPage(buf);
2959  BlockNumber blockno = BufferGetBlockNumber(buf);
2960  OffsetNumber offnum,
2961  maxoff;
2962  bool all_visible = true;
2963 
2964  *visibility_cutoff_xid = InvalidTransactionId;
2965  *all_frozen = true;
2966 
2967  /*
2968  * This is a stripped down version of the line pointer scan in
2969  * lazy_scan_heap(). So if you change anything here, also check that code.
2970  */
2971  maxoff = PageGetMaxOffsetNumber(page);
2972  for (offnum = FirstOffsetNumber;
2973  offnum <= maxoff && all_visible;
2974  offnum = OffsetNumberNext(offnum))
2975  {
2976  ItemId itemid;
2977  HeapTupleData tuple;
2978 
2979  itemid = PageGetItemId(page, offnum);
2980 
2981  /* Unused or redirect line pointers are of no interest */
2982  if (!ItemIdIsUsed(itemid) || ItemIdIsRedirected(itemid))
2983  continue;
2984 
2985  ItemPointerSet(&(tuple.t_self), blockno, offnum);
2986 
2987  /*
2988  * Dead line pointers can have index pointers pointing to them. So
2989  * they can't be treated as visible
2990  */
2991  if (ItemIdIsDead(itemid))
2992  {
2993  all_visible = false;
2994  *all_frozen = false;
2995  break;
2996  }
2997 
2998  Assert(ItemIdIsNormal(itemid));
2999 
3000  tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
3001  tuple.t_len = ItemIdGetLength(itemid);
3002  tuple.t_tableOid = RelationGetRelid(rel);
3003 
3004  switch (HeapTupleSatisfiesVacuum(&tuple, OldestXmin, buf))
3005  {
3006  case HEAPTUPLE_LIVE:
3007  {
3008  TransactionId xmin;
3009 
3010  /* Check comments in lazy_scan_heap. */
3012  {
3013  all_visible = false;
3014  *all_frozen = false;
3015  break;
3016  }
3017 
3018  /*
3019  * The inserter definitely committed. But is it old enough
3020  * that everyone sees it as committed?
3021  */
3022  xmin = HeapTupleHeaderGetXmin(tuple.t_data);
3023  if (!TransactionIdPrecedes(xmin, OldestXmin))
3024  {
3025  all_visible = false;
3026  *all_frozen = false;
3027  break;
3028  }
3029 
3030  /* Track newest xmin on page. */
3031  if (TransactionIdFollows(xmin, *visibility_cutoff_xid))
3032  *visibility_cutoff_xid = xmin;
3033 
3034  /* Check whether this tuple is already frozen or not */
3035  if (all_visible && *all_frozen &&
3037  *all_frozen = false;
3038  }
3039  break;
3040 
3041  case HEAPTUPLE_DEAD:
3045  {
3046  all_visible = false;
3047  *all_frozen = false;
3048  break;
3049  }
3050  default:
3051  elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
3052  break;
3053  }
3054  } /* scan along page */
3055 
3056  return all_visible;
3057 }
3058 
3059 /*
3060  * Compute the number of parallel worker processes to request. Both index
3061  * vacuum and index cleanup can be executed with parallel workers. The index
3062  * is eligible for parallel vacuum iff its size is greater than
3063  * min_parallel_index_scan_size as invoking workers for very small indexes
3064  * can hurt performance.
3065  *
3066  * nrequested is the number of parallel workers that user requested. If
3067  * nrequested is 0, we compute the parallel degree based on nindexes, that is
3068  * the number of indexes that support parallel vacuum. This function also
3069  * sets can_parallel_vacuum to remember indexes that participate in parallel
3070  * vacuum.
3071  */
3072 static int
3073 compute_parallel_vacuum_workers(Relation *Irel, int nindexes, int nrequested,
3074  bool *can_parallel_vacuum)
3075 {
3076  int nindexes_parallel = 0;
3077  int nindexes_parallel_bulkdel = 0;
3078  int nindexes_parallel_cleanup = 0;
3079  int parallel_workers;
3080  int i;
3081 
3082  /*
3083  * We don't allow performing parallel operation in standalone backend or
3084  * when parallelism is disabled.
3085  */
3087  return 0;
3088 
3089  /*
3090  * Compute the number of indexes that can participate in parallel vacuum.
3091  */
3092  for (i = 0; i < nindexes; i++)
3093  {
3094  uint8 vacoptions = Irel[i]->rd_indam->amparallelvacuumoptions;
3095 
3096  if (vacoptions == VACUUM_OPTION_NO_PARALLEL ||
3098  continue;
3099 
3100  can_parallel_vacuum[i] = true;
3101 
3102  if ((vacoptions & VACUUM_OPTION_PARALLEL_BULKDEL) != 0)
3103  nindexes_parallel_bulkdel++;
3104  if (((vacoptions & VACUUM_OPTION_PARALLEL_CLEANUP) != 0) ||
3105  ((vacoptions & VACUUM_OPTION_PARALLEL_COND_CLEANUP) != 0))
3106  nindexes_parallel_cleanup++;
3107  }
3108 
3109  nindexes_parallel = Max(nindexes_parallel_bulkdel,
3110  nindexes_parallel_cleanup);
3111 
3112  /* The leader process takes one index */
3113  nindexes_parallel--;
3114 
3115  /* No index supports parallel vacuum */
3116  if (nindexes_parallel <= 0)
3117  return 0;
3118 
3119  /* Compute the parallel degree */
3120  parallel_workers = (nrequested > 0) ?
3121  Min(nrequested, nindexes_parallel) : nindexes_parallel;
3122 
3123  /* Cap by max_parallel_maintenance_workers */
3124  parallel_workers = Min(parallel_workers, max_parallel_maintenance_workers);
3125 
3126  return parallel_workers;
3127 }
3128 
3129 /*
3130  * Initialize variables for shared index statistics, set NULL bitmap and the
3131  * size of stats for each index.
3132  */
3133 static void
3134 prepare_index_statistics(LVShared *lvshared, bool *can_parallel_vacuum,
3135  int nindexes)
3136 {
3137  int i;
3138 
3139  /* Currently, we don't support parallel vacuum for autovacuum */
3141 
3142  /* Set NULL for all indexes */
3143  memset(lvshared->bitmap, 0x00, BITMAPLEN(nindexes));
3144 
3145  for (i = 0; i < nindexes; i++)
3146  {
3147  if (!can_parallel_vacuum[i])
3148  continue;
3149 
3150  /* Set NOT NULL as this index does support parallelism */
3151  lvshared->bitmap[i >> 3] |= 1 << (i & 0x07);
3152  }
3153 }
3154 
3155 /*
3156  * Update index statistics in pg_class if the statistics are accurate.
3157  */
3158 static void
3160  int nindexes)
3161 {
3162  int i;
3163 
3165 
3166  for (i = 0; i < nindexes; i++)
3167  {
3168  if (stats[i] == NULL || stats[i]->estimated_count)
3169  continue;
3170 
3171  /* Update index statistics */
3172  vac_update_relstats(Irel[i],
3173  stats[i]->num_pages,
3174  stats[i]->num_index_tuples,
3175  0,
3176  false,
3179  false);
3180  pfree(stats[i]);
3181  }
3182 }
3183 
3184 /*
3185  * This function prepares and returns parallel vacuum state if we can launch
3186  * even one worker. This function is responsible for entering parallel mode,
3187  * create a parallel context, and then initialize the DSM segment.
3188  */
3189 static LVParallelState *
3190 begin_parallel_vacuum(Oid relid, Relation *Irel, LVRelStats *vacrelstats,
3191  BlockNumber nblocks, int nindexes, int nrequested)
3192 {
3193  LVParallelState *lps = NULL;
3194  ParallelContext *pcxt;
3195  LVShared *shared;
3196  LVDeadTuples *dead_tuples;
3197  BufferUsage *buffer_usage;
3198  WalUsage *wal_usage;
3199  bool *can_parallel_vacuum;
3200  long maxtuples;
3201  char *sharedquery;
3202  Size est_shared;
3203  Size est_deadtuples;
3204  int nindexes_mwm = 0;
3205  int parallel_workers = 0;
3206  int querylen;
3207  int i;
3208 
3209  /*
3210  * A parallel vacuum must be requested and there must be indexes on the
3211  * relation
3212  */
3213  Assert(nrequested >= 0);
3214  Assert(nindexes > 0);
3215 
3216  /*
3217  * Compute the number of parallel vacuum workers to launch
3218  */
3219  can_parallel_vacuum = (bool *) palloc0(sizeof(bool) * nindexes);
3220  parallel_workers = compute_parallel_vacuum_workers(Irel, nindexes,
3221  nrequested,
3222  can_parallel_vacuum);
3223 
3224  /* Can't perform vacuum in parallel */
3225  if (parallel_workers <= 0)
3226  {
3227  pfree(can_parallel_vacuum);
3228  return lps;
3229  }
3230 
3231  lps = (LVParallelState *) palloc0(sizeof(LVParallelState));
3232 
3234  pcxt = CreateParallelContext("postgres", "parallel_vacuum_main",
3235  parallel_workers);
3236  Assert(pcxt->nworkers > 0);
3237  lps->pcxt = pcxt;
3238 
3239  /* Estimate size for shared information -- PARALLEL_VACUUM_KEY_SHARED */
3240  est_shared = MAXALIGN(add_size(SizeOfLVShared, BITMAPLEN(nindexes)));
3241  for (i = 0; i < nindexes; i++)
3242  {
3243  uint8 vacoptions = Irel[i]->rd_indam->amparallelvacuumoptions;
3244 
3245  /*
3246  * Cleanup option should be either disabled, always performing in
3247  * parallel or conditionally performing in parallel.
3248  */
3249  Assert(((vacoptions & VACUUM_OPTION_PARALLEL_CLEANUP) == 0) ||
3250  ((vacoptions & VACUUM_OPTION_PARALLEL_COND_CLEANUP) == 0));
3251  Assert(vacoptions <= VACUUM_OPTION_MAX_VALID_VALUE);
3252 
3253  /* Skip indexes that don't participate in parallel vacuum */
3254  if (!can_parallel_vacuum[i])
3255  continue;
3256 
3257  if (Irel[i]->rd_indam->amusemaintenanceworkmem)
3258  nindexes_mwm++;
3259 
3260  est_shared = add_size(est_shared, sizeof(LVSharedIndStats));
3261 
3262  /*
3263  * Remember the number of indexes that support parallel operation for
3264  * each phase.
3265  */
3266  if ((vacoptions & VACUUM_OPTION_PARALLEL_BULKDEL) != 0)
3268  if ((vacoptions & VACUUM_OPTION_PARALLEL_CLEANUP) != 0)
3270  if ((vacoptions & VACUUM_OPTION_PARALLEL_COND_CLEANUP) != 0)
3272  }
3273  shm_toc_estimate_chunk(&pcxt->estimator, est_shared);
3274  shm_toc_estimate_keys(&pcxt->estimator, 1);
3275 
3276  /* Estimate size for dead tuples -- PARALLEL_VACUUM_KEY_DEAD_TUPLES */
3277  maxtuples = compute_max_dead_tuples(nblocks, true);
3278  est_deadtuples = MAXALIGN(SizeOfDeadTuples(maxtuples));
3279  shm_toc_estimate_chunk(&pcxt->estimator, est_deadtuples);
3280  shm_toc_estimate_keys(&pcxt->estimator, 1);
3281 
3282  /*
3283  * Estimate space for BufferUsage and WalUsage --
3284  * PARALLEL_VACUUM_KEY_BUFFER_USAGE and PARALLEL_VACUUM_KEY_WAL_USAGE.
3285  *
3286  * If there are no extensions loaded that care, we could skip this. We
3287  * have no way of knowing whether anyone's looking at pgBufferUsage or
3288  * pgWalUsage, so do it unconditionally.
3289  */
3291  mul_size(sizeof(BufferUsage), pcxt->nworkers));
3292  shm_toc_estimate_keys(&pcxt->estimator, 1);
3294  mul_size(sizeof(WalUsage), pcxt->nworkers));
3295  shm_toc_estimate_keys(&pcxt->estimator, 1);
3296 
3297  /* Finally, estimate PARALLEL_VACUUM_KEY_QUERY_TEXT space */
3298  querylen = strlen(debug_query_string);
3299  shm_toc_estimate_chunk(&pcxt->estimator, querylen + 1);
3300  shm_toc_estimate_keys(&pcxt->estimator, 1);
3301 
3302  InitializeParallelDSM(pcxt);
3303 
3304  /* Prepare shared information */
3305  shared = (LVShared *) shm_toc_allocate(pcxt->toc, est_shared);
3306  MemSet(shared, 0, est_shared);
3307  shared->relid = relid;
3308  shared->elevel = elevel;
3309  shared->maintenance_work_mem_worker =
3310  (nindexes_mwm > 0) ?
3311  maintenance_work_mem / Min(parallel_workers, nindexes_mwm) :
3313 
3314  pg_atomic_init_u32(&(shared->cost_balance), 0);
3315  pg_atomic_init_u32(&(shared->active_nworkers), 0);
3316  pg_atomic_init_u32(&(shared->idx), 0);
3317  shared->offset = MAXALIGN(add_size(SizeOfLVShared, BITMAPLEN(nindexes)));
3318  prepare_index_statistics(shared, can_parallel_vacuum, nindexes);
3319 
3321  lps->lvshared = shared;
3322 
3323  /* Prepare the dead tuple space */
3324  dead_tuples = (LVDeadTuples *) shm_toc_allocate(pcxt->toc, est_deadtuples);
3325  dead_tuples->max_tuples = maxtuples;
3326  dead_tuples->num_tuples = 0;
3327  MemSet(dead_tuples->itemptrs, 0, sizeof(ItemPointerData) * maxtuples);
3328  shm_toc_insert(pcxt->toc, PARALLEL_VACUUM_KEY_DEAD_TUPLES, dead_tuples);
3329  vacrelstats->dead_tuples = dead_tuples;
3330 
3331  /*
3332  * Allocate space for each worker's BufferUsage and WalUsage; no need to
3333  * initialize
3334  */
3335  buffer_usage = shm_toc_allocate(pcxt->toc,
3336  mul_size(sizeof(BufferUsage), pcxt->nworkers));
3337  shm_toc_insert(pcxt->toc, PARALLEL_VACUUM_KEY_BUFFER_USAGE, buffer_usage);
3338  lps->buffer_usage = buffer_usage;
3339  wal_usage = shm_toc_allocate(pcxt->toc,
3340  mul_size(sizeof(WalUsage), pcxt->nworkers));
3342  lps->wal_usage = wal_usage;
3343 
3344  /* Store query string for workers */
3345  sharedquery = (char *) shm_toc_allocate(pcxt->toc, querylen + 1);
3346  memcpy(sharedquery, debug_query_string, querylen + 1);
3347  sharedquery[querylen] = '\0';
3348  shm_toc_insert(pcxt->toc, PARALLEL_VACUUM_KEY_QUERY_TEXT, sharedquery);
3349 
3350  pfree(can_parallel_vacuum);
3351  return lps;
3352 }
3353 
3354 /*
3355  * Destroy the parallel context, and end parallel mode.
3356  *
3357  * Since writes are not allowed during parallel mode, copy the
3358  * updated index statistics from DSM into local memory and then later use that
3359  * to update the index statistics. One might think that we can exit from
3360  * parallel mode, update the index statistics and then destroy parallel
3361  * context, but that won't be safe (see ExitParallelMode).
3362  */
3363 static void
3365  int nindexes)
3366 {
3367  int i;
3368 
3370 
3371  /* Copy the updated statistics */
3372  for (i = 0; i < nindexes; i++)
3373  {
3374  LVSharedIndStats *indstats = get_indstats(lps->lvshared, i);
3375 
3376  /*
3377  * Skip unused slot. The statistics of this index are already stored
3378  * in local memory.
3379  */
3380  if (indstats == NULL)
3381  continue;
3382 
3383  if (indstats->updated)
3384  {
3385  stats[i] = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult));
3386  memcpy(stats[i], &(indstats->stats), sizeof(IndexBulkDeleteResult));
3387  }
3388  else
3389  stats[i] = NULL;
3390  }
3391 
3393  ExitParallelMode();
3394 
3395  /* Deactivate parallel vacuum */
3396  pfree(lps);
3397  lps = NULL;
3398 }
3399 
3400 /* Return the Nth index statistics or NULL */
3401 static LVSharedIndStats *
3402 get_indstats(LVShared *lvshared, int n)
3403 {
3404  int i;
3405  char *p;
3406 
3407  if (IndStatsIsNull(lvshared, n))
3408  return NULL;
3409 
3410  p = (char *) GetSharedIndStats(lvshared);
3411  for (i = 0; i < n; i++)
3412  {
3413  if (IndStatsIsNull(lvshared, i))
3414  continue;
3415 
3416  p += sizeof(LVSharedIndStats);
3417  }
3418 
3419  return (LVSharedIndStats *) p;
3420 }
3421 
3422 /*
3423  * Returns true, if the given index can't participate in parallel index vacuum
3424  * or parallel index cleanup, false, otherwise.
3425  */
3426 static bool
3428 {
3429  uint8 vacoptions = indrel->rd_indam->amparallelvacuumoptions;
3430 
3431  /* first_time must be true only if for_cleanup is true */
3432  Assert(lvshared->for_cleanup || !lvshared->first_time);
3433 
3434  if (lvshared->for_cleanup)
3435  {
3436  /* Skip, if the index does not support parallel cleanup */
3437  if (((vacoptions & VACUUM_OPTION_PARALLEL_CLEANUP) == 0) &&
3438  ((vacoptions & VACUUM_OPTION_PARALLEL_COND_CLEANUP) == 0))
3439  return true;
3440 
3441  /*
3442  * Skip, if the index supports parallel cleanup conditionally, but we
3443  * have already processed the index (for bulkdelete). See the
3444  * comments for option VACUUM_OPTION_PARALLEL_COND_CLEANUP to know
3445  * when indexes support parallel cleanup conditionally.
3446  */
3447  if (!lvshared->first_time &&
3448  ((vacoptions & VACUUM_OPTION_PARALLEL_COND_CLEANUP) != 0))
3449  return true;
3450  }
3451  else if ((vacoptions & VACUUM_OPTION_PARALLEL_BULKDEL) == 0)
3452  {
3453  /* Skip if the index does not support parallel bulk deletion */
3454  return true;
3455  }
3456 
3457  return false;
3458 }
3459 
3460 /*
3461  * Perform work within a launched parallel process.
3462  *
3463  * Since parallel vacuum workers perform only index vacuum or index cleanup,
3464  * we don't need to report progress information.
3465  */
3466 void
3468 {
3469  Relation onerel;
3470  Relation *indrels;
3471  LVShared *lvshared;
3472  LVDeadTuples *dead_tuples;
3473  BufferUsage *buffer_usage;
3474  WalUsage *wal_usage;
3475  int nindexes;
3476  char *sharedquery;
3477  IndexBulkDeleteResult **stats;
3478  LVRelStats vacrelstats;
3479  ErrorContextCallback errcallback;
3480 
3482  false);
3483  elevel = lvshared->elevel;
3484 
3485  ereport(DEBUG1,
3486  (errmsg("starting parallel vacuum worker for %s",
3487  lvshared->for_cleanup ? "cleanup" : "bulk delete")));
3488 
3489  /* Set debug_query_string for individual workers */
3490  sharedquery = shm_toc_lookup(toc, PARALLEL_VACUUM_KEY_QUERY_TEXT, false);
3491  debug_query_string = sharedquery;
3493 
3494  /*
3495  * Open table. The lock mode is the same as the leader process. It's
3496  * okay because the lock mode does not conflict among the parallel
3497  * workers.
3498  */
3499  onerel = table_open(lvshared->relid, ShareUpdateExclusiveLock);
3500 
3501  /*
3502  * Open all indexes. indrels are sorted in order by OID, which should be
3503  * matched to the leader's one.
3504  */
3505  vac_open_indexes(onerel, RowExclusiveLock, &nindexes, &indrels);
3506  Assert(nindexes > 0);
3507 
3508  /* Set dead tuple space */
3509  dead_tuples = (LVDeadTuples *) shm_toc_lookup(toc,
3511  false);
3512 
3513  /* Set cost-based vacuum delay */
3515  VacuumCostBalance = 0;
3516  VacuumPageHit = 0;
3517  VacuumPageMiss = 0;
3518  VacuumPageDirty = 0;
3520  VacuumSharedCostBalance = &(lvshared->cost_balance);
3521  VacuumActiveNWorkers = &(lvshared->active_nworkers);
3522 
3523  stats = (IndexBulkDeleteResult **)
3524  palloc0(nindexes * sizeof(IndexBulkDeleteResult *));
3525 
3526  if (lvshared->maintenance_work_mem_worker > 0)
3528 
3529  /*
3530  * Initialize vacrelstats for use as error callback arg by parallel
3531  * worker.
3532  */
3533  vacrelstats.relnamespace = get_namespace_name(RelationGetNamespace(onerel));
3534  vacrelstats.relname = pstrdup(RelationGetRelationName(onerel));
3535  vacrelstats.indname = NULL;
3536  vacrelstats.phase = VACUUM_ERRCB_PHASE_UNKNOWN; /* Not yet processing */
3537 
3538  /* Setup error traceback support for ereport() */
3539  errcallback.callback = vacuum_error_callback;
3540  errcallback.arg = &vacrelstats;
3541  errcallback.previous = error_context_stack;
3542  error_context_stack = &errcallback;
3543 
3544  /* Prepare to track buffer usage during parallel execution */
3546 
3547  /* Process indexes to perform vacuum/cleanup */
3548  parallel_vacuum_index(indrels, stats, lvshared, dead_tuples, nindexes,
3549  &vacrelstats);
3550 
3551  /* Report buffer/WAL usage during parallel execution */
3552  buffer_usage = shm_toc_lookup(toc, PARALLEL_VACUUM_KEY_BUFFER_USAGE, false);
3553  wal_usage = shm_toc_lookup(toc, PARALLEL_VACUUM_KEY_WAL_USAGE, false);
3555  &wal_usage[ParallelWorkerNumber]);
3556 
3557  /* Pop the error context stack */
3558  error_context_stack = errcallback.previous;
3559 
3560  vac_close_indexes(nindexes, indrels, RowExclusiveLock);
3562  pfree(stats);
3563 }
3564 
3565 /*
3566  * Error context callback for errors occurring during vacuum.
3567  */
3568 static void
3570 {
3571  LVRelStats *errinfo = arg;
3572 
3573  switch (errinfo->phase)
3574  {
3576  if (BlockNumberIsValid(errinfo->blkno))
3577  errcontext("while scanning block %u of relation \"%s.%s\"",
3578  errinfo->blkno, errinfo->relnamespace, errinfo->relname);
3579  break;
3580 
3582  if (BlockNumberIsValid(errinfo->blkno))
3583  errcontext("while vacuuming block %u of relation \"%s.%s\"",
3584  errinfo->blkno, errinfo->relnamespace, errinfo->relname);
3585  break;
3586 
3588  errcontext("while vacuuming index \"%s\" of relation \"%s.%s\"",
3589  errinfo->indname, errinfo->relnamespace, errinfo->relname);
3590  break;
3591 
3593  errcontext("while cleaning up index \"%s\" of relation \"%s.%s\"",
3594  errinfo->indname, errinfo->relnamespace, errinfo->relname);
3595  break;
3596 
3598  if (BlockNumberIsValid(errinfo->blkno))
3599  errcontext("while truncating relation \"%s.%s\" to %u blocks",
3600  errinfo->relnamespace, errinfo->relname, errinfo->blkno);
3601  break;
3602 
3604  default:
3605  return; /* do nothing; the errinfo may not be
3606  * initialized */
3607  }
3608 }
3609 
3610 /*
3611  * Updates the information required for vacuum error callback. This also saves
3612  * the current information which can be later restored via restore_vacuum_error_info.
3613  */
3614 static void
3615 update_vacuum_error_info(LVRelStats *errinfo, LVSavedErrInfo *saved_err_info, int phase,
3616  BlockNumber blkno)
3617 {
3618  if (saved_err_info)
3619  {
3620  saved_err_info->blkno = errinfo->blkno;
3621  saved_err_info->phase = errinfo->phase;
3622  }
3623 
3624  errinfo->blkno = blkno;
3625  errinfo->phase = phase;
3626 }
3627 
3628 /*
3629  * Restores the vacuum information saved via a prior call to update_vacuum_error_info.
3630  */
3631 static void
3632 restore_vacuum_error_info(LVRelStats *errinfo, const LVSavedErrInfo *saved_err_info)
3633 {
3634  errinfo->blkno = saved_err_info->blkno;
3635  errinfo->phase = saved_err_info->phase;
3636 }
int autovacuum_work_mem
Definition: autovacuum.c:116
double new_rel_tuples
Definition: vacuumlazy.c:305
void HeapTupleHeaderAdvanceLatestRemovedXid(HeapTupleHeader tuple, TransactionId *latestRemovedXid)
Definition: heapam.c:6874
#define GetSharedIndStats(s)
Definition: vacuumlazy.c:253
uint8 amparallelvacuumoptions
Definition: amapi.h:205
XLogRecPtr log_heap_cleanup_info(RelFileNode rnode, TransactionId latestRemovedXid)
Definition: heapam.c:7117
static void vacuum_error_callback(void *arg)
Definition: vacuumlazy.c:3569
struct IndexAmRoutine * rd_indam
Definition: rel.h:188
int multixact_freeze_table_age
Definition: vacuum.h:215
void vac_close_indexes(int nindexes, Relation *Irel, LOCKMODE lockmode)
Definition: vacuum.c:1974
#define BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:96
static int lazy_vacuum_page(Relation onerel, BlockNumber blkno, Buffer buffer, int tupindex, LVRelStats *vacrelstats, Buffer *vmbuffer)
Definition: vacuumlazy.c:1903
int heap_page_prune(Relation relation, Buffer buffer, TransactionId OldestXmin, bool report_stats, TransactionId *latestRemovedXid)
Definition: pruneheap.c:180
void LockBufferForCleanup(Buffer buffer)
Definition: bufmgr.c:3779
XLogRecPtr log_heap_freeze(Relation reln, Buffer buffer, TransactionId cutoff_xid, xl_heap_freeze_tuple *tuples, int ntuples)
Definition: heapam.c:7197
#define PROGRESS_VACUUM_HEAP_BLKS_VACUUMED
Definition: progress.h:24
#define InvalidXLogRecPtr
Definition: xlogdefs.h:28
WalUsage * wal_usage
Definition: vacuumlazy.c:280
#define PageIsEmpty(page)
Definition: bufpage.h:222
int64 VacuumPageMiss
Definition: globals.c:144
#define DEBUG1
Definition: elog.h:25
void table_close(Relation relation, LOCKMODE lockmode)
Definition: table.c:133
static BlockNumber count_nondeletable_pages(Relation onerel, LVRelStats *vacrelstats)
Definition: vacuumlazy.c:2690
XLogRecPtr log_newpage_buffer(Buffer buffer, bool page_std)
Definition: xloginsert.c:1014
BlockNumber rel_pages
Definition: vacuumlazy.c:299
pg_atomic_uint32 * VacuumActiveNWorkers
Definition: vacuum.c:77
static int compute_parallel_vacuum_workers(Relation *Irel, int nindexes, int nrequested, bool *can_parallel_vacuum)
Definition: vacuumlazy.c:3073
double vac_estimate_reltuples(Relation relation, BlockNumber total_pages, BlockNumber scanned_pages, double scanned_tuples)
Definition: vacuum.c:1121
OffsetNumber offset
Definition: heapam_xlog.h:321
static void lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats)
Definition: vacuumlazy.c:1825
int VacuumCostBalance
Definition: globals.c:147
ItemPointerData itemptrs[FLEXIBLE_ARRAY_MEMBER]
Definition: vacuumlazy.c:172
bool estimated_count
Definition: vacuumlazy.c:214
static bool lazy_tid_reaped(ItemPointer itemptr, void *state)
Definition: vacuumlazy.c:2903
#define ItemIdIsRedirected(itemId)
Definition: itemid.h:106
bool TransactionIdFollows(TransactionId id1, TransactionId id2)
Definition: transam.c:334
ParallelContext * CreateParallelContext(const char *library_name, const char *function_name, int nworkers)
Definition: parallel.c:164
LVDeadTuples * dead_tuples
Definition: vacuumlazy.c:311
#define PageIsAllVisible(page)
Definition: bufpage.h:385
uint32 TransactionId
Definition: c.h:520
#define IndStatsIsNull(s, i)
Definition: vacuumlazy.c:255
void UnlockRelation(Relation relation, LOCKMODE lockmode)
Definition: lmgr.c:282
void RecordPageWithFreeSpace(Relation rel, BlockNumber heapBlk, Size spaceAvail)
Definition: freespace.c:181
#define PARALLEL_VACUUM_KEY_DEAD_TUPLES
Definition: vacuumlazy.c:139
void pgstat_progress_start_command(ProgressCommandType cmdtype, Oid relid)
Definition: pgstat.c:3210
Oid relid
Definition: vacuumlazy.c:193
#define PROGRESS_VACUUM_MAX_DEAD_TUPLES
Definition: progress.h:26
#define PROGRESS_VACUUM_PHASE_VACUUM_INDEX
Definition: progress.h:31
void visibilitymap_pin(Relation rel, BlockNumber heapBlk, Buffer *buf)
TimestampTz GetCurrentTimestamp(void)
Definition: timestamp.c:1574
double tuples_deleted
Definition: vacuumlazy.c:309
void visibilitymap_set(Relation rel, BlockNumber heapBlk, Buffer heapBuf, XLogRecPtr recptr, Buffer vmBuf, TransactionId cutoff_xid, uint8 flags)
void pgstat_report_activity(BackendState state, const char *cmd_str)
Definition: pgstat.c:3132
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:1468
int64 TimestampTz
Definition: timestamp.h:39
WalUsage pgWalUsage
Definition: instrument.c:22
#define SizeOfDeadTuples(cnt)
Definition: vacuumlazy.c:177
HeapTupleHeaderData * HeapTupleHeader
Definition: htup.h:23
static uint32 pg_atomic_sub_fetch_u32(volatile pg_atomic_uint32 *ptr, int32 sub_)
Definition: atomics.h:401
#define MaxOffsetNumber
Definition: off.h:28
void pgstat_progress_update_param(int index, int64 val)
Definition: pgstat.c:3231
BufferUsage * buffer_usage
Definition: vacuumlazy.c:277
#define VISIBILITYMAP_ALL_FROZEN
Definition: visibilitymap.h:27
char * pstrdup(const char *in)
Definition: mcxt.c:1186
static void lazy_record_dead_tuple(LVDeadTuples *dead_tuples, ItemPointer itemptr)
Definition: vacuumlazy.c:2879
shm_toc_estimator estimator
Definition: parallel.h:42
bool analyze_only
Definition: genam.h:47
Buffer ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
Definition: bufmgr.c:652
bool useindex
Definition: vacuumlazy.c:296
#define XLogIsNeeded()
Definition: xlog.h:191
struct timeval instr_time
Definition: instr_time.h:150
int64 VacuumPageHit
Definition: globals.c:143
#define Min(x, y)
Definition: c.h:927
bool report_progress
Definition: genam.h:48
BlockNumber tupcount_pages
Definition: vacuumlazy.c:303
#define END_CRIT_SECTION()
Definition: miscadmin.h:134
BufferAccessStrategy strategy
Definition: genam.h:52
struct LVSharedIndStats LVSharedIndStats
#define ItemIdIsUsed(itemId)
Definition: itemid.h:92
#define MaxHeapTuplesPerPage
Definition: htup_details.h:574
void WalUsageAccumDiff(WalUsage *dst, const WalUsage *add, const WalUsage *sub)
Definition: instrument.c:255
#define VM_ALL_FROZEN(r, b, v)
Definition: visibilitymap.h:34
unsigned char uint8
Definition: c.h:372
#define PROGRESS_VACUUM_HEAP_BLKS_SCANNED
Definition: progress.h:23
#define InvalidBuffer
Definition: buf.h:25
#define gettext_noop(x)
Definition: c.h:1166
#define FLEXIBLE_ARRAY_MEMBER
Definition: c.h:283
char * relnamespace
Definition: vacuumlazy.c:293
#define PROGRESS_VACUUM_TOTAL_HEAP_BLKS
Definition: progress.h:22
#define START_CRIT_SECTION()
Definition: miscadmin.h:132
#define VACUUM_OPTION_MAX_VALID_VALUE
Definition: vacuum.h:63
Relation index
Definition: genam.h:46
BlockNumber scanned_pages
Definition: vacuumlazy.c:300
VacErrPhase phase
Definition: vacuumlazy.c:319
#define MemSet(start, val, len)
Definition: c.h:978
#define INFO
Definition: elog.h:33
#define VACUUM_TRUNCATE_LOCK_TIMEOUT
Definition: vacuumlazy.c:103
static uint32 pg_atomic_add_fetch_u32(volatile pg_atomic_uint32 *ptr, int32 add_)
Definition: atomics.h:386
Datum idx(PG_FUNCTION_ARGS)
Definition: _int_op.c:259
void vacuum_set_xid_limits(Relation rel, int freeze_min_age, int freeze_table_age, int multixact_freeze_min_age, int multixact_freeze_table_age, TransactionId *oldestXmin, TransactionId *freezeLimit, TransactionId *xidFullScanLimit, MultiXactId *multiXactCutoff, MultiXactId *mxactFullScanLimit)
Definition: vacuum.c:929
int64 VacuumPageDirty
Definition: globals.c:145
static long compute_max_dead_tuples(BlockNumber relblocks, bool hasindex)
Definition: vacuumlazy.c:2829
uint32 BlockNumber
Definition: block.h:31
void ReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:3483
#define BITMAPLEN(NATTS)
Definition: htup_details.h:547
static bool skip_parallel_vacuum_index(Relation indrel, LVShared *lvshared)
Definition: vacuumlazy.c:3427
int nindexes_parallel_bulkdel
Definition: vacuumlazy.c:286
BlockNumber pinskipped_pages
Definition: vacuumlazy.c:301
int maintenance_work_mem_worker
Definition: vacuumlazy.c:224
void visibilitymap_count(Relation rel, BlockNumber *all_visible, BlockNumber *all_frozen)
#define SizeOfPageHeaderData
Definition: bufpage.h:216
#define LOG
Definition: elog.h:26
Form_pg_class rd_rel
Definition: rel.h:109
unsigned int Oid
Definition: postgres_ext.h:31
static void restore_vacuum_error_info(LVRelStats *errinfo, const LVSavedErrInfo *saved_err_info)
Definition: vacuumlazy.c:3632
#define shm_toc_estimate_chunk(e, sz)
Definition: shm_toc.h:51
#define ItemIdIsDead(itemId)
Definition: itemid.h:113
int nindexes_parallel_condcleanup
Definition: vacuumlazy.c:288
void InstrEndParallelQuery(BufferUsage *bufusage, WalUsage *walusage)
Definition: instrument.c:189
void(* callback)(void *arg)
Definition: elog.h:229
bool TimestampDifferenceExceeds(TimestampTz start_time, TimestampTz stop_time, int msec)
Definition: timestamp.c:1673
struct ErrorContextCallback * previous
Definition: elog.h:228
#define PageGetMaxOffsetNumber(page)
Definition: bufpage.h:357
int freeze_table_age
Definition: vacuum.h:212
void ReinitializeParallelWorkers(ParallelContext *pcxt, int nworkers_to_launch)
Definition: parallel.c:513
static void update_vacuum_error_info(LVRelStats *errinfo, LVSavedErrInfo *saved_err_info, int phase, BlockNumber blkno)
Definition: vacuumlazy.c:3615
int errdetail_internal(const char *fmt,...)
Definition: elog.c:984
#define PARALLEL_VACUUM_KEY_QUERY_TEXT
Definition: vacuumlazy.c:140
static LVSharedIndStats * get_indstats(LVShared *lvshared, int n)
Definition: vacuumlazy.c:3402
uint16 OffsetNumber
Definition: off.h:24
ItemPointerData * ItemPointer
Definition: itemptr.h:49
#define VISIBILITYMAP_VALID_BITS
Definition: visibilitymap.h:28
HeapTupleHeader t_data
Definition: htup.h:68
void pg_rusage_init(PGRUsage *ru0)
Definition: pg_rusage.c:27
bool heap_prepare_freeze_tuple(HeapTupleHeader tuple, TransactionId relfrozenxid, TransactionId relminmxid, TransactionId cutoff_xid, TransactionId cutoff_multi, xl_heap_freeze_tuple *frz, bool *totally_frozen_p)
Definition: heapam.c:6097
#define FORCE_CHECK_PAGE()
ErrorContextCallback * error_context_stack
Definition: elog.c:92
ParallelContext * pcxt
Definition: vacuumlazy.c:271
#define PROGRESS_VACUUM_PHASE_TRUNCATE
Definition: progress.h:34
#define SizeOfLVShared
Definition: vacuumlazy.c:252
#define HeapTupleIsHotUpdated(tuple)
Definition: htup_details.h:676
pg_atomic_uint32 cost_balance
Definition: vacuumlazy.c:231
#define StaticAssertStmt(condition, errmessage)
Definition: c.h:859
BlockNumber blkno
Definition: vacuumlazy.c:325
void WaitForParallelWorkersToFinish(ParallelContext *pcxt)
Definition: parallel.c:750
BlockNumber old_rel_pages
Definition: vacuumlazy.c:298
void pg_usleep(long microsec)
Definition: signal.c:53
void DestroyParallelContext(ParallelContext *pcxt)
Definition: parallel.c:904
#define PREFETCH_SIZE
Definition: vacuumlazy.c:131
bool heap_tuple_needs_eventual_freeze(HeapTupleHeader tuple)
Definition: heapam.c:6742
VacErrPhase phase
Definition: vacuumlazy.c:326
#define ItemIdGetLength(itemId)
Definition: itemid.h:59
void pfree(void *pointer)
Definition: mcxt.c:1056
bool IsInParallelMode(void)
Definition: xact.c:997
void appendStringInfo(StringInfo str, const char *fmt,...)
Definition: stringinfo.c:91
bool visibilitymap_clear(Relation rel, BlockNumber heapBlk, Buffer buf, uint8 flags)
#define VACUUM_FSM_EVERY_PAGES
Definition: vacuumlazy.c:111
bool TransactionIdPrecedesOrEquals(TransactionId id1, TransactionId id2)
Definition: transam.c:319
void UnlockReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:3506
bool ConditionalLockBufferForCleanup(Buffer buffer)
Definition: bufmgr.c:3916
#define ERROR
Definition: elog.h:43
#define REL_TRUNCATE_MINIMUM
Definition: vacuumlazy.c:91
Size PageGetHeapFreeSpace(Page page)
Definition: bufpage.c:658
#define INSTR_TIME_SUBTRACT(x, y)
Definition: instr_time.h:170
int freeze_min_age
Definition: vacuum.h:211
ItemPointerData t_self
Definition: htup.h:65
void ExitParallelMode(void)
Definition: xact.c:977
bool is_wraparound
Definition: vacuum.h:217
long wal_records
Definition: instrument.h:37
char * get_database_name(Oid dbid)
Definition: dbcommands.c:2155
#define DEBUG2
Definition: elog.h:24
#define HeapTupleHeaderXminCommitted(tup)
Definition: htup_details.h:324
static TransactionId FreezeLimit
Definition: vacuumlazy.c:333
IndexBulkDeleteResult stats
Definition: vacuumlazy.c:265
static void lazy_vacuum_all_indexes(Relation onerel, Relation *Irel, IndexBulkDeleteResult **stats, LVRelStats *vacrelstats, LVParallelState *lps, int nindexes)
Definition: vacuumlazy.c:1766
uint32 t_len
Definition: htup.h:64
void heap_execute_freeze_tuple(HeapTupleHeader tuple, xl_heap_freeze_tuple *frz)
Definition: heapam.c:6326
char * get_namespace_name(Oid nspid)
Definition: lsyscache.c:3191
#define NoLock
Definition: lockdefs.h:34
static char * buf
Definition: pg_test_fsync.c:67
#define PageSetAllVisible(page)
Definition: bufpage.h:387
bool IsUnderPostmaster
Definition: globals.c:109
#define FirstOffsetNumber
Definition: off.h:27
#define RowExclusiveLock
Definition: lockdefs.h:38
struct LVDeadTuples LVDeadTuples
int errdetail(const char *fmt,...)
Definition: elog.c:957
int elevel
Definition: vacuumlazy.c:194
int ParallelWorkerNumber
Definition: parallel.c:112
static MultiXactId MultiXactCutoff
Definition: vacuumlazy.c:334
const char * pg_rusage_show(const PGRUsage *ru0)
Definition: pg_rusage.c:40
bool ConditionalLockRelation(Relation relation, LOCKMODE lockmode)
Definition: lmgr.c:247
#define InvalidTransactionId
Definition: transam.h:31
#define RelationGetRelationName(relation)
Definition: rel.h:490
static TransactionId OldestXmin
Definition: vacuumlazy.c:332
pg_atomic_uint32 idx
Definition: vacuumlazy.c:245
unsigned int uint32
Definition: c.h:374
Oid t_tableOid
Definition: htup.h:66
#define MultiXactIdIsValid(multi)
Definition: multixact.h:27
int min_parallel_index_scan_size
Definition: allpaths.c:65
int nworkers_launched
Definition: parallel.h:38
#define BufferGetPage(buffer)
Definition: bufmgr.h:169
static void lazy_vacuum_index(Relation indrel, IndexBulkDeleteResult **stats, LVDeadTuples *dead_tuples, double reltuples, LVRelStats *vacrelstats)
Definition: vacuumlazy.c:2401
HTSV_Result HeapTupleSatisfiesVacuum(HeapTuple htup, TransactionId OldestXmin, Buffer buffer)
void LaunchParallelWorkers(ParallelContext *pcxt)
Definition: parallel.c:527
bool IsAutoVacuumWorkerProcess(void)
Definition: autovacuum.c:3301
void InstrAccumParallelQuery(BufferUsage *bufusage, WalUsage *walusage)
Definition: instrument.c:199
static void vacuum_indexes_leader(Relation *Irel, IndexBulkDeleteResult **stats, LVRelStats *vacrelstats, LVParallelState *lps, int nindexes)
Definition: vacuumlazy.c:2243
bool heap_tuple_needs_freeze(HeapTupleHeader tuple, TransactionId cutoff_xid, MultiXactId cutoff_multi, Buffer buf)
Definition: heapam.c:6795
#define PROGRESS_VACUUM_NUM_DEAD_TUPLES
Definition: progress.h:27
#define IsParallelWorker()
Definition: parallel.h:61
bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition: transam.c:300
double new_live_tuples
Definition: vacuumlazy.c:306
bool first_time
Definition: vacuumlazy.c:202
VacOptTernaryValue index_cleanup
Definition: vacuum.h:221
static bool heap_page_is_all_visible(Relation rel, Buffer buf, TransactionId *visibility_cutoff_xid, bool *all_frozen)
Definition: vacuumlazy.c:2954
#define SKIP_PAGES_THRESHOLD
Definition: vacuumlazy.c:125
#define VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL
Definition: vacuumlazy.c:102
#define MaxAllocSize
Definition: memutils.h:40
void initStringInfo(StringInfo str)
Definition: stringinfo.c:59
#define WARNING
Definition: elog.h:40
void vac_open_indexes(Relation relation, LOCKMODE lockmode, int *nindexes, Relation **Irel)
Definition: vacuum.c:1931
const char * debug_query_string
Definition: postgres.c:88
static void prepare_index_statistics(LVShared *lvshared, bool *can_parallel_vacuum, int nindexes)
Definition: vacuumlazy.c:3134
double reltuples
Definition: vacuumlazy.c:213
#define VACUUM_OPTION_NO_PARALLEL
Definition: vacuum.h:39
void InitializeParallelDSM(ParallelContext *pcxt)
Definition: parallel.c:202
#define PageGetItemId(page, offsetNumber)
Definition: bufpage.h:235
int VacuumCostBalanceLocal
Definition: vacuum.c:78
pg_atomic_uint32 * VacuumSharedCostBalance
Definition: vacuum.c:76
void InstrStartParallelQuery(void)
Definition: instrument.c:181
static int elevel
Definition: vacuumlazy.c:330
uint8 bits8
Definition: c.h:381
#define ngettext(s, p, n)
Definition: c.h:1152
static void lazy_cleanup_index(Relation indrel, IndexBulkDeleteResult **stats, double reltuples, bool estimated_count, LVRelStats *vacrelstats)
Definition: vacuumlazy.c:2459
int nindexes_parallel_cleanup
Definition: vacuumlazy.c:287
Size mul_size(Size s1, Size s2)
Definition: shmem.c:515
static void lazy_parallel_vacuum_indexes(Relation *Irel, IndexBulkDeleteResult **stats, LVRelStats *vacrelstats, LVParallelState *lps, int nindexes)
Definition: vacuumlazy.c:2061
Size GetRecordedFreeSpace(Relation rel, BlockNumber heapBlk)
Definition: freespace.c:230
#define ParallelVacuumIsActive(lps)
Definition: vacuumlazy.c:148
void * palloc0(Size size)
Definition: mcxt.c:980
char * indname
Definition: vacuumlazy.c:317
#define VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL
Definition: vacuumlazy.c:101
void pgstat_progress_end_command(void)
Definition: pgstat.c:3282
char * relname
Definition: vacuumlazy.c:294
IndexBulkDeleteResult * index_bulk_delete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state)
Definition: indexam.c:688
void heap_vacuum_rel(Relation onerel, VacuumParams *params, BufferAccessStrategy bstrategy)
Definition: vacuumlazy.c:413
Size add_size(Size s1, Size s2)
Definition: shmem.c:498
#define BufferGetPageSize(buffer)
Definition: bufmgr.h:156
Oid MyDatabaseId
Definition: globals.c:85
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:3722
#define PARALLEL_VACUUM_KEY_SHARED
Definition: vacuumlazy.c:138
int max_parallel_maintenance_workers
Definition: globals.c:123
void ReinitializeParallelDSM(ParallelContext *pcxt)
Definition: parallel.c:463
#define InvalidMultiXactId
Definition: multixact.h:23
#define RelationGetNumberOfBlocks(reln)
Definition: bufmgr.h:211
static void update_index_statistics(Relation *Irel, IndexBulkDeleteResult **stats, int nindexes)
Definition: vacuumlazy.c:3159
static void lazy_cleanup_all_indexes(Relation *Irel, IndexBulkDeleteResult **stats, LVRelStats *vacrelstats, LVParallelState *lps, int nindexes)
Definition: vacuumlazy.c:2346
static bool should_attempt_truncation(VacuumParams *params, LVRelStats *vacrelstats)
Definition: vacuumlazy.c:2539
pg_atomic_uint32 active_nworkers
Definition: vacuumlazy.c:238
VacOptTernaryValue truncate
Definition: vacuum.h:223
static LVParallelState * begin_parallel_vacuum(Oid relid, Relation *Irel, LVRelStats *vacrelstats, BlockNumber nblocks, int nindexes, int nrequested)
Definition: vacuumlazy.c:3190
#define PROGRESS_VACUUM_PHASE_FINAL_CLEANUP
Definition: progress.h:35
#define ereport(elevel,...)
Definition: elog.h:144
int num_index_scans
Definition: vacuumlazy.c:312
int maintenance_work_mem
Definition: globals.c:122
bool LockHasWaitersRelation(Relation relation, LOCKMODE lockmode)
Definition: lmgr.c:345
#define BlockNumberIsValid(blockNumber)
Definition: block.h:70
double old_live_tuples
Definition: vacuumlazy.c:304
static void vacuum_log_cleanup_info(Relation rel, LVRelStats *vacrelstats)
Definition: vacuumlazy.c:714
static uint32 pg_atomic_fetch_add_u32(volatile pg_atomic_uint32 *ptr, int32 add_)
Definition: atomics.h:328
int message_level
Definition: genam.h:50
TransactionId MultiXactId
Definition: c.h:530
RelFileNode rd_node
Definition: rel.h:55
int errmsg_internal(const char *fmt,...)
Definition: elog.c:911
double num_heap_tuples
Definition: genam.h:51
#define Max(x, y)
Definition: c.h:921
static BufferAccessStrategy vac_strategy
Definition: vacuumlazy.c:336
#define ShareUpdateExclusiveLock
Definition: lockdefs.h:39
#define PARALLEL_VACUUM_KEY_BUFFER_USAGE
Definition: vacuumlazy.c:141
#define PageClearAllVisible(page)
Definition: bufpage.h:389
uint64 XLogRecPtr
Definition: xlogdefs.h:21
struct LVShared LVShared
#define HeapTupleIsHeapOnly(tuple)
Definition: htup_details.h:685
#define Assert(condition)
Definition: c.h:745
#define VACUUM_OPTION_PARALLEL_COND_CLEANUP
Definition: vacuum.h:52
double new_dead_tuples
Definition: vacuumlazy.c:307
Definition: regguts.h:298
bits8 bitmap[FLEXIBLE_ARRAY_MEMBER]
Definition: vacuumlazy.c:247
#define PARALLEL_VACUUM_KEY_WAL_USAGE
Definition: vacuumlazy.c:142
TransactionId latestRemovedXid
Definition: vacuumlazy.c:313
#define ItemIdIsNormal(itemId)
Definition: itemid.h:99
#define INSTR_TIME_GET_MICROSEC(t)
Definition: instr_time.h:205
#define PROGRESS_VACUUM_PHASE_INDEX_CLEANUP
Definition: progress.h:33
#define HeapTupleHeaderGetXmin(tup)
Definition: htup_details.h:313
#define VM_ALL_VISIBLE(r, b, v)
Definition: visibilitymap.h:32
void pgstat_progress_update_multi_param(int nparam, const int *index, const int64 *val)
Definition: pgstat.c:3253
#define OffsetNumberNext(offsetNumber)
Definition: off.h:52
size_t Size
Definition: c.h:473
#define PROGRESS_VACUUM_NUM_INDEX_VACUUMS
Definition: progress.h:25
#define PROGRESS_VACUUM_PHASE_SCAN_HEAP
Definition: progress.h:30
#define PROGRESS_VACUUM_PHASE
Definition: progress.h:21
void parallel_vacuum_main(dsm_segment *seg, shm_toc *toc)
Definition: vacuumlazy.c:3467
int nworkers
Definition: vacuum.h:231
#define InvalidBlockNumber
Definition: block.h:33
#define shm_toc_estimate_keys(e, cnt)
Definition: shm_toc.h:53
long wal_fpi
Definition: instrument.h:38
XLogRecPtr log_heap_clean(Relation reln, Buffer buffer, OffsetNumber *redirected, int nredirected, OffsetNumber *nowdead, int ndead, OffsetNumber *nowunused, int nunused, TransactionId latestRemovedXid)
Definition: heapam.c:7146
#define MAXALIGN(LEN)
Definition: c.h:698
#define BufferIsValid(bufnum)
Definition: bufmgr.h:123
int log_min_duration
Definition: vacuum.h:218
#define ItemPointerGetOffsetNumber(pointer)
Definition: itemptr.h:117
void EnterParallelMode(void)
Definition: xact.c:964
LVShared * lvshared
Definition: vacuumlazy.c:274
#define VACUUM_OPTION_PARALLEL_BULKDEL
Definition: vacuum.h:45
void * shm_toc_allocate(shm_toc *toc, Size nbytes)
Definition: shm_toc.c:88
#define RelationNeedsWAL(relation)
Definition: rel.h:562
IndexBulkDeleteResult * index_vacuum_cleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
Definition: indexam.c:709
#define VISIBILITYMAP_ALL_VISIBLE
Definition: visibilitymap.h:26
struct LVRelStats LVRelStats
#define INSTR_TIME_SET_CURRENT(t)
Definition: instr_time.h:156
void pgstat_report_vacuum(Oid tableoid, bool shared, PgStat_Counter livetuples, PgStat_Counter deadtuples)
Definition: pgstat.c:1463
#define RelationUsesLocalBuffers(relation)
Definition: rel.h:572
#define PageGetLSN(page)
Definition: bufpage.h:366
static void end_parallel_vacuum(IndexBulkDeleteResult **stats, LVParallelState *lps, int nindexes)
Definition: vacuumlazy.c:3364
bool for_cleanup
Definition: vacuumlazy.c:201
#define AccessExclusiveLock
Definition: lockdefs.h:45
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:2633
BlockNumber pages_removed
Definition: vacuumlazy.c:308
BlockNumber nonempty_pages
Definition: vacuumlazy.c:310
void PageRepairFragmentation(Page page)
Definition: bufpage.c:475
#define PageIsNew(page)
Definition: bufpage.h:229
void shm_toc_insert(shm_toc *toc, uint64 key, void *address)
Definition: shm_toc.c:171
void * palloc(Size size)
Definition: mcxt.c:949
int errmsg(const char *fmt,...)
Definition: elog.c:824
static void lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats, Relation *Irel, int nindexes, bool aggressive)
Definition: vacuumlazy.c:759
uint8 visibilitymap_get_status(Relation rel, BlockNumber heapBlk, Buffer *buf)
BlockNumber frozenskipped_pages
Definition: vacuumlazy.c:302
double VacuumCostDelay
Definition: globals.c:141
#define elog(elevel,...)
Definition: elog.h:214
int old_snapshot_threshold
Definition: snapmgr.c:75
bool MultiXactIdPrecedesOrEquals(MultiXactId multi1, MultiXactId multi2)
Definition: multixact.c:3156
int i
int options
Definition: vacuum.h:210
PrefetchBufferResult PrefetchBuffer(Relation reln, ForkNumber forkNum, BlockNumber blockNum)
Definition: bufmgr.c:574
#define errcontext
Definition: elog.h:185
#define BUFFER_LOCK_SHARE
Definition: bufmgr.h:97
void * arg
struct LVParallelState LVParallelState
static void vacuum_one_index(Relation indrel, IndexBulkDeleteResult **stats, LVShared *lvshared, LVSharedIndStats *shared_indstats, LVDeadTuples *dead_tuples, LVRelStats *vacrelstats)
Definition: vacuumlazy.c:2286
static bool lazy_check_needs_freeze(Buffer buf, bool *hastup)
Definition: vacuumlazy.c:2009
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:99
static void pg_atomic_init_u32(volatile pg_atomic_uint32 *ptr, uint32 val)
Definition: atomics.h:223
#define ItemPointerGetBlockNumber(pointer)
Definition: itemptr.h:98
uint32 offset
Definition: vacuumlazy.c:246
#define TransactionIdIsValid(xid)
Definition: transam.h:41
static void pg_atomic_write_u32(volatile pg_atomic_uint32 *ptr, uint32 val)
Definition: atomics.h:258
#define ItemIdSetUnused(itemId)
Definition: itemid.h:128
static int vac_cmp_itemptr(const void *left, const void *right)
Definition: vacuumlazy.c:2921
uint64 wal_bytes
Definition: instrument.h:39
void vacuum_delay_point(void)
Definition: vacuum.c:1995
#define MAXDEADTUPLES(max_size)
Definition: vacuumlazy.c:180
void TimestampDifference(TimestampTz start_time, TimestampTz stop_time, long *secs, int *microsecs)
Definition: timestamp.c:1648
#define TransactionIdIsNormal(xid)
Definition: transam.h:42
BlockNumber blkno
Definition: vacuumlazy.c:318
Relation table_open(Oid relationId, LOCKMODE lockmode)
Definition: table.c:39
#define VACUUM_OPTION_PARALLEL_CLEANUP
Definition: vacuum.h:60
void vac_update_relstats(Relation relation, BlockNumber num_pages, double num_tuples, BlockNumber num_all_visible_pages, bool hasindex, TransactionId frozenxid, MultiXactId minmulti, bool in_outer_xact)
Definition: vacuum.c:1206
VacErrPhase
Definition: vacuumlazy.c:151
#define PageSetLSN(page, lsn)
Definition: bufpage.h:368
#define LAZY_ALLOC_TUPLES
Definition: vacuumlazy.c:119
int Buffer
Definition: buf.h:23
#define _(x)
Definition: elog.c:88
#define UINT64_FORMAT
Definition: c.h:417
#define RelationGetRelid(relation)
Definition: rel.h:456
int multixact_freeze_min_age
Definition: vacuum.h:213
void * shm_toc_lookup(shm_toc *toc, uint64 key, bool noError)
Definition: shm_toc.c:232
void FreeSpaceMapVacuumRange(Relation rel, BlockNumber start, BlockNumber end)
Definition: freespace.c:354
static void lazy_truncate_heap(Relation onerel, LVRelStats *vacrelstats)
Definition: vacuumlazy.c:2560
#define PROGRESS_VACUUM_PHASE_VACUUM_HEAP
Definition: progress.h:32
#define REL_TRUNCATE_FRACTION
Definition: vacuumlazy.c:92
#define PageGetItem(page, itemId)
Definition: bufpage.h:340
Pointer Page
Definition: bufpage.h:78
#define ItemPointerSet(pointer, blockNumber, offNum)
Definition: itemptr.h:127
shm_toc * toc
Definition: parallel.h:45
static void lazy_space_alloc(LVRelStats *vacrelstats, BlockNumber relblocks)
Definition: vacuumlazy.c:2861
bool VacuumCostActive
Definition: globals.c:148
bool estimated_count
Definition: genam.h:49
static uint32 pg_atomic_read_u32(volatile pg_atomic_uint32 *ptr)
Definition: atomics.h:241
struct LVSavedErrInfo LVSavedErrInfo
void RelationTruncate(Relation rel, BlockNumber nblocks)
Definition: storage.c:277
#define RelationGetNamespace(relation)
Definition: rel.h:497
static void parallel_vacuum_index(Relation *Irel, IndexBulkDeleteResult **stats, LVShared *lvshared, LVDeadTuples *dead_tuples, int nindexes, LVRelStats *vacrelstats)
Definition: vacuumlazy.c:2191
bool lock_waiter_detected
Definition: vacuumlazy.c:314