PostgreSQL Source Code  git master
vacuumlazy.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * vacuumlazy.c
4  * Concurrent ("lazy") vacuuming.
5  *
6  *
7  * The major space usage for LAZY VACUUM is storage for the array of dead tuple
8  * TIDs. We want to ensure we can vacuum even the very largest relations with
9  * finite memory space usage. To do that, we set upper bounds on the number of
10  * tuples we will keep track of at once.
11  *
12  * We are willing to use at most maintenance_work_mem (or perhaps
13  * autovacuum_work_mem) memory space to keep track of dead tuples. We
14  * initially allocate an array of TIDs of that size, with an upper limit that
15  * depends on table size (this limit ensures we don't allocate a huge area
16  * uselessly for vacuuming small tables). If the array threatens to overflow,
17  * we suspend the heap scan phase and perform a pass of index cleanup and page
18  * compaction, then resume the heap scan with an empty TID array.
19  *
20  * If we're processing a table with no indexes, we can just vacuum each page
21  * as we go; there's no need to save up multiple tuples to minimize the number
22  * of index scans performed. So we don't use maintenance_work_mem memory for
23  * the TID array, just enough to hold as many heap tuples as fit on one page.
24  *
25  * Lazy vacuum supports parallel execution with parallel worker processes. In
26  * a parallel vacuum, we perform both index vacuum and index cleanup with
27  * parallel worker processes. Individual indexes are processed by one vacuum
28  * process. At the beginning of a lazy vacuum (at lazy_scan_heap) we prepare
29  * the parallel context and initialize the DSM segment that contains shared
30  * information as well as the memory space for storing dead tuples. When
31  * starting either index vacuum or index cleanup, we launch parallel worker
32  * processes. Once all indexes are processed the parallel worker processes
33  * exit. After that, the leader process re-initializes the parallel context
34  * so that it can use the same DSM for multiple passes of index vacuum and
35  * for performing index cleanup. For updating the index statistics, we need
36  * to update the system table and since updates are not allowed during
37  * parallel mode we update the index statistics after exiting from the
38  * parallel mode.
39  *
40  * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
41  * Portions Copyright (c) 1994, Regents of the University of California
42  *
43  *
44  * IDENTIFICATION
45  * src/backend/access/heap/vacuumlazy.c
46  *
47  *-------------------------------------------------------------------------
48  */
49 #include "postgres.h"
50 
51 #include <math.h>
52 
53 #include "access/amapi.h"
54 #include "access/genam.h"
55 #include "access/heapam.h"
56 #include "access/heapam_xlog.h"
57 #include "access/htup_details.h"
58 #include "access/multixact.h"
59 #include "access/parallel.h"
60 #include "access/transam.h"
61 #include "access/visibilitymap.h"
62 #include "access/xact.h"
63 #include "access/xlog.h"
64 #include "catalog/storage.h"
65 #include "commands/dbcommands.h"
66 #include "commands/progress.h"
67 #include "commands/vacuum.h"
68 #include "miscadmin.h"
69 #include "optimizer/paths.h"
70 #include "pgstat.h"
71 #include "portability/instr_time.h"
72 #include "postmaster/autovacuum.h"
73 #include "storage/bufmgr.h"
74 #include "storage/freespace.h"
75 #include "storage/lmgr.h"
76 #include "tcop/tcopprot.h"
77 #include "utils/lsyscache.h"
78 #include "utils/memutils.h"
79 #include "utils/pg_rusage.h"
80 #include "utils/timestamp.h"
81 
82 
83 /*
84  * Space/time tradeoff parameters: do these need to be user-tunable?
85  *
86  * To consider truncating the relation, we want there to be at least
87  * REL_TRUNCATE_MINIMUM or (relsize / REL_TRUNCATE_FRACTION) (whichever
88  * is less) potentially-freeable pages.
89  */
90 #define REL_TRUNCATE_MINIMUM 1000
91 #define REL_TRUNCATE_FRACTION 16
92 
93 /*
94  * Timing parameters for truncate locking heuristics.
95  *
96  * These were not exposed as user tunable GUC values because it didn't seem
97  * that the potential for improvement was great enough to merit the cost of
98  * supporting them.
99  */
100 #define VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL 20 /* ms */
101 #define VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL 50 /* ms */
102 #define VACUUM_TRUNCATE_LOCK_TIMEOUT 5000 /* ms */
103 
104 /*
105  * When a table has no indexes, vacuum the FSM after every 8GB, approximately
106  * (it won't be exact because we only vacuum FSM after processing a heap page
107  * that has some removable tuples). When there are indexes, this is ignored,
108  * and we vacuum FSM after each index/heap cleaning pass.
109  */
110 #define VACUUM_FSM_EVERY_PAGES \
111  ((BlockNumber) (((uint64) 8 * 1024 * 1024 * 1024) / BLCKSZ))
112 
113 /*
114  * Guesstimation of number of dead tuples per page. This is used to
115  * provide an upper limit to memory allocated when vacuuming small
116  * tables.
117  */
118 #define LAZY_ALLOC_TUPLES MaxHeapTuplesPerPage
119 
120 /*
121  * Before we consider skipping a page that's marked as clean in
122  * visibility map, we must've seen at least this many clean pages.
123  */
124 #define SKIP_PAGES_THRESHOLD ((BlockNumber) 32)
125 
126 /*
127  * Size of the prefetch window for lazy vacuum backwards truncation scan.
128  * Needs to be a power of 2.
129  */
130 #define PREFETCH_SIZE ((BlockNumber) 32)
131 
132 /*
133  * DSM keys for parallel vacuum. Unlike other parallel execution code, since
134  * we don't need to worry about DSM keys conflicting with plan_node_id we can
135  * use small integers.
136  */
137 #define PARALLEL_VACUUM_KEY_SHARED 1
138 #define PARALLEL_VACUUM_KEY_DEAD_TUPLES 2
139 #define PARALLEL_VACUUM_KEY_QUERY_TEXT 3
140 
141 /*
142  * Macro to check if we are in a parallel vacuum. If true, we are in the
143  * parallel mode and the DSM segment is initialized.
144  */
145 #define ParallelVacuumIsActive(lps) PointerIsValid(lps)
146 
147 /* Phases of vacuum during which we report error context. */
148 typedef enum
149 {
156 } VacErrPhase;
157 
158 /*
159  * LVDeadTuples stores the dead tuple TIDs collected during the heap scan.
160  * This is allocated in the DSM segment in parallel mode and in local memory
161  * in non-parallel mode.
162  */
163 typedef struct LVDeadTuples
164 {
165  int max_tuples; /* # slots allocated in array */
166  int num_tuples; /* current # of entries */
167  /* List of TIDs of tuples we intend to delete */
168  /* NB: this list is ordered by TID address */
170  * ItemPointerData */
171 } LVDeadTuples;
172 
173 /* The dead tuple space consists of LVDeadTuples and dead tuple TIDs */
174 #define SizeOfDeadTuples(cnt) \
175  add_size(offsetof(LVDeadTuples, itemptrs), \
176  mul_size(sizeof(ItemPointerData), cnt))
177 #define MAXDEADTUPLES(max_size) \
178  (((max_size) - offsetof(LVDeadTuples, itemptrs)) / sizeof(ItemPointerData))
179 
180 /*
181  * Shared information among parallel workers. So this is allocated in the DSM
182  * segment.
183  */
184 typedef struct LVShared
185 {
186  /*
187  * Target table relid and log level. These fields are not modified during
188  * the lazy vacuum.
189  */
191  int elevel;
192 
193  /*
194  * An indication for vacuum workers to perform either index vacuum or
195  * index cleanup. first_time is true only if for_cleanup is true and
196  * bulk-deletion is not performed yet.
197  */
200 
201  /*
202  * Fields for both index vacuum and cleanup.
203  *
204  * reltuples is the total number of input heap tuples. We set either old
205  * live tuples in the index vacuum case or the new live tuples in the
206  * index cleanup case.
207  *
208  * estimated_count is true if the reltuples is an estimated value.
209  */
210  double reltuples;
212 
213  /*
214  * In single process lazy vacuum we could consume more memory during index
215  * vacuuming or cleanup apart from the memory for heap scanning. In
216  * parallel vacuum, since individual vacuum workers can consume memory
217  * equal to maintenance_work_mem, the new maintenance_work_mem for each
218  * worker is set such that the parallel operation doesn't consume more
219  * memory than single process lazy vacuum.
220  */
222 
223  /*
224  * Shared vacuum cost balance. During parallel vacuum,
225  * VacuumSharedCostBalance points to this value and it accumulates the
226  * balance of each parallel vacuum worker.
227  */
229 
230  /*
231  * Number of active parallel workers. This is used for computing the
232  * minimum threshold of the vacuum cost balance for a worker to go for the
233  * delay.
234  */
236 
237  /*
238  * Variables to control parallel vacuum. We have a bitmap to indicate
239  * which index has stats in shared memory. The set bit in the map
240  * indicates that the particular index supports a parallel vacuum.
241  */
242  pg_atomic_uint32 idx; /* counter for vacuuming and clean up */
243  uint32 offset; /* sizeof header incl. bitmap */
244  bits8 bitmap[FLEXIBLE_ARRAY_MEMBER]; /* bit map of NULLs */
245 
246  /* Shared index statistics data follows at end of struct */
247 } LVShared;
248 
249 #define SizeOfLVShared (offsetof(LVShared, bitmap) + sizeof(bits8))
250 #define GetSharedIndStats(s) \
251  ((LVSharedIndStats *)((char *)(s) + ((LVShared *)(s))->offset))
252 #define IndStatsIsNull(s, i) \
253  (!(((LVShared *)(s))->bitmap[(i) >> 3] & (1 << ((i) & 0x07))))
254 
255 /*
256  * Struct for an index bulk-deletion statistic used for parallel vacuum. This
257  * is allocated in the DSM segment.
258  */
259 typedef struct LVSharedIndStats
260 {
261  bool updated; /* are the stats updated? */
264 
265 /* Struct for maintaining a parallel vacuum state. */
266 typedef struct LVParallelState
267 {
269 
270  /* Shared information among parallel vacuum workers */
272 
273  /*
274  * The number of indexes that support parallel index bulk-deletion and
275  * parallel index cleanup respectively.
276  */
281 
282 typedef struct LVRelStats
283 {
285  char *relname;
286  /* useindex = true means two-pass strategy; false means one-pass */
287  bool useindex;
288  /* Overall statistics about rel */
289  BlockNumber old_rel_pages; /* previous value of pg_class.relpages */
290  BlockNumber rel_pages; /* total number of pages */
291  BlockNumber scanned_pages; /* number of pages we examined */
292  BlockNumber pinskipped_pages; /* # of pages we skipped due to a pin */
293  BlockNumber frozenskipped_pages; /* # of frozen pages we skipped */
294  BlockNumber tupcount_pages; /* pages whose tuples we counted */
295  double old_live_tuples; /* previous value of pg_class.reltuples */
296  double new_rel_tuples; /* new estimated total # of tuples */
297  double new_live_tuples; /* new estimated total # of live tuples */
298  double new_dead_tuples; /* new estimated total # of dead tuples */
301  BlockNumber nonempty_pages; /* actually, last nonempty page + 1 */
306 
307  /* Used for error callback */
308  char *indname;
309  BlockNumber blkno; /* used only for heap operations */
311 } LVRelStats;
312 
313 /* A few variables that don't seem worth passing around as parameters */
314 static int elevel = -1;
315 
319 
321 
322 
323 /* non-export function prototypes */
324 static void lazy_scan_heap(Relation onerel, VacuumParams *params,
325  LVRelStats *vacrelstats, Relation *Irel, int nindexes,
326  bool aggressive);
327 static void lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats);
328 static bool lazy_check_needs_freeze(Buffer buf, bool *hastup);
329 static void lazy_vacuum_all_indexes(Relation onerel, Relation *Irel,
330  IndexBulkDeleteResult **stats,
331  LVRelStats *vacrelstats, LVParallelState *lps,
332  int nindexes);
333 static void lazy_vacuum_index(Relation indrel, IndexBulkDeleteResult **stats,
334  LVDeadTuples *dead_tuples, double reltuples, LVRelStats *vacrelstats);
335 static void lazy_cleanup_index(Relation indrel,
336  IndexBulkDeleteResult **stats,
337  double reltuples, bool estimated_count, LVRelStats *vacrelstats);
338 static int lazy_vacuum_page(Relation onerel, BlockNumber blkno, Buffer buffer,
339  int tupindex, LVRelStats *vacrelstats, Buffer *vmbuffer);
340 static bool should_attempt_truncation(VacuumParams *params,
341  LVRelStats *vacrelstats);
342 static void lazy_truncate_heap(Relation onerel, LVRelStats *vacrelstats);
344  LVRelStats *vacrelstats);
345 static void lazy_space_alloc(LVRelStats *vacrelstats, BlockNumber relblocks);
346 static void lazy_record_dead_tuple(LVDeadTuples *dead_tuples,
347  ItemPointer itemptr);
348 static bool lazy_tid_reaped(ItemPointer itemptr, void *state);
349 static int vac_cmp_itemptr(const void *left, const void *right);
351  TransactionId *visibility_cutoff_xid, bool *all_frozen);
353  LVRelStats *vacrelstats, LVParallelState *lps,
354  int nindexes);
355 static void parallel_vacuum_index(Relation *Irel, IndexBulkDeleteResult **stats,
356  LVShared *lvshared, LVDeadTuples *dead_tuples,
357  int nindexes, LVRelStats *vacrelstats);
358 static void vacuum_indexes_leader(Relation *Irel, IndexBulkDeleteResult **stats,
359  LVRelStats *vacrelstats, LVParallelState *lps,
360  int nindexes);
361 static void vacuum_one_index(Relation indrel, IndexBulkDeleteResult **stats,
362  LVShared *lvshared, LVSharedIndStats *shared_indstats,
363  LVDeadTuples *dead_tuples, LVRelStats *vacrelstats);
364 static void lazy_cleanup_all_indexes(Relation *Irel, IndexBulkDeleteResult **stats,
365  LVRelStats *vacrelstats, LVParallelState *lps,
366  int nindexes);
367 static long compute_max_dead_tuples(BlockNumber relblocks, bool hasindex);
368 static int compute_parallel_vacuum_workers(Relation *Irel, int nindexes, int nrequested,
369  bool *can_parallel_vacuum);
370 static void prepare_index_statistics(LVShared *lvshared, bool *can_parallel_vacuum,
371  int nindexes);
372 static void update_index_statistics(Relation *Irel, IndexBulkDeleteResult **stats,
373  int nindexes);
375  LVRelStats *vacrelstats, BlockNumber nblocks,
376  int nindexes, int nrequested);
377 static void end_parallel_vacuum(Relation *Irel, IndexBulkDeleteResult **stats,
378  LVParallelState *lps, int nindexes);
379 static LVSharedIndStats *get_indstats(LVShared *lvshared, int n);
380 static bool skip_parallel_vacuum_index(Relation indrel, LVShared *lvshared);
381 static void vacuum_error_callback(void *arg);
382 static void update_vacuum_error_info(LVRelStats *errinfo, int phase,
383  BlockNumber blkno, char *indname);
384 
385 
386 /*
387  * heap_vacuum_rel() -- perform VACUUM for one heap relation
388  *
389  * This routine vacuums a single heap, cleans out its indexes, and
390  * updates its relpages and reltuples statistics.
391  *
392  * At entry, we have already established a transaction and opened
393  * and locked the relation.
394  */
395 void
397  BufferAccessStrategy bstrategy)
398 {
399  LVRelStats *vacrelstats;
400  Relation *Irel;
401  int nindexes;
402  PGRUsage ru0;
403  TimestampTz starttime = 0;
404  long secs;
405  int usecs;
406  double read_rate,
407  write_rate;
408  bool aggressive; /* should we scan all unfrozen pages? */
409  bool scanned_all_unfrozen; /* actually scanned all such pages? */
410  TransactionId xidFullScanLimit;
411  MultiXactId mxactFullScanLimit;
412  BlockNumber new_rel_pages;
413  BlockNumber new_rel_allvisible;
414  double new_live_tuples;
415  TransactionId new_frozen_xid;
416  MultiXactId new_min_multi;
417  ErrorContextCallback errcallback;
418 
419  Assert(params != NULL);
422 
423  /* not every AM requires these to be valid, but heap does */
424  Assert(TransactionIdIsNormal(onerel->rd_rel->relfrozenxid));
425  Assert(MultiXactIdIsValid(onerel->rd_rel->relminmxid));
426 
427  /* measure elapsed time iff autovacuum logging requires it */
428  if (IsAutoVacuumWorkerProcess() && params->log_min_duration >= 0)
429  {
430  pg_rusage_init(&ru0);
431  starttime = GetCurrentTimestamp();
432  }
433 
434  if (params->options & VACOPT_VERBOSE)
435  elevel = INFO;
436  else
437  elevel = DEBUG2;
438 
440  RelationGetRelid(onerel));
441 
442  vac_strategy = bstrategy;
443 
444  vacuum_set_xid_limits(onerel,
445  params->freeze_min_age,
446  params->freeze_table_age,
447  params->multixact_freeze_min_age,
449  &OldestXmin, &FreezeLimit, &xidFullScanLimit,
450  &MultiXactCutoff, &mxactFullScanLimit);
451 
452  /*
453  * We request an aggressive scan if the table's frozen Xid is now older
454  * than or equal to the requested Xid full-table scan limit; or if the
455  * table's minimum MultiXactId is older than or equal to the requested
456  * mxid full-table scan limit; or if DISABLE_PAGE_SKIPPING was specified.
457  */
458  aggressive = TransactionIdPrecedesOrEquals(onerel->rd_rel->relfrozenxid,
459  xidFullScanLimit);
460  aggressive |= MultiXactIdPrecedesOrEquals(onerel->rd_rel->relminmxid,
461  mxactFullScanLimit);
463  aggressive = true;
464 
465  vacrelstats = (LVRelStats *) palloc0(sizeof(LVRelStats));
466 
467  vacrelstats->relnamespace = get_namespace_name(RelationGetNamespace(onerel));
468  vacrelstats->relname = pstrdup(RelationGetRelationName(onerel));
469  vacrelstats->indname = NULL;
470  vacrelstats->phase = VACUUM_ERRCB_PHASE_UNKNOWN;
471  vacrelstats->old_rel_pages = onerel->rd_rel->relpages;
472  vacrelstats->old_live_tuples = onerel->rd_rel->reltuples;
473  vacrelstats->num_index_scans = 0;
474  vacrelstats->pages_removed = 0;
475  vacrelstats->lock_waiter_detected = false;
476 
477  /* Open all indexes of the relation */
478  vac_open_indexes(onerel, RowExclusiveLock, &nindexes, &Irel);
479  vacrelstats->useindex = (nindexes > 0 &&
481 
482  /*
483  * Setup error traceback support for ereport(). The idea is to set up an
484  * error context callback to display additional information on any error
485  * during a vacuum. During different phases of vacuum (heap scan, heap
486  * vacuum, index vacuum, index clean up, heap truncate), we update the
487  * error context callback to display appropriate information.
488  *
489  * Note that the index vacuum and heap vacuum phases may be called
490  * multiple times in the middle of the heap scan phase. So the old phase
491  * information is restored at the end of those phases.
492  */
493  errcallback.callback = vacuum_error_callback;
494  errcallback.arg = vacrelstats;
495  errcallback.previous = error_context_stack;
496  error_context_stack = &errcallback;
497 
498  /* Do the vacuuming */
499  lazy_scan_heap(onerel, params, vacrelstats, Irel, nindexes, aggressive);
500 
501  /* Done with indexes */
502  vac_close_indexes(nindexes, Irel, NoLock);
503 
504  /*
505  * Compute whether we actually scanned the all unfrozen pages. If we did,
506  * we can adjust relfrozenxid and relminmxid.
507  *
508  * NB: We need to check this before truncating the relation, because that
509  * will change ->rel_pages.
510  */
511  if ((vacrelstats->scanned_pages + vacrelstats->frozenskipped_pages)
512  < vacrelstats->rel_pages)
513  {
514  Assert(!aggressive);
515  scanned_all_unfrozen = false;
516  }
517  else
518  scanned_all_unfrozen = true;
519 
520  /*
521  * Optionally truncate the relation.
522  */
523  if (should_attempt_truncation(params, vacrelstats))
524  {
525  /*
526  * Update error traceback information. This is the last phase during
527  * which we add context information to errors, so we don't need to
528  * revert to the previous phase.
529  */
531  vacrelstats->nonempty_pages, NULL);
532  lazy_truncate_heap(onerel, vacrelstats);
533  }
534 
535  /* Pop the error context stack */
536  error_context_stack = errcallback.previous;
537 
538  /* Report that we are now doing final cleanup */
541 
542  /*
543  * Update statistics in pg_class.
544  *
545  * A corner case here is that if we scanned no pages at all because every
546  * page is all-visible, we should not update relpages/reltuples, because
547  * we have no new information to contribute. In particular this keeps us
548  * from replacing relpages=reltuples=0 (which means "unknown tuple
549  * density") with nonzero relpages and reltuples=0 (which means "zero
550  * tuple density") unless there's some actual evidence for the latter.
551  *
552  * It's important that we use tupcount_pages and not scanned_pages for the
553  * check described above; scanned_pages counts pages where we could not
554  * get cleanup lock, and which were processed only for frozenxid purposes.
555  *
556  * We do update relallvisible even in the corner case, since if the table
557  * is all-visible we'd definitely like to know that. But clamp the value
558  * to be not more than what we're setting relpages to.
559  *
560  * Also, don't change relfrozenxid/relminmxid if we skipped any pages,
561  * since then we don't know for certain that all tuples have a newer xmin.
562  */
563  new_rel_pages = vacrelstats->rel_pages;
564  new_live_tuples = vacrelstats->new_live_tuples;
565  if (vacrelstats->tupcount_pages == 0 && new_rel_pages > 0)
566  {
567  new_rel_pages = vacrelstats->old_rel_pages;
568  new_live_tuples = vacrelstats->old_live_tuples;
569  }
570 
571  visibilitymap_count(onerel, &new_rel_allvisible, NULL);
572  if (new_rel_allvisible > new_rel_pages)
573  new_rel_allvisible = new_rel_pages;
574 
575  new_frozen_xid = scanned_all_unfrozen ? FreezeLimit : InvalidTransactionId;
576  new_min_multi = scanned_all_unfrozen ? MultiXactCutoff : InvalidMultiXactId;
577 
578  vac_update_relstats(onerel,
579  new_rel_pages,
580  new_live_tuples,
581  new_rel_allvisible,
582  nindexes > 0,
583  new_frozen_xid,
584  new_min_multi,
585  false);
586 
587  /* report results to the stats collector, too */
589  onerel->rd_rel->relisshared,
590  new_live_tuples,
591  vacrelstats->new_dead_tuples);
593 
594  /* and log the action if appropriate */
595  if (IsAutoVacuumWorkerProcess() && params->log_min_duration >= 0)
596  {
597  TimestampTz endtime = GetCurrentTimestamp();
598 
599  if (params->log_min_duration == 0 ||
600  TimestampDifferenceExceeds(starttime, endtime,
601  params->log_min_duration))
602  {
604  char *msgfmt;
605 
606  TimestampDifference(starttime, endtime, &secs, &usecs);
607 
608  read_rate = 0;
609  write_rate = 0;
610  if ((secs > 0) || (usecs > 0))
611  {
612  read_rate = (double) BLCKSZ * VacuumPageMiss / (1024 * 1024) /
613  (secs + usecs / 1000000.0);
614  write_rate = (double) BLCKSZ * VacuumPageDirty / (1024 * 1024) /
615  (secs + usecs / 1000000.0);
616  }
617 
618  /*
619  * This is pretty messy, but we split it up so that we can skip
620  * emitting individual parts of the message when not applicable.
621  */
622  initStringInfo(&buf);
623  if (params->is_wraparound)
624  {
625  if (aggressive)
626  msgfmt = _("automatic aggressive vacuum to prevent wraparound of table \"%s.%s.%s\": index scans: %d\n");
627  else
628  msgfmt = _("automatic vacuum to prevent wraparound of table \"%s.%s.%s\": index scans: %d\n");
629  }
630  else
631  {
632  if (aggressive)
633  msgfmt = _("automatic aggressive vacuum of table \"%s.%s.%s\": index scans: %d\n");
634  else
635  msgfmt = _("automatic vacuum of table \"%s.%s.%s\": index scans: %d\n");
636  }
637  appendStringInfo(&buf, msgfmt,
639  vacrelstats->relnamespace,
640  vacrelstats->relname,
641  vacrelstats->num_index_scans);
642  appendStringInfo(&buf, _("pages: %u removed, %u remain, %u skipped due to pins, %u skipped frozen\n"),
643  vacrelstats->pages_removed,
644  vacrelstats->rel_pages,
645  vacrelstats->pinskipped_pages,
646  vacrelstats->frozenskipped_pages);
647  appendStringInfo(&buf,
648  _("tuples: %.0f removed, %.0f remain, %.0f are dead but not yet removable, oldest xmin: %u\n"),
649  vacrelstats->tuples_deleted,
650  vacrelstats->new_rel_tuples,
651  vacrelstats->new_dead_tuples,
652  OldestXmin);
653  appendStringInfo(&buf,
654  _("buffer usage: %lld hits, %lld misses, %lld dirtied\n"),
655  (long long) VacuumPageHit,
656  (long long) VacuumPageMiss,
657  (long long) VacuumPageDirty);
658  appendStringInfo(&buf, _("avg read rate: %.3f MB/s, avg write rate: %.3f MB/s\n"),
659  read_rate, write_rate);
660  appendStringInfo(&buf, _("system usage: %s"), pg_rusage_show(&ru0));
661 
662  ereport(LOG,
663  (errmsg_internal("%s", buf.data)));
664  pfree(buf.data);
665  }
666  }
667 }
668 
669 /*
670  * For Hot Standby we need to know the highest transaction id that will
671  * be removed by any change. VACUUM proceeds in a number of passes so
672  * we need to consider how each pass operates. The first phase runs
673  * heap_page_prune(), which can issue XLOG_HEAP2_CLEAN records as it
674  * progresses - these will have a latestRemovedXid on each record.
675  * In some cases this removes all of the tuples to be removed, though
676  * often we have dead tuples with index pointers so we must remember them
677  * for removal in phase 3. Index records for those rows are removed
678  * in phase 2 and index blocks do not have MVCC information attached.
679  * So before we can allow removal of any index tuples we need to issue
680  * a WAL record containing the latestRemovedXid of rows that will be
681  * removed in phase three. This allows recovery queries to block at the
682  * correct place, i.e. before phase two, rather than during phase three
683  * which would be after the rows have become inaccessible.
684  */
685 static void
687 {
688  /*
689  * Skip this for relations for which no WAL is to be written, or if we're
690  * not trying to support archive recovery.
691  */
692  if (!RelationNeedsWAL(rel) || !XLogIsNeeded())
693  return;
694 
695  /*
696  * No need to write the record at all unless it contains a valid value
697  */
698  if (TransactionIdIsValid(vacrelstats->latestRemovedXid))
699  (void) log_heap_cleanup_info(rel->rd_node, vacrelstats->latestRemovedXid);
700 }
701 
702 /*
703  * lazy_scan_heap() -- scan an open heap relation
704  *
705  * This routine prunes each page in the heap, which will among other
706  * things truncate dead tuples to dead line pointers, defragment the
707  * page, and set commit status bits (see heap_page_prune). It also builds
708  * lists of dead tuples and pages with free space, calculates statistics
709  * on the number of live tuples in the heap, and marks pages as
710  * all-visible if appropriate. When done, or when we run low on space for
711  * dead-tuple TIDs, invoke vacuuming of indexes and call lazy_vacuum_heap
712  * to reclaim dead line pointers.
713  *
714  * If the table has at least two indexes, we execute both index vacuum
715  * and index cleanup with parallel workers unless the parallel vacuum is
716  * disabled. In a parallel vacuum, we enter parallel mode and then
717  * create both the parallel context and the DSM segment before starting
718  * heap scan so that we can record dead tuples to the DSM segment. All
719  * parallel workers are launched at beginning of index vacuuming and
720  * index cleanup and they exit once done with all indexes. At the end of
721  * this function we exit from parallel mode. Index bulk-deletion results
722  * are stored in the DSM segment and we update index statistics for all
723  * the indexes after exiting from parallel mode since writes are not
724  * allowed during parallel mode.
725  *
726  * If there are no indexes then we can reclaim line pointers on the fly;
727  * dead line pointers need only be retained until all index pointers that
728  * reference them have been killed.
729  */
730 static void
731 lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats,
732  Relation *Irel, int nindexes, bool aggressive)
733 {
734  LVParallelState *lps = NULL;
735  LVDeadTuples *dead_tuples;
736  BlockNumber nblocks,
737  blkno;
738  HeapTupleData tuple;
739  TransactionId relfrozenxid = onerel->rd_rel->relfrozenxid;
740  TransactionId relminmxid = onerel->rd_rel->relminmxid;
741  BlockNumber empty_pages,
742  vacuumed_pages,
743  next_fsm_block_to_vacuum;
744  double num_tuples, /* total number of nonremovable tuples */
745  live_tuples, /* live tuples (reltuples estimate) */
746  tups_vacuumed, /* tuples cleaned up by vacuum */
747  nkeep, /* dead-but-not-removable tuples */
748  nunused; /* unused line pointers */
749  IndexBulkDeleteResult **indstats;
750  int i;
751  PGRUsage ru0;
752  Buffer vmbuffer = InvalidBuffer;
753  BlockNumber next_unskippable_block;
754  bool skipping_blocks;
755  xl_heap_freeze_tuple *frozen;
757  const int initprog_index[] = {
761  };
762  int64 initprog_val[3];
763 
764  pg_rusage_init(&ru0);
765 
766  if (aggressive)
767  ereport(elevel,
768  (errmsg("aggressively vacuuming \"%s.%s\"",
769  vacrelstats->relnamespace,
770  vacrelstats->relname)));
771  else
772  ereport(elevel,
773  (errmsg("vacuuming \"%s.%s\"",
774  vacrelstats->relnamespace,
775  vacrelstats->relname)));
776 
777  empty_pages = vacuumed_pages = 0;
778  next_fsm_block_to_vacuum = (BlockNumber) 0;
779  num_tuples = live_tuples = tups_vacuumed = nkeep = nunused = 0;
780 
781  indstats = (IndexBulkDeleteResult **)
782  palloc0(nindexes * sizeof(IndexBulkDeleteResult *));
783 
784  nblocks = RelationGetNumberOfBlocks(onerel);
785  vacrelstats->rel_pages = nblocks;
786  vacrelstats->scanned_pages = 0;
787  vacrelstats->tupcount_pages = 0;
788  vacrelstats->nonempty_pages = 0;
789  vacrelstats->latestRemovedXid = InvalidTransactionId;
790 
791  /*
792  * Initialize the state for a parallel vacuum. As of now, only one worker
793  * can be used for an index, so we invoke parallelism only if there are at
794  * least two indexes on a table.
795  */
796  if (params->nworkers >= 0 && vacrelstats->useindex && nindexes > 1)
797  {
798  /*
799  * Since parallel workers cannot access data in temporary tables, we
800  * can't perform parallel vacuum on them.
801  */
802  if (RelationUsesLocalBuffers(onerel))
803  {
804  /*
805  * Give warning only if the user explicitly tries to perform a
806  * parallel vacuum on the temporary table.
807  */
808  if (params->nworkers > 0)
810  (errmsg("disabling parallel option of vacuum on \"%s\" --- cannot vacuum temporary tables in parallel",
811  vacrelstats->relname)));
812  }
813  else
814  lps = begin_parallel_vacuum(RelationGetRelid(onerel), Irel,
815  vacrelstats, nblocks, nindexes,
816  params->nworkers);
817  }
818 
819  /*
820  * Allocate the space for dead tuples in case the parallel vacuum is not
821  * initialized.
822  */
823  if (!ParallelVacuumIsActive(lps))
824  lazy_space_alloc(vacrelstats, nblocks);
825 
826  dead_tuples = vacrelstats->dead_tuples;
828 
829  /* Report that we're scanning the heap, advertising total # of blocks */
830  initprog_val[0] = PROGRESS_VACUUM_PHASE_SCAN_HEAP;
831  initprog_val[1] = nblocks;
832  initprog_val[2] = dead_tuples->max_tuples;
833  pgstat_progress_update_multi_param(3, initprog_index, initprog_val);
834 
835  /*
836  * Except when aggressive is set, we want to skip pages that are
837  * all-visible according to the visibility map, but only when we can skip
838  * at least SKIP_PAGES_THRESHOLD consecutive pages. Since we're reading
839  * sequentially, the OS should be doing readahead for us, so there's no
840  * gain in skipping a page now and then; that's likely to disable
841  * readahead and so be counterproductive. Also, skipping even a single
842  * page means that we can't update relfrozenxid, so we only want to do it
843  * if we can skip a goodly number of pages.
844  *
845  * When aggressive is set, we can't skip pages just because they are
846  * all-visible, but we can still skip pages that are all-frozen, since
847  * such pages do not need freezing and do not affect the value that we can
848  * safely set for relfrozenxid or relminmxid.
849  *
850  * Before entering the main loop, establish the invariant that
851  * next_unskippable_block is the next block number >= blkno that we can't
852  * skip based on the visibility map, either all-visible for a regular scan
853  * or all-frozen for an aggressive scan. We set it to nblocks if there's
854  * no such block. We also set up the skipping_blocks flag correctly at
855  * this stage.
856  *
857  * Note: The value returned by visibilitymap_get_status could be slightly
858  * out-of-date, since we make this test before reading the corresponding
859  * heap page or locking the buffer. This is OK. If we mistakenly think
860  * that the page is all-visible or all-frozen when in fact the flag's just
861  * been cleared, we might fail to vacuum the page. It's easy to see that
862  * skipping a page when aggressive is not set is not a very big deal; we
863  * might leave some dead tuples lying around, but the next vacuum will
864  * find them. But even when aggressive *is* set, it's still OK if we miss
865  * a page whose all-frozen marking has just been cleared. Any new XIDs
866  * just added to that page are necessarily newer than the GlobalXmin we
867  * computed, so they'll have no effect on the value to which we can safely
868  * set relfrozenxid. A similar argument applies for MXIDs and relminmxid.
869  *
870  * We will scan the table's last page, at least to the extent of
871  * determining whether it has tuples or not, even if it should be skipped
872  * according to the above rules; except when we've already determined that
873  * it's not worth trying to truncate the table. This avoids having
874  * lazy_truncate_heap() take access-exclusive lock on the table to attempt
875  * a truncation that just fails immediately because there are tuples in
876  * the last page. This is worth avoiding mainly because such a lock must
877  * be replayed on any hot standby, where it can be disruptive.
878  */
879  next_unskippable_block = 0;
880  if ((params->options & VACOPT_DISABLE_PAGE_SKIPPING) == 0)
881  {
882  while (next_unskippable_block < nblocks)
883  {
884  uint8 vmstatus;
885 
886  vmstatus = visibilitymap_get_status(onerel, next_unskippable_block,
887  &vmbuffer);
888  if (aggressive)
889  {
890  if ((vmstatus & VISIBILITYMAP_ALL_FROZEN) == 0)
891  break;
892  }
893  else
894  {
895  if ((vmstatus & VISIBILITYMAP_ALL_VISIBLE) == 0)
896  break;
897  }
899  next_unskippable_block++;
900  }
901  }
902 
903  if (next_unskippable_block >= SKIP_PAGES_THRESHOLD)
904  skipping_blocks = true;
905  else
906  skipping_blocks = false;
907 
908  for (blkno = 0; blkno < nblocks; blkno++)
909  {
910  Buffer buf;
911  Page page;
912  OffsetNumber offnum,
913  maxoff;
914  bool tupgone,
915  hastup;
916  int prev_dead_count;
917  int nfrozen;
918  Size freespace;
919  bool all_visible_according_to_vm = false;
920  bool all_visible;
921  bool all_frozen = true; /* provided all_visible is also true */
922  bool has_dead_tuples;
923  TransactionId visibility_cutoff_xid = InvalidTransactionId;
924 
925  /* see note above about forcing scanning of last page */
926 #define FORCE_CHECK_PAGE() \
927  (blkno == nblocks - 1 && should_attempt_truncation(params, vacrelstats))
928 
930 
932  blkno, NULL);
933 
934  if (blkno == next_unskippable_block)
935  {
936  /* Time to advance next_unskippable_block */
937  next_unskippable_block++;
938  if ((params->options & VACOPT_DISABLE_PAGE_SKIPPING) == 0)
939  {
940  while (next_unskippable_block < nblocks)
941  {
942  uint8 vmskipflags;
943 
944  vmskipflags = visibilitymap_get_status(onerel,
945  next_unskippable_block,
946  &vmbuffer);
947  if (aggressive)
948  {
949  if ((vmskipflags & VISIBILITYMAP_ALL_FROZEN) == 0)
950  break;
951  }
952  else
953  {
954  if ((vmskipflags & VISIBILITYMAP_ALL_VISIBLE) == 0)
955  break;
956  }
958  next_unskippable_block++;
959  }
960  }
961 
962  /*
963  * We know we can't skip the current block. But set up
964  * skipping_blocks to do the right thing at the following blocks.
965  */
966  if (next_unskippable_block - blkno > SKIP_PAGES_THRESHOLD)
967  skipping_blocks = true;
968  else
969  skipping_blocks = false;
970 
971  /*
972  * Normally, the fact that we can't skip this block must mean that
973  * it's not all-visible. But in an aggressive vacuum we know only
974  * that it's not all-frozen, so it might still be all-visible.
975  */
976  if (aggressive && VM_ALL_VISIBLE(onerel, blkno, &vmbuffer))
977  all_visible_according_to_vm = true;
978  }
979  else
980  {
981  /*
982  * The current block is potentially skippable; if we've seen a
983  * long enough run of skippable blocks to justify skipping it, and
984  * we're not forced to check it, then go ahead and skip.
985  * Otherwise, the page must be at least all-visible if not
986  * all-frozen, so we can set all_visible_according_to_vm = true.
987  */
988  if (skipping_blocks && !FORCE_CHECK_PAGE())
989  {
990  /*
991  * Tricky, tricky. If this is in aggressive vacuum, the page
992  * must have been all-frozen at the time we checked whether it
993  * was skippable, but it might not be any more. We must be
994  * careful to count it as a skipped all-frozen page in that
995  * case, or else we'll think we can't update relfrozenxid and
996  * relminmxid. If it's not an aggressive vacuum, we don't
997  * know whether it was all-frozen, so we have to recheck; but
998  * in this case an approximate answer is OK.
999  */
1000  if (aggressive || VM_ALL_FROZEN(onerel, blkno, &vmbuffer))
1001  vacrelstats->frozenskipped_pages++;
1002  continue;
1003  }
1004  all_visible_according_to_vm = true;
1005  }
1006 
1008 
1009  /*
1010  * If we are close to overrunning the available space for dead-tuple
1011  * TIDs, pause and do a cycle of vacuuming before we tackle this page.
1012  */
1013  if ((dead_tuples->max_tuples - dead_tuples->num_tuples) < MaxHeapTuplesPerPage &&
1014  dead_tuples->num_tuples > 0)
1015  {
1016  /*
1017  * Before beginning index vacuuming, we release any pin we may
1018  * hold on the visibility map page. This isn't necessary for
1019  * correctness, but we do it anyway to avoid holding the pin
1020  * across a lengthy, unrelated operation.
1021  */
1022  if (BufferIsValid(vmbuffer))
1023  {
1024  ReleaseBuffer(vmbuffer);
1025  vmbuffer = InvalidBuffer;
1026  }
1027 
1028  /* Work on all the indexes, then the heap */
1029  lazy_vacuum_all_indexes(onerel, Irel, indstats,
1030  vacrelstats, lps, nindexes);
1031 
1032  /* Remove tuples from heap */
1033  lazy_vacuum_heap(onerel, vacrelstats);
1034 
1035  /*
1036  * Forget the now-vacuumed tuples, and press on, but be careful
1037  * not to reset latestRemovedXid since we want that value to be
1038  * valid.
1039  */
1040  dead_tuples->num_tuples = 0;
1041 
1042  /*
1043  * Vacuum the Free Space Map to make newly-freed space visible on
1044  * upper-level FSM pages. Note we have not yet processed blkno.
1045  */
1046  FreeSpaceMapVacuumRange(onerel, next_fsm_block_to_vacuum, blkno);
1047  next_fsm_block_to_vacuum = blkno;
1048 
1049  /* Report that we are once again scanning the heap */
1052  }
1053 
1054  /*
1055  * Pin the visibility map page in case we need to mark the page
1056  * all-visible. In most cases this will be very cheap, because we'll
1057  * already have the correct page pinned anyway. However, it's
1058  * possible that (a) next_unskippable_block is covered by a different
1059  * VM page than the current block or (b) we released our pin and did a
1060  * cycle of index vacuuming.
1061  *
1062  */
1063  visibilitymap_pin(onerel, blkno, &vmbuffer);
1064 
1065  buf = ReadBufferExtended(onerel, MAIN_FORKNUM, blkno,
1066  RBM_NORMAL, vac_strategy);
1067 
1068  /* We need buffer cleanup lock so that we can prune HOT chains. */
1070  {
1071  /*
1072  * If we're not performing an aggressive scan to guard against XID
1073  * wraparound, and we don't want to forcibly check the page, then
1074  * it's OK to skip vacuuming pages we get a lock conflict on. They
1075  * will be dealt with in some future vacuum.
1076  */
1077  if (!aggressive && !FORCE_CHECK_PAGE())
1078  {
1079  ReleaseBuffer(buf);
1080  vacrelstats->pinskipped_pages++;
1081  continue;
1082  }
1083 
1084  /*
1085  * Read the page with share lock to see if any xids on it need to
1086  * be frozen. If not we just skip the page, after updating our
1087  * scan statistics. If there are some, we wait for cleanup lock.
1088  *
1089  * We could defer the lock request further by remembering the page
1090  * and coming back to it later, or we could even register
1091  * ourselves for multiple buffers and then service whichever one
1092  * is received first. For now, this seems good enough.
1093  *
1094  * If we get here with aggressive false, then we're just forcibly
1095  * checking the page, and so we don't want to insist on getting
1096  * the lock; we only need to know if the page contains tuples, so
1097  * that we can update nonempty_pages correctly. It's convenient
1098  * to use lazy_check_needs_freeze() for both situations, though.
1099  */
1101  if (!lazy_check_needs_freeze(buf, &hastup))
1102  {
1103  UnlockReleaseBuffer(buf);
1104  vacrelstats->scanned_pages++;
1105  vacrelstats->pinskipped_pages++;
1106  if (hastup)
1107  vacrelstats->nonempty_pages = blkno + 1;
1108  continue;
1109  }
1110  if (!aggressive)
1111  {
1112  /*
1113  * Here, we must not advance scanned_pages; that would amount
1114  * to claiming that the page contains no freezable tuples.
1115  */
1116  UnlockReleaseBuffer(buf);
1117  vacrelstats->pinskipped_pages++;
1118  if (hastup)
1119  vacrelstats->nonempty_pages = blkno + 1;
1120  continue;
1121  }
1123  LockBufferForCleanup(buf);
1124  /* drop through to normal processing */
1125  }
1126 
1127  vacrelstats->scanned_pages++;
1128  vacrelstats->tupcount_pages++;
1129 
1130  page = BufferGetPage(buf);
1131 
1132  if (PageIsNew(page))
1133  {
1134  /*
1135  * All-zeroes pages can be left over if either a backend extends
1136  * the relation by a single page, but crashes before the newly
1137  * initialized page has been written out, or when bulk-extending
1138  * the relation (which creates a number of empty pages at the tail
1139  * end of the relation, but enters them into the FSM).
1140  *
1141  * Note we do not enter the page into the visibilitymap. That has
1142  * the downside that we repeatedly visit this page in subsequent
1143  * vacuums, but otherwise we'll never not discover the space on a
1144  * promoted standby. The harm of repeated checking ought to
1145  * normally not be too bad - the space usually should be used at
1146  * some point, otherwise there wouldn't be any regular vacuums.
1147  *
1148  * Make sure these pages are in the FSM, to ensure they can be
1149  * reused. Do that by testing if there's any space recorded for
1150  * the page. If not, enter it. We do so after releasing the lock
1151  * on the heap page, the FSM is approximate, after all.
1152  */
1153  UnlockReleaseBuffer(buf);
1154 
1155  empty_pages++;
1156 
1157  if (GetRecordedFreeSpace(onerel, blkno) == 0)
1158  {
1159  Size freespace;
1160 
1161  freespace = BufferGetPageSize(buf) - SizeOfPageHeaderData;
1162  RecordPageWithFreeSpace(onerel, blkno, freespace);
1163  }
1164  continue;
1165  }
1166 
1167  if (PageIsEmpty(page))
1168  {
1169  empty_pages++;
1170  freespace = PageGetHeapFreeSpace(page);
1171 
1172  /*
1173  * Empty pages are always all-visible and all-frozen (note that
1174  * the same is currently not true for new pages, see above).
1175  */
1176  if (!PageIsAllVisible(page))
1177  {
1179 
1180  /* mark buffer dirty before writing a WAL record */
1181  MarkBufferDirty(buf);
1182 
1183  /*
1184  * It's possible that another backend has extended the heap,
1185  * initialized the page, and then failed to WAL-log the page
1186  * due to an ERROR. Since heap extension is not WAL-logged,
1187  * recovery might try to replay our record setting the page
1188  * all-visible and find that the page isn't initialized, which
1189  * will cause a PANIC. To prevent that, check whether the
1190  * page has been previously WAL-logged, and if not, do that
1191  * now.
1192  */
1193  if (RelationNeedsWAL(onerel) &&
1194  PageGetLSN(page) == InvalidXLogRecPtr)
1195  log_newpage_buffer(buf, true);
1196 
1197  PageSetAllVisible(page);
1198  visibilitymap_set(onerel, blkno, buf, InvalidXLogRecPtr,
1199  vmbuffer, InvalidTransactionId,
1201  END_CRIT_SECTION();
1202  }
1203 
1204  UnlockReleaseBuffer(buf);
1205  RecordPageWithFreeSpace(onerel, blkno, freespace);
1206  continue;
1207  }
1208 
1209  /*
1210  * Prune all HOT-update chains in this page.
1211  *
1212  * We count tuples removed by the pruning step as removed by VACUUM.
1213  */
1214  tups_vacuumed += heap_page_prune(onerel, buf, OldestXmin, false,
1215  &vacrelstats->latestRemovedXid);
1216 
1217  /*
1218  * Now scan the page to collect vacuumable items and check for tuples
1219  * requiring freezing.
1220  */
1221  all_visible = true;
1222  has_dead_tuples = false;
1223  nfrozen = 0;
1224  hastup = false;
1225  prev_dead_count = dead_tuples->num_tuples;
1226  maxoff = PageGetMaxOffsetNumber(page);
1227 
1228  /*
1229  * Note: If you change anything in the loop below, also look at
1230  * heap_page_is_all_visible to see if that needs to be changed.
1231  */
1232  for (offnum = FirstOffsetNumber;
1233  offnum <= maxoff;
1234  offnum = OffsetNumberNext(offnum))
1235  {
1236  ItemId itemid;
1237 
1238  itemid = PageGetItemId(page, offnum);
1239 
1240  /* Unused items require no processing, but we count 'em */
1241  if (!ItemIdIsUsed(itemid))
1242  {
1243  nunused += 1;
1244  continue;
1245  }
1246 
1247  /* Redirect items mustn't be touched */
1248  if (ItemIdIsRedirected(itemid))
1249  {
1250  hastup = true; /* this page won't be truncatable */
1251  continue;
1252  }
1253 
1254  ItemPointerSet(&(tuple.t_self), blkno, offnum);
1255 
1256  /*
1257  * DEAD line pointers are to be vacuumed normally; but we don't
1258  * count them in tups_vacuumed, else we'd be double-counting (at
1259  * least in the common case where heap_page_prune() just freed up
1260  * a non-HOT tuple).
1261  */
1262  if (ItemIdIsDead(itemid))
1263  {
1264  lazy_record_dead_tuple(dead_tuples, &(tuple.t_self));
1265  all_visible = false;
1266  continue;
1267  }
1268 
1269  Assert(ItemIdIsNormal(itemid));
1270 
1271  tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
1272  tuple.t_len = ItemIdGetLength(itemid);
1273  tuple.t_tableOid = RelationGetRelid(onerel);
1274 
1275  tupgone = false;
1276 
1277  /*
1278  * The criteria for counting a tuple as live in this block need to
1279  * match what analyze.c's acquire_sample_rows() does, otherwise
1280  * VACUUM and ANALYZE may produce wildly different reltuples
1281  * values, e.g. when there are many recently-dead tuples.
1282  *
1283  * The logic here is a bit simpler than acquire_sample_rows(), as
1284  * VACUUM can't run inside a transaction block, which makes some
1285  * cases impossible (e.g. in-progress insert from the same
1286  * transaction).
1287  */
1288  switch (HeapTupleSatisfiesVacuum(&tuple, OldestXmin, buf))
1289  {
1290  case HEAPTUPLE_DEAD:
1291 
1292  /*
1293  * Ordinarily, DEAD tuples would have been removed by
1294  * heap_page_prune(), but it's possible that the tuple
1295  * state changed since heap_page_prune() looked. In
1296  * particular an INSERT_IN_PROGRESS tuple could have
1297  * changed to DEAD if the inserter aborted. So this
1298  * cannot be considered an error condition.
1299  *
1300  * If the tuple is HOT-updated then it must only be
1301  * removed by a prune operation; so we keep it just as if
1302  * it were RECENTLY_DEAD. Also, if it's a heap-only
1303  * tuple, we choose to keep it, because it'll be a lot
1304  * cheaper to get rid of it in the next pruning pass than
1305  * to treat it like an indexed tuple. Finally, if index
1306  * cleanup is disabled, the second heap pass will not
1307  * execute, and the tuple will not get removed, so we must
1308  * treat it like any other dead tuple that we choose to
1309  * keep.
1310  *
1311  * If this were to happen for a tuple that actually needed
1312  * to be deleted, we'd be in trouble, because it'd
1313  * possibly leave a tuple below the relation's xmin
1314  * horizon alive. heap_prepare_freeze_tuple() is prepared
1315  * to detect that case and abort the transaction,
1316  * preventing corruption.
1317  */
1318  if (HeapTupleIsHotUpdated(&tuple) ||
1319  HeapTupleIsHeapOnly(&tuple) ||
1321  nkeep += 1;
1322  else
1323  tupgone = true; /* we can delete the tuple */
1324  all_visible = false;
1325  break;
1326  case HEAPTUPLE_LIVE:
1327 
1328  /*
1329  * Count it as live. Not only is this natural, but it's
1330  * also what acquire_sample_rows() does.
1331  */
1332  live_tuples += 1;
1333 
1334  /*
1335  * Is the tuple definitely visible to all transactions?
1336  *
1337  * NB: Like with per-tuple hint bits, we can't set the
1338  * PD_ALL_VISIBLE flag if the inserter committed
1339  * asynchronously. See SetHintBits for more info. Check
1340  * that the tuple is hinted xmin-committed because of
1341  * that.
1342  */
1343  if (all_visible)
1344  {
1345  TransactionId xmin;
1346 
1348  {
1349  all_visible = false;
1350  break;
1351  }
1352 
1353  /*
1354  * The inserter definitely committed. But is it old
1355  * enough that everyone sees it as committed?
1356  */
1357  xmin = HeapTupleHeaderGetXmin(tuple.t_data);
1358  if (!TransactionIdPrecedes(xmin, OldestXmin))
1359  {
1360  all_visible = false;
1361  break;
1362  }
1363 
1364  /* Track newest xmin on page. */
1365  if (TransactionIdFollows(xmin, visibility_cutoff_xid))
1366  visibility_cutoff_xid = xmin;
1367  }
1368  break;
1370 
1371  /*
1372  * If tuple is recently deleted then we must not remove it
1373  * from relation.
1374  */
1375  nkeep += 1;
1376  all_visible = false;
1377  break;
1379 
1380  /*
1381  * This is an expected case during concurrent vacuum.
1382  *
1383  * We do not count these rows as live, because we expect
1384  * the inserting transaction to update the counters at
1385  * commit, and we assume that will happen only after we
1386  * report our results. This assumption is a bit shaky,
1387  * but it is what acquire_sample_rows() does, so be
1388  * consistent.
1389  */
1390  all_visible = false;
1391  break;
1393  /* This is an expected case during concurrent vacuum */
1394  all_visible = false;
1395 
1396  /*
1397  * Count such rows as live. As above, we assume the
1398  * deleting transaction will commit and update the
1399  * counters after we report.
1400  */
1401  live_tuples += 1;
1402  break;
1403  default:
1404  elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
1405  break;
1406  }
1407 
1408  if (tupgone)
1409  {
1410  lazy_record_dead_tuple(dead_tuples, &(tuple.t_self));
1412  &vacrelstats->latestRemovedXid);
1413  tups_vacuumed += 1;
1414  has_dead_tuples = true;
1415  }
1416  else
1417  {
1418  bool tuple_totally_frozen;
1419 
1420  num_tuples += 1;
1421  hastup = true;
1422 
1423  /*
1424  * Each non-removable tuple must be checked to see if it needs
1425  * freezing. Note we already have exclusive buffer lock.
1426  */
1428  relfrozenxid, relminmxid,
1430  &frozen[nfrozen],
1431  &tuple_totally_frozen))
1432  frozen[nfrozen++].offset = offnum;
1433 
1434  if (!tuple_totally_frozen)
1435  all_frozen = false;
1436  }
1437  } /* scan along page */
1438 
1439  /*
1440  * If we froze any tuples, mark the buffer dirty, and write a WAL
1441  * record recording the changes. We must log the changes to be
1442  * crash-safe against future truncation of CLOG.
1443  */
1444  if (nfrozen > 0)
1445  {
1447 
1448  MarkBufferDirty(buf);
1449 
1450  /* execute collected freezes */
1451  for (i = 0; i < nfrozen; i++)
1452  {
1453  ItemId itemid;
1454  HeapTupleHeader htup;
1455 
1456  itemid = PageGetItemId(page, frozen[i].offset);
1457  htup = (HeapTupleHeader) PageGetItem(page, itemid);
1458 
1459  heap_execute_freeze_tuple(htup, &frozen[i]);
1460  }
1461 
1462  /* Now WAL-log freezing if necessary */
1463  if (RelationNeedsWAL(onerel))
1464  {
1465  XLogRecPtr recptr;
1466 
1467  recptr = log_heap_freeze(onerel, buf, FreezeLimit,
1468  frozen, nfrozen);
1469  PageSetLSN(page, recptr);
1470  }
1471 
1472  END_CRIT_SECTION();
1473  }
1474 
1475  /*
1476  * If there are no indexes we can vacuum the page right now instead of
1477  * doing a second scan. Also we don't do that but forget dead tuples
1478  * when index cleanup is disabled.
1479  */
1480  if (!vacrelstats->useindex && dead_tuples->num_tuples > 0)
1481  {
1482  if (nindexes == 0)
1483  {
1484  /* Remove tuples from heap if the table has no index */
1485  lazy_vacuum_page(onerel, blkno, buf, 0, vacrelstats, &vmbuffer);
1486  vacuumed_pages++;
1487  has_dead_tuples = false;
1488  }
1489  else
1490  {
1491  /*
1492  * Here, we have indexes but index cleanup is disabled.
1493  * Instead of vacuuming the dead tuples on the heap, we just
1494  * forget them.
1495  *
1496  * Note that vacrelstats->dead_tuples could have tuples which
1497  * became dead after HOT-pruning but are not marked dead yet.
1498  * We do not process them because it's a very rare condition,
1499  * and the next vacuum will process them anyway.
1500  */
1502  }
1503 
1504  /*
1505  * Forget the now-vacuumed tuples, and press on, but be careful
1506  * not to reset latestRemovedXid since we want that value to be
1507  * valid.
1508  */
1509  dead_tuples->num_tuples = 0;
1510 
1511  /*
1512  * Periodically do incremental FSM vacuuming to make newly-freed
1513  * space visible on upper FSM pages. Note: although we've cleaned
1514  * the current block, we haven't yet updated its FSM entry (that
1515  * happens further down), so passing end == blkno is correct.
1516  */
1517  if (blkno - next_fsm_block_to_vacuum >= VACUUM_FSM_EVERY_PAGES)
1518  {
1519  FreeSpaceMapVacuumRange(onerel, next_fsm_block_to_vacuum,
1520  blkno);
1521  next_fsm_block_to_vacuum = blkno;
1522  }
1523  }
1524 
1525  freespace = PageGetHeapFreeSpace(page);
1526 
1527  /* mark page all-visible, if appropriate */
1528  if (all_visible && !all_visible_according_to_vm)
1529  {
1531 
1532  if (all_frozen)
1533  flags |= VISIBILITYMAP_ALL_FROZEN;
1534 
1535  /*
1536  * It should never be the case that the visibility map page is set
1537  * while the page-level bit is clear, but the reverse is allowed
1538  * (if checksums are not enabled). Regardless, set both bits so
1539  * that we get back in sync.
1540  *
1541  * NB: If the heap page is all-visible but the VM bit is not set,
1542  * we don't need to dirty the heap page. However, if checksums
1543  * are enabled, we do need to make sure that the heap page is
1544  * dirtied before passing it to visibilitymap_set(), because it
1545  * may be logged. Given that this situation should only happen in
1546  * rare cases after a crash, it is not worth optimizing.
1547  */
1548  PageSetAllVisible(page);
1549  MarkBufferDirty(buf);
1550  visibilitymap_set(onerel, blkno, buf, InvalidXLogRecPtr,
1551  vmbuffer, visibility_cutoff_xid, flags);
1552  }
1553 
1554  /*
1555  * As of PostgreSQL 9.2, the visibility map bit should never be set if
1556  * the page-level bit is clear. However, it's possible that the bit
1557  * got cleared after we checked it and before we took the buffer
1558  * content lock, so we must recheck before jumping to the conclusion
1559  * that something bad has happened.
1560  */
1561  else if (all_visible_according_to_vm && !PageIsAllVisible(page)
1562  && VM_ALL_VISIBLE(onerel, blkno, &vmbuffer))
1563  {
1564  elog(WARNING, "page is not marked all-visible but visibility map bit is set in relation \"%s\" page %u",
1565  vacrelstats->relname, blkno);
1566  visibilitymap_clear(onerel, blkno, vmbuffer,
1568  }
1569 
1570  /*
1571  * It's possible for the value returned by GetOldestXmin() to move
1572  * backwards, so it's not wrong for us to see tuples that appear to
1573  * not be visible to everyone yet, while PD_ALL_VISIBLE is already
1574  * set. The real safe xmin value never moves backwards, but
1575  * GetOldestXmin() is conservative and sometimes returns a value
1576  * that's unnecessarily small, so if we see that contradiction it just
1577  * means that the tuples that we think are not visible to everyone yet
1578  * actually are, and the PD_ALL_VISIBLE flag is correct.
1579  *
1580  * There should never be dead tuples on a page with PD_ALL_VISIBLE
1581  * set, however.
1582  */
1583  else if (PageIsAllVisible(page) && has_dead_tuples)
1584  {
1585  elog(WARNING, "page containing dead tuples is marked as all-visible in relation \"%s\" page %u",
1586  vacrelstats->relname, blkno);
1587  PageClearAllVisible(page);
1588  MarkBufferDirty(buf);
1589  visibilitymap_clear(onerel, blkno, vmbuffer,
1591  }
1592 
1593  /*
1594  * If the all-visible page is all-frozen but not marked as such yet,
1595  * mark it as all-frozen. Note that all_frozen is only valid if
1596  * all_visible is true, so we must check both.
1597  */
1598  else if (all_visible_according_to_vm && all_visible && all_frozen &&
1599  !VM_ALL_FROZEN(onerel, blkno, &vmbuffer))
1600  {
1601  /*
1602  * We can pass InvalidTransactionId as the cutoff XID here,
1603  * because setting the all-frozen bit doesn't cause recovery
1604  * conflicts.
1605  */
1606  visibilitymap_set(onerel, blkno, buf, InvalidXLogRecPtr,
1607  vmbuffer, InvalidTransactionId,
1609  }
1610 
1611  UnlockReleaseBuffer(buf);
1612 
1613  /* Remember the location of the last page with nonremovable tuples */
1614  if (hastup)
1615  vacrelstats->nonempty_pages = blkno + 1;
1616 
1617  /*
1618  * If we remembered any tuples for deletion, then the page will be
1619  * visited again by lazy_vacuum_heap, which will compute and record
1620  * its post-compaction free space. If not, then we're done with this
1621  * page, so remember its free space as-is. (This path will always be
1622  * taken if there are no indexes.)
1623  */
1624  if (dead_tuples->num_tuples == prev_dead_count)
1625  RecordPageWithFreeSpace(onerel, blkno, freespace);
1626  }
1627 
1628  /* report that everything is scanned and vacuumed */
1630 
1631  pfree(frozen);
1632 
1633  /* save stats for use later */
1634  vacrelstats->tuples_deleted = tups_vacuumed;
1635  vacrelstats->new_dead_tuples = nkeep;
1636 
1637  /* now we can compute the new value for pg_class.reltuples */
1638  vacrelstats->new_live_tuples = vac_estimate_reltuples(onerel,
1639  nblocks,
1640  vacrelstats->tupcount_pages,
1641  live_tuples);
1642 
1643  /* also compute total number of surviving heap entries */
1644  vacrelstats->new_rel_tuples =
1645  vacrelstats->new_live_tuples + vacrelstats->new_dead_tuples;
1646 
1647  /*
1648  * Release any remaining pin on visibility map page.
1649  */
1650  if (BufferIsValid(vmbuffer))
1651  {
1652  ReleaseBuffer(vmbuffer);
1653  vmbuffer = InvalidBuffer;
1654  }
1655 
1656  /* If any tuples need to be deleted, perform final vacuum cycle */
1657  /* XXX put a threshold on min number of tuples here? */
1658  if (dead_tuples->num_tuples > 0)
1659  {
1660  /* Work on all the indexes, and then the heap */
1661  lazy_vacuum_all_indexes(onerel, Irel, indstats, vacrelstats,
1662  lps, nindexes);
1663 
1664  /* Remove tuples from heap */
1665  lazy_vacuum_heap(onerel, vacrelstats);
1666  }
1667 
1668  /*
1669  * Vacuum the remainder of the Free Space Map. We must do this whether or
1670  * not there were indexes.
1671  */
1672  if (blkno > next_fsm_block_to_vacuum)
1673  FreeSpaceMapVacuumRange(onerel, next_fsm_block_to_vacuum, blkno);
1674 
1675  /* report all blocks vacuumed */
1677 
1678  /* Do post-vacuum cleanup */
1679  if (vacrelstats->useindex)
1680  lazy_cleanup_all_indexes(Irel, indstats, vacrelstats, lps, nindexes);
1681 
1682  /*
1683  * End parallel mode before updating index statistics as we cannot write
1684  * during parallel mode.
1685  */
1686  if (ParallelVacuumIsActive(lps))
1687  end_parallel_vacuum(Irel, indstats, lps, nindexes);
1688 
1689  /* Update index statistics */
1690  update_index_statistics(Irel, indstats, nindexes);
1691 
1692  /* If no indexes, make log report that lazy_vacuum_heap would've made */
1693  if (vacuumed_pages)
1694  ereport(elevel,
1695  (errmsg("\"%s\": removed %.0f row versions in %u pages",
1696  vacrelstats->relname,
1697  tups_vacuumed, vacuumed_pages)));
1698 
1699  /*
1700  * This is pretty messy, but we split it up so that we can skip emitting
1701  * individual parts of the message when not applicable.
1702  */
1703  initStringInfo(&buf);
1704  appendStringInfo(&buf,
1705  _("%.0f dead row versions cannot be removed yet, oldest xmin: %u\n"),
1706  nkeep, OldestXmin);
1707  appendStringInfo(&buf, _("There were %.0f unused item identifiers.\n"),
1708  nunused);
1709  appendStringInfo(&buf, ngettext("Skipped %u page due to buffer pins, ",
1710  "Skipped %u pages due to buffer pins, ",
1711  vacrelstats->pinskipped_pages),
1712  vacrelstats->pinskipped_pages);
1713  appendStringInfo(&buf, ngettext("%u frozen page.\n",
1714  "%u frozen pages.\n",
1715  vacrelstats->frozenskipped_pages),
1716  vacrelstats->frozenskipped_pages);
1717  appendStringInfo(&buf, ngettext("%u page is entirely empty.\n",
1718  "%u pages are entirely empty.\n",
1719  empty_pages),
1720  empty_pages);
1721  appendStringInfo(&buf, _("%s."), pg_rusage_show(&ru0));
1722 
1723  ereport(elevel,
1724  (errmsg("\"%s\": found %.0f removable, %.0f nonremovable row versions in %u out of %u pages",
1725  vacrelstats->relname,
1726  tups_vacuumed, num_tuples,
1727  vacrelstats->scanned_pages, nblocks),
1728  errdetail_internal("%s", buf.data)));
1729  pfree(buf.data);
1730 }
1731 
1732 /*
1733  * lazy_vacuum_all_indexes() -- vacuum all indexes of relation.
1734  *
1735  * We process the indexes serially unless we are doing parallel vacuum.
1736  */
1737 static void
1739  IndexBulkDeleteResult **stats,
1740  LVRelStats *vacrelstats, LVParallelState *lps,
1741  int nindexes)
1742 {
1744  Assert(nindexes > 0);
1745 
1746  /* Log cleanup info before we touch indexes */
1747  vacuum_log_cleanup_info(onerel, vacrelstats);
1748 
1749  /* Report that we are now vacuuming indexes */
1752 
1753  /* Perform index vacuuming with parallel workers for parallel vacuum. */
1754  if (ParallelVacuumIsActive(lps))
1755  {
1756  /* Tell parallel workers to do index vacuuming */
1757  lps->lvshared->for_cleanup = false;
1758  lps->lvshared->first_time = false;
1759 
1760  /*
1761  * We can only provide an approximate value of num_heap_tuples in
1762  * vacuum cases.
1763  */
1764  lps->lvshared->reltuples = vacrelstats->old_live_tuples;
1765  lps->lvshared->estimated_count = true;
1766 
1767  lazy_parallel_vacuum_indexes(Irel, stats, vacrelstats, lps, nindexes);
1768  }
1769  else
1770  {
1771  int idx;
1772 
1773  for (idx = 0; idx < nindexes; idx++)
1774  lazy_vacuum_index(Irel[idx], &stats[idx], vacrelstats->dead_tuples,
1775  vacrelstats->old_live_tuples, vacrelstats);
1776  }
1777 
1778  /* Increase and report the number of index scans */
1779  vacrelstats->num_index_scans++;
1781  vacrelstats->num_index_scans);
1782 }
1783 
1784 
1785 /*
1786  * lazy_vacuum_heap() -- second pass over the heap
1787  *
1788  * This routine marks dead tuples as unused and compacts out free
1789  * space on their pages. Pages not having dead tuples recorded from
1790  * lazy_scan_heap are not visited at all.
1791  *
1792  * Note: the reason for doing this as a second pass is we cannot remove
1793  * the tuples until we've removed their index entries, and we want to
1794  * process index entry removal in batches as large as possible.
1795  */
1796 static void
1797 lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats)
1798 {
1799  int tupindex;
1800  int npages;
1801  PGRUsage ru0;
1802  Buffer vmbuffer = InvalidBuffer;
1803  LVRelStats olderrinfo;
1804 
1805  /* Report that we are now vacuuming the heap */
1808 
1809  /* Update error traceback information */
1810  olderrinfo = *vacrelstats;
1812  InvalidBlockNumber, NULL);
1813 
1814  pg_rusage_init(&ru0);
1815  npages = 0;
1816 
1817  tupindex = 0;
1818  while (tupindex < vacrelstats->dead_tuples->num_tuples)
1819  {
1820  BlockNumber tblk;
1821  Buffer buf;
1822  Page page;
1823  Size freespace;
1824 
1826 
1827  tblk = ItemPointerGetBlockNumber(&vacrelstats->dead_tuples->itemptrs[tupindex]);
1828  vacrelstats->blkno = tblk;
1829  buf = ReadBufferExtended(onerel, MAIN_FORKNUM, tblk, RBM_NORMAL,
1830  vac_strategy);
1832  {
1833  ReleaseBuffer(buf);
1834  ++tupindex;
1835  continue;
1836  }
1837  tupindex = lazy_vacuum_page(onerel, tblk, buf, tupindex, vacrelstats,
1838  &vmbuffer);
1839 
1840  /* Now that we've compacted the page, record its available space */
1841  page = BufferGetPage(buf);
1842  freespace = PageGetHeapFreeSpace(page);
1843 
1844  UnlockReleaseBuffer(buf);
1845  RecordPageWithFreeSpace(onerel, tblk, freespace);
1846  npages++;
1847  }
1848 
1849  if (BufferIsValid(vmbuffer))
1850  {
1851  ReleaseBuffer(vmbuffer);
1852  vmbuffer = InvalidBuffer;
1853  }
1854 
1855  ereport(elevel,
1856  (errmsg("\"%s\": removed %d row versions in %d pages",
1857  vacrelstats->relname,
1858  tupindex, npages),
1859  errdetail_internal("%s", pg_rusage_show(&ru0))));
1860 
1861  /* Revert to the previous phase information for error traceback */
1862  update_vacuum_error_info(vacrelstats,
1863  olderrinfo.phase,
1864  olderrinfo.blkno,
1865  olderrinfo.indname);
1866 }
1867 
1868 /*
1869  * lazy_vacuum_page() -- free dead tuples on a page
1870  * and repair its fragmentation.
1871  *
1872  * Caller must hold pin and buffer cleanup lock on the buffer.
1873  *
1874  * tupindex is the index in vacrelstats->dead_tuples of the first dead
1875  * tuple for this page. We assume the rest follow sequentially.
1876  * The return value is the first tupindex after the tuples of this page.
1877  */
1878 static int
1880  int tupindex, LVRelStats *vacrelstats, Buffer *vmbuffer)
1881 {
1882  LVDeadTuples *dead_tuples = vacrelstats->dead_tuples;
1883  Page page = BufferGetPage(buffer);
1884  OffsetNumber unused[MaxOffsetNumber];
1885  int uncnt = 0;
1886  TransactionId visibility_cutoff_xid;
1887  bool all_frozen;
1888  LVRelStats olderrinfo;
1889 
1891 
1892  /* Update error traceback information */
1893  olderrinfo = *vacrelstats;
1895  blkno, NULL);
1896 
1898 
1899  for (; tupindex < dead_tuples->num_tuples; tupindex++)
1900  {
1901  BlockNumber tblk;
1902  OffsetNumber toff;
1903  ItemId itemid;
1904 
1905  tblk = ItemPointerGetBlockNumber(&dead_tuples->itemptrs[tupindex]);
1906  if (tblk != blkno)
1907  break; /* past end of tuples for this block */
1908  toff = ItemPointerGetOffsetNumber(&dead_tuples->itemptrs[tupindex]);
1909  itemid = PageGetItemId(page, toff);
1910  ItemIdSetUnused(itemid);
1911  unused[uncnt++] = toff;
1912  }
1913 
1915 
1916  /*
1917  * Mark buffer dirty before we write WAL.
1918  */
1919  MarkBufferDirty(buffer);
1920 
1921  /* XLOG stuff */
1922  if (RelationNeedsWAL(onerel))
1923  {
1924  XLogRecPtr recptr;
1925 
1926  recptr = log_heap_clean(onerel, buffer,
1927  NULL, 0, NULL, 0,
1928  unused, uncnt,
1929  vacrelstats->latestRemovedXid);
1930  PageSetLSN(page, recptr);
1931  }
1932 
1933  /*
1934  * End critical section, so we safely can do visibility tests (which
1935  * possibly need to perform IO and allocate memory!). If we crash now the
1936  * page (including the corresponding vm bit) might not be marked all
1937  * visible, but that's fine. A later vacuum will fix that.
1938  */
1939  END_CRIT_SECTION();
1940 
1941  /*
1942  * Now that we have removed the dead tuples from the page, once again
1943  * check if the page has become all-visible. The page is already marked
1944  * dirty, exclusively locked, and, if needed, a full page image has been
1945  * emitted in the log_heap_clean() above.
1946  */
1947  if (heap_page_is_all_visible(onerel, buffer, &visibility_cutoff_xid,
1948  &all_frozen))
1949  PageSetAllVisible(page);
1950 
1951  /*
1952  * All the changes to the heap page have been done. If the all-visible
1953  * flag is now set, also set the VM all-visible bit (and, if possible, the
1954  * all-frozen bit) unless this has already been done previously.
1955  */
1956  if (PageIsAllVisible(page))
1957  {
1958  uint8 vm_status = visibilitymap_get_status(onerel, blkno, vmbuffer);
1959  uint8 flags = 0;
1960 
1961  /* Set the VM all-frozen bit to flag, if needed */
1962  if ((vm_status & VISIBILITYMAP_ALL_VISIBLE) == 0)
1963  flags |= VISIBILITYMAP_ALL_VISIBLE;
1964  if ((vm_status & VISIBILITYMAP_ALL_FROZEN) == 0 && all_frozen)
1965  flags |= VISIBILITYMAP_ALL_FROZEN;
1966 
1967  Assert(BufferIsValid(*vmbuffer));
1968  if (flags != 0)
1969  visibilitymap_set(onerel, blkno, buffer, InvalidXLogRecPtr,
1970  *vmbuffer, visibility_cutoff_xid, flags);
1971  }
1972 
1973  /* Revert to the previous phase information for error traceback */
1974  update_vacuum_error_info(vacrelstats,
1975  olderrinfo.phase,
1976  olderrinfo.blkno,
1977  olderrinfo.indname);
1978  return tupindex;
1979 }
1980 
1981 /*
1982  * lazy_check_needs_freeze() -- scan page to see if any tuples
1983  * need to be cleaned to avoid wraparound
1984  *
1985  * Returns true if the page needs to be vacuumed using cleanup lock.
1986  * Also returns a flag indicating whether page contains any tuples at all.
1987  */
1988 static bool
1990 {
1991  Page page = BufferGetPage(buf);
1992  OffsetNumber offnum,
1993  maxoff;
1994  HeapTupleHeader tupleheader;
1995 
1996  *hastup = false;
1997 
1998  /*
1999  * New and empty pages, obviously, don't contain tuples. We could make
2000  * sure that the page is registered in the FSM, but it doesn't seem worth
2001  * waiting for a cleanup lock just for that, especially because it's
2002  * likely that the pin holder will do so.
2003  */
2004  if (PageIsNew(page) || PageIsEmpty(page))
2005  return false;
2006 
2007  maxoff = PageGetMaxOffsetNumber(page);
2008  for (offnum = FirstOffsetNumber;
2009  offnum <= maxoff;
2010  offnum = OffsetNumberNext(offnum))
2011  {
2012  ItemId itemid;
2013 
2014  itemid = PageGetItemId(page, offnum);
2015 
2016  /* this should match hastup test in count_nondeletable_pages() */
2017  if (ItemIdIsUsed(itemid))
2018  *hastup = true;
2019 
2020  /* dead and redirect items never need freezing */
2021  if (!ItemIdIsNormal(itemid))
2022  continue;
2023 
2024  tupleheader = (HeapTupleHeader) PageGetItem(page, itemid);
2025 
2026  if (heap_tuple_needs_freeze(tupleheader, FreezeLimit,
2027  MultiXactCutoff, buf))
2028  return true;
2029  } /* scan along page */
2030 
2031  return false;
2032 }
2033 
2034 /*
2035  * Perform index vacuum or index cleanup with parallel workers. This function
2036  * must be used by the parallel vacuum leader process. The caller must set
2037  * lps->lvshared->for_cleanup to indicate whether to perform vacuum or
2038  * cleanup.
2039  */
2040 static void
2042  LVRelStats *vacrelstats, LVParallelState *lps,
2043  int nindexes)
2044 {
2045  int nworkers;
2046 
2049  Assert(nindexes > 0);
2050 
2051  /* Determine the number of parallel workers to launch */
2052  if (lps->lvshared->for_cleanup)
2053  {
2054  if (lps->lvshared->first_time)
2055  nworkers = lps->nindexes_parallel_cleanup +
2057  else
2058  nworkers = lps->nindexes_parallel_cleanup;
2059  }
2060  else
2061  nworkers = lps->nindexes_parallel_bulkdel;
2062 
2063  /* The leader process will participate */
2064  nworkers--;
2065 
2066  /*
2067  * It is possible that parallel context is initialized with fewer workers
2068  * than the number of indexes that need a separate worker in the current
2069  * phase, so we need to consider it. See compute_parallel_vacuum_workers.
2070  */
2071  nworkers = Min(nworkers, lps->pcxt->nworkers);
2072 
2073  /* Setup the shared cost-based vacuum delay and launch workers */
2074  if (nworkers > 0)
2075  {
2076  if (vacrelstats->num_index_scans > 0)
2077  {
2078  /* Reset the parallel index processing counter */
2079  pg_atomic_write_u32(&(lps->lvshared->idx), 0);
2080 
2081  /* Reinitialize the parallel context to relaunch parallel workers */
2083  }
2084 
2085  /*
2086  * Set up shared cost balance and the number of active workers for
2087  * vacuum delay. We need to do this before launching workers as
2088  * otherwise, they might not see the updated values for these
2089  * parameters.
2090  */
2093 
2094  /*
2095  * The number of workers can vary between bulkdelete and cleanup
2096  * phase.
2097  */
2098  ReinitializeParallelWorkers(lps->pcxt, nworkers);
2099 
2101 
2102  if (lps->pcxt->nworkers_launched > 0)
2103  {
2104  /*
2105  * Reset the local cost values for leader backend as we have
2106  * already accumulated the remaining balance of heap.
2107  */
2108  VacuumCostBalance = 0;
2110 
2111  /* Enable shared cost balance for leader backend */
2114  }
2115 
2116  if (lps->lvshared->for_cleanup)
2117  ereport(elevel,
2118  (errmsg(ngettext("launched %d parallel vacuum worker for index cleanup (planned: %d)",
2119  "launched %d parallel vacuum workers for index cleanup (planned: %d)",
2120  lps->pcxt->nworkers_launched),
2121  lps->pcxt->nworkers_launched, nworkers)));
2122  else
2123  ereport(elevel,
2124  (errmsg(ngettext("launched %d parallel vacuum worker for index vacuuming (planned: %d)",
2125  "launched %d parallel vacuum workers for index vacuuming (planned: %d)",
2126  lps->pcxt->nworkers_launched),
2127  lps->pcxt->nworkers_launched, nworkers)));
2128  }
2129 
2130  /* Process the indexes that can be processed by only leader process */
2131  vacuum_indexes_leader(Irel, stats, vacrelstats, lps, nindexes);
2132 
2133  /*
2134  * Join as a parallel worker. The leader process alone processes all the
2135  * indexes in the case where no workers are launched.
2136  */
2137  parallel_vacuum_index(Irel, stats, lps->lvshared,
2138  vacrelstats->dead_tuples, nindexes, vacrelstats);
2139 
2140  /* Wait for all vacuum workers to finish */
2142 
2143  /*
2144  * Carry the shared balance value to heap scan and disable shared costing
2145  */
2147  {
2149  VacuumSharedCostBalance = NULL;
2150  VacuumActiveNWorkers = NULL;
2151  }
2152 }
2153 
2154 /*
2155  * Index vacuum/cleanup routine used by the leader process and parallel
2156  * vacuum worker processes to process the indexes in parallel.
2157  */
2158 static void
2160  LVShared *lvshared, LVDeadTuples *dead_tuples,
2161  int nindexes, LVRelStats *vacrelstats)
2162 {
2163  /*
2164  * Increment the active worker count if we are able to launch any worker.
2165  */
2168 
2169  /* Loop until all indexes are vacuumed */
2170  for (;;)
2171  {
2172  int idx;
2173  LVSharedIndStats *shared_indstats;
2174 
2175  /* Get an index number to process */
2176  idx = pg_atomic_fetch_add_u32(&(lvshared->idx), 1);
2177 
2178  /* Done for all indexes? */
2179  if (idx >= nindexes)
2180  break;
2181 
2182  /* Get the index statistics of this index from DSM */
2183  shared_indstats = get_indstats(lvshared, idx);
2184 
2185  /*
2186  * Skip processing indexes that doesn't participate in parallel
2187  * operation
2188  */
2189  if (shared_indstats == NULL ||
2190  skip_parallel_vacuum_index(Irel[idx], lvshared))
2191  continue;
2192 
2193  /* Do vacuum or cleanup of the index */
2194  vacuum_one_index(Irel[idx], &(stats[idx]), lvshared, shared_indstats,
2195  dead_tuples, vacrelstats);
2196  }
2197 
2198  /*
2199  * We have completed the index vacuum so decrement the active worker
2200  * count.
2201  */
2204 }
2205 
2206 /*
2207  * Vacuum or cleanup indexes that can be processed by only the leader process
2208  * because these indexes don't support parallel operation at that phase.
2209  */
2210 static void
2212  LVRelStats *vacrelstats, LVParallelState *lps,
2213  int nindexes)
2214 {
2215  int i;
2216 
2218 
2219  /*
2220  * Increment the active worker count if we are able to launch any worker.
2221  */
2224 
2225  for (i = 0; i < nindexes; i++)
2226  {
2227  LVSharedIndStats *shared_indstats;
2228 
2229  shared_indstats = get_indstats(lps->lvshared, i);
2230 
2231  /* Process the indexes skipped by parallel workers */
2232  if (shared_indstats == NULL ||
2233  skip_parallel_vacuum_index(Irel[i], lps->lvshared))
2234  vacuum_one_index(Irel[i], &(stats[i]), lps->lvshared,
2235  shared_indstats, vacrelstats->dead_tuples,
2236  vacrelstats);
2237  }
2238 
2239  /*
2240  * We have completed the index vacuum so decrement the active worker
2241  * count.
2242  */
2245 }
2246 
2247 /*
2248  * Vacuum or cleanup index either by leader process or by one of the worker
2249  * process. After processing the index this function copies the index
2250  * statistics returned from ambulkdelete and amvacuumcleanup to the DSM
2251  * segment.
2252  */
2253 static void
2255  LVShared *lvshared, LVSharedIndStats *shared_indstats,
2256  LVDeadTuples *dead_tuples, LVRelStats *vacrelstats)
2257 {
2258  IndexBulkDeleteResult *bulkdelete_res = NULL;
2259 
2260  if (shared_indstats)
2261  {
2262  /* Get the space for IndexBulkDeleteResult */
2263  bulkdelete_res = &(shared_indstats->stats);
2264 
2265  /*
2266  * Update the pointer to the corresponding bulk-deletion result if
2267  * someone has already updated it.
2268  */
2269  if (shared_indstats->updated && *stats == NULL)
2270  *stats = bulkdelete_res;
2271  }
2272 
2273  /* Do vacuum or cleanup of the index */
2274  if (lvshared->for_cleanup)
2275  lazy_cleanup_index(indrel, stats, lvshared->reltuples,
2276  lvshared->estimated_count, vacrelstats);
2277  else
2278  lazy_vacuum_index(indrel, stats, dead_tuples,
2279  lvshared->reltuples, vacrelstats);
2280 
2281  /*
2282  * Copy the index bulk-deletion result returned from ambulkdelete and
2283  * amvacuumcleanup to the DSM segment if it's the first time to get it
2284  * from them, because they allocate it locally and it's possible that an
2285  * index will be vacuumed by the different vacuum process at the next
2286  * time. The copying of the result normally happens only after the first
2287  * time of index vacuuming. From the second time, we pass the result on
2288  * the DSM segment so that they then update it directly.
2289  *
2290  * Since all vacuum workers write the bulk-deletion result at different
2291  * slots we can write them without locking.
2292  */
2293  if (shared_indstats && !shared_indstats->updated && *stats != NULL)
2294  {
2295  memcpy(bulkdelete_res, *stats, sizeof(IndexBulkDeleteResult));
2296  shared_indstats->updated = true;
2297 
2298  /*
2299  * Now that the stats[idx] points to the DSM segment, we don't need
2300  * the locally allocated results.
2301  */
2302  pfree(*stats);
2303  *stats = bulkdelete_res;
2304  }
2305 }
2306 
2307 /*
2308  * lazy_cleanup_all_indexes() -- cleanup all indexes of relation.
2309  *
2310  * Cleanup indexes. We process the indexes serially unless we are doing
2311  * parallel vacuum.
2312  */
2313 static void
2315  LVRelStats *vacrelstats, LVParallelState *lps,
2316  int nindexes)
2317 {
2318  int idx;
2319 
2321  Assert(nindexes > 0);
2322 
2323  /* Report that we are now cleaning up indexes */
2326 
2327  /*
2328  * If parallel vacuum is active we perform index cleanup with parallel
2329  * workers.
2330  */
2331  if (ParallelVacuumIsActive(lps))
2332  {
2333  /* Tell parallel workers to do index cleanup */
2334  lps->lvshared->for_cleanup = true;
2335  lps->lvshared->first_time =
2336  (vacrelstats->num_index_scans == 0);
2337 
2338  /*
2339  * Now we can provide a better estimate of total number of surviving
2340  * tuples (we assume indexes are more interested in that than in the
2341  * number of nominally live tuples).
2342  */
2343  lps->lvshared->reltuples = vacrelstats->new_rel_tuples;
2344  lps->lvshared->estimated_count =
2345  (vacrelstats->tupcount_pages < vacrelstats->rel_pages);
2346 
2347  lazy_parallel_vacuum_indexes(Irel, stats, vacrelstats, lps, nindexes);
2348  }
2349  else
2350  {
2351  for (idx = 0; idx < nindexes; idx++)
2352  lazy_cleanup_index(Irel[idx], &stats[idx],
2353  vacrelstats->new_rel_tuples,
2354  vacrelstats->tupcount_pages < vacrelstats->rel_pages,
2355  vacrelstats);
2356  }
2357 }
2358 
2359 /*
2360  * lazy_vacuum_index() -- vacuum one index relation.
2361  *
2362  * Delete all the index entries pointing to tuples listed in
2363  * dead_tuples, and update running statistics.
2364  *
2365  * reltuples is the number of heap tuples to be passed to the
2366  * bulkdelete callback.
2367  */
2368 static void
2370  LVDeadTuples *dead_tuples, double reltuples, LVRelStats *vacrelstats)
2371 {
2372  IndexVacuumInfo ivinfo;
2373  const char *msg;
2374  PGRUsage ru0;
2375  LVRelStats olderrinfo;
2376 
2377  pg_rusage_init(&ru0);
2378 
2379  ivinfo.index = indrel;
2380  ivinfo.analyze_only = false;
2381  ivinfo.report_progress = false;
2382  ivinfo.estimated_count = true;
2383  ivinfo.message_level = elevel;
2384  ivinfo.num_heap_tuples = reltuples;
2385  ivinfo.strategy = vac_strategy;
2386 
2387  /* Update error traceback information */
2388  olderrinfo = *vacrelstats;
2389  update_vacuum_error_info(vacrelstats,
2392  RelationGetRelationName(indrel));
2393 
2394  /* Do bulk deletion */
2395  *stats = index_bulk_delete(&ivinfo, *stats,
2396  lazy_tid_reaped, (void *) dead_tuples);
2397 
2398  if (IsParallelWorker())
2399  msg = gettext_noop("scanned index \"%s\" to remove %d row versions by parallel vacuum worker");
2400  else
2401  msg = gettext_noop("scanned index \"%s\" to remove %d row versions");
2402 
2403  ereport(elevel,
2404  (errmsg(msg,
2405  vacrelstats->indname,
2406  dead_tuples->num_tuples),
2407  errdetail_internal("%s", pg_rusage_show(&ru0))));
2408 
2409  /* Revert to the previous phase information for error traceback */
2410  update_vacuum_error_info(vacrelstats,
2411  olderrinfo.phase,
2412  olderrinfo.blkno,
2413  olderrinfo.indname);
2414 }
2415 
2416 /*
2417  * lazy_cleanup_index() -- do post-vacuum cleanup for one index relation.
2418  *
2419  * reltuples is the number of heap tuples and estimated_count is true
2420  * if the reltuples is an estimated value.
2421  */
2422 static void
2424  IndexBulkDeleteResult **stats,
2425  double reltuples, bool estimated_count, LVRelStats *vacrelstats)
2426 {
2427  IndexVacuumInfo ivinfo;
2428  const char *msg;
2429  PGRUsage ru0;
2430  LVRelStats olderrcbarg;
2431 
2432  pg_rusage_init(&ru0);
2433 
2434  ivinfo.index = indrel;
2435  ivinfo.analyze_only = false;
2436  ivinfo.report_progress = false;
2437  ivinfo.estimated_count = estimated_count;
2438  ivinfo.message_level = elevel;
2439 
2440  ivinfo.num_heap_tuples = reltuples;
2441  ivinfo.strategy = vac_strategy;
2442 
2443  /* Update error traceback information */
2444  olderrcbarg = *vacrelstats;
2445  update_vacuum_error_info(vacrelstats,
2448  RelationGetRelationName(indrel));
2449 
2450  *stats = index_vacuum_cleanup(&ivinfo, *stats);
2451 
2452  /* Revert back to the old phase information for error traceback */
2453  update_vacuum_error_info(vacrelstats,
2454  olderrcbarg.phase,
2455  olderrcbarg.blkno,
2456  olderrcbarg.indname);
2457  if (!(*stats))
2458  return;
2459 
2460  if (IsParallelWorker())
2461  msg = gettext_noop("index \"%s\" now contains %.0f row versions in %u pages as reported by parallel vacuum worker");
2462  else
2463  msg = gettext_noop("index \"%s\" now contains %.0f row versions in %u pages");
2464 
2465  ereport(elevel,
2466  (errmsg(msg,
2467  RelationGetRelationName(indrel),
2468  (*stats)->num_index_tuples,
2469  (*stats)->num_pages),
2470  errdetail("%.0f index row versions were removed.\n"
2471  "%u index pages have been deleted, %u are currently reusable.\n"
2472  "%s.",
2473  (*stats)->tuples_removed,
2474  (*stats)->pages_deleted, (*stats)->pages_free,
2475  pg_rusage_show(&ru0))));
2476 }
2477 
2478 /*
2479  * should_attempt_truncation - should we attempt to truncate the heap?
2480  *
2481  * Don't even think about it unless we have a shot at releasing a goodly
2482  * number of pages. Otherwise, the time taken isn't worth it.
2483  *
2484  * Also don't attempt it if we are doing early pruning/vacuuming, because a
2485  * scan which cannot find a truncated heap page cannot determine that the
2486  * snapshot is too old to read that page. We might be able to get away with
2487  * truncating all except one of the pages, setting its LSN to (at least) the
2488  * maximum of the truncated range if we also treated an index leaf tuple
2489  * pointing to a missing heap page as something to trigger the "snapshot too
2490  * old" error, but that seems fragile and seems like it deserves its own patch
2491  * if we consider it.
2492  *
2493  * This is split out so that we can test whether truncation is going to be
2494  * called for before we actually do it. If you change the logic here, be
2495  * careful to depend only on fields that lazy_scan_heap updates on-the-fly.
2496  */
2497 static bool
2499 {
2500  BlockNumber possibly_freeable;
2501 
2502  if (params->truncate == VACOPT_TERNARY_DISABLED)
2503  return false;
2504 
2505  possibly_freeable = vacrelstats->rel_pages - vacrelstats->nonempty_pages;
2506  if (possibly_freeable > 0 &&
2507  (possibly_freeable >= REL_TRUNCATE_MINIMUM ||
2508  possibly_freeable >= vacrelstats->rel_pages / REL_TRUNCATE_FRACTION) &&
2510  return true;
2511  else
2512  return false;
2513 }
2514 
2515 /*
2516  * lazy_truncate_heap - try to truncate off any empty pages at the end
2517  */
2518 static void
2520 {
2521  BlockNumber old_rel_pages = vacrelstats->rel_pages;
2522  BlockNumber new_rel_pages;
2523  int lock_retry;
2524 
2525  /* Report that we are now truncating */
2528 
2529  /*
2530  * Loop until no more truncating can be done.
2531  */
2532  do
2533  {
2534  PGRUsage ru0;
2535 
2536  pg_rusage_init(&ru0);
2537 
2538  /*
2539  * We need full exclusive lock on the relation in order to do
2540  * truncation. If we can't get it, give up rather than waiting --- we
2541  * don't want to block other backends, and we don't want to deadlock
2542  * (which is quite possible considering we already hold a lower-grade
2543  * lock).
2544  */
2545  vacrelstats->lock_waiter_detected = false;
2546  lock_retry = 0;
2547  while (true)
2548  {
2550  break;
2551 
2552  /*
2553  * Check for interrupts while trying to (re-)acquire the exclusive
2554  * lock.
2555  */
2557 
2558  if (++lock_retry > (VACUUM_TRUNCATE_LOCK_TIMEOUT /
2560  {
2561  /*
2562  * We failed to establish the lock in the specified number of
2563  * retries. This means we give up truncating.
2564  */
2565  vacrelstats->lock_waiter_detected = true;
2566  ereport(elevel,
2567  (errmsg("\"%s\": stopping truncate due to conflicting lock request",
2568  vacrelstats->relname)));
2569  return;
2570  }
2571 
2573  }
2574 
2575  /*
2576  * Now that we have exclusive lock, look to see if the rel has grown
2577  * whilst we were vacuuming with non-exclusive lock. If so, give up;
2578  * the newly added pages presumably contain non-deletable tuples.
2579  */
2580  new_rel_pages = RelationGetNumberOfBlocks(onerel);
2581  if (new_rel_pages != old_rel_pages)
2582  {
2583  /*
2584  * Note: we intentionally don't update vacrelstats->rel_pages with
2585  * the new rel size here. If we did, it would amount to assuming
2586  * that the new pages are empty, which is unlikely. Leaving the
2587  * numbers alone amounts to assuming that the new pages have the
2588  * same tuple density as existing ones, which is less unlikely.
2589  */
2591  return;
2592  }
2593 
2594  /*
2595  * Scan backwards from the end to verify that the end pages actually
2596  * contain no tuples. This is *necessary*, not optional, because
2597  * other backends could have added tuples to these pages whilst we
2598  * were vacuuming.
2599  */
2600  new_rel_pages = count_nondeletable_pages(onerel, vacrelstats);
2601  vacrelstats->blkno = new_rel_pages;
2602 
2603  if (new_rel_pages >= old_rel_pages)
2604  {
2605  /* can't do anything after all */
2607  return;
2608  }
2609 
2610  /*
2611  * Okay to truncate.
2612  */
2613  RelationTruncate(onerel, new_rel_pages);
2614 
2615  /*
2616  * We can release the exclusive lock as soon as we have truncated.
2617  * Other backends can't safely access the relation until they have
2618  * processed the smgr invalidation that smgrtruncate sent out ... but
2619  * that should happen as part of standard invalidation processing once
2620  * they acquire lock on the relation.
2621  */
2623 
2624  /*
2625  * Update statistics. Here, it *is* correct to adjust rel_pages
2626  * without also touching reltuples, since the tuple count wasn't
2627  * changed by the truncation.
2628  */
2629  vacrelstats->pages_removed += old_rel_pages - new_rel_pages;
2630  vacrelstats->rel_pages = new_rel_pages;
2631 
2632  ereport(elevel,
2633  (errmsg("\"%s\": truncated %u to %u pages",
2634  vacrelstats->relname,
2635  old_rel_pages, new_rel_pages),
2636  errdetail_internal("%s",
2637  pg_rusage_show(&ru0))));
2638  old_rel_pages = new_rel_pages;
2639  } while (new_rel_pages > vacrelstats->nonempty_pages &&
2640  vacrelstats->lock_waiter_detected);
2641 }
2642 
2643 /*
2644  * Rescan end pages to verify that they are (still) empty of tuples.
2645  *
2646  * Returns number of nondeletable pages (last nonempty page + 1).
2647  */
2648 static BlockNumber
2650 {
2651  BlockNumber blkno;
2652  BlockNumber prefetchedUntil;
2653  instr_time starttime;
2654 
2655  /* Initialize the starttime if we check for conflicting lock requests */
2656  INSTR_TIME_SET_CURRENT(starttime);
2657 
2658  /*
2659  * Start checking blocks at what we believe relation end to be and move
2660  * backwards. (Strange coding of loop control is needed because blkno is
2661  * unsigned.) To make the scan faster, we prefetch a few blocks at a time
2662  * in forward direction, so that OS-level readahead can kick in.
2663  */
2664  blkno = vacrelstats->rel_pages;
2666  "prefetch size must be power of 2");
2667  prefetchedUntil = InvalidBlockNumber;
2668  while (blkno > vacrelstats->nonempty_pages)
2669  {
2670  Buffer buf;
2671  Page page;
2672  OffsetNumber offnum,
2673  maxoff;
2674  bool hastup;
2675 
2676  /*
2677  * Check if another process requests a lock on our relation. We are
2678  * holding an AccessExclusiveLock here, so they will be waiting. We
2679  * only do this once per VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL, and we
2680  * only check if that interval has elapsed once every 32 blocks to
2681  * keep the number of system calls and actual shared lock table
2682  * lookups to a minimum.
2683  */
2684  if ((blkno % 32) == 0)
2685  {
2686  instr_time currenttime;
2687  instr_time elapsed;
2688 
2689  INSTR_TIME_SET_CURRENT(currenttime);
2690  elapsed = currenttime;
2691  INSTR_TIME_SUBTRACT(elapsed, starttime);
2692  if ((INSTR_TIME_GET_MICROSEC(elapsed) / 1000)
2694  {
2696  {
2697  ereport(elevel,
2698  (errmsg("\"%s\": suspending truncate due to conflicting lock request",
2699  vacrelstats->relname)));
2700 
2701  vacrelstats->lock_waiter_detected = true;
2702  return blkno;
2703  }
2704  starttime = currenttime;
2705  }
2706  }
2707 
2708  /*
2709  * We don't insert a vacuum delay point here, because we have an
2710  * exclusive lock on the table which we want to hold for as short a
2711  * time as possible. We still need to check for interrupts however.
2712  */
2714 
2715  blkno--;
2716 
2717  /* If we haven't prefetched this lot yet, do so now. */
2718  if (prefetchedUntil > blkno)
2719  {
2720  BlockNumber prefetchStart;
2721  BlockNumber pblkno;
2722 
2723  prefetchStart = blkno & ~(PREFETCH_SIZE - 1);
2724  for (pblkno = prefetchStart; pblkno <= blkno; pblkno++)
2725  {
2726  PrefetchBuffer(onerel, MAIN_FORKNUM, pblkno);
2728  }
2729  prefetchedUntil = prefetchStart;
2730  }
2731 
2732  buf = ReadBufferExtended(onerel, MAIN_FORKNUM, blkno,
2733  RBM_NORMAL, vac_strategy);
2734 
2735  /* In this phase we only need shared access to the buffer */
2737 
2738  page = BufferGetPage(buf);
2739 
2740  if (PageIsNew(page) || PageIsEmpty(page))
2741  {
2742  UnlockReleaseBuffer(buf);
2743  continue;
2744  }
2745 
2746  hastup = false;
2747  maxoff = PageGetMaxOffsetNumber(page);
2748  for (offnum = FirstOffsetNumber;
2749  offnum <= maxoff;
2750  offnum = OffsetNumberNext(offnum))
2751  {
2752  ItemId itemid;
2753 
2754  itemid = PageGetItemId(page, offnum);
2755 
2756  /*
2757  * Note: any non-unused item should be taken as a reason to keep
2758  * this page. We formerly thought that DEAD tuples could be
2759  * thrown away, but that's not so, because we'd not have cleaned
2760  * out their index entries.
2761  */
2762  if (ItemIdIsUsed(itemid))
2763  {
2764  hastup = true;
2765  break; /* can stop scanning */
2766  }
2767  } /* scan along page */
2768 
2769  UnlockReleaseBuffer(buf);
2770 
2771  /* Done scanning if we found a tuple here */
2772  if (hastup)
2773  return blkno + 1;
2774  }
2775 
2776  /*
2777  * If we fall out of the loop, all the previously-thought-to-be-empty
2778  * pages still are; we need not bother to look at the last known-nonempty
2779  * page.
2780  */
2781  return vacrelstats->nonempty_pages;
2782 }
2783 
2784 /*
2785  * Return the maximum number of dead tuples we can record.
2786  */
2787 static long
2788 compute_max_dead_tuples(BlockNumber relblocks, bool useindex)
2789 {
2790  long maxtuples;
2791  int vac_work_mem = IsAutoVacuumWorkerProcess() &&
2792  autovacuum_work_mem != -1 ?
2794 
2795  if (useindex)
2796  {
2797  maxtuples = MAXDEADTUPLES(vac_work_mem * 1024L);
2798  maxtuples = Min(maxtuples, INT_MAX);
2799  maxtuples = Min(maxtuples, MAXDEADTUPLES(MaxAllocSize));
2800 
2801  /* curious coding here to ensure the multiplication can't overflow */
2802  if ((BlockNumber) (maxtuples / LAZY_ALLOC_TUPLES) > relblocks)
2803  maxtuples = relblocks * LAZY_ALLOC_TUPLES;
2804 
2805  /* stay sane if small maintenance_work_mem */
2806  maxtuples = Max(maxtuples, MaxHeapTuplesPerPage);
2807  }
2808  else
2809  maxtuples = MaxHeapTuplesPerPage;
2810 
2811  return maxtuples;
2812 }
2813 
2814 /*
2815  * lazy_space_alloc - space allocation decisions for lazy vacuum
2816  *
2817  * See the comments at the head of this file for rationale.
2818  */
2819 static void
2820 lazy_space_alloc(LVRelStats *vacrelstats, BlockNumber relblocks)
2821 {
2822  LVDeadTuples *dead_tuples = NULL;
2823  long maxtuples;
2824 
2825  maxtuples = compute_max_dead_tuples(relblocks, vacrelstats->useindex);
2826 
2827  dead_tuples = (LVDeadTuples *) palloc(SizeOfDeadTuples(maxtuples));
2828  dead_tuples->num_tuples = 0;
2829  dead_tuples->max_tuples = (int) maxtuples;
2830 
2831  vacrelstats->dead_tuples = dead_tuples;
2832 }
2833 
2834 /*
2835  * lazy_record_dead_tuple - remember one deletable tuple
2836  */
2837 static void
2839 {
2840  /*
2841  * The array shouldn't overflow under normal behavior, but perhaps it
2842  * could if we are given a really small maintenance_work_mem. In that
2843  * case, just forget the last few tuples (we'll get 'em next time).
2844  */
2845  if (dead_tuples->num_tuples < dead_tuples->max_tuples)
2846  {
2847  dead_tuples->itemptrs[dead_tuples->num_tuples] = *itemptr;
2848  dead_tuples->num_tuples++;
2850  dead_tuples->num_tuples);
2851  }
2852 }
2853 
2854 /*
2855  * lazy_tid_reaped() -- is a particular tid deletable?
2856  *
2857  * This has the right signature to be an IndexBulkDeleteCallback.
2858  *
2859  * Assumes dead_tuples array is in sorted order.
2860  */
2861 static bool
2863 {
2864  LVDeadTuples *dead_tuples = (LVDeadTuples *) state;
2865  ItemPointer res;
2866 
2867  res = (ItemPointer) bsearch((void *) itemptr,
2868  (void *) dead_tuples->itemptrs,
2869  dead_tuples->num_tuples,
2870  sizeof(ItemPointerData),
2871  vac_cmp_itemptr);
2872 
2873  return (res != NULL);
2874 }
2875 
2876 /*
2877  * Comparator routines for use with qsort() and bsearch().
2878  */
2879 static int
2880 vac_cmp_itemptr(const void *left, const void *right)
2881 {
2882  BlockNumber lblk,
2883  rblk;
2884  OffsetNumber loff,
2885  roff;
2886 
2887  lblk = ItemPointerGetBlockNumber((ItemPointer) left);
2888  rblk = ItemPointerGetBlockNumber((ItemPointer) right);
2889 
2890  if (lblk < rblk)
2891  return -1;
2892  if (lblk > rblk)
2893  return 1;
2894 
2895  loff = ItemPointerGetOffsetNumber((ItemPointer) left);
2896  roff = ItemPointerGetOffsetNumber((ItemPointer) right);
2897 
2898  if (loff < roff)
2899  return -1;
2900  if (loff > roff)
2901  return 1;
2902 
2903  return 0;
2904 }
2905 
2906 /*
2907  * Check if every tuple in the given page is visible to all current and future
2908  * transactions. Also return the visibility_cutoff_xid which is the highest
2909  * xmin amongst the visible tuples. Set *all_frozen to true if every tuple
2910  * on this page is frozen.
2911  */
2912 static bool
2914  TransactionId *visibility_cutoff_xid,
2915  bool *all_frozen)
2916 {
2917  Page page = BufferGetPage(buf);
2918  BlockNumber blockno = BufferGetBlockNumber(buf);
2919  OffsetNumber offnum,
2920  maxoff;
2921  bool all_visible = true;
2922 
2923  *visibility_cutoff_xid = InvalidTransactionId;
2924  *all_frozen = true;
2925 
2926  /*
2927  * This is a stripped down version of the line pointer scan in
2928  * lazy_scan_heap(). So if you change anything here, also check that code.
2929  */
2930  maxoff = PageGetMaxOffsetNumber(page);
2931  for (offnum = FirstOffsetNumber;
2932  offnum <= maxoff && all_visible;
2933  offnum = OffsetNumberNext(offnum))
2934  {
2935  ItemId itemid;
2936  HeapTupleData tuple;
2937 
2938  itemid = PageGetItemId(page, offnum);
2939 
2940  /* Unused or redirect line pointers are of no interest */
2941  if (!ItemIdIsUsed(itemid) || ItemIdIsRedirected(itemid))
2942  continue;
2943 
2944  ItemPointerSet(&(tuple.t_self), blockno, offnum);
2945 
2946  /*
2947  * Dead line pointers can have index pointers pointing to them. So
2948  * they can't be treated as visible
2949  */
2950  if (ItemIdIsDead(itemid))
2951  {
2952  all_visible = false;
2953  *all_frozen = false;
2954  break;
2955  }
2956 
2957  Assert(ItemIdIsNormal(itemid));
2958 
2959  tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
2960  tuple.t_len = ItemIdGetLength(itemid);
2961  tuple.t_tableOid = RelationGetRelid(rel);
2962 
2963  switch (HeapTupleSatisfiesVacuum(&tuple, OldestXmin, buf))
2964  {
2965  case HEAPTUPLE_LIVE:
2966  {
2967  TransactionId xmin;
2968 
2969  /* Check comments in lazy_scan_heap. */
2971  {
2972  all_visible = false;
2973  *all_frozen = false;
2974  break;
2975  }
2976 
2977  /*
2978  * The inserter definitely committed. But is it old enough
2979  * that everyone sees it as committed?
2980  */
2981  xmin = HeapTupleHeaderGetXmin(tuple.t_data);
2982  if (!TransactionIdPrecedes(xmin, OldestXmin))
2983  {
2984  all_visible = false;
2985  *all_frozen = false;
2986  break;
2987  }
2988 
2989  /* Track newest xmin on page. */
2990  if (TransactionIdFollows(xmin, *visibility_cutoff_xid))
2991  *visibility_cutoff_xid = xmin;
2992 
2993  /* Check whether this tuple is already frozen or not */
2994  if (all_visible && *all_frozen &&
2996  *all_frozen = false;
2997  }
2998  break;
2999 
3000  case HEAPTUPLE_DEAD:
3004  {
3005  all_visible = false;
3006  *all_frozen = false;
3007  break;
3008  }
3009  default:
3010  elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
3011  break;
3012  }
3013  } /* scan along page */
3014 
3015  return all_visible;
3016 }
3017 
3018 /*
3019  * Compute the number of parallel worker processes to request. Both index
3020  * vacuum and index cleanup can be executed with parallel workers. The index
3021  * is eligible for parallel vacuum iff it's size is greater than
3022  * min_parallel_index_scan_size as invoking workers for very small indexes
3023  * can hurt the performance.
3024  *
3025  * nrequested is the number of parallel workers that user requested. If
3026  * nrequested is 0, we compute the parallel degree based on nindexes, that is
3027  * the number of indexes that support parallel vacuum. This function also
3028  * sets can_parallel_vacuum to remember indexes that participate in parallel
3029  * vacuum.
3030  */
3031 static int
3032 compute_parallel_vacuum_workers(Relation *Irel, int nindexes, int nrequested,
3033  bool *can_parallel_vacuum)
3034 {
3035  int nindexes_parallel = 0;
3036  int nindexes_parallel_bulkdel = 0;
3037  int nindexes_parallel_cleanup = 0;
3038  int parallel_workers;
3039  int i;
3040 
3041  /*
3042  * We don't allow to perform parallel operation in standalone backend or
3043  * when parallelism is disabled.
3044  */
3046  return 0;
3047 
3048  /*
3049  * Compute the number of indexes that can participate in parallel vacuum.
3050  */
3051  for (i = 0; i < nindexes; i++)
3052  {
3053  uint8 vacoptions = Irel[i]->rd_indam->amparallelvacuumoptions;
3054 
3055  if (vacoptions == VACUUM_OPTION_NO_PARALLEL ||
3057  continue;
3058 
3059  can_parallel_vacuum[i] = true;
3060 
3061  if ((vacoptions & VACUUM_OPTION_PARALLEL_BULKDEL) != 0)
3062  nindexes_parallel_bulkdel++;
3063  if (((vacoptions & VACUUM_OPTION_PARALLEL_CLEANUP) != 0) ||
3064  ((vacoptions & VACUUM_OPTION_PARALLEL_COND_CLEANUP) != 0))
3065  nindexes_parallel_cleanup++;
3066  }
3067 
3068  nindexes_parallel = Max(nindexes_parallel_bulkdel,
3069  nindexes_parallel_cleanup);
3070 
3071  /* The leader process takes one index */
3072  nindexes_parallel--;
3073 
3074  /* No index supports parallel vacuum */
3075  if (nindexes_parallel <= 0)
3076  return 0;
3077 
3078  /* Compute the parallel degree */
3079  parallel_workers = (nrequested > 0) ?
3080  Min(nrequested, nindexes_parallel) : nindexes_parallel;
3081 
3082  /* Cap by max_parallel_maintenance_workers */
3083  parallel_workers = Min(parallel_workers, max_parallel_maintenance_workers);
3084 
3085  return parallel_workers;
3086 }
3087 
3088 /*
3089  * Initialize variables for shared index statistics, set NULL bitmap and the
3090  * size of stats for each index.
3091  */
3092 static void
3093 prepare_index_statistics(LVShared *lvshared, bool *can_parallel_vacuum,
3094  int nindexes)
3095 {
3096  int i;
3097 
3098  /* Currently, we don't support parallel vacuum for autovacuum */
3100 
3101  /* Set NULL for all indexes */
3102  memset(lvshared->bitmap, 0x00, BITMAPLEN(nindexes));
3103 
3104  for (i = 0; i < nindexes; i++)
3105  {
3106  if (!can_parallel_vacuum[i])
3107  continue;
3108 
3109  /* Set NOT NULL as this index do support parallelism */
3110  lvshared->bitmap[i >> 3] |= 1 << (i & 0x07);
3111  }
3112 }
3113 
3114 /*
3115  * Update index statistics in pg_class if the statistics is accurate.
3116  */
3117 static void
3119  int nindexes)
3120 {
3121  int i;
3122 
3124 
3125  for (i = 0; i < nindexes; i++)
3126  {
3127  if (stats[i] == NULL || stats[i]->estimated_count)
3128  continue;
3129 
3130  /* Update index statistics */
3131  vac_update_relstats(Irel[i],
3132  stats[i]->num_pages,
3133  stats[i]->num_index_tuples,
3134  0,
3135  false,
3138  false);
3139  pfree(stats[i]);
3140  }
3141 }
3142 
3143 /*
3144  * This function prepares and returns parallel vacuum state if we can launch
3145  * even one worker. This function is responsible to enter parallel mode,
3146  * create a parallel context, and then initialize the DSM segment.
3147  */
3148 static LVParallelState *
3149 begin_parallel_vacuum(Oid relid, Relation *Irel, LVRelStats *vacrelstats,
3150  BlockNumber nblocks, int nindexes, int nrequested)
3151 {
3152  LVParallelState *lps = NULL;
3153  ParallelContext *pcxt;
3154  LVShared *shared;
3155  LVDeadTuples *dead_tuples;
3156  bool *can_parallel_vacuum;
3157  long maxtuples;
3158  char *sharedquery;
3159  Size est_shared;
3160  Size est_deadtuples;
3161  int nindexes_mwm = 0;
3162  int parallel_workers = 0;
3163  int querylen;
3164  int i;
3165 
3166  /*
3167  * A parallel vacuum must be requested and there must be indexes on the
3168  * relation
3169  */
3170  Assert(nrequested >= 0);
3171  Assert(nindexes > 0);
3172 
3173  /*
3174  * Compute the number of parallel vacuum workers to launch
3175  */
3176  can_parallel_vacuum = (bool *) palloc0(sizeof(bool) * nindexes);
3177  parallel_workers = compute_parallel_vacuum_workers(Irel, nindexes,
3178  nrequested,
3179  can_parallel_vacuum);
3180 
3181  /* Can't perform vacuum in parallel */
3182  if (parallel_workers <= 0)
3183  {
3184  pfree(can_parallel_vacuum);
3185  return lps;
3186  }
3187 
3188  lps = (LVParallelState *) palloc0(sizeof(LVParallelState));
3189 
3191  pcxt = CreateParallelContext("postgres", "parallel_vacuum_main",
3192  parallel_workers);
3193  Assert(pcxt->nworkers > 0);
3194  lps->pcxt = pcxt;
3195 
3196  /* Estimate size for shared information -- PARALLEL_VACUUM_KEY_SHARED */
3197  est_shared = MAXALIGN(add_size(SizeOfLVShared, BITMAPLEN(nindexes)));
3198  for (i = 0; i < nindexes; i++)
3199  {
3200  uint8 vacoptions = Irel[i]->rd_indam->amparallelvacuumoptions;
3201 
3202  /*
3203  * Cleanup option should be either disabled, always performing in
3204  * parallel or conditionally performing in parallel.
3205  */
3206  Assert(((vacoptions & VACUUM_OPTION_PARALLEL_CLEANUP) == 0) ||
3207  ((vacoptions & VACUUM_OPTION_PARALLEL_COND_CLEANUP) == 0));
3208  Assert(vacoptions <= VACUUM_OPTION_MAX_VALID_VALUE);
3209 
3210  /* Skip indexes that don't participate in parallel vacuum */
3211  if (!can_parallel_vacuum[i])
3212  continue;
3213 
3214  if (Irel[i]->rd_indam->amusemaintenanceworkmem)
3215  nindexes_mwm++;
3216 
3217  est_shared = add_size(est_shared, sizeof(LVSharedIndStats));
3218 
3219  /*
3220  * Remember the number of indexes that support parallel operation for
3221  * each phase.
3222  */
3223  if ((vacoptions & VACUUM_OPTION_PARALLEL_BULKDEL) != 0)
3225  if ((vacoptions & VACUUM_OPTION_PARALLEL_CLEANUP) != 0)
3227  if ((vacoptions & VACUUM_OPTION_PARALLEL_COND_CLEANUP) != 0)
3229  }
3230  shm_toc_estimate_chunk(&pcxt->estimator, est_shared);
3231  shm_toc_estimate_keys(&pcxt->estimator, 1);
3232 
3233  /* Estimate size for dead tuples -- PARALLEL_VACUUM_KEY_DEAD_TUPLES */
3234  maxtuples = compute_max_dead_tuples(nblocks, true);
3235  est_deadtuples = MAXALIGN(SizeOfDeadTuples(maxtuples));
3236  shm_toc_estimate_chunk(&pcxt->estimator, est_deadtuples);
3237  shm_toc_estimate_keys(&pcxt->estimator, 1);
3238 
3239  /* Finally, estimate PARALLEL_VACUUM_KEY_QUERY_TEXT space */
3240  querylen = strlen(debug_query_string);
3241  shm_toc_estimate_chunk(&pcxt->estimator, querylen + 1);
3242  shm_toc_estimate_keys(&pcxt->estimator, 1);
3243 
3244  InitializeParallelDSM(pcxt);
3245 
3246  /* Prepare shared information */
3247  shared = (LVShared *) shm_toc_allocate(pcxt->toc, est_shared);
3248  MemSet(shared, 0, est_shared);
3249  shared->relid = relid;
3250  shared->elevel = elevel;
3251  shared->maintenance_work_mem_worker =
3252  (nindexes_mwm > 0) ?
3253  maintenance_work_mem / Min(parallel_workers, nindexes_mwm) :
3255 
3256  pg_atomic_init_u32(&(shared->cost_balance), 0);
3257  pg_atomic_init_u32(&(shared->active_nworkers), 0);
3258  pg_atomic_init_u32(&(shared->idx), 0);
3259  shared->offset = MAXALIGN(add_size(SizeOfLVShared, BITMAPLEN(nindexes)));
3260  prepare_index_statistics(shared, can_parallel_vacuum, nindexes);
3261 
3263  lps->lvshared = shared;
3264 
3265  /* Prepare the dead tuple space */
3266  dead_tuples = (LVDeadTuples *) shm_toc_allocate(pcxt->toc, est_deadtuples);
3267  dead_tuples->max_tuples = maxtuples;
3268  dead_tuples->num_tuples = 0;
3269  MemSet(dead_tuples->itemptrs, 0, sizeof(ItemPointerData) * maxtuples);
3270  shm_toc_insert(pcxt->toc, PARALLEL_VACUUM_KEY_DEAD_TUPLES, dead_tuples);
3271  vacrelstats->dead_tuples = dead_tuples;
3272 
3273  /* Store query string for workers */
3274  sharedquery = (char *) shm_toc_allocate(pcxt->toc, querylen + 1);
3275  memcpy(sharedquery, debug_query_string, querylen + 1);
3276  sharedquery[querylen] = '\0';
3277  shm_toc_insert(pcxt->toc, PARALLEL_VACUUM_KEY_QUERY_TEXT, sharedquery);
3278 
3279  pfree(can_parallel_vacuum);
3280  return lps;
3281 }
3282 
3283 /*
3284  * Destroy the parallel context, and end parallel mode.
3285  *
3286  * Since writes are not allowed during the parallel mode, so we copy the
3287  * updated index statistics from DSM in local memory and then later use that
3288  * to update the index statistics. One might think that we can exit from
3289  * parallel mode, update the index statistics and then destroy parallel
3290  * context, but that won't be safe (see ExitParallelMode).
3291  */
3292 static void
3294  LVParallelState *lps, int nindexes)
3295 {
3296  int i;
3297 
3299 
3300  /* Copy the updated statistics */
3301  for (i = 0; i < nindexes; i++)
3302  {
3303  LVSharedIndStats *indstats = get_indstats(lps->lvshared, i);
3304 
3305  /*
3306  * Skip unused slot. The statistics of this index are already stored
3307  * in local memory.
3308  */
3309  if (indstats == NULL)
3310  continue;
3311 
3312  if (indstats->updated)
3313  {
3314  stats[i] = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult));
3315  memcpy(stats[i], &(indstats->stats), sizeof(IndexBulkDeleteResult));
3316  }
3317  else
3318  stats[i] = NULL;
3319  }
3320 
3322  ExitParallelMode();
3323 
3324  /* Deactivate parallel vacuum */
3325  pfree(lps);
3326  lps = NULL;
3327 }
3328 
3329 /* Return the Nth index statistics or NULL */
3330 static LVSharedIndStats *
3331 get_indstats(LVShared *lvshared, int n)
3332 {
3333  int i;
3334  char *p;
3335 
3336  if (IndStatsIsNull(lvshared, n))
3337  return NULL;
3338 
3339  p = (char *) GetSharedIndStats(lvshared);
3340  for (i = 0; i < n; i++)
3341  {
3342  if (IndStatsIsNull(lvshared, i))
3343  continue;
3344 
3345  p += sizeof(LVSharedIndStats);
3346  }
3347 
3348  return (LVSharedIndStats *) p;
3349 }
3350 
3351 /*
3352  * Returns true, if the given index can't participate in parallel index vacuum
3353  * or parallel index cleanup, false, otherwise.
3354  */
3355 static bool
3357 {
3358  uint8 vacoptions = indrel->rd_indam->amparallelvacuumoptions;
3359 
3360  /* first_time must be true only if for_cleanup is true */
3361  Assert(lvshared->for_cleanup || !lvshared->first_time);
3362 
3363  if (lvshared->for_cleanup)
3364  {
3365  /* Skip, if the index does not support parallel cleanup */
3366  if (((vacoptions & VACUUM_OPTION_PARALLEL_CLEANUP) == 0) &&
3367  ((vacoptions & VACUUM_OPTION_PARALLEL_COND_CLEANUP) == 0))
3368  return true;
3369 
3370  /*
3371  * Skip, if the index supports parallel cleanup conditionally, but we
3372  * have already processed the index (for bulkdelete). See the
3373  * comments for option VACUUM_OPTION_PARALLEL_COND_CLEANUP to know
3374  * when indexes support parallel cleanup conditionally.
3375  */
3376  if (!lvshared->first_time &&
3377  ((vacoptions & VACUUM_OPTION_PARALLEL_COND_CLEANUP) != 0))
3378  return true;
3379  }
3380  else if ((vacoptions & VACUUM_OPTION_PARALLEL_BULKDEL) == 0)
3381  {
3382  /* Skip if the index does not support parallel bulk deletion */
3383  return true;
3384  }
3385 
3386  return false;
3387 }
3388 
3389 /*
3390  * Perform work within a launched parallel process.
3391  *
3392  * Since parallel vacuum workers perform only index vacuum or index cleanup,
3393  * we don't need to report the progress information.
3394  */
3395 void
3397 {
3398  Relation onerel;
3399  Relation *indrels;
3400  LVShared *lvshared;
3401  LVDeadTuples *dead_tuples;
3402  int nindexes;
3403  char *sharedquery;
3404  IndexBulkDeleteResult **stats;
3405  LVRelStats vacrelstats;
3406  ErrorContextCallback errcallback;
3407 
3409  false);
3410  elevel = lvshared->elevel;
3411 
3412  ereport(DEBUG1,
3413  (errmsg("starting parallel vacuum worker for %s",
3414  lvshared->for_cleanup ? "cleanup" : "bulk delete")));
3415 
3416  /* Set debug_query_string for individual workers */
3417  sharedquery = shm_toc_lookup(toc, PARALLEL_VACUUM_KEY_QUERY_TEXT, false);
3418  debug_query_string = sharedquery;
3420 
3421  /*
3422  * Open table. The lock mode is the same as the leader process. It's
3423  * okay because the lock mode does not conflict among the parallel
3424  * workers.
3425  */
3426  onerel = table_open(lvshared->relid, ShareUpdateExclusiveLock);
3427 
3428  /*
3429  * Open all indexes. indrels are sorted in order by OID, which should be
3430  * matched to the leader's one.
3431  */
3432  vac_open_indexes(onerel, RowExclusiveLock, &nindexes, &indrels);
3433  Assert(nindexes > 0);
3434 
3435  /* Set dead tuple space */
3436  dead_tuples = (LVDeadTuples *) shm_toc_lookup(toc,
3438  false);
3439 
3440  /* Set cost-based vacuum delay */
3442  VacuumCostBalance = 0;
3443  VacuumPageHit = 0;
3444  VacuumPageMiss = 0;
3445  VacuumPageDirty = 0;
3447  VacuumSharedCostBalance = &(lvshared->cost_balance);
3448  VacuumActiveNWorkers = &(lvshared->active_nworkers);
3449 
3450  stats = (IndexBulkDeleteResult **)
3451  palloc0(nindexes * sizeof(IndexBulkDeleteResult *));
3452 
3453  if (lvshared->maintenance_work_mem_worker > 0)
3455 
3456  /*
3457  * Initialize vacrelstats for use as error callback arg by parallel
3458  * worker.
3459  */
3460  vacrelstats.relnamespace = get_namespace_name(RelationGetNamespace(onerel));
3461  vacrelstats.relname = pstrdup(RelationGetRelationName(onerel));
3462  vacrelstats.indname = NULL;
3463  vacrelstats.phase = VACUUM_ERRCB_PHASE_UNKNOWN; /* Not yet processing */
3464 
3465  /* Setup error traceback support for ereport() */
3466  errcallback.callback = vacuum_error_callback;
3467  errcallback.arg = &vacrelstats;
3468  errcallback.previous = error_context_stack;
3469  error_context_stack = &errcallback;
3470 
3471  /* Process indexes to perform vacuum/cleanup */
3472  parallel_vacuum_index(indrels, stats, lvshared, dead_tuples, nindexes,
3473  &vacrelstats);
3474 
3475  /* Pop the error context stack */
3476  error_context_stack = errcallback.previous;
3477 
3478  vac_close_indexes(nindexes, indrels, RowExclusiveLock);
3480  pfree(stats);
3481 }
3482 
3483 /*
3484  * Error context callback for errors occurring during vacuum.
3485  */
3486 static void
3488 {
3489  LVRelStats *errinfo = arg;
3490 
3491  switch (errinfo->phase)
3492  {
3494  if (BlockNumberIsValid(errinfo->blkno))
3495  errcontext("while scanning block %u of relation \"%s.%s\"",
3496  errinfo->blkno, errinfo->relnamespace, errinfo->relname);
3497  break;
3498 
3500  if (BlockNumberIsValid(errinfo->blkno))
3501  errcontext("while vacuuming block %u of relation \"%s.%s\"",
3502  errinfo->blkno, errinfo->relnamespace, errinfo->relname);
3503  break;
3504 
3506  errcontext("while vacuuming index \"%s\" of relation \"%s.%s\"",
3507  errinfo->indname, errinfo->relnamespace, errinfo->relname);
3508  break;
3509 
3511  errcontext("while cleaning up index \"%s\" of relation \"%s.%s\"",
3512  errinfo->indname, errinfo->relnamespace, errinfo->relname);
3513  break;
3514 
3516  if (BlockNumberIsValid(errinfo->blkno))
3517  errcontext("while truncating relation \"%s.%s\" to %u blocks",
3518  errinfo->relnamespace, errinfo->relname, errinfo->blkno);
3519  break;
3520 
3522  default:
3523  return; /* do nothing; the errinfo may not be
3524  * initialized */
3525  }
3526 }
3527 
3528 /* Update vacuum error callback for the current phase, block, and index. */
3529 static void
3531  char *indname)
3532 {
3533  errinfo->blkno = blkno;
3534  errinfo->phase = phase;
3535 
3536  /* Free index name from any previous phase */
3537  if (errinfo->indname)
3538  pfree(errinfo->indname);
3539 
3540  /* For index phases, save the name of the current index for the callback */
3541  errinfo->indname = indname ? pstrdup(indname) : NULL;
3542 }
static void update_vacuum_error_info(LVRelStats *errinfo, int phase, BlockNumber blkno, char *indname)
Definition: vacuumlazy.c:3530
int autovacuum_work_mem
Definition: autovacuum.c:116
double new_rel_tuples
Definition: vacuumlazy.c:296
void HeapTupleHeaderAdvanceLatestRemovedXid(HeapTupleHeader tuple, TransactionId *latestRemovedXid)
Definition: heapam.c:6905
#define GetSharedIndStats(s)
Definition: vacuumlazy.c:250
uint8 amparallelvacuumoptions
Definition: amapi.h:205
XLogRecPtr log_heap_cleanup_info(RelFileNode rnode, TransactionId latestRemovedXid)
Definition: heapam.c:7148
static void vacuum_error_callback(void *arg)
Definition: vacuumlazy.c:3487
struct IndexAmRoutine * rd_indam
Definition: rel.h:168
int multixact_freeze_table_age
Definition: vacuum.h:215
void vac_close_indexes(int nindexes, Relation *Irel, LOCKMODE lockmode)
Definition: vacuum.c:1976
#define BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:84
static int lazy_vacuum_page(Relation onerel, BlockNumber blkno, Buffer buffer, int tupindex, LVRelStats *vacrelstats, Buffer *vmbuffer)
Definition: vacuumlazy.c:1879
int heap_page_prune(Relation relation, Buffer buffer, TransactionId OldestXmin, bool report_stats, TransactionId *latestRemovedXid)
Definition: pruneheap.c:180
void LockBufferForCleanup(Buffer buffer)
Definition: bufmgr.c:3617
XLogRecPtr log_heap_freeze(Relation reln, Buffer buffer, TransactionId cutoff_xid, xl_heap_freeze_tuple *tuples, int ntuples)
Definition: heapam.c:7228
#define PROGRESS_VACUUM_HEAP_BLKS_VACUUMED
Definition: progress.h:24
#define InvalidXLogRecPtr
Definition: xlogdefs.h:28
#define PageIsEmpty(page)
Definition: bufpage.h:222
int64 VacuumPageMiss
Definition: globals.c:144
#define DEBUG1
Definition: elog.h:25
void table_close(Relation relation, LOCKMODE lockmode)
Definition: table.c:133
static BlockNumber count_nondeletable_pages(Relation onerel, LVRelStats *vacrelstats)
Definition: vacuumlazy.c:2649
XLogRecPtr log_newpage_buffer(Buffer buffer, bool page_std)
Definition: xloginsert.c:1009
BlockNumber rel_pages
Definition: vacuumlazy.c:290
pg_atomic_uint32 * VacuumActiveNWorkers
Definition: vacuum.c:77
static int compute_parallel_vacuum_workers(Relation *Irel, int nindexes, int nrequested, bool *can_parallel_vacuum)
Definition: vacuumlazy.c:3032
double vac_estimate_reltuples(Relation relation, BlockNumber total_pages, BlockNumber scanned_pages, double scanned_tuples)
Definition: vacuum.c:1123
OffsetNumber offset
Definition: heapam_xlog.h:321
static void lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats)
Definition: vacuumlazy.c:1797
int VacuumCostBalance
Definition: globals.c:147
ItemPointerData itemptrs[FLEXIBLE_ARRAY_MEMBER]
Definition: vacuumlazy.c:169
bool estimated_count
Definition: vacuumlazy.c:211
static bool lazy_tid_reaped(ItemPointer itemptr, void *state)
Definition: vacuumlazy.c:2862
#define ItemIdIsRedirected(itemId)
Definition: itemid.h:106
bool TransactionIdFollows(TransactionId id1, TransactionId id2)
Definition: transam.c:334
ParallelContext * CreateParallelContext(const char *library_name, const char *function_name, int nworkers)
Definition: parallel.c:162
LVDeadTuples * dead_tuples
Definition: vacuumlazy.c:302
#define PageIsAllVisible(page)
Definition: bufpage.h:385
uint32 TransactionId
Definition: c.h:513
#define IndStatsIsNull(s, i)
Definition: vacuumlazy.c:252
void UnlockRelation(Relation relation, LOCKMODE lockmode)
Definition: lmgr.c:282
void RecordPageWithFreeSpace(Relation rel, BlockNumber heapBlk, Size spaceAvail)
Definition: freespace.c:181
#define PARALLEL_VACUUM_KEY_DEAD_TUPLES
Definition: vacuumlazy.c:138
void pgstat_progress_start_command(ProgressCommandType cmdtype, Oid relid)
Definition: pgstat.c:3140
Oid relid
Definition: vacuumlazy.c:190
#define PROGRESS_VACUUM_MAX_DEAD_TUPLES
Definition: progress.h:26
#define PROGRESS_VACUUM_PHASE_VACUUM_INDEX
Definition: progress.h:31
void visibilitymap_pin(Relation rel, BlockNumber heapBlk, Buffer *buf)
TimestampTz GetCurrentTimestamp(void)
Definition: timestamp.c:1583
double tuples_deleted
Definition: vacuumlazy.c:300
void visibilitymap_set(Relation rel, BlockNumber heapBlk, Buffer heapBuf, XLogRecPtr recptr, Buffer vmBuf, TransactionId cutoff_xid, uint8 flags)
void pgstat_report_activity(BackendState state, const char *cmd_str)
Definition: pgstat.c:3062
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:1406
int64 TimestampTz
Definition: timestamp.h:39
#define SizeOfDeadTuples(cnt)
Definition: vacuumlazy.c:174
HeapTupleHeaderData * HeapTupleHeader
Definition: htup.h:23
static uint32 pg_atomic_sub_fetch_u32(volatile pg_atomic_uint32 *ptr, int32 sub_)
Definition: atomics.h:401
#define MaxOffsetNumber
Definition: off.h:28
void pgstat_progress_update_param(int index, int64 val)
Definition: pgstat.c:3161
#define VISIBILITYMAP_ALL_FROZEN
Definition: visibilitymap.h:27
char * pstrdup(const char *in)
Definition: mcxt.c:1186
static void lazy_record_dead_tuple(LVDeadTuples *dead_tuples, ItemPointer itemptr)
Definition: vacuumlazy.c:2838
shm_toc_estimator estimator
Definition: parallel.h:42
bool analyze_only
Definition: genam.h:47
Buffer ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
Definition: bufmgr.c:590
bool useindex
Definition: vacuumlazy.c:287
#define XLogIsNeeded()
Definition: xlog.h:182
struct timeval instr_time
Definition: instr_time.h:150
int64 VacuumPageHit
Definition: globals.c:143
#define Min(x, y)
Definition: c.h:920
bool report_progress
Definition: genam.h:48
BlockNumber tupcount_pages
Definition: vacuumlazy.c:294
#define END_CRIT_SECTION()
Definition: miscadmin.h:134
BufferAccessStrategy strategy
Definition: genam.h:52
struct LVSharedIndStats LVSharedIndStats
#define ItemIdIsUsed(itemId)
Definition: itemid.h:92
#define MaxHeapTuplesPerPage
Definition: htup_details.h:574
#define VM_ALL_FROZEN(r, b, v)
Definition: visibilitymap.h:34
unsigned char uint8
Definition: c.h:365
#define PROGRESS_VACUUM_HEAP_BLKS_SCANNED
Definition: progress.h:23
#define InvalidBuffer
Definition: buf.h:25
#define gettext_noop(x)
Definition: c.h:1160
#define FLEXIBLE_ARRAY_MEMBER
Definition: c.h:276
char * relnamespace
Definition: vacuumlazy.c:284
#define PROGRESS_VACUUM_TOTAL_HEAP_BLKS
Definition: progress.h:22
#define START_CRIT_SECTION()
Definition: miscadmin.h:132
#define VACUUM_OPTION_MAX_VALID_VALUE
Definition: vacuum.h:63
Relation index
Definition: genam.h:46
BlockNumber scanned_pages
Definition: vacuumlazy.c:291
VacErrPhase phase
Definition: vacuumlazy.c:310
#define MemSet(start, val, len)
Definition: c.h:971
#define INFO
Definition: elog.h:33
#define VACUUM_TRUNCATE_LOCK_TIMEOUT
Definition: vacuumlazy.c:102
static uint32 pg_atomic_add_fetch_u32(volatile pg_atomic_uint32 *ptr, int32 add_)
Definition: atomics.h:386
Datum idx(PG_FUNCTION_ARGS)
Definition: _int_op.c:259
void vacuum_set_xid_limits(Relation rel, int freeze_min_age, int freeze_table_age, int multixact_freeze_min_age, int multixact_freeze_table_age, TransactionId *oldestXmin, TransactionId *freezeLimit, TransactionId *xidFullScanLimit, MultiXactId *multiXactCutoff, MultiXactId *mxactFullScanLimit)
Definition: vacuum.c:931
int64 VacuumPageDirty
Definition: globals.c:145
static long compute_max_dead_tuples(BlockNumber relblocks, bool hasindex)
Definition: vacuumlazy.c:2788
uint32 BlockNumber
Definition: block.h:31
void ReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:3323
#define BITMAPLEN(NATTS)
Definition: htup_details.h:547
static bool skip_parallel_vacuum_index(Relation indrel, LVShared *lvshared)
Definition: vacuumlazy.c:3356
int nindexes_parallel_bulkdel
Definition: vacuumlazy.c:277
BlockNumber pinskipped_pages
Definition: vacuumlazy.c:292
int maintenance_work_mem_worker
Definition: vacuumlazy.c:221
void visibilitymap_count(Relation rel, BlockNumber *all_visible, BlockNumber *all_frozen)
#define SizeOfPageHeaderData
Definition: bufpage.h:216
#define LOG
Definition: elog.h:26
Form_pg_class rd_rel
Definition: rel.h:89
unsigned int Oid
Definition: postgres_ext.h:31
#define shm_toc_estimate_chunk(e, sz)
Definition: shm_toc.h:51
#define ItemIdIsDead(itemId)
Definition: itemid.h:113
int nindexes_parallel_condcleanup
Definition: vacuumlazy.c:279
void(* callback)(void *arg)
Definition: elog.h:229
bool TimestampDifferenceExceeds(TimestampTz start_time, TimestampTz stop_time, int msec)
Definition: timestamp.c:1682
struct ErrorContextCallback * previous
Definition: elog.h:228
#define PageGetMaxOffsetNumber(page)
Definition: bufpage.h:357
int freeze_table_age
Definition: vacuum.h:212
void ReinitializeParallelWorkers(ParallelContext *pcxt, int nworkers_to_launch)
Definition: parallel.c:501
int errdetail_internal(const char *fmt,...)
Definition: elog.c:984
#define PARALLEL_VACUUM_KEY_QUERY_TEXT
Definition: vacuumlazy.c:139
static LVSharedIndStats * get_indstats(LVShared *lvshared, int n)
Definition: vacuumlazy.c:3331
uint16 OffsetNumber
Definition: off.h:24
ItemPointerData * ItemPointer
Definition: itemptr.h:49
#define VISIBILITYMAP_VALID_BITS
Definition: visibilitymap.h:28
HeapTupleHeader t_data
Definition: htup.h:68
void pg_rusage_init(PGRUsage *ru0)
Definition: pg_rusage.c:27
bool heap_prepare_freeze_tuple(HeapTupleHeader tuple, TransactionId relfrozenxid, TransactionId relminmxid, TransactionId cutoff_xid, TransactionId cutoff_multi, xl_heap_freeze_tuple *frz, bool *totally_frozen_p)
Definition: heapam.c:6128
#define FORCE_CHECK_PAGE()
ErrorContextCallback * error_context_stack
Definition: elog.c:92
ParallelContext * pcxt
Definition: vacuumlazy.c:268
#define PROGRESS_VACUUM_PHASE_TRUNCATE
Definition: progress.h:34
#define SizeOfLVShared
Definition: vacuumlazy.c:249
#define HeapTupleIsHotUpdated(tuple)
Definition: htup_details.h:676
pg_atomic_uint32 cost_balance
Definition: vacuumlazy.c:228
#define StaticAssertStmt(condition, errmessage)
Definition: c.h:852
void WaitForParallelWorkersToFinish(ParallelContext *pcxt)
Definition: parallel.c:738
BlockNumber old_rel_pages
Definition: vacuumlazy.c:289
void pg_usleep(long microsec)
Definition: signal.c:53
void DestroyParallelContext(ParallelContext *pcxt)
Definition: parallel.c:892
#define PREFETCH_SIZE
Definition: vacuumlazy.c:130
bool heap_tuple_needs_eventual_freeze(HeapTupleHeader tuple)
Definition: heapam.c:6773
#define ItemIdGetLength(itemId)
Definition: itemid.h:59
void pfree(void *pointer)
Definition: mcxt.c:1056
bool IsInParallelMode(void)
Definition: xact.c:996
void appendStringInfo(StringInfo str, const char *fmt,...)
Definition: stringinfo.c:91
bool visibilitymap_clear(Relation rel, BlockNumber heapBlk, Buffer buf, uint8 flags)
#define VACUUM_FSM_EVERY_PAGES
Definition: vacuumlazy.c:110
bool TransactionIdPrecedesOrEquals(TransactionId id1, TransactionId id2)
Definition: transam.c:319
void UnlockReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:3346
bool ConditionalLockBufferForCleanup(Buffer buffer)
Definition: bufmgr.c:3754
#define ERROR
Definition: elog.h:43
#define REL_TRUNCATE_MINIMUM
Definition: vacuumlazy.c:90
Size PageGetHeapFreeSpace(Page page)
Definition: bufpage.c:658
#define INSTR_TIME_SUBTRACT(x, y)
Definition: instr_time.h:170
int freeze_min_age
Definition: vacuum.h:211
ItemPointerData t_self
Definition: htup.h:65
void ExitParallelMode(void)
Definition: xact.c:976
bool is_wraparound
Definition: vacuum.h:217
char * get_database_name(Oid dbid)
Definition: dbcommands.c:2155
#define DEBUG2
Definition: elog.h:24
#define HeapTupleHeaderXminCommitted(tup)
Definition: htup_details.h:324
static TransactionId FreezeLimit
Definition: vacuumlazy.c:317
IndexBulkDeleteResult stats
Definition: vacuumlazy.c:262
static void lazy_vacuum_all_indexes(Relation onerel, Relation *Irel, IndexBulkDeleteResult **stats, LVRelStats *vacrelstats, LVParallelState *lps, int nindexes)
Definition: vacuumlazy.c:1738
uint32 t_len
Definition: htup.h:64
void heap_execute_freeze_tuple(HeapTupleHeader tuple, xl_heap_freeze_tuple *frz)
Definition: heapam.c:6357
char * get_namespace_name(Oid nspid)
Definition: lsyscache.c:3155
#define NoLock
Definition: lockdefs.h:34
static char * buf
Definition: pg_test_fsync.c:67
#define PageSetAllVisible(page)
Definition: bufpage.h:387
bool IsUnderPostmaster
Definition: globals.c:109
#define FirstOffsetNumber
Definition: off.h:27
#define RowExclusiveLock
Definition: lockdefs.h:38
struct LVDeadTuples LVDeadTuples
int errdetail(const char *fmt,...)
Definition: elog.c:957
int elevel
Definition: vacuumlazy.c:191
static MultiXactId MultiXactCutoff
Definition: vacuumlazy.c:318
const char * pg_rusage_show(const PGRUsage *ru0)
Definition: pg_rusage.c:40
bool ConditionalLockRelation(Relation relation, LOCKMODE lockmode)
Definition: lmgr.c:247
#define InvalidTransactionId
Definition: transam.h:31
#define RelationGetRelationName(relation)
Definition: rel.h:470
static TransactionId OldestXmin
Definition: vacuumlazy.c:316
pg_atomic_uint32 idx
Definition: vacuumlazy.c:242
unsigned int uint32
Definition: c.h:367
Oid t_tableOid
Definition: htup.h:66
#define MultiXactIdIsValid(multi)
Definition: multixact.h:27
int min_parallel_index_scan_size
Definition: allpaths.c:65
int nworkers_launched
Definition: parallel.h:38
#define BufferGetPage(buffer)
Definition: bufmgr.h:157
static void lazy_vacuum_index(Relation indrel, IndexBulkDeleteResult **stats, LVDeadTuples *dead_tuples, double reltuples, LVRelStats *vacrelstats)
Definition: vacuumlazy.c:2369
HTSV_Result HeapTupleSatisfiesVacuum(HeapTuple htup, TransactionId OldestXmin, Buffer buffer)
void LaunchParallelWorkers(ParallelContext *pcxt)
Definition: parallel.c:515
bool IsAutoVacuumWorkerProcess(void)
Definition: autovacuum.c:3302
static void vacuum_indexes_leader(Relation *Irel, IndexBulkDeleteResult **stats, LVRelStats *vacrelstats, LVParallelState *lps, int nindexes)
Definition: vacuumlazy.c:2211
bool heap_tuple_needs_freeze(HeapTupleHeader tuple, TransactionId cutoff_xid, MultiXactId cutoff_multi, Buffer buf)
Definition: heapam.c:6826
#define PROGRESS_VACUUM_NUM_DEAD_TUPLES
Definition: progress.h:27
#define IsParallelWorker()
Definition: parallel.h:61
bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition: transam.c:300
double new_live_tuples
Definition: vacuumlazy.c:297
bool first_time
Definition: vacuumlazy.c:199
VacOptTernaryValue index_cleanup
Definition: vacuum.h:221
static bool heap_page_is_all_visible(Relation rel, Buffer buf, TransactionId *visibility_cutoff_xid, bool *all_frozen)
Definition: vacuumlazy.c:2913
#define SKIP_PAGES_THRESHOLD
Definition: vacuumlazy.c:124
#define VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL
Definition: vacuumlazy.c:101
#define MaxAllocSize
Definition: memutils.h:40
void initStringInfo(StringInfo str)
Definition: stringinfo.c:59
#define WARNING
Definition: elog.h:40
void vac_open_indexes(Relation relation, LOCKMODE lockmode, int *nindexes, Relation **Irel)
Definition: vacuum.c:1933
const char * debug_query_string
Definition: postgres.c:88
static void prepare_index_statistics(LVShared *lvshared, bool *can_parallel_vacuum, int nindexes)
Definition: vacuumlazy.c:3093
double reltuples
Definition: vacuumlazy.c:210
#define VACUUM_OPTION_NO_PARALLEL
Definition: vacuum.h:39
void InitializeParallelDSM(ParallelContext *pcxt)
Definition: parallel.c:200
#define PageGetItemId(page, offsetNumber)
Definition: bufpage.h:235
int VacuumCostBalanceLocal
Definition: vacuum.c:78
pg_atomic_uint32 * VacuumSharedCostBalance
Definition: vacuum.c:76
static int elevel
Definition: vacuumlazy.c:314
uint8 bits8
Definition: c.h:374
#define ngettext(s, p, n)
Definition: c.h:1146
static void lazy_cleanup_index(Relation indrel, IndexBulkDeleteResult **stats, double reltuples, bool estimated_count, LVRelStats *vacrelstats)
Definition: vacuumlazy.c:2423
int nindexes_parallel_cleanup
Definition: vacuumlazy.c:278
static void lazy_parallel_vacuum_indexes(Relation *Irel, IndexBulkDeleteResult **stats, LVRelStats *vacrelstats, LVParallelState *lps, int nindexes)
Definition: vacuumlazy.c:2041
Size GetRecordedFreeSpace(Relation rel, BlockNumber heapBlk)
Definition: freespace.c:230
static void end_parallel_vacuum(Relation *Irel, IndexBulkDeleteResult **stats, LVParallelState *lps, int nindexes)
Definition: vacuumlazy.c:3293
#define ParallelVacuumIsActive(lps)
Definition: vacuumlazy.c:145
void * palloc0(Size size)
Definition: mcxt.c:980
char * indname
Definition: vacuumlazy.c:308
#define VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL
Definition: vacuumlazy.c:100
void pgstat_progress_end_command(void)
Definition: pgstat.c:3212
char * relname
Definition: vacuumlazy.c:285
IndexBulkDeleteResult * index_bulk_delete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state)
Definition: indexam.c:688
void heap_vacuum_rel(Relation onerel, VacuumParams *params, BufferAccessStrategy bstrategy)
Definition: vacuumlazy.c:396
Size add_size(Size s1, Size s2)
Definition: shmem.c:498
#define BufferGetPageSize(buffer)
Definition: bufmgr.h:144
Oid MyDatabaseId
Definition: globals.c:85
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:3560
#define PARALLEL_VACUUM_KEY_SHARED
Definition: vacuumlazy.c:137
int max_parallel_maintenance_workers
Definition: globals.c:123
void ReinitializeParallelDSM(ParallelContext *pcxt)
Definition: parallel.c:451
#define InvalidMultiXactId
Definition: multixact.h:23
#define RelationGetNumberOfBlocks(reln)
Definition: bufmgr.h:195
static void update_index_statistics(Relation *Irel, IndexBulkDeleteResult **stats, int nindexes)
Definition: vacuumlazy.c:3118
static void lazy_cleanup_all_indexes(Relation *Irel, IndexBulkDeleteResult **stats, LVRelStats *vacrelstats, LVParallelState *lps, int nindexes)
Definition: vacuumlazy.c:2314
static bool should_attempt_truncation(VacuumParams *params, LVRelStats *vacrelstats)
Definition: vacuumlazy.c:2498
pg_atomic_uint32 active_nworkers
Definition: vacuumlazy.c:235
VacOptTernaryValue truncate
Definition: vacuum.h:223
static LVParallelState * begin_parallel_vacuum(Oid relid, Relation *Irel, LVRelStats *vacrelstats, BlockNumber nblocks, int nindexes, int nrequested)
Definition: vacuumlazy.c:3149
#define PROGRESS_VACUUM_PHASE_FINAL_CLEANUP
Definition: progress.h:35
#define ereport(elevel,...)
Definition: elog.h:144
void PrefetchBuffer(Relation reln, ForkNumber forkNum, BlockNumber blockNum)
Definition: bufmgr.c:479
int num_index_scans
Definition: vacuumlazy.c:303
int maintenance_work_mem
Definition: globals.c:122
bool LockHasWaitersRelation(Relation relation, LOCKMODE lockmode)
Definition: lmgr.c:345
#define BlockNumberIsValid(blockNumber)
Definition: block.h:70
double old_live_tuples
Definition: vacuumlazy.c:295
static void vacuum_log_cleanup_info(Relation rel, LVRelStats *vacrelstats)
Definition: vacuumlazy.c:686
static uint32 pg_atomic_fetch_add_u32(volatile pg_atomic_uint32 *ptr, int32 add_)
Definition: atomics.h:328
int message_level
Definition: genam.h:50
TransactionId MultiXactId
Definition: c.h:523
RelFileNode rd_node
Definition: rel.h:55
int errmsg_internal(const char *fmt,...)
Definition: elog.c:911
double num_heap_tuples
Definition: genam.h:51
#define Max(x, y)
Definition: c.h:914
static BufferAccessStrategy vac_strategy
Definition: vacuumlazy.c:320
#define ShareUpdateExclusiveLock
Definition: lockdefs.h:39
#define PageClearAllVisible(page)
Definition: bufpage.h:389
uint64 XLogRecPtr
Definition: xlogdefs.h:21
struct LVShared LVShared
#define HeapTupleIsHeapOnly(tuple)
Definition: htup_details.h:685
#define Assert(condition)
Definition: c.h:738
#define VACUUM_OPTION_PARALLEL_COND_CLEANUP
Definition: vacuum.h:52
double new_dead_tuples
Definition: vacuumlazy.c:298
Definition: regguts.h:298
bits8 bitmap[FLEXIBLE_ARRAY_MEMBER]
Definition: vacuumlazy.c:244
TransactionId latestRemovedXid
Definition: vacuumlazy.c:304
#define ItemIdIsNormal(itemId)
Definition: itemid.h:99
#define INSTR_TIME_GET_MICROSEC(t)
Definition: instr_time.h:205
#define PROGRESS_VACUUM_PHASE_INDEX_CLEANUP
Definition: progress.h:33
#define HeapTupleHeaderGetXmin(tup)
Definition: htup_details.h:313
#define VM_ALL_VISIBLE(r, b, v)
Definition: visibilitymap.h:32
void pgstat_progress_update_multi_param(int nparam, const int *index, const int64 *val)
Definition: pgstat.c:3183
#define OffsetNumberNext(offsetNumber)
Definition: off.h:52
size_t Size
Definition: c.h:466
#define PROGRESS_VACUUM_NUM_INDEX_VACUUMS
Definition: progress.h:25
#define PROGRESS_VACUUM_PHASE_SCAN_HEAP
Definition: progress.h:30
#define PROGRESS_VACUUM_PHASE
Definition: progress.h:21
void parallel_vacuum_main(dsm_segment *seg, shm_toc *toc)
Definition: vacuumlazy.c:3396
int nworkers
Definition: vacuum.h:231
#define InvalidBlockNumber
Definition: block.h:33
#define shm_toc_estimate_keys(e, cnt)
Definition: shm_toc.h:53
XLogRecPtr log_heap_clean(Relation reln, Buffer buffer, OffsetNumber *redirected, int nredirected, OffsetNumber *nowdead, int ndead, OffsetNumber *nowunused, int nunused, TransactionId latestRemovedXid)
Definition: heapam.c:7177
#define MAXALIGN(LEN)
Definition: c.h:691
#define BufferIsValid(bufnum)
Definition: bufmgr.h:111
int log_min_duration
Definition: vacuum.h:218
#define ItemPointerGetOffsetNumber(pointer)
Definition: itemptr.h:117
void EnterParallelMode(void)
Definition: xact.c:963
LVShared * lvshared
Definition: vacuumlazy.c:271
#define VACUUM_OPTION_PARALLEL_BULKDEL
Definition: vacuum.h:45
void * shm_toc_allocate(shm_toc *toc, Size nbytes)
Definition: shm_toc.c:88
#define RelationNeedsWAL(relation)
Definition: rel.h:538
IndexBulkDeleteResult * index_vacuum_cleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
Definition: indexam.c:709
#define VISIBILITYMAP_ALL_VISIBLE
Definition: visibilitymap.h:26
struct LVRelStats LVRelStats
#define INSTR_TIME_SET_CURRENT(t)
Definition: instr_time.h:156
void pgstat_report_vacuum(Oid tableoid, bool shared, PgStat_Counter livetuples, PgStat_Counter deadtuples)
Definition: pgstat.c:1406
#define RelationUsesLocalBuffers(relation)
Definition: rel.h:545
#define PageGetLSN(page)
Definition: bufpage.h:366
bool for_cleanup
Definition: vacuumlazy.c:198
#define AccessExclusiveLock
Definition: lockdefs.h:45
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:2571
BlockNumber pages_removed
Definition: vacuumlazy.c:299
BlockNumber nonempty_pages
Definition: vacuumlazy.c:301
void PageRepairFragmentation(Page page)
Definition: bufpage.c:475
#define PageIsNew(page)
Definition: bufpage.h:229
void shm_toc_insert(shm_toc *toc, uint64 key, void *address)
Definition: shm_toc.c:171
void * palloc(Size size)
Definition: mcxt.c:949
int errmsg(const char *fmt,...)
Definition: elog.c:824
static void lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats, Relation *Irel, int nindexes, bool aggressive)
Definition: vacuumlazy.c:731
uint8 visibilitymap_get_status(Relation rel, BlockNumber heapBlk, Buffer *buf)
BlockNumber frozenskipped_pages
Definition: vacuumlazy.c:293
double VacuumCostDelay
Definition: globals.c:141
#define elog(elevel,...)
Definition: elog.h:214
int old_snapshot_threshold
Definition: snapmgr.c:75
bool MultiXactIdPrecedesOrEquals(MultiXactId multi1, MultiXactId multi2)
Definition: multixact.c:3156
int i
int options
Definition: vacuum.h:210
#define errcontext
Definition: elog.h:185
#define BUFFER_LOCK_SHARE
Definition: bufmgr.h:85
void * arg
struct LVParallelState LVParallelState
static void vacuum_one_index(Relation indrel, IndexBulkDeleteResult **stats, LVShared *lvshared, LVSharedIndStats *shared_indstats, LVDeadTuples *dead_tuples, LVRelStats *vacrelstats)
Definition: vacuumlazy.c:2254
static bool lazy_check_needs_freeze(Buffer buf, bool *hastup)
Definition: vacuumlazy.c:1989
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:99
static void pg_atomic_init_u32(volatile pg_atomic_uint32 *ptr, uint32 val)
Definition: atomics.h:223
#define ItemPointerGetBlockNumber(pointer)
Definition: itemptr.h:98
uint32 offset
Definition: vacuumlazy.c:243
#define TransactionIdIsValid(xid)
Definition: transam.h:41
static void pg_atomic_write_u32(volatile pg_atomic_uint32 *ptr, uint32 val)
Definition: atomics.h:258
#define ItemIdSetUnused(itemId)
Definition: itemid.h:128
static int vac_cmp_itemptr(const void *left, const void *right)
Definition: vacuumlazy.c:2880
void vacuum_delay_point(void)
Definition: vacuum.c:1997
#define MAXDEADTUPLES(max_size)
Definition: vacuumlazy.c:177
void TimestampDifference(TimestampTz start_time, TimestampTz stop_time, long *secs, int *microsecs)
Definition: timestamp.c:1657
#define TransactionIdIsNormal(xid)
Definition: transam.h:42
BlockNumber blkno
Definition: vacuumlazy.c:309
Relation table_open(Oid relationId, LOCKMODE lockmode)
Definition: table.c:39
#define VACUUM_OPTION_PARALLEL_CLEANUP
Definition: vacuum.h:60
void vac_update_relstats(Relation relation, BlockNumber num_pages, double num_tuples, BlockNumber num_all_visible_pages, bool hasindex, TransactionId frozenxid, MultiXactId minmulti, bool in_outer_xact)
Definition: vacuum.c:1208
VacErrPhase
Definition: vacuumlazy.c:148
#define PageSetLSN(page, lsn)
Definition: bufpage.h:368
#define LAZY_ALLOC_TUPLES
Definition: vacuumlazy.c:118
int Buffer
Definition: buf.h:23
#define _(x)
Definition: elog.c:88
#define RelationGetRelid(relation)
Definition: rel.h:436
int multixact_freeze_min_age
Definition: vacuum.h:213
void * shm_toc_lookup(shm_toc *toc, uint64 key, bool noError)
Definition: shm_toc.c:232
void FreeSpaceMapVacuumRange(Relation rel, BlockNumber start, BlockNumber end)
Definition: freespace.c:352
static void lazy_truncate_heap(Relation onerel, LVRelStats *vacrelstats)
Definition: vacuumlazy.c:2519
#define PROGRESS_VACUUM_PHASE_VACUUM_HEAP
Definition: progress.h:32
#define REL_TRUNCATE_FRACTION
Definition: vacuumlazy.c:91
#define PageGetItem(page, itemId)
Definition: bufpage.h:340
Pointer Page
Definition: bufpage.h:78
#define ItemPointerSet(pointer, blockNumber, offNum)
Definition: itemptr.h:127
shm_toc * toc
Definition: parallel.h:45
static void lazy_space_alloc(LVRelStats *vacrelstats, BlockNumber relblocks)
Definition: vacuumlazy.c:2820
bool VacuumCostActive
Definition: globals.c:148
bool estimated_count
Definition: genam.h:49
static uint32 pg_atomic_read_u32(volatile pg_atomic_uint32 *ptr)
Definition: atomics.h:241
void RelationTruncate(Relation rel, BlockNumber nblocks)
Definition: storage.c:229
#define RelationGetNamespace(relation)
Definition: rel.h:477
static void parallel_vacuum_index(Relation *Irel, IndexBulkDeleteResult **stats, LVShared *lvshared, LVDeadTuples *dead_tuples, int nindexes, LVRelStats *vacrelstats)
Definition: vacuumlazy.c:2159
bool lock_waiter_detected
Definition: vacuumlazy.c:305