PostgreSQL Source Code  git master
vacuumlazy.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * vacuumlazy.c
4  * Concurrent ("lazy") vacuuming.
5  *
6  * The major space usage for vacuuming is storage for the array of dead TIDs
7  * that are to be removed from indexes. We want to ensure we can vacuum even
8  * the very largest relations with finite memory space usage. To do that, we
9  * set upper bounds on the number of TIDs we can keep track of at once.
10  *
11  * We are willing to use at most maintenance_work_mem (or perhaps
12  * autovacuum_work_mem) memory space to keep track of dead TIDs. We initially
13  * allocate an array of TIDs of that size, with an upper limit that depends on
14  * table size (this limit ensures we don't allocate a huge area uselessly for
15  * vacuuming small tables). If the array threatens to overflow, we must call
16  * lazy_vacuum to vacuum indexes (and to vacuum the pages that we've pruned).
17  * This frees up the memory space dedicated to storing dead TIDs.
18  *
19  * In practice VACUUM will often complete its initial pass over the target
20  * heap relation without ever running out of space to store TIDs. This means
21  * that there only needs to be one call to lazy_vacuum, after the initial pass
22  * completes.
23  *
24  * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
25  * Portions Copyright (c) 1994, Regents of the University of California
26  *
27  *
28  * IDENTIFICATION
29  * src/backend/access/heap/vacuumlazy.c
30  *
31  *-------------------------------------------------------------------------
32  */
33 #include "postgres.h"
34 
35 #include <math.h>
36 
37 #include "access/amapi.h"
38 #include "access/genam.h"
39 #include "access/heapam.h"
40 #include "access/heapam_xlog.h"
41 #include "access/htup_details.h"
42 #include "access/multixact.h"
43 #include "access/transam.h"
44 #include "access/visibilitymap.h"
45 #include "access/xact.h"
46 #include "access/xlog.h"
47 #include "access/xloginsert.h"
48 #include "catalog/index.h"
49 #include "catalog/storage.h"
50 #include "commands/dbcommands.h"
51 #include "commands/progress.h"
52 #include "commands/vacuum.h"
53 #include "executor/instrument.h"
54 #include "miscadmin.h"
55 #include "optimizer/paths.h"
56 #include "pgstat.h"
57 #include "portability/instr_time.h"
58 #include "postmaster/autovacuum.h"
59 #include "storage/bufmgr.h"
60 #include "storage/freespace.h"
61 #include "storage/lmgr.h"
62 #include "tcop/tcopprot.h"
63 #include "utils/lsyscache.h"
64 #include "utils/memutils.h"
65 #include "utils/pg_rusage.h"
66 #include "utils/timestamp.h"
67 
68 
69 /*
70  * Space/time tradeoff parameters: do these need to be user-tunable?
71  *
72  * To consider truncating the relation, we want there to be at least
73  * REL_TRUNCATE_MINIMUM or (relsize / REL_TRUNCATE_FRACTION) (whichever
74  * is less) potentially-freeable pages.
75  */
76 #define REL_TRUNCATE_MINIMUM 1000
77 #define REL_TRUNCATE_FRACTION 16
78 
79 /*
80  * Timing parameters for truncate locking heuristics.
81  *
82  * These were not exposed as user tunable GUC values because it didn't seem
83  * that the potential for improvement was great enough to merit the cost of
84  * supporting them.
85  */
86 #define VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL 20 /* ms */
87 #define VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL 50 /* ms */
88 #define VACUUM_TRUNCATE_LOCK_TIMEOUT 5000 /* ms */
89 
90 /*
91  * Threshold that controls whether we bypass index vacuuming and heap
92  * vacuuming as an optimization
93  */
94 #define BYPASS_THRESHOLD_PAGES 0.02 /* i.e. 2% of rel_pages */
95 
96 /*
97  * Perform a failsafe check every 4GB during the heap scan, approximately
98  */
99 #define FAILSAFE_EVERY_PAGES \
100  ((BlockNumber) (((uint64) 4 * 1024 * 1024 * 1024) / BLCKSZ))
101 
102 /*
103  * When a table has no indexes, vacuum the FSM after every 8GB, approximately
104  * (it won't be exact because we only vacuum FSM after processing a heap page
105  * that has some removable tuples). When there are indexes, this is ignored,
106  * and we vacuum FSM after each index/heap cleaning pass.
107  */
108 #define VACUUM_FSM_EVERY_PAGES \
109  ((BlockNumber) (((uint64) 8 * 1024 * 1024 * 1024) / BLCKSZ))
110 
111 /*
112  * Before we consider skipping a page that's marked as clean in
113  * visibility map, we must've seen at least this many clean pages.
114  */
115 #define SKIP_PAGES_THRESHOLD ((BlockNumber) 32)
116 
117 /*
118  * Size of the prefetch window for lazy vacuum backwards truncation scan.
119  * Needs to be a power of 2.
120  */
121 #define PREFETCH_SIZE ((BlockNumber) 32)
122 
123 /*
124  * Macro to check if we are in a parallel vacuum. If true, we are in the
125  * parallel mode and the DSM segment is initialized.
126  */
127 #define ParallelVacuumIsActive(vacrel) ((vacrel)->pvs != NULL)
128 
129 /* Phases of vacuum during which we report error context. */
130 typedef enum
131 {
139 
140 typedef struct LVRelState
141 {
142  /* Target heap relation and its indexes */
145  int nindexes;
146 
147  /* Aggressive VACUUM? (must set relfrozenxid >= FreezeLimit) */
149  /* Use visibility map to skip? (disabled by DISABLE_PAGE_SKIPPING) */
151  /* Wraparound failsafe has been triggered? */
153  /* Consider index vacuuming bypass optimization? */
155 
156  /* Doing index vacuuming, index cleanup, rel truncation? */
160 
161  /* Buffer access strategy and parallel vacuum state */
164 
165  /* rel's initial relfrozenxid and relminmxid */
168  double old_live_tuples; /* previous value of pg_class.reltuples */
169 
170  /* VACUUM operation's cutoffs for freezing and pruning */
173  /* VACUUM operation's target cutoffs for freezing XIDs and MultiXactIds */
176  /* Tracks oldest extant XID/MXID for setting relfrozenxid/relminmxid */
180 
181  /* Error reporting state */
183  char *relname;
184  char *indname; /* Current index name */
185  BlockNumber blkno; /* used only for heap operations */
186  OffsetNumber offnum; /* used only for heap operations */
188  bool verbose; /* VACUUM VERBOSE? */
189 
190  /*
191  * dead_items stores TIDs whose index tuples are deleted by index
192  * vacuuming. Each TID points to an LP_DEAD line pointer from a heap page
193  * that has been processed by lazy_scan_prune. Also needed by
194  * lazy_vacuum_heap_rel, which marks the same LP_DEAD line pointers as
195  * LP_UNUSED during second heap pass.
196  */
197  VacDeadItems *dead_items; /* TIDs whose index tuples we'll delete */
198  BlockNumber rel_pages; /* total number of pages */
199  BlockNumber scanned_pages; /* # pages examined (not skipped via VM) */
200  BlockNumber removed_pages; /* # pages removed by relation truncation */
201  BlockNumber frozen_pages; /* # pages with newly frozen tuples */
202  BlockNumber lpdead_item_pages; /* # pages with LP_DEAD items */
203  BlockNumber missed_dead_pages; /* # pages with missed dead tuples */
204  BlockNumber nonempty_pages; /* actually, last nonempty page + 1 */
205 
206  /* Statistics output by us, for table */
207  double new_rel_tuples; /* new estimated total # of tuples */
208  double new_live_tuples; /* new estimated total # of live tuples */
209  /* Statistics output by index AMs */
211 
212  /* Instrumentation counters */
214  /* Counters that follow are only for scanned_pages */
215  int64 tuples_deleted; /* # deleted from table */
216  int64 tuples_frozen; /* # newly frozen */
217  int64 lpdead_items; /* # deleted from indexes */
218  int64 live_tuples; /* # live tuples remaining */
219  int64 recently_dead_tuples; /* # dead, but not yet removable */
220  int64 missed_dead_tuples; /* # removable, but not removed */
222 
223 /*
224  * State returned by lazy_scan_prune()
225  */
226 typedef struct LVPagePruneState
227 {
228  bool hastup; /* Page prevents rel truncation? */
229  bool has_lpdead_items; /* includes existing LP_DEAD items */
230 
231  /*
232  * State describes the proper VM bit states to set for the page following
233  * pruning and freezing. all_visible implies !has_lpdead_items, but don't
234  * trust all_frozen result unless all_visible is also set to true.
235  */
236  bool all_visible; /* Every item visible to all? */
237  bool all_frozen; /* provided all_visible is also true */
238  TransactionId visibility_cutoff_xid; /* For recovery conflicts */
240 
241 /* Struct for saving and restoring vacuum error information. */
242 typedef struct LVSavedErrInfo
243 {
248 
249 
250 /* non-export function prototypes */
251 static void lazy_scan_heap(LVRelState *vacrel);
252 static BlockNumber lazy_scan_skip(LVRelState *vacrel, Buffer *vmbuffer,
253  BlockNumber next_block,
254  bool *next_unskippable_allvis,
255  bool *skipping_current_range);
256 static bool lazy_scan_new_or_empty(LVRelState *vacrel, Buffer buf,
257  BlockNumber blkno, Page page,
258  bool sharelock, Buffer vmbuffer);
259 static void lazy_scan_prune(LVRelState *vacrel, Buffer buf,
260  BlockNumber blkno, Page page,
261  LVPagePruneState *prunestate);
262 static bool lazy_scan_noprune(LVRelState *vacrel, Buffer buf,
263  BlockNumber blkno, Page page,
264  bool *hastup, bool *recordfreespace);
265 static void lazy_vacuum(LVRelState *vacrel);
266 static bool lazy_vacuum_all_indexes(LVRelState *vacrel);
267 static void lazy_vacuum_heap_rel(LVRelState *vacrel);
268 static int lazy_vacuum_heap_page(LVRelState *vacrel, BlockNumber blkno,
269  Buffer buffer, int index, Buffer *vmbuffer);
270 static bool lazy_check_wraparound_failsafe(LVRelState *vacrel);
271 static void lazy_cleanup_all_indexes(LVRelState *vacrel);
273  IndexBulkDeleteResult *istat,
274  double reltuples,
275  LVRelState *vacrel);
277  IndexBulkDeleteResult *istat,
278  double reltuples,
279  bool estimated_count,
280  LVRelState *vacrel);
281 static bool should_attempt_truncation(LVRelState *vacrel);
282 static void lazy_truncate_heap(LVRelState *vacrel);
284  bool *lock_waiter_detected);
285 static void dead_items_alloc(LVRelState *vacrel, int nworkers);
286 static void dead_items_cleanup(LVRelState *vacrel);
287 static bool heap_page_is_all_visible(LVRelState *vacrel, Buffer buf,
288  TransactionId *visibility_cutoff_xid, bool *all_frozen);
289 static void update_relstats_all_indexes(LVRelState *vacrel);
290 static void vacuum_error_callback(void *arg);
291 static void update_vacuum_error_info(LVRelState *vacrel,
292  LVSavedErrInfo *saved_vacrel,
293  int phase, BlockNumber blkno,
294  OffsetNumber offnum);
295 static void restore_vacuum_error_info(LVRelState *vacrel,
296  const LVSavedErrInfo *saved_vacrel);
297 
298 
299 /*
300  * heap_vacuum_rel() -- perform VACUUM for one heap relation
301  *
302  * This routine sets things up for and then calls lazy_scan_heap, where
303  * almost all work actually takes place. Finalizes everything after call
304  * returns by managing relation truncation and updating rel's pg_class
305  * entry. (Also updates pg_class entries for any indexes that need it.)
306  *
307  * At entry, we have already established a transaction and opened
308  * and locked the relation.
309  */
310 void
312  BufferAccessStrategy bstrategy)
313 {
314  LVRelState *vacrel;
315  bool verbose,
316  instrument,
317  aggressive,
318  skipwithvm,
319  frozenxid_updated,
320  minmulti_updated;
321  TransactionId OldestXmin,
322  FreezeLimit;
323  MultiXactId OldestMxact,
324  MultiXactCutoff;
325  BlockNumber orig_rel_pages,
326  new_rel_pages,
327  new_rel_allvisible;
328  PGRUsage ru0;
329  TimestampTz starttime = 0;
330  PgStat_Counter startreadtime = 0,
331  startwritetime = 0;
332  WalUsage startwalusage = pgWalUsage;
333  int64 StartPageHit = VacuumPageHit,
334  StartPageMiss = VacuumPageMiss,
335  StartPageDirty = VacuumPageDirty;
336  ErrorContextCallback errcallback;
337  char **indnames = NULL;
338 
339  verbose = (params->options & VACOPT_VERBOSE) != 0;
340  instrument = (verbose || (IsAutoVacuumWorkerProcess() &&
341  params->log_min_duration >= 0));
342  if (instrument)
343  {
344  pg_rusage_init(&ru0);
345  starttime = GetCurrentTimestamp();
346  if (track_io_timing)
347  {
348  startreadtime = pgStatBlockReadTime;
349  startwritetime = pgStatBlockWriteTime;
350  }
351  }
352 
354  RelationGetRelid(rel));
355 
356  /*
357  * Get OldestXmin cutoff, which is used to determine which deleted tuples
358  * are considered DEAD, not just RECENTLY_DEAD. Also get related cutoffs
359  * used to determine which XIDs/MultiXactIds will be frozen. If this is
360  * an aggressive VACUUM then lazy_scan_heap cannot leave behind unfrozen
361  * XIDs < FreezeLimit (all MXIDs < MultiXactCutoff also need to go away).
362  */
363  aggressive = vacuum_set_xid_limits(rel,
364  params->freeze_min_age,
365  params->multixact_freeze_min_age,
366  params->freeze_table_age,
368  &OldestXmin, &OldestMxact,
369  &FreezeLimit, &MultiXactCutoff);
370 
371  skipwithvm = true;
373  {
374  /*
375  * Force aggressive mode, and disable skipping blocks using the
376  * visibility map (even those set all-frozen)
377  */
378  aggressive = true;
379  skipwithvm = false;
380  }
381 
382  /*
383  * Setup error traceback support for ereport() first. The idea is to set
384  * up an error context callback to display additional information on any
385  * error during a vacuum. During different phases of vacuum, we update
386  * the state so that the error context callback always display current
387  * information.
388  *
389  * Copy the names of heap rel into local memory for error reporting
390  * purposes, too. It isn't always safe to assume that we can get the name
391  * of each rel. It's convenient for code in lazy_scan_heap to always use
392  * these temp copies.
393  */
394  vacrel = (LVRelState *) palloc0(sizeof(LVRelState));
396  vacrel->relname = pstrdup(RelationGetRelationName(rel));
397  vacrel->indname = NULL;
399  vacrel->verbose = verbose;
400  errcallback.callback = vacuum_error_callback;
401  errcallback.arg = vacrel;
402  errcallback.previous = error_context_stack;
403  error_context_stack = &errcallback;
404  if (verbose)
405  {
407  if (aggressive)
408  ereport(INFO,
409  (errmsg("aggressively vacuuming \"%s.%s.%s\"",
411  vacrel->relnamespace, vacrel->relname)));
412  else
413  ereport(INFO,
414  (errmsg("vacuuming \"%s.%s.%s\"",
416  vacrel->relnamespace, vacrel->relname)));
417  }
418 
419  /* Set up high level stuff about rel and its indexes */
420  vacrel->rel = rel;
421  vac_open_indexes(vacrel->rel, RowExclusiveLock, &vacrel->nindexes,
422  &vacrel->indrels);
423  if (instrument && vacrel->nindexes > 0)
424  {
425  /* Copy index names used by instrumentation (not error reporting) */
426  indnames = palloc(sizeof(char *) * vacrel->nindexes);
427  for (int i = 0; i < vacrel->nindexes; i++)
428  indnames[i] = pstrdup(RelationGetRelationName(vacrel->indrels[i]));
429  }
430 
431  /*
432  * The index_cleanup param either disables index vacuuming and cleanup or
433  * forces it to go ahead when we would otherwise apply the index bypass
434  * optimization. The default is 'auto', which leaves the final decision
435  * up to lazy_vacuum().
436  *
437  * The truncate param allows user to avoid attempting relation truncation,
438  * though it can't force truncation to happen.
439  */
442  params->truncate != VACOPTVALUE_AUTO);
443  vacrel->aggressive = aggressive;
444  vacrel->skipwithvm = skipwithvm;
445  vacrel->failsafe_active = false;
446  vacrel->consider_bypass_optimization = true;
447  vacrel->do_index_vacuuming = true;
448  vacrel->do_index_cleanup = true;
449  vacrel->do_rel_truncate = (params->truncate != VACOPTVALUE_DISABLED);
450  if (params->index_cleanup == VACOPTVALUE_DISABLED)
451  {
452  /* Force disable index vacuuming up-front */
453  vacrel->do_index_vacuuming = false;
454  vacrel->do_index_cleanup = false;
455  }
456  else if (params->index_cleanup == VACOPTVALUE_ENABLED)
457  {
458  /* Force index vacuuming. Note that failsafe can still bypass. */
459  vacrel->consider_bypass_optimization = false;
460  }
461  else
462  {
463  /* Default/auto, make all decisions dynamically */
465  }
466 
467  vacrel->bstrategy = bstrategy;
468  vacrel->relfrozenxid = rel->rd_rel->relfrozenxid;
469  vacrel->relminmxid = rel->rd_rel->relminmxid;
470  vacrel->old_live_tuples = rel->rd_rel->reltuples;
471 
472  /* Initialize page counters explicitly (be tidy) */
473  vacrel->scanned_pages = 0;
474  vacrel->removed_pages = 0;
475  vacrel->frozen_pages = 0;
476  vacrel->lpdead_item_pages = 0;
477  vacrel->missed_dead_pages = 0;
478  vacrel->nonempty_pages = 0;
479  /* dead_items_alloc allocates vacrel->dead_items later on */
480 
481  /* Allocate/initialize output statistics state */
482  vacrel->new_rel_tuples = 0;
483  vacrel->new_live_tuples = 0;
484  vacrel->indstats = (IndexBulkDeleteResult **)
485  palloc0(vacrel->nindexes * sizeof(IndexBulkDeleteResult *));
486 
487  /* Initialize remaining counters (be tidy) */
488  vacrel->num_index_scans = 0;
489  vacrel->tuples_deleted = 0;
490  vacrel->tuples_frozen = 0;
491  vacrel->lpdead_items = 0;
492  vacrel->live_tuples = 0;
493  vacrel->recently_dead_tuples = 0;
494  vacrel->missed_dead_tuples = 0;
495 
496  /*
497  * Determine the extent of the blocks that we'll scan in lazy_scan_heap,
498  * and finalize cutoffs used for freezing and pruning in lazy_scan_prune.
499  *
500  * We expect vistest will always make heap_page_prune remove any deleted
501  * tuple whose xmax is < OldestXmin. lazy_scan_prune must never become
502  * confused about whether a tuple should be frozen or removed. (In the
503  * future we might want to teach lazy_scan_prune to recompute vistest from
504  * time to time, to increase the number of dead tuples it can prune away.)
505  *
506  * We must determine rel_pages _after_ OldestXmin has been established.
507  * lazy_scan_heap's physical heap scan (scan of pages < rel_pages) is
508  * thereby guaranteed to not miss any tuples with XIDs < OldestXmin. These
509  * XIDs must at least be considered for freezing (though not necessarily
510  * frozen) during its scan.
511  */
512  vacrel->rel_pages = orig_rel_pages = RelationGetNumberOfBlocks(rel);
513  vacrel->OldestXmin = OldestXmin;
514  vacrel->vistest = GlobalVisTestFor(rel);
515  /* FreezeLimit controls XID freezing (always <= OldestXmin) */
516  vacrel->FreezeLimit = FreezeLimit;
517  /* MultiXactCutoff controls MXID freezing (always <= OldestMxact) */
518  vacrel->MultiXactCutoff = MultiXactCutoff;
519  /* Initialize state used to track oldest extant XID/MXID */
520  vacrel->NewRelfrozenXid = OldestXmin;
521  vacrel->NewRelminMxid = OldestMxact;
522  vacrel->skippedallvis = false;
523 
524  /*
525  * Allocate dead_items array memory using dead_items_alloc. This handles
526  * parallel VACUUM initialization as part of allocating shared memory
527  * space used for dead_items. (But do a failsafe precheck first, to
528  * ensure that parallel VACUUM won't be attempted at all when relfrozenxid
529  * is already dangerously old.)
530  */
532  dead_items_alloc(vacrel, params->nworkers);
533 
534  /*
535  * Call lazy_scan_heap to perform all required heap pruning, index
536  * vacuuming, and heap vacuuming (plus related processing)
537  */
538  lazy_scan_heap(vacrel);
539 
540  /*
541  * Free resources managed by dead_items_alloc. This ends parallel mode in
542  * passing when necessary.
543  */
544  dead_items_cleanup(vacrel);
546 
547  /*
548  * Update pg_class entries for each of rel's indexes where appropriate.
549  *
550  * Unlike the later update to rel's pg_class entry, this is not critical.
551  * Maintains relpages/reltuples statistics used by the planner only.
552  */
553  if (vacrel->do_index_cleanup)
555 
556  /* Done with rel's indexes */
557  vac_close_indexes(vacrel->nindexes, vacrel->indrels, NoLock);
558 
559  /* Optionally truncate rel */
560  if (should_attempt_truncation(vacrel))
561  lazy_truncate_heap(vacrel);
562 
563  /* Pop the error context stack */
564  error_context_stack = errcallback.previous;
565 
566  /* Report that we are now doing final cleanup */
569 
570  /*
571  * Prepare to update rel's pg_class entry.
572  *
573  * Aggressive VACUUMs must always be able to advance relfrozenxid to a
574  * value >= FreezeLimit, and relminmxid to a value >= MultiXactCutoff.
575  * Non-aggressive VACUUMs may advance them by any amount, or not at all.
576  */
577  Assert(vacrel->NewRelfrozenXid == OldestXmin ||
578  TransactionIdPrecedesOrEquals(aggressive ? FreezeLimit :
579  vacrel->relfrozenxid,
580  vacrel->NewRelfrozenXid));
581  Assert(vacrel->NewRelminMxid == OldestMxact ||
582  MultiXactIdPrecedesOrEquals(aggressive ? MultiXactCutoff :
583  vacrel->relminmxid,
584  vacrel->NewRelminMxid));
585  if (vacrel->skippedallvis)
586  {
587  /*
588  * Must keep original relfrozenxid in a non-aggressive VACUUM that
589  * chose to skip an all-visible page range. The state that tracks new
590  * values will have missed unfrozen XIDs from the pages we skipped.
591  */
592  Assert(!aggressive);
595  }
596 
597  /*
598  * For safety, clamp relallvisible to be not more than what we're setting
599  * pg_class.relpages to
600  */
601  new_rel_pages = vacrel->rel_pages; /* After possible rel truncation */
602  visibilitymap_count(rel, &new_rel_allvisible, NULL);
603  if (new_rel_allvisible > new_rel_pages)
604  new_rel_allvisible = new_rel_pages;
605 
606  /*
607  * Now actually update rel's pg_class entry.
608  *
609  * In principle new_live_tuples could be -1 indicating that we (still)
610  * don't know the tuple count. In practice that can't happen, since we
611  * scan every page that isn't skipped using the visibility map.
612  */
613  vac_update_relstats(rel, new_rel_pages, vacrel->new_live_tuples,
614  new_rel_allvisible, vacrel->nindexes > 0,
615  vacrel->NewRelfrozenXid, vacrel->NewRelminMxid,
616  &frozenxid_updated, &minmulti_updated, false);
617 
618  /*
619  * Report results to the cumulative stats system, too.
620  *
621  * Deliberately avoid telling the stats system about LP_DEAD items that
622  * remain in the table due to VACUUM bypassing index and heap vacuuming.
623  * ANALYZE will consider the remaining LP_DEAD items to be dead "tuples".
624  * It seems like a good idea to err on the side of not vacuuming again too
625  * soon in cases where the failsafe prevented significant amounts of heap
626  * vacuuming.
627  */
629  rel->rd_rel->relisshared,
630  Max(vacrel->new_live_tuples, 0),
631  vacrel->recently_dead_tuples +
632  vacrel->missed_dead_tuples);
634 
635  if (instrument)
636  {
637  TimestampTz endtime = GetCurrentTimestamp();
638 
639  if (verbose || params->log_min_duration == 0 ||
640  TimestampDifferenceExceeds(starttime, endtime,
641  params->log_min_duration))
642  {
643  long secs_dur;
644  int usecs_dur;
645  WalUsage walusage;
647  char *msgfmt;
648  int32 diff;
649  int64 PageHitOp = VacuumPageHit - StartPageHit,
650  PageMissOp = VacuumPageMiss - StartPageMiss,
651  PageDirtyOp = VacuumPageDirty - StartPageDirty;
652  double read_rate = 0,
653  write_rate = 0;
654 
655  TimestampDifference(starttime, endtime, &secs_dur, &usecs_dur);
656  memset(&walusage, 0, sizeof(WalUsage));
657  WalUsageAccumDiff(&walusage, &pgWalUsage, &startwalusage);
658 
660  if (verbose)
661  {
662  /*
663  * Aggressiveness already reported earlier, in dedicated
664  * VACUUM VERBOSE ereport
665  */
666  Assert(!params->is_wraparound);
667  msgfmt = _("finished vacuuming \"%s.%s.%s\": index scans: %d\n");
668  }
669  else if (params->is_wraparound)
670  {
671  /*
672  * While it's possible for a VACUUM to be both is_wraparound
673  * and !aggressive, that's just a corner-case -- is_wraparound
674  * implies aggressive. Produce distinct output for the corner
675  * case all the same, just in case.
676  */
677  if (aggressive)
678  msgfmt = _("automatic aggressive vacuum to prevent wraparound of table \"%s.%s.%s\": index scans: %d\n");
679  else
680  msgfmt = _("automatic vacuum to prevent wraparound of table \"%s.%s.%s\": index scans: %d\n");
681  }
682  else
683  {
684  if (aggressive)
685  msgfmt = _("automatic aggressive vacuum of table \"%s.%s.%s\": index scans: %d\n");
686  else
687  msgfmt = _("automatic vacuum of table \"%s.%s.%s\": index scans: %d\n");
688  }
689  appendStringInfo(&buf, msgfmt,
691  vacrel->relnamespace,
692  vacrel->relname,
693  vacrel->num_index_scans);
694  appendStringInfo(&buf, _("pages: %u removed, %u remain, %u scanned (%.2f%% of total)\n"),
695  vacrel->removed_pages,
696  new_rel_pages,
697  vacrel->scanned_pages,
698  orig_rel_pages == 0 ? 100.0 :
699  100.0 * vacrel->scanned_pages / orig_rel_pages);
701  _("tuples: %lld removed, %lld remain, %lld are dead but not yet removable\n"),
702  (long long) vacrel->tuples_deleted,
703  (long long) vacrel->new_rel_tuples,
704  (long long) vacrel->recently_dead_tuples);
705  if (vacrel->missed_dead_tuples > 0)
707  _("tuples missed: %lld dead from %u pages not removed due to cleanup lock contention\n"),
708  (long long) vacrel->missed_dead_tuples,
709  vacrel->missed_dead_pages);
710  diff = (int32) (ReadNextTransactionId() - OldestXmin);
712  _("removable cutoff: %u, which was %d XIDs old when operation ended\n"),
713  OldestXmin, diff);
714  if (frozenxid_updated)
715  {
716  diff = (int32) (vacrel->NewRelfrozenXid - vacrel->relfrozenxid);
718  _("new relfrozenxid: %u, which is %d XIDs ahead of previous value\n"),
719  vacrel->NewRelfrozenXid, diff);
720  }
721  if (minmulti_updated)
722  {
723  diff = (int32) (vacrel->NewRelminMxid - vacrel->relminmxid);
725  _("new relminmxid: %u, which is %d MXIDs ahead of previous value\n"),
726  vacrel->NewRelminMxid, diff);
727  }
728  appendStringInfo(&buf, _("frozen: %u pages from table (%.2f%% of total) had %lld tuples frozen\n"),
729  vacrel->frozen_pages,
730  orig_rel_pages == 0 ? 100.0 :
731  100.0 * vacrel->frozen_pages / orig_rel_pages,
732  (long long) vacrel->tuples_frozen);
733  if (vacrel->do_index_vacuuming)
734  {
735  if (vacrel->nindexes == 0 || vacrel->num_index_scans == 0)
736  appendStringInfoString(&buf, _("index scan not needed: "));
737  else
738  appendStringInfoString(&buf, _("index scan needed: "));
739 
740  msgfmt = _("%u pages from table (%.2f%% of total) had %lld dead item identifiers removed\n");
741  }
742  else
743  {
744  if (!vacrel->failsafe_active)
745  appendStringInfoString(&buf, _("index scan bypassed: "));
746  else
747  appendStringInfoString(&buf, _("index scan bypassed by failsafe: "));
748 
749  msgfmt = _("%u pages from table (%.2f%% of total) have %lld dead item identifiers\n");
750  }
751  appendStringInfo(&buf, msgfmt,
752  vacrel->lpdead_item_pages,
753  orig_rel_pages == 0 ? 100.0 :
754  100.0 * vacrel->lpdead_item_pages / orig_rel_pages,
755  (long long) vacrel->lpdead_items);
756  for (int i = 0; i < vacrel->nindexes; i++)
757  {
758  IndexBulkDeleteResult *istat = vacrel->indstats[i];
759 
760  if (!istat)
761  continue;
762 
764  _("index \"%s\": pages: %u in total, %u newly deleted, %u currently deleted, %u reusable\n"),
765  indnames[i],
766  istat->num_pages,
767  istat->pages_newly_deleted,
768  istat->pages_deleted,
769  istat->pages_free);
770  }
771  if (track_io_timing)
772  {
773  double read_ms = (double) (pgStatBlockReadTime - startreadtime) / 1000;
774  double write_ms = (double) (pgStatBlockWriteTime - startwritetime) / 1000;
775 
776  appendStringInfo(&buf, _("I/O timings: read: %.3f ms, write: %.3f ms\n"),
777  read_ms, write_ms);
778  }
779  if (secs_dur > 0 || usecs_dur > 0)
780  {
781  read_rate = (double) BLCKSZ * PageMissOp / (1024 * 1024) /
782  (secs_dur + usecs_dur / 1000000.0);
783  write_rate = (double) BLCKSZ * PageDirtyOp / (1024 * 1024) /
784  (secs_dur + usecs_dur / 1000000.0);
785  }
786  appendStringInfo(&buf, _("avg read rate: %.3f MB/s, avg write rate: %.3f MB/s\n"),
787  read_rate, write_rate);
789  _("buffer usage: %lld hits, %lld misses, %lld dirtied\n"),
790  (long long) PageHitOp,
791  (long long) PageMissOp,
792  (long long) PageDirtyOp);
794  _("WAL usage: %lld records, %lld full page images, %llu bytes\n"),
795  (long long) walusage.wal_records,
796  (long long) walusage.wal_fpi,
797  (unsigned long long) walusage.wal_bytes);
798  appendStringInfo(&buf, _("system usage: %s"), pg_rusage_show(&ru0));
799 
800  ereport(verbose ? INFO : LOG,
801  (errmsg_internal("%s", buf.data)));
802  pfree(buf.data);
803  }
804  }
805 
806  /* Cleanup index statistics and index names */
807  for (int i = 0; i < vacrel->nindexes; i++)
808  {
809  if (vacrel->indstats[i])
810  pfree(vacrel->indstats[i]);
811 
812  if (instrument)
813  pfree(indnames[i]);
814  }
815 }
816 
817 /*
818  * lazy_scan_heap() -- workhorse function for VACUUM
819  *
820  * This routine prunes each page in the heap, and considers the need to
821  * freeze remaining tuples with storage (not including pages that can be
822  * skipped using the visibility map). Also performs related maintenance
823  * of the FSM and visibility map. These steps all take place during an
824  * initial pass over the target heap relation.
825  *
826  * Also invokes lazy_vacuum_all_indexes to vacuum indexes, which largely
827  * consists of deleting index tuples that point to LP_DEAD items left in
828  * heap pages following pruning. Earlier initial pass over the heap will
829  * have collected the TIDs whose index tuples need to be removed.
830  *
831  * Finally, invokes lazy_vacuum_heap_rel to vacuum heap pages, which
832  * largely consists of marking LP_DEAD items (from collected TID array)
833  * as LP_UNUSED. This has to happen in a second, final pass over the
834  * heap, to preserve a basic invariant that all index AMs rely on: no
835  * extant index tuple can ever be allowed to contain a TID that points to
836  * an LP_UNUSED line pointer in the heap. We must disallow premature
837  * recycling of line pointers to avoid index scans that get confused
838  * about which TID points to which tuple immediately after recycling.
839  * (Actually, this isn't a concern when target heap relation happens to
840  * have no indexes, which allows us to safely apply the one-pass strategy
841  * as an optimization).
842  *
843  * In practice we often have enough space to fit all TIDs, and so won't
844  * need to call lazy_vacuum more than once, after our initial pass over
845  * the heap has totally finished. Otherwise things are slightly more
846  * complicated: our "initial pass" over the heap applies only to those
847  * pages that were pruned before we needed to call lazy_vacuum, and our
848  * "final pass" over the heap only vacuums these same heap pages.
849  * However, we process indexes in full every time lazy_vacuum is called,
850  * which makes index processing very inefficient when memory is in short
851  * supply.
852  */
853 static void
855 {
856  BlockNumber rel_pages = vacrel->rel_pages,
857  blkno,
858  next_unskippable_block,
859  next_failsafe_block = 0,
860  next_fsm_block_to_vacuum = 0;
861  VacDeadItems *dead_items = vacrel->dead_items;
862  Buffer vmbuffer = InvalidBuffer;
863  bool next_unskippable_allvis,
864  skipping_current_range;
865  const int initprog_index[] = {
869  };
870  int64 initprog_val[3];
871 
872  /* Report that we're scanning the heap, advertising total # of blocks */
873  initprog_val[0] = PROGRESS_VACUUM_PHASE_SCAN_HEAP;
874  initprog_val[1] = rel_pages;
875  initprog_val[2] = dead_items->max_items;
876  pgstat_progress_update_multi_param(3, initprog_index, initprog_val);
877 
878  /* Set up an initial range of skippable blocks using the visibility map */
879  next_unskippable_block = lazy_scan_skip(vacrel, &vmbuffer, 0,
880  &next_unskippable_allvis,
881  &skipping_current_range);
882  for (blkno = 0; blkno < rel_pages; blkno++)
883  {
884  Buffer buf;
885  Page page;
886  bool all_visible_according_to_vm;
887  LVPagePruneState prunestate;
888 
889  if (blkno == next_unskippable_block)
890  {
891  /*
892  * Can't skip this page safely. Must scan the page. But
893  * determine the next skippable range after the page first.
894  */
895  all_visible_according_to_vm = next_unskippable_allvis;
896  next_unskippable_block = lazy_scan_skip(vacrel, &vmbuffer,
897  blkno + 1,
898  &next_unskippable_allvis,
899  &skipping_current_range);
900 
901  Assert(next_unskippable_block >= blkno + 1);
902  }
903  else
904  {
905  /* Last page always scanned (may need to set nonempty_pages) */
906  Assert(blkno < rel_pages - 1);
907 
908  if (skipping_current_range)
909  continue;
910 
911  /* Current range is too small to skip -- just scan the page */
912  all_visible_according_to_vm = true;
913  }
914 
915  vacrel->scanned_pages++;
916 
917  /* Report as block scanned, update error traceback information */
920  blkno, InvalidOffsetNumber);
921 
923 
924  /*
925  * Regularly check if wraparound failsafe should trigger.
926  *
927  * There is a similar check inside lazy_vacuum_all_indexes(), but
928  * relfrozenxid might start to look dangerously old before we reach
929  * that point. This check also provides failsafe coverage for the
930  * one-pass strategy, and the two-pass strategy with the index_cleanup
931  * param set to 'off'.
932  */
933  if (blkno - next_failsafe_block >= FAILSAFE_EVERY_PAGES)
934  {
936  next_failsafe_block = blkno;
937  }
938 
939  /*
940  * Consider if we definitely have enough space to process TIDs on page
941  * already. If we are close to overrunning the available space for
942  * dead_items TIDs, pause and do a cycle of vacuuming before we tackle
943  * this page.
944  */
945  Assert(dead_items->max_items >= MaxHeapTuplesPerPage);
946  if (dead_items->max_items - dead_items->num_items < MaxHeapTuplesPerPage)
947  {
948  /*
949  * Before beginning index vacuuming, we release any pin we may
950  * hold on the visibility map page. This isn't necessary for
951  * correctness, but we do it anyway to avoid holding the pin
952  * across a lengthy, unrelated operation.
953  */
954  if (BufferIsValid(vmbuffer))
955  {
956  ReleaseBuffer(vmbuffer);
957  vmbuffer = InvalidBuffer;
958  }
959 
960  /* Perform a round of index and heap vacuuming */
961  vacrel->consider_bypass_optimization = false;
962  lazy_vacuum(vacrel);
963 
964  /*
965  * Vacuum the Free Space Map to make newly-freed space visible on
966  * upper-level FSM pages. Note we have not yet processed blkno.
967  */
968  FreeSpaceMapVacuumRange(vacrel->rel, next_fsm_block_to_vacuum,
969  blkno);
970  next_fsm_block_to_vacuum = blkno;
971 
972  /* Report that we are once again scanning the heap */
975  }
976 
977  /*
978  * Pin the visibility map page in case we need to mark the page
979  * all-visible. In most cases this will be very cheap, because we'll
980  * already have the correct page pinned anyway.
981  */
982  visibilitymap_pin(vacrel->rel, blkno, &vmbuffer);
983 
984  /* Finished preparatory checks. Actually scan the page. */
985  buf = ReadBufferExtended(vacrel->rel, MAIN_FORKNUM, blkno,
986  RBM_NORMAL, vacrel->bstrategy);
987  page = BufferGetPage(buf);
988 
989  /*
990  * We need a buffer cleanup lock to prune HOT chains and defragment
991  * the page in lazy_scan_prune. But when it's not possible to acquire
992  * a cleanup lock right away, we may be able to settle for reduced
993  * processing using lazy_scan_noprune.
994  */
996  {
997  bool hastup,
998  recordfreespace;
999 
1001 
1002  /* Check for new or empty pages before lazy_scan_noprune call */
1003  if (lazy_scan_new_or_empty(vacrel, buf, blkno, page, true,
1004  vmbuffer))
1005  {
1006  /* Processed as new/empty page (lock and pin released) */
1007  continue;
1008  }
1009 
1010  /* Collect LP_DEAD items in dead_items array, count tuples */
1011  if (lazy_scan_noprune(vacrel, buf, blkno, page, &hastup,
1012  &recordfreespace))
1013  {
1014  Size freespace = 0;
1015 
1016  /*
1017  * Processed page successfully (without cleanup lock) -- just
1018  * need to perform rel truncation and FSM steps, much like the
1019  * lazy_scan_prune case. Don't bother trying to match its
1020  * visibility map setting steps, though.
1021  */
1022  if (hastup)
1023  vacrel->nonempty_pages = blkno + 1;
1024  if (recordfreespace)
1025  freespace = PageGetHeapFreeSpace(page);
1027  if (recordfreespace)
1028  RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
1029  continue;
1030  }
1031 
1032  /*
1033  * lazy_scan_noprune could not do all required processing. Wait
1034  * for a cleanup lock, and call lazy_scan_prune in the usual way.
1035  */
1036  Assert(vacrel->aggressive);
1039  }
1040 
1041  /* Check for new or empty pages before lazy_scan_prune call */
1042  if (lazy_scan_new_or_empty(vacrel, buf, blkno, page, false, vmbuffer))
1043  {
1044  /* Processed as new/empty page (lock and pin released) */
1045  continue;
1046  }
1047 
1048  /*
1049  * Prune, freeze, and count tuples.
1050  *
1051  * Accumulates details of remaining LP_DEAD line pointers on page in
1052  * dead_items array. This includes LP_DEAD line pointers that we
1053  * pruned ourselves, as well as existing LP_DEAD line pointers that
1054  * were pruned some time earlier. Also considers freezing XIDs in the
1055  * tuple headers of remaining items with storage.
1056  */
1057  lazy_scan_prune(vacrel, buf, blkno, page, &prunestate);
1058 
1059  Assert(!prunestate.all_visible || !prunestate.has_lpdead_items);
1060 
1061  /* Remember the location of the last page with nonremovable tuples */
1062  if (prunestate.hastup)
1063  vacrel->nonempty_pages = blkno + 1;
1064 
1065  if (vacrel->nindexes == 0)
1066  {
1067  /*
1068  * Consider the need to do page-at-a-time heap vacuuming when
1069  * using the one-pass strategy now.
1070  *
1071  * The one-pass strategy will never call lazy_vacuum(). The steps
1072  * performed here can be thought of as the one-pass equivalent of
1073  * a call to lazy_vacuum().
1074  */
1075  if (prunestate.has_lpdead_items)
1076  {
1077  Size freespace;
1078 
1079  lazy_vacuum_heap_page(vacrel, blkno, buf, 0, &vmbuffer);
1080 
1081  /* Forget the LP_DEAD items that we just vacuumed */
1082  dead_items->num_items = 0;
1083 
1084  /*
1085  * Periodically perform FSM vacuuming to make newly-freed
1086  * space visible on upper FSM pages. Note we have not yet
1087  * performed FSM processing for blkno.
1088  */
1089  if (blkno - next_fsm_block_to_vacuum >= VACUUM_FSM_EVERY_PAGES)
1090  {
1091  FreeSpaceMapVacuumRange(vacrel->rel, next_fsm_block_to_vacuum,
1092  blkno);
1093  next_fsm_block_to_vacuum = blkno;
1094  }
1095 
1096  /*
1097  * Now perform FSM processing for blkno, and move on to next
1098  * page.
1099  *
1100  * Our call to lazy_vacuum_heap_page() will have considered if
1101  * it's possible to set all_visible/all_frozen independently
1102  * of lazy_scan_prune(). Note that prunestate was invalidated
1103  * by lazy_vacuum_heap_page() call.
1104  */
1105  freespace = PageGetHeapFreeSpace(page);
1106 
1108  RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
1109  continue;
1110  }
1111 
1112  /*
1113  * There was no call to lazy_vacuum_heap_page() because pruning
1114  * didn't encounter/create any LP_DEAD items that needed to be
1115  * vacuumed. Prune state has not been invalidated, so proceed
1116  * with prunestate-driven visibility map and FSM steps (just like
1117  * the two-pass strategy).
1118  */
1119  Assert(dead_items->num_items == 0);
1120  }
1121 
1122  /*
1123  * Handle setting visibility map bit based on information from the VM
1124  * (as of last lazy_scan_skip() call), and from prunestate
1125  */
1126  if (!all_visible_according_to_vm && prunestate.all_visible)
1127  {
1129 
1130  if (prunestate.all_frozen)
1131  flags |= VISIBILITYMAP_ALL_FROZEN;
1132 
1133  /*
1134  * It should never be the case that the visibility map page is set
1135  * while the page-level bit is clear, but the reverse is allowed
1136  * (if checksums are not enabled). Regardless, set both bits so
1137  * that we get back in sync.
1138  *
1139  * NB: If the heap page is all-visible but the VM bit is not set,
1140  * we don't need to dirty the heap page. However, if checksums
1141  * are enabled, we do need to make sure that the heap page is
1142  * dirtied before passing it to visibilitymap_set(), because it
1143  * may be logged. Given that this situation should only happen in
1144  * rare cases after a crash, it is not worth optimizing.
1145  */
1146  PageSetAllVisible(page);
1148  visibilitymap_set(vacrel->rel, blkno, buf, InvalidXLogRecPtr,
1149  vmbuffer, prunestate.visibility_cutoff_xid,
1150  flags);
1151  }
1152 
1153  /*
1154  * As of PostgreSQL 9.2, the visibility map bit should never be set if
1155  * the page-level bit is clear. However, it's possible that the bit
1156  * got cleared after lazy_scan_skip() was called, so we must recheck
1157  * with buffer lock before concluding that the VM is corrupt.
1158  */
1159  else if (all_visible_according_to_vm && !PageIsAllVisible(page)
1160  && VM_ALL_VISIBLE(vacrel->rel, blkno, &vmbuffer))
1161  {
1162  elog(WARNING, "page is not marked all-visible but visibility map bit is set in relation \"%s\" page %u",
1163  vacrel->relname, blkno);
1164  visibilitymap_clear(vacrel->rel, blkno, vmbuffer,
1166  }
1167 
1168  /*
1169  * It's possible for the value returned by
1170  * GetOldestNonRemovableTransactionId() to move backwards, so it's not
1171  * wrong for us to see tuples that appear to not be visible to
1172  * everyone yet, while PD_ALL_VISIBLE is already set. The real safe
1173  * xmin value never moves backwards, but
1174  * GetOldestNonRemovableTransactionId() is conservative and sometimes
1175  * returns a value that's unnecessarily small, so if we see that
1176  * contradiction it just means that the tuples that we think are not
1177  * visible to everyone yet actually are, and the PD_ALL_VISIBLE flag
1178  * is correct.
1179  *
1180  * There should never be LP_DEAD items on a page with PD_ALL_VISIBLE
1181  * set, however.
1182  */
1183  else if (prunestate.has_lpdead_items && PageIsAllVisible(page))
1184  {
1185  elog(WARNING, "page containing LP_DEAD items is marked as all-visible in relation \"%s\" page %u",
1186  vacrel->relname, blkno);
1187  PageClearAllVisible(page);
1189  visibilitymap_clear(vacrel->rel, blkno, vmbuffer,
1191  }
1192 
1193  /*
1194  * If the all-visible page is all-frozen but not marked as such yet,
1195  * mark it as all-frozen. Note that all_frozen is only valid if
1196  * all_visible is true, so we must check both prunestate fields.
1197  */
1198  else if (all_visible_according_to_vm && prunestate.all_visible &&
1199  prunestate.all_frozen &&
1200  !VM_ALL_FROZEN(vacrel->rel, blkno, &vmbuffer))
1201  {
1202  /*
1203  * We can pass InvalidTransactionId as the cutoff XID here,
1204  * because setting the all-frozen bit doesn't cause recovery
1205  * conflicts.
1206  */
1207  visibilitymap_set(vacrel->rel, blkno, buf, InvalidXLogRecPtr,
1208  vmbuffer, InvalidTransactionId,
1210  }
1211 
1212  /*
1213  * Final steps for block: drop cleanup lock, record free space in the
1214  * FSM
1215  */
1216  if (prunestate.has_lpdead_items && vacrel->do_index_vacuuming)
1217  {
1218  /*
1219  * Wait until lazy_vacuum_heap_rel() to save free space. This
1220  * doesn't just save us some cycles; it also allows us to record
1221  * any additional free space that lazy_vacuum_heap_page() will
1222  * make available in cases where it's possible to truncate the
1223  * page's line pointer array.
1224  *
1225  * Note: It's not in fact 100% certain that we really will call
1226  * lazy_vacuum_heap_rel() -- lazy_vacuum() might yet opt to skip
1227  * index vacuuming (and so must skip heap vacuuming). This is
1228  * deemed okay because it only happens in emergencies, or when
1229  * there is very little free space anyway. (Besides, we start
1230  * recording free space in the FSM once index vacuuming has been
1231  * abandoned.)
1232  *
1233  * Note: The one-pass (no indexes) case is only supposed to make
1234  * it this far when there were no LP_DEAD items during pruning.
1235  */
1236  Assert(vacrel->nindexes > 0);
1238  }
1239  else
1240  {
1241  Size freespace = PageGetHeapFreeSpace(page);
1242 
1244  RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
1245  }
1246  }
1247 
1248  vacrel->blkno = InvalidBlockNumber;
1249  if (BufferIsValid(vmbuffer))
1250  ReleaseBuffer(vmbuffer);
1251 
1252  /* report that everything is now scanned */
1254 
1255  /* now we can compute the new value for pg_class.reltuples */
1256  vacrel->new_live_tuples = vac_estimate_reltuples(vacrel->rel, rel_pages,
1257  vacrel->scanned_pages,
1258  vacrel->live_tuples);
1259 
1260  /*
1261  * Also compute the total number of surviving heap entries. In the
1262  * (unlikely) scenario that new_live_tuples is -1, take it as zero.
1263  */
1264  vacrel->new_rel_tuples =
1265  Max(vacrel->new_live_tuples, 0) + vacrel->recently_dead_tuples +
1266  vacrel->missed_dead_tuples;
1267 
1268  /*
1269  * Do index vacuuming (call each index's ambulkdelete routine), then do
1270  * related heap vacuuming
1271  */
1272  if (dead_items->num_items > 0)
1273  lazy_vacuum(vacrel);
1274 
1275  /*
1276  * Vacuum the remainder of the Free Space Map. We must do this whether or
1277  * not there were indexes, and whether or not we bypassed index vacuuming.
1278  */
1279  if (blkno > next_fsm_block_to_vacuum)
1280  FreeSpaceMapVacuumRange(vacrel->rel, next_fsm_block_to_vacuum, blkno);
1281 
1282  /* report all blocks vacuumed */
1284 
1285  /* Do final index cleanup (call each index's amvacuumcleanup routine) */
1286  if (vacrel->nindexes > 0 && vacrel->do_index_cleanup)
1287  lazy_cleanup_all_indexes(vacrel);
1288 }
1289 
1290 /*
1291  * lazy_scan_skip() -- set up range of skippable blocks using visibility map.
1292  *
1293  * lazy_scan_heap() calls here every time it needs to set up a new range of
1294  * blocks to skip via the visibility map. Caller passes the next block in
1295  * line. We return a next_unskippable_block for this range. When there are
1296  * no skippable blocks we just return caller's next_block. The all-visible
1297  * status of the returned block is set in *next_unskippable_allvis for caller,
1298  * too. Block usually won't be all-visible (since it's unskippable), but it
1299  * can be during aggressive VACUUMs (as well as in certain edge cases).
1300  *
1301  * Sets *skipping_current_range to indicate if caller should skip this range.
1302  * Costs and benefits drive our decision. Very small ranges won't be skipped.
1303  *
1304  * Note: our opinion of which blocks can be skipped can go stale immediately.
1305  * It's okay if caller "misses" a page whose all-visible or all-frozen marking
1306  * was concurrently cleared, though. All that matters is that caller scan all
1307  * pages whose tuples might contain XIDs < OldestXmin, or MXIDs < OldestMxact.
1308  * (Actually, non-aggressive VACUUMs can choose to skip all-visible pages with
1309  * older XIDs/MXIDs. The vacrel->skippedallvis flag will be set here when the
1310  * choice to skip such a range is actually made, making everything safe.)
1311  */
1312 static BlockNumber
1313 lazy_scan_skip(LVRelState *vacrel, Buffer *vmbuffer, BlockNumber next_block,
1314  bool *next_unskippable_allvis, bool *skipping_current_range)
1315 {
1316  BlockNumber rel_pages = vacrel->rel_pages,
1317  next_unskippable_block = next_block,
1318  nskippable_blocks = 0;
1319  bool skipsallvis = false;
1320 
1321  *next_unskippable_allvis = true;
1322  while (next_unskippable_block < rel_pages)
1323  {
1324  uint8 mapbits = visibilitymap_get_status(vacrel->rel,
1325  next_unskippable_block,
1326  vmbuffer);
1327 
1328  if ((mapbits & VISIBILITYMAP_ALL_VISIBLE) == 0)
1329  {
1330  Assert((mapbits & VISIBILITYMAP_ALL_FROZEN) == 0);
1331  *next_unskippable_allvis = false;
1332  break;
1333  }
1334 
1335  /*
1336  * Caller must scan the last page to determine whether it has tuples
1337  * (caller must have the opportunity to set vacrel->nonempty_pages).
1338  * This rule avoids having lazy_truncate_heap() take access-exclusive
1339  * lock on rel to attempt a truncation that fails anyway, just because
1340  * there are tuples on the last page (it is likely that there will be
1341  * tuples on other nearby pages as well, but those can be skipped).
1342  *
1343  * Implement this by always treating the last block as unsafe to skip.
1344  */
1345  if (next_unskippable_block == rel_pages - 1)
1346  break;
1347 
1348  /* DISABLE_PAGE_SKIPPING makes all skipping unsafe */
1349  if (!vacrel->skipwithvm)
1350  break;
1351 
1352  /*
1353  * Aggressive VACUUM caller can't skip pages just because they are
1354  * all-visible. They may still skip all-frozen pages, which can't
1355  * contain XIDs < OldestXmin (XIDs that aren't already frozen by now).
1356  */
1357  if ((mapbits & VISIBILITYMAP_ALL_FROZEN) == 0)
1358  {
1359  if (vacrel->aggressive)
1360  break;
1361 
1362  /*
1363  * All-visible block is safe to skip in non-aggressive case. But
1364  * remember that the final range contains such a block for later.
1365  */
1366  skipsallvis = true;
1367  }
1368 
1370  next_unskippable_block++;
1371  nskippable_blocks++;
1372  }
1373 
1374  /*
1375  * We only skip a range with at least SKIP_PAGES_THRESHOLD consecutive
1376  * pages. Since we're reading sequentially, the OS should be doing
1377  * readahead for us, so there's no gain in skipping a page now and then.
1378  * Skipping such a range might even discourage sequential detection.
1379  *
1380  * This test also enables more frequent relfrozenxid advancement during
1381  * non-aggressive VACUUMs. If the range has any all-visible pages then
1382  * skipping makes updating relfrozenxid unsafe, which is a real downside.
1383  */
1384  if (nskippable_blocks < SKIP_PAGES_THRESHOLD)
1385  *skipping_current_range = false;
1386  else
1387  {
1388  *skipping_current_range = true;
1389  if (skipsallvis)
1390  vacrel->skippedallvis = true;
1391  }
1392 
1393  return next_unskippable_block;
1394 }
1395 
1396 /*
1397  * lazy_scan_new_or_empty() -- lazy_scan_heap() new/empty page handling.
1398  *
1399  * Must call here to handle both new and empty pages before calling
1400  * lazy_scan_prune or lazy_scan_noprune, since they're not prepared to deal
1401  * with new or empty pages.
1402  *
1403  * It's necessary to consider new pages as a special case, since the rules for
1404  * maintaining the visibility map and FSM with empty pages are a little
1405  * different (though new pages can be truncated away during rel truncation).
1406  *
1407  * Empty pages are not really a special case -- they're just heap pages that
1408  * have no allocated tuples (including even LP_UNUSED items). You might
1409  * wonder why we need to handle them here all the same. It's only necessary
1410  * because of a corner-case involving a hard crash during heap relation
1411  * extension. If we ever make relation-extension crash safe, then it should
1412  * no longer be necessary to deal with empty pages here (or new pages, for
1413  * that matter).
1414  *
1415  * Caller must hold at least a shared lock. We might need to escalate the
1416  * lock in that case, so the type of lock caller holds needs to be specified
1417  * using 'sharelock' argument.
1418  *
1419  * Returns false in common case where caller should go on to call
1420  * lazy_scan_prune (or lazy_scan_noprune). Otherwise returns true, indicating
1421  * that lazy_scan_heap is done processing the page, releasing lock on caller's
1422  * behalf.
1423  */
1424 static bool
1426  Page page, bool sharelock, Buffer vmbuffer)
1427 {
1428  Size freespace;
1429 
1430  if (PageIsNew(page))
1431  {
1432  /*
1433  * All-zeroes pages can be left over if either a backend extends the
1434  * relation by a single page, but crashes before the newly initialized
1435  * page has been written out, or when bulk-extending the relation
1436  * (which creates a number of empty pages at the tail end of the
1437  * relation), and then enters them into the FSM.
1438  *
1439  * Note we do not enter the page into the visibilitymap. That has the
1440  * downside that we repeatedly visit this page in subsequent vacuums,
1441  * but otherwise we'll never discover the space on a promoted standby.
1442  * The harm of repeated checking ought to normally not be too bad. The
1443  * space usually should be used at some point, otherwise there
1444  * wouldn't be any regular vacuums.
1445  *
1446  * Make sure these pages are in the FSM, to ensure they can be reused.
1447  * Do that by testing if there's any space recorded for the page. If
1448  * not, enter it. We do so after releasing the lock on the heap page,
1449  * the FSM is approximate, after all.
1450  */
1452 
1453  if (GetRecordedFreeSpace(vacrel->rel, blkno) == 0)
1454  {
1455  freespace = BLCKSZ - SizeOfPageHeaderData;
1456 
1457  RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
1458  }
1459 
1460  return true;
1461  }
1462 
1463  if (PageIsEmpty(page))
1464  {
1465  /*
1466  * It seems likely that caller will always be able to get a cleanup
1467  * lock on an empty page. But don't take any chances -- escalate to
1468  * an exclusive lock (still don't need a cleanup lock, though).
1469  */
1470  if (sharelock)
1471  {
1474 
1475  if (!PageIsEmpty(page))
1476  {
1477  /* page isn't new or empty -- keep lock and pin for now */
1478  return false;
1479  }
1480  }
1481  else
1482  {
1483  /* Already have a full cleanup lock (which is more than enough) */
1484  }
1485 
1486  /*
1487  * Unlike new pages, empty pages are always set all-visible and
1488  * all-frozen.
1489  */
1490  if (!PageIsAllVisible(page))
1491  {
1493 
1494  /* mark buffer dirty before writing a WAL record */
1496 
1497  /*
1498  * It's possible that another backend has extended the heap,
1499  * initialized the page, and then failed to WAL-log the page due
1500  * to an ERROR. Since heap extension is not WAL-logged, recovery
1501  * might try to replay our record setting the page all-visible and
1502  * find that the page isn't initialized, which will cause a PANIC.
1503  * To prevent that, check whether the page has been previously
1504  * WAL-logged, and if not, do that now.
1505  */
1506  if (RelationNeedsWAL(vacrel->rel) &&
1507  PageGetLSN(page) == InvalidXLogRecPtr)
1508  log_newpage_buffer(buf, true);
1509 
1510  PageSetAllVisible(page);
1511  visibilitymap_set(vacrel->rel, blkno, buf, InvalidXLogRecPtr,
1512  vmbuffer, InvalidTransactionId,
1514  END_CRIT_SECTION();
1515  }
1516 
1517  freespace = PageGetHeapFreeSpace(page);
1519  RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
1520  return true;
1521  }
1522 
1523  /* page isn't new or empty -- keep lock and pin */
1524  return false;
1525 }
1526 
1527 /*
1528  * lazy_scan_prune() -- lazy_scan_heap() pruning and freezing.
1529  *
1530  * Caller must hold pin and buffer cleanup lock on the buffer.
1531  *
1532  * Prior to PostgreSQL 14 there were very rare cases where heap_page_prune()
1533  * was allowed to disagree with our HeapTupleSatisfiesVacuum() call about
1534  * whether or not a tuple should be considered DEAD. This happened when an
1535  * inserting transaction concurrently aborted (after our heap_page_prune()
1536  * call, before our HeapTupleSatisfiesVacuum() call). There was rather a lot
1537  * of complexity just so we could deal with tuples that were DEAD to VACUUM,
1538  * but nevertheless were left with storage after pruning.
1539  *
1540  * The approach we take now is to restart pruning when the race condition is
1541  * detected. This allows heap_page_prune() to prune the tuples inserted by
1542  * the now-aborted transaction. This is a little crude, but it guarantees
1543  * that any items that make it into the dead_items array are simple LP_DEAD
1544  * line pointers, and that every remaining item with tuple storage is
1545  * considered as a candidate for freezing.
1546  */
1547 static void
1549  Buffer buf,
1550  BlockNumber blkno,
1551  Page page,
1552  LVPagePruneState *prunestate)
1553 {
1554  Relation rel = vacrel->rel;
1555  OffsetNumber offnum,
1556  maxoff;
1557  ItemId itemid;
1558  HeapTupleData tuple;
1559  HTSV_Result res;
1560  int tuples_deleted,
1561  tuples_frozen,
1562  lpdead_items,
1563  live_tuples,
1564  recently_dead_tuples;
1565  int nnewlpdead;
1566  TransactionId NewRelfrozenXid;
1567  MultiXactId NewRelminMxid;
1568  OffsetNumber deadoffsets[MaxHeapTuplesPerPage];
1570 
1571  Assert(BufferGetBlockNumber(buf) == blkno);
1572 
1573  /*
1574  * maxoff might be reduced following line pointer array truncation in
1575  * heap_page_prune. That's safe for us to ignore, since the reclaimed
1576  * space will continue to look like LP_UNUSED items below.
1577  */
1578  maxoff = PageGetMaxOffsetNumber(page);
1579 
1580 retry:
1581 
1582  /* Initialize (or reset) page-level state */
1583  NewRelfrozenXid = vacrel->NewRelfrozenXid;
1584  NewRelminMxid = vacrel->NewRelminMxid;
1585  tuples_deleted = 0;
1586  tuples_frozen = 0;
1587  lpdead_items = 0;
1588  live_tuples = 0;
1589  recently_dead_tuples = 0;
1590 
1591  /*
1592  * Prune all HOT-update chains in this page.
1593  *
1594  * We count tuples removed by the pruning step as tuples_deleted. Its
1595  * final value can be thought of as the number of tuples that have been
1596  * deleted from the table. It should not be confused with lpdead_items;
1597  * lpdead_items's final value can be thought of as the number of tuples
1598  * that were deleted from indexes.
1599  */
1600  tuples_deleted = heap_page_prune(rel, buf, vacrel->vistest,
1601  InvalidTransactionId, 0, &nnewlpdead,
1602  &vacrel->offnum);
1603 
1604  /*
1605  * Now scan the page to collect LP_DEAD items and check for tuples
1606  * requiring freezing among remaining tuples with storage
1607  */
1608  prunestate->hastup = false;
1609  prunestate->has_lpdead_items = false;
1610  prunestate->all_visible = true;
1611  prunestate->all_frozen = true;
1613 
1614  for (offnum = FirstOffsetNumber;
1615  offnum <= maxoff;
1616  offnum = OffsetNumberNext(offnum))
1617  {
1618  bool tuple_totally_frozen;
1619 
1620  /*
1621  * Set the offset number so that we can display it along with any
1622  * error that occurred while processing this tuple.
1623  */
1624  vacrel->offnum = offnum;
1625  itemid = PageGetItemId(page, offnum);
1626 
1627  if (!ItemIdIsUsed(itemid))
1628  continue;
1629 
1630  /* Redirect items mustn't be touched */
1631  if (ItemIdIsRedirected(itemid))
1632  {
1633  prunestate->hastup = true; /* page won't be truncatable */
1634  continue;
1635  }
1636 
1637  /*
1638  * LP_DEAD items are processed outside of the loop.
1639  *
1640  * Note that we deliberately don't set hastup=true in the case of an
1641  * LP_DEAD item here, which is not how count_nondeletable_pages() does
1642  * it -- it only considers pages empty/truncatable when they have no
1643  * items at all (except LP_UNUSED items).
1644  *
1645  * Our assumption is that any LP_DEAD items we encounter here will
1646  * become LP_UNUSED inside lazy_vacuum_heap_page() before we actually
1647  * call count_nondeletable_pages(). In any case our opinion of
1648  * whether or not a page 'hastup' (which is how our caller sets its
1649  * vacrel->nonempty_pages value) is inherently race-prone. It must be
1650  * treated as advisory/unreliable, so we might as well be slightly
1651  * optimistic.
1652  */
1653  if (ItemIdIsDead(itemid))
1654  {
1655  deadoffsets[lpdead_items++] = offnum;
1656  prunestate->all_visible = false;
1657  prunestate->has_lpdead_items = true;
1658  continue;
1659  }
1660 
1661  Assert(ItemIdIsNormal(itemid));
1662 
1663  ItemPointerSet(&(tuple.t_self), blkno, offnum);
1664  tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
1665  tuple.t_len = ItemIdGetLength(itemid);
1666  tuple.t_tableOid = RelationGetRelid(rel);
1667 
1668  /*
1669  * DEAD tuples are almost always pruned into LP_DEAD line pointers by
1670  * heap_page_prune(), but it's possible that the tuple state changed
1671  * since heap_page_prune() looked. Handle that here by restarting.
1672  * (See comments at the top of function for a full explanation.)
1673  */
1674  res = HeapTupleSatisfiesVacuum(&tuple, vacrel->OldestXmin, buf);
1675 
1676  if (unlikely(res == HEAPTUPLE_DEAD))
1677  goto retry;
1678 
1679  /*
1680  * The criteria for counting a tuple as live in this block need to
1681  * match what analyze.c's acquire_sample_rows() does, otherwise VACUUM
1682  * and ANALYZE may produce wildly different reltuples values, e.g.
1683  * when there are many recently-dead tuples.
1684  *
1685  * The logic here is a bit simpler than acquire_sample_rows(), as
1686  * VACUUM can't run inside a transaction block, which makes some cases
1687  * impossible (e.g. in-progress insert from the same transaction).
1688  *
1689  * We treat LP_DEAD items (which are the closest thing to DEAD tuples
1690  * that might be seen here) differently, too: we assume that they'll
1691  * become LP_UNUSED before VACUUM finishes. This difference is only
1692  * superficial. VACUUM effectively agrees with ANALYZE about DEAD
1693  * items, in the end. VACUUM won't remember LP_DEAD items, but only
1694  * because they're not supposed to be left behind when it is done.
1695  * (Cases where we bypass index vacuuming will violate this optimistic
1696  * assumption, but the overall impact of that should be negligible.)
1697  */
1698  switch (res)
1699  {
1700  case HEAPTUPLE_LIVE:
1701 
1702  /*
1703  * Count it as live. Not only is this natural, but it's also
1704  * what acquire_sample_rows() does.
1705  */
1706  live_tuples++;
1707 
1708  /*
1709  * Is the tuple definitely visible to all transactions?
1710  *
1711  * NB: Like with per-tuple hint bits, we can't set the
1712  * PD_ALL_VISIBLE flag if the inserter committed
1713  * asynchronously. See SetHintBits for more info. Check that
1714  * the tuple is hinted xmin-committed because of that.
1715  */
1716  if (prunestate->all_visible)
1717  {
1718  TransactionId xmin;
1719 
1721  {
1722  prunestate->all_visible = false;
1723  break;
1724  }
1725 
1726  /*
1727  * The inserter definitely committed. But is it old enough
1728  * that everyone sees it as committed?
1729  */
1730  xmin = HeapTupleHeaderGetXmin(tuple.t_data);
1731  if (!TransactionIdPrecedes(xmin, vacrel->OldestXmin))
1732  {
1733  prunestate->all_visible = false;
1734  break;
1735  }
1736 
1737  /* Track newest xmin on page. */
1738  if (TransactionIdFollows(xmin, prunestate->visibility_cutoff_xid))
1739  prunestate->visibility_cutoff_xid = xmin;
1740  }
1741  break;
1743 
1744  /*
1745  * If tuple is recently dead then we must not remove it from
1746  * the relation. (We only remove items that are LP_DEAD from
1747  * pruning.)
1748  */
1749  recently_dead_tuples++;
1750  prunestate->all_visible = false;
1751  break;
1753 
1754  /*
1755  * We do not count these rows as live, because we expect the
1756  * inserting transaction to update the counters at commit, and
1757  * we assume that will happen only after we report our
1758  * results. This assumption is a bit shaky, but it is what
1759  * acquire_sample_rows() does, so be consistent.
1760  */
1761  prunestate->all_visible = false;
1762  break;
1764  /* This is an expected case during concurrent vacuum */
1765  prunestate->all_visible = false;
1766 
1767  /*
1768  * Count such rows as live. As above, we assume the deleting
1769  * transaction will commit and update the counters after we
1770  * report.
1771  */
1772  live_tuples++;
1773  break;
1774  default:
1775  elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
1776  break;
1777  }
1778 
1779  /*
1780  * Non-removable tuple (i.e. tuple with storage).
1781  *
1782  * Check tuple left behind after pruning to see if needs to be frozen
1783  * now.
1784  */
1785  prunestate->hastup = true; /* page makes rel truncation unsafe */
1787  vacrel->relfrozenxid,
1788  vacrel->relminmxid,
1789  vacrel->FreezeLimit,
1790  vacrel->MultiXactCutoff,
1791  &frozen[tuples_frozen],
1792  &tuple_totally_frozen,
1793  &NewRelfrozenXid, &NewRelminMxid))
1794  {
1795  /* Will execute freeze below */
1796  frozen[tuples_frozen++].offset = offnum;
1797  }
1798 
1799  /*
1800  * If tuple is not frozen (and not about to become frozen) then caller
1801  * had better not go on to set this page's VM bit
1802  */
1803  if (!tuple_totally_frozen)
1804  prunestate->all_frozen = false;
1805  }
1806 
1807  vacrel->offnum = InvalidOffsetNumber;
1808 
1809  /*
1810  * We have now divided every item on the page into either an LP_DEAD item
1811  * that will need to be vacuumed in indexes later, or a LP_NORMAL tuple
1812  * that remains and needs to be considered for freezing now (LP_UNUSED and
1813  * LP_REDIRECT items also remain, but are of no further interest to us).
1814  */
1815  vacrel->NewRelfrozenXid = NewRelfrozenXid;
1816  vacrel->NewRelminMxid = NewRelminMxid;
1817 
1818  /*
1819  * Consider the need to freeze any items with tuple storage from the page
1820  * first (arbitrary)
1821  */
1822  if (tuples_frozen > 0)
1823  {
1824  Assert(prunestate->hastup);
1825 
1826  vacrel->frozen_pages++;
1827 
1828  /*
1829  * At least one tuple with storage needs to be frozen -- execute that
1830  * now.
1831  *
1832  * If we need to freeze any tuples we'll mark the buffer dirty, and
1833  * write a WAL record recording the changes. We must log the changes
1834  * to be crash-safe against future truncation of CLOG.
1835  */
1837 
1839 
1840  /* execute collected freezes */
1841  for (int i = 0; i < tuples_frozen; i++)
1842  {
1843  HeapTupleHeader htup;
1844 
1845  itemid = PageGetItemId(page, frozen[i].offset);
1846  htup = (HeapTupleHeader) PageGetItem(page, itemid);
1847 
1848  heap_execute_freeze_tuple(htup, &frozen[i]);
1849  }
1850 
1851  /* Now WAL-log freezing if necessary */
1852  if (RelationNeedsWAL(vacrel->rel))
1853  {
1854  XLogRecPtr recptr;
1855 
1856  recptr = log_heap_freeze(vacrel->rel, buf, vacrel->FreezeLimit,
1857  frozen, tuples_frozen);
1858  PageSetLSN(page, recptr);
1859  }
1860 
1861  END_CRIT_SECTION();
1862  }
1863 
1864  /*
1865  * The second pass over the heap can also set visibility map bits, using
1866  * the same approach. This is important when the table frequently has a
1867  * few old LP_DEAD items on each page by the time we get to it (typically
1868  * because past opportunistic pruning operations freed some non-HOT
1869  * tuples).
1870  *
1871  * VACUUM will call heap_page_is_all_visible() during the second pass over
1872  * the heap to determine all_visible and all_frozen for the page -- this
1873  * is a specialized version of the logic from this function. Now that
1874  * we've finished pruning and freezing, make sure that we're in total
1875  * agreement with heap_page_is_all_visible() using an assertion.
1876  */
1877 #ifdef USE_ASSERT_CHECKING
1878  /* Note that all_frozen value does not matter when !all_visible */
1879  if (prunestate->all_visible)
1880  {
1881  TransactionId cutoff;
1882  bool all_frozen;
1883 
1884  if (!heap_page_is_all_visible(vacrel, buf, &cutoff, &all_frozen))
1885  Assert(false);
1886 
1887  Assert(lpdead_items == 0);
1888  Assert(prunestate->all_frozen == all_frozen);
1889 
1890  /*
1891  * It's possible that we froze tuples and made the page's XID cutoff
1892  * (for recovery conflict purposes) FrozenTransactionId. This is okay
1893  * because visibility_cutoff_xid will be logged by our caller in a
1894  * moment.
1895  */
1896  Assert(cutoff == FrozenTransactionId ||
1897  cutoff == prunestate->visibility_cutoff_xid);
1898  }
1899 #endif
1900 
1901  /*
1902  * Now save details of the LP_DEAD items from the page in vacrel
1903  */
1904  if (lpdead_items > 0)
1905  {
1906  VacDeadItems *dead_items = vacrel->dead_items;
1907  ItemPointerData tmp;
1908 
1909  Assert(!prunestate->all_visible);
1910  Assert(prunestate->has_lpdead_items);
1911 
1912  vacrel->lpdead_item_pages++;
1913 
1914  ItemPointerSetBlockNumber(&tmp, blkno);
1915 
1916  for (int i = 0; i < lpdead_items; i++)
1917  {
1918  ItemPointerSetOffsetNumber(&tmp, deadoffsets[i]);
1919  dead_items->items[dead_items->num_items++] = tmp;
1920  }
1921 
1922  Assert(dead_items->num_items <= dead_items->max_items);
1924  dead_items->num_items);
1925  }
1926 
1927  /* Finally, add page-local counts to whole-VACUUM counts */
1928  vacrel->tuples_deleted += tuples_deleted;
1929  vacrel->tuples_frozen += tuples_frozen;
1930  vacrel->lpdead_items += lpdead_items;
1931  vacrel->live_tuples += live_tuples;
1932  vacrel->recently_dead_tuples += recently_dead_tuples;
1933 }
1934 
1935 /*
1936  * lazy_scan_noprune() -- lazy_scan_prune() without pruning or freezing
1937  *
1938  * Caller need only hold a pin and share lock on the buffer, unlike
1939  * lazy_scan_prune, which requires a full cleanup lock. While pruning isn't
1940  * performed here, it's quite possible that an earlier opportunistic pruning
1941  * operation left LP_DEAD items behind. We'll at least collect any such items
1942  * in the dead_items array for removal from indexes.
1943  *
1944  * For aggressive VACUUM callers, we may return false to indicate that a full
1945  * cleanup lock is required for processing by lazy_scan_prune. This is only
1946  * necessary when the aggressive VACUUM needs to freeze some tuple XIDs from
1947  * one or more tuples on the page. We always return true for non-aggressive
1948  * callers.
1949  *
1950  * See lazy_scan_prune for an explanation of hastup return flag.
1951  * recordfreespace flag instructs caller on whether or not it should do
1952  * generic FSM processing for page.
1953  */
1954 static bool
1956  Buffer buf,
1957  BlockNumber blkno,
1958  Page page,
1959  bool *hastup,
1960  bool *recordfreespace)
1961 {
1962  OffsetNumber offnum,
1963  maxoff;
1964  int lpdead_items,
1965  live_tuples,
1966  recently_dead_tuples,
1967  missed_dead_tuples;
1968  HeapTupleHeader tupleheader;
1969  TransactionId NewRelfrozenXid = vacrel->NewRelfrozenXid;
1970  MultiXactId NewRelminMxid = vacrel->NewRelminMxid;
1971  OffsetNumber deadoffsets[MaxHeapTuplesPerPage];
1972 
1973  Assert(BufferGetBlockNumber(buf) == blkno);
1974 
1975  *hastup = false; /* for now */
1976  *recordfreespace = false; /* for now */
1977 
1978  lpdead_items = 0;
1979  live_tuples = 0;
1980  recently_dead_tuples = 0;
1981  missed_dead_tuples = 0;
1982 
1983  maxoff = PageGetMaxOffsetNumber(page);
1984  for (offnum = FirstOffsetNumber;
1985  offnum <= maxoff;
1986  offnum = OffsetNumberNext(offnum))
1987  {
1988  ItemId itemid;
1989  HeapTupleData tuple;
1990 
1991  vacrel->offnum = offnum;
1992  itemid = PageGetItemId(page, offnum);
1993 
1994  if (!ItemIdIsUsed(itemid))
1995  continue;
1996 
1997  if (ItemIdIsRedirected(itemid))
1998  {
1999  *hastup = true;
2000  continue;
2001  }
2002 
2003  if (ItemIdIsDead(itemid))
2004  {
2005  /*
2006  * Deliberately don't set hastup=true here. See same point in
2007  * lazy_scan_prune for an explanation.
2008  */
2009  deadoffsets[lpdead_items++] = offnum;
2010  continue;
2011  }
2012 
2013  *hastup = true; /* page prevents rel truncation */
2014  tupleheader = (HeapTupleHeader) PageGetItem(page, itemid);
2015  if (heap_tuple_would_freeze(tupleheader,
2016  vacrel->FreezeLimit,
2017  vacrel->MultiXactCutoff,
2018  &NewRelfrozenXid, &NewRelminMxid))
2019  {
2020  /* Tuple with XID < FreezeLimit (or MXID < MultiXactCutoff) */
2021  if (vacrel->aggressive)
2022  {
2023  /*
2024  * Aggressive VACUUMs must always be able to advance rel's
2025  * relfrozenxid to a value >= FreezeLimit (and be able to
2026  * advance rel's relminmxid to a value >= MultiXactCutoff).
2027  * The ongoing aggressive VACUUM won't be able to do that
2028  * unless it can freeze an XID (or MXID) from this tuple now.
2029  *
2030  * The only safe option is to have caller perform processing
2031  * of this page using lazy_scan_prune. Caller might have to
2032  * wait a while for a cleanup lock, but it can't be helped.
2033  */
2034  vacrel->offnum = InvalidOffsetNumber;
2035  return false;
2036  }
2037 
2038  /*
2039  * Non-aggressive VACUUMs are under no obligation to advance
2040  * relfrozenxid (even by one XID). We can be much laxer here.
2041  *
2042  * Currently we always just accept an older final relfrozenxid
2043  * and/or relminmxid value. We never make caller wait or work a
2044  * little harder, even when it likely makes sense to do so.
2045  */
2046  }
2047 
2048  ItemPointerSet(&(tuple.t_self), blkno, offnum);
2049  tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
2050  tuple.t_len = ItemIdGetLength(itemid);
2051  tuple.t_tableOid = RelationGetRelid(vacrel->rel);
2052 
2053  switch (HeapTupleSatisfiesVacuum(&tuple, vacrel->OldestXmin, buf))
2054  {
2056  case HEAPTUPLE_LIVE:
2057 
2058  /*
2059  * Count both cases as live, just like lazy_scan_prune
2060  */
2061  live_tuples++;
2062 
2063  break;
2064  case HEAPTUPLE_DEAD:
2065 
2066  /*
2067  * There is some useful work for pruning to do, that won't be
2068  * done due to failure to get a cleanup lock.
2069  */
2070  missed_dead_tuples++;
2071  break;
2073 
2074  /*
2075  * Count in recently_dead_tuples, just like lazy_scan_prune
2076  */
2077  recently_dead_tuples++;
2078  break;
2080 
2081  /*
2082  * Do not count these rows as live, just like lazy_scan_prune
2083  */
2084  break;
2085  default:
2086  elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
2087  break;
2088  }
2089  }
2090 
2091  vacrel->offnum = InvalidOffsetNumber;
2092 
2093  /*
2094  * By here we know for sure that caller can put off freezing and pruning
2095  * this particular page until the next VACUUM. Remember its details now.
2096  * (lazy_scan_prune expects a clean slate, so we have to do this last.)
2097  */
2098  vacrel->NewRelfrozenXid = NewRelfrozenXid;
2099  vacrel->NewRelminMxid = NewRelminMxid;
2100 
2101  /* Save any LP_DEAD items found on the page in dead_items array */
2102  if (vacrel->nindexes == 0)
2103  {
2104  /* Using one-pass strategy (since table has no indexes) */
2105  if (lpdead_items > 0)
2106  {
2107  /*
2108  * Perfunctory handling for the corner case where a single pass
2109  * strategy VACUUM cannot get a cleanup lock, and it turns out
2110  * that there is one or more LP_DEAD items: just count the LP_DEAD
2111  * items as missed_dead_tuples instead. (This is a bit dishonest,
2112  * but it beats having to maintain specialized heap vacuuming code
2113  * forever, for vanishingly little benefit.)
2114  */
2115  *hastup = true;
2116  missed_dead_tuples += lpdead_items;
2117  }
2118 
2119  *recordfreespace = true;
2120  }
2121  else if (lpdead_items == 0)
2122  {
2123  /*
2124  * Won't be vacuuming this page later, so record page's freespace in
2125  * the FSM now
2126  */
2127  *recordfreespace = true;
2128  }
2129  else
2130  {
2131  VacDeadItems *dead_items = vacrel->dead_items;
2132  ItemPointerData tmp;
2133 
2134  /*
2135  * Page has LP_DEAD items, and so any references/TIDs that remain in
2136  * indexes will be deleted during index vacuuming (and then marked
2137  * LP_UNUSED in the heap)
2138  */
2139  vacrel->lpdead_item_pages++;
2140 
2141  ItemPointerSetBlockNumber(&tmp, blkno);
2142 
2143  for (int i = 0; i < lpdead_items; i++)
2144  {
2145  ItemPointerSetOffsetNumber(&tmp, deadoffsets[i]);
2146  dead_items->items[dead_items->num_items++] = tmp;
2147  }
2148 
2149  Assert(dead_items->num_items <= dead_items->max_items);
2151  dead_items->num_items);
2152 
2153  vacrel->lpdead_items += lpdead_items;
2154 
2155  /*
2156  * Assume that we'll go on to vacuum this heap page during final pass
2157  * over the heap. Don't record free space until then.
2158  */
2159  *recordfreespace = false;
2160  }
2161 
2162  /*
2163  * Finally, add relevant page-local counts to whole-VACUUM counts
2164  */
2165  vacrel->live_tuples += live_tuples;
2166  vacrel->recently_dead_tuples += recently_dead_tuples;
2167  vacrel->missed_dead_tuples += missed_dead_tuples;
2168  if (missed_dead_tuples > 0)
2169  vacrel->missed_dead_pages++;
2170 
2171  /* Caller won't need to call lazy_scan_prune with same page */
2172  return true;
2173 }
2174 
2175 /*
2176  * Main entry point for index vacuuming and heap vacuuming.
2177  *
2178  * Removes items collected in dead_items from table's indexes, then marks the
2179  * same items LP_UNUSED in the heap. See the comments above lazy_scan_heap
2180  * for full details.
2181  *
2182  * Also empties dead_items, freeing up space for later TIDs.
2183  *
2184  * We may choose to bypass index vacuuming at this point, though only when the
2185  * ongoing VACUUM operation will definitely only have one index scan/round of
2186  * index vacuuming.
2187  */
2188 static void
2190 {
2191  bool bypass;
2192 
2193  /* Should not end up here with no indexes */
2194  Assert(vacrel->nindexes > 0);
2195  Assert(vacrel->lpdead_item_pages > 0);
2196 
2197  if (!vacrel->do_index_vacuuming)
2198  {
2199  Assert(!vacrel->do_index_cleanup);
2200  vacrel->dead_items->num_items = 0;
2201  return;
2202  }
2203 
2204  /*
2205  * Consider bypassing index vacuuming (and heap vacuuming) entirely.
2206  *
2207  * We currently only do this in cases where the number of LP_DEAD items
2208  * for the entire VACUUM operation is close to zero. This avoids sharp
2209  * discontinuities in the duration and overhead of successive VACUUM
2210  * operations that run against the same table with a fixed workload.
2211  * Ideally, successive VACUUM operations will behave as if there are
2212  * exactly zero LP_DEAD items in cases where there are close to zero.
2213  *
2214  * This is likely to be helpful with a table that is continually affected
2215  * by UPDATEs that can mostly apply the HOT optimization, but occasionally
2216  * have small aberrations that lead to just a few heap pages retaining
2217  * only one or two LP_DEAD items. This is pretty common; even when the
2218  * DBA goes out of their way to make UPDATEs use HOT, it is practically
2219  * impossible to predict whether HOT will be applied in 100% of cases.
2220  * It's far easier to ensure that 99%+ of all UPDATEs against a table use
2221  * HOT through careful tuning.
2222  */
2223  bypass = false;
2224  if (vacrel->consider_bypass_optimization && vacrel->rel_pages > 0)
2225  {
2226  BlockNumber threshold;
2227 
2228  Assert(vacrel->num_index_scans == 0);
2229  Assert(vacrel->lpdead_items == vacrel->dead_items->num_items);
2230  Assert(vacrel->do_index_vacuuming);
2231  Assert(vacrel->do_index_cleanup);
2232 
2233  /*
2234  * This crossover point at which we'll start to do index vacuuming is
2235  * expressed as a percentage of the total number of heap pages in the
2236  * table that are known to have at least one LP_DEAD item. This is
2237  * much more important than the total number of LP_DEAD items, since
2238  * it's a proxy for the number of heap pages whose visibility map bits
2239  * cannot be set on account of bypassing index and heap vacuuming.
2240  *
2241  * We apply one further precautionary test: the space currently used
2242  * to store the TIDs (TIDs that now all point to LP_DEAD items) must
2243  * not exceed 32MB. This limits the risk that we will bypass index
2244  * vacuuming again and again until eventually there is a VACUUM whose
2245  * dead_items space is not CPU cache resident.
2246  *
2247  * We don't take any special steps to remember the LP_DEAD items (such
2248  * as counting them in our final update to the stats system) when the
2249  * optimization is applied. Though the accounting used in analyze.c's
2250  * acquire_sample_rows() will recognize the same LP_DEAD items as dead
2251  * rows in its own stats report, that's okay. The discrepancy should
2252  * be negligible. If this optimization is ever expanded to cover more
2253  * cases then this may need to be reconsidered.
2254  */
2255  threshold = (double) vacrel->rel_pages * BYPASS_THRESHOLD_PAGES;
2256  bypass = (vacrel->lpdead_item_pages < threshold &&
2257  vacrel->lpdead_items < MAXDEADITEMS(32L * 1024L * 1024L));
2258  }
2259 
2260  if (bypass)
2261  {
2262  /*
2263  * There are almost zero TIDs. Behave as if there were precisely
2264  * zero: bypass index vacuuming, but do index cleanup.
2265  *
2266  * We expect that the ongoing VACUUM operation will finish very
2267  * quickly, so there is no point in considering speeding up as a
2268  * failsafe against wraparound failure. (Index cleanup is expected to
2269  * finish very quickly in cases where there were no ambulkdelete()
2270  * calls.)
2271  */
2272  vacrel->do_index_vacuuming = false;
2273  }
2274  else if (lazy_vacuum_all_indexes(vacrel))
2275  {
2276  /*
2277  * We successfully completed a round of index vacuuming. Do related
2278  * heap vacuuming now.
2279  */
2280  lazy_vacuum_heap_rel(vacrel);
2281  }
2282  else
2283  {
2284  /*
2285  * Failsafe case.
2286  *
2287  * We attempted index vacuuming, but didn't finish a full round/full
2288  * index scan. This happens when relfrozenxid or relminmxid is too
2289  * far in the past.
2290  *
2291  * From this point on the VACUUM operation will do no further index
2292  * vacuuming or heap vacuuming. This VACUUM operation won't end up
2293  * back here again.
2294  */
2295  Assert(vacrel->failsafe_active);
2296  }
2297 
2298  /*
2299  * Forget the LP_DEAD items that we just vacuumed (or just decided to not
2300  * vacuum)
2301  */
2302  vacrel->dead_items->num_items = 0;
2303 }
2304 
2305 /*
2306  * lazy_vacuum_all_indexes() -- Main entry for index vacuuming
2307  *
2308  * Returns true in the common case when all indexes were successfully
2309  * vacuumed. Returns false in rare cases where we determined that the ongoing
2310  * VACUUM operation is at risk of taking too long to finish, leading to
2311  * wraparound failure.
2312  */
2313 static bool
2315 {
2316  bool allindexes = true;
2317 
2318  Assert(vacrel->nindexes > 0);
2319  Assert(vacrel->do_index_vacuuming);
2320  Assert(vacrel->do_index_cleanup);
2321 
2322  /* Precheck for XID wraparound emergencies */
2323  if (lazy_check_wraparound_failsafe(vacrel))
2324  {
2325  /* Wraparound emergency -- don't even start an index scan */
2326  return false;
2327  }
2328 
2329  /* Report that we are now vacuuming indexes */
2332 
2333  if (!ParallelVacuumIsActive(vacrel))
2334  {
2335  for (int idx = 0; idx < vacrel->nindexes; idx++)
2336  {
2337  Relation indrel = vacrel->indrels[idx];
2338  IndexBulkDeleteResult *istat = vacrel->indstats[idx];
2339 
2340  vacrel->indstats[idx] =
2341  lazy_vacuum_one_index(indrel, istat, vacrel->old_live_tuples,
2342  vacrel);
2343 
2344  if (lazy_check_wraparound_failsafe(vacrel))
2345  {
2346  /* Wraparound emergency -- end current index scan */
2347  allindexes = false;
2348  break;
2349  }
2350  }
2351  }
2352  else
2353  {
2354  /* Outsource everything to parallel variant */
2356  vacrel->num_index_scans);
2357 
2358  /*
2359  * Do a postcheck to consider applying wraparound failsafe now. Note
2360  * that parallel VACUUM only gets the precheck and this postcheck.
2361  */
2362  if (lazy_check_wraparound_failsafe(vacrel))
2363  allindexes = false;
2364  }
2365 
2366  /*
2367  * We delete all LP_DEAD items from the first heap pass in all indexes on
2368  * each call here (except calls where we choose to do the failsafe). This
2369  * makes the next call to lazy_vacuum_heap_rel() safe (except in the event
2370  * of the failsafe triggering, which prevents the next call from taking
2371  * place).
2372  */
2373  Assert(vacrel->num_index_scans > 0 ||
2374  vacrel->dead_items->num_items == vacrel->lpdead_items);
2375  Assert(allindexes || vacrel->failsafe_active);
2376 
2377  /*
2378  * Increase and report the number of index scans.
2379  *
2380  * We deliberately include the case where we started a round of bulk
2381  * deletes that we weren't able to finish due to the failsafe triggering.
2382  */
2383  vacrel->num_index_scans++;
2385  vacrel->num_index_scans);
2386 
2387  return allindexes;
2388 }
2389 
2390 /*
2391  * lazy_vacuum_heap_rel() -- second pass over the heap for two pass strategy
2392  *
2393  * This routine marks LP_DEAD items in vacrel->dead_items array as LP_UNUSED.
2394  * Pages that never had lazy_scan_prune record LP_DEAD items are not visited
2395  * at all.
2396  *
2397  * We may also be able to truncate the line pointer array of the heap pages we
2398  * visit. If there is a contiguous group of LP_UNUSED items at the end of the
2399  * array, it can be reclaimed as free space. These LP_UNUSED items usually
2400  * start out as LP_DEAD items recorded by lazy_scan_prune (we set items from
2401  * each page to LP_UNUSED, and then consider if it's possible to truncate the
2402  * page's line pointer array).
2403  *
2404  * Note: the reason for doing this as a second pass is we cannot remove the
2405  * tuples until we've removed their index entries, and we want to process
2406  * index entry removal in batches as large as possible.
2407  */
2408 static void
2410 {
2411  int index;
2412  BlockNumber vacuumed_pages;
2413  Buffer vmbuffer = InvalidBuffer;
2414  LVSavedErrInfo saved_err_info;
2415 
2416  Assert(vacrel->do_index_vacuuming);
2417  Assert(vacrel->do_index_cleanup);
2418  Assert(vacrel->num_index_scans > 0);
2419 
2420  /* Report that we are now vacuuming the heap */
2423 
2424  /* Update error traceback information */
2425  update_vacuum_error_info(vacrel, &saved_err_info,
2428 
2429  vacuumed_pages = 0;
2430 
2431  index = 0;
2432  while (index < vacrel->dead_items->num_items)
2433  {
2434  BlockNumber tblk;
2435  Buffer buf;
2436  Page page;
2437  Size freespace;
2438 
2440 
2441  tblk = ItemPointerGetBlockNumber(&vacrel->dead_items->items[index]);
2442  vacrel->blkno = tblk;
2443  buf = ReadBufferExtended(vacrel->rel, MAIN_FORKNUM, tblk, RBM_NORMAL,
2444  vacrel->bstrategy);
2446  index = lazy_vacuum_heap_page(vacrel, tblk, buf, index, &vmbuffer);
2447 
2448  /* Now that we've vacuumed the page, record its available space */
2449  page = BufferGetPage(buf);
2450  freespace = PageGetHeapFreeSpace(page);
2451 
2453  RecordPageWithFreeSpace(vacrel->rel, tblk, freespace);
2454  vacuumed_pages++;
2455  }
2456 
2457  /* Clear the block number information */
2458  vacrel->blkno = InvalidBlockNumber;
2459 
2460  if (BufferIsValid(vmbuffer))
2461  {
2462  ReleaseBuffer(vmbuffer);
2463  vmbuffer = InvalidBuffer;
2464  }
2465 
2466  /*
2467  * We set all LP_DEAD items from the first heap pass to LP_UNUSED during
2468  * the second heap pass. No more, no less.
2469  */
2470  Assert(index > 0);
2471  Assert(vacrel->num_index_scans > 1 ||
2472  (index == vacrel->lpdead_items &&
2473  vacuumed_pages == vacrel->lpdead_item_pages));
2474 
2475  ereport(DEBUG2,
2476  (errmsg("table \"%s\": removed %lld dead item identifiers in %u pages",
2477  vacrel->relname, (long long) index, vacuumed_pages)));
2478 
2479  /* Revert to the previous phase information for error traceback */
2480  restore_vacuum_error_info(vacrel, &saved_err_info);
2481 }
2482 
2483 /*
2484  * lazy_vacuum_heap_page() -- free page's LP_DEAD items listed in the
2485  * vacrel->dead_items array.
2486  *
2487  * Caller must have an exclusive buffer lock on the buffer (though a full
2488  * cleanup lock is also acceptable).
2489  *
2490  * index is an offset into the vacrel->dead_items array for the first listed
2491  * LP_DEAD item on the page. The return value is the first index immediately
2492  * after all LP_DEAD items for the same page in the array.
2493  */
2494 static int
2496  int index, Buffer *vmbuffer)
2497 {
2498  VacDeadItems *dead_items = vacrel->dead_items;
2499  Page page = BufferGetPage(buffer);
2501  int uncnt = 0;
2502  TransactionId visibility_cutoff_xid;
2503  bool all_frozen;
2504  LVSavedErrInfo saved_err_info;
2505 
2506  Assert(vacrel->nindexes == 0 || vacrel->do_index_vacuuming);
2507 
2509 
2510  /* Update error traceback information */
2511  update_vacuum_error_info(vacrel, &saved_err_info,
2514 
2516 
2517  for (; index < dead_items->num_items; index++)
2518  {
2519  BlockNumber tblk;
2520  OffsetNumber toff;
2521  ItemId itemid;
2522 
2523  tblk = ItemPointerGetBlockNumber(&dead_items->items[index]);
2524  if (tblk != blkno)
2525  break; /* past end of tuples for this block */
2526  toff = ItemPointerGetOffsetNumber(&dead_items->items[index]);
2527  itemid = PageGetItemId(page, toff);
2528 
2529  Assert(ItemIdIsDead(itemid) && !ItemIdHasStorage(itemid));
2530  ItemIdSetUnused(itemid);
2531  unused[uncnt++] = toff;
2532  }
2533 
2534  Assert(uncnt > 0);
2535 
2536  /* Attempt to truncate line pointer array now */
2538 
2539  /*
2540  * Mark buffer dirty before we write WAL.
2541  */
2542  MarkBufferDirty(buffer);
2543 
2544  /* XLOG stuff */
2545  if (RelationNeedsWAL(vacrel->rel))
2546  {
2547  xl_heap_vacuum xlrec;
2548  XLogRecPtr recptr;
2549 
2550  xlrec.nunused = uncnt;
2551 
2552  XLogBeginInsert();
2553  XLogRegisterData((char *) &xlrec, SizeOfHeapVacuum);
2554 
2555  XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
2556  XLogRegisterBufData(0, (char *) unused, uncnt * sizeof(OffsetNumber));
2557 
2558  recptr = XLogInsert(RM_HEAP2_ID, XLOG_HEAP2_VACUUM);
2559 
2560  PageSetLSN(page, recptr);
2561  }
2562 
2563  /*
2564  * End critical section, so we safely can do visibility tests (which
2565  * possibly need to perform IO and allocate memory!). If we crash now the
2566  * page (including the corresponding vm bit) might not be marked all
2567  * visible, but that's fine. A later vacuum will fix that.
2568  */
2569  END_CRIT_SECTION();
2570 
2571  /*
2572  * Now that we have removed the LD_DEAD items from the page, once again
2573  * check if the page has become all-visible. The page is already marked
2574  * dirty, exclusively locked, and, if needed, a full page image has been
2575  * emitted.
2576  */
2577  if (heap_page_is_all_visible(vacrel, buffer, &visibility_cutoff_xid,
2578  &all_frozen))
2579  PageSetAllVisible(page);
2580 
2581  /*
2582  * All the changes to the heap page have been done. If the all-visible
2583  * flag is now set, also set the VM all-visible bit (and, if possible, the
2584  * all-frozen bit) unless this has already been done previously.
2585  */
2586  if (PageIsAllVisible(page))
2587  {
2588  uint8 flags = 0;
2589  uint8 vm_status = visibilitymap_get_status(vacrel->rel,
2590  blkno, vmbuffer);
2591 
2592  /* Set the VM all-frozen bit to flag, if needed */
2593  if ((vm_status & VISIBILITYMAP_ALL_VISIBLE) == 0)
2594  flags |= VISIBILITYMAP_ALL_VISIBLE;
2595  if ((vm_status & VISIBILITYMAP_ALL_FROZEN) == 0 && all_frozen)
2596  flags |= VISIBILITYMAP_ALL_FROZEN;
2597 
2598  Assert(BufferIsValid(*vmbuffer));
2599  if (flags != 0)
2600  visibilitymap_set(vacrel->rel, blkno, buffer, InvalidXLogRecPtr,
2601  *vmbuffer, visibility_cutoff_xid, flags);
2602  }
2603 
2604  /* Revert to the previous phase information for error traceback */
2605  restore_vacuum_error_info(vacrel, &saved_err_info);
2606  return index;
2607 }
2608 
2609 /*
2610  * Trigger the failsafe to avoid wraparound failure when vacrel table has a
2611  * relfrozenxid and/or relminmxid that is dangerously far in the past.
2612  * Triggering the failsafe makes the ongoing VACUUM bypass any further index
2613  * vacuuming and heap vacuuming. Truncating the heap is also bypassed.
2614  *
2615  * Any remaining work (work that VACUUM cannot just bypass) is typically sped
2616  * up when the failsafe triggers. VACUUM stops applying any cost-based delay
2617  * that it started out with.
2618  *
2619  * Returns true when failsafe has been triggered.
2620  */
2621 static bool
2623 {
2626 
2627  /* Don't warn more than once per VACUUM */
2628  if (vacrel->failsafe_active)
2629  return true;
2630 
2632  vacrel->relminmxid)))
2633  {
2634  vacrel->failsafe_active = true;
2635 
2636  /* Disable index vacuuming, index cleanup, and heap rel truncation */
2637  vacrel->do_index_vacuuming = false;
2638  vacrel->do_index_cleanup = false;
2639  vacrel->do_rel_truncate = false;
2640 
2641  ereport(WARNING,
2642  (errmsg("bypassing nonessential maintenance of table \"%s.%s.%s\" as a failsafe after %d index scans",
2644  vacrel->relnamespace,
2645  vacrel->relname,
2646  vacrel->num_index_scans),
2647  errdetail("The table's relfrozenxid or relminmxid is too far in the past."),
2648  errhint("Consider increasing configuration parameter \"maintenance_work_mem\" or \"autovacuum_work_mem\".\n"
2649  "You might also need to consider other ways for VACUUM to keep up with the allocation of transaction IDs.")));
2650 
2651  /* Stop applying cost limits from this point on */
2652  VacuumCostActive = false;
2653  VacuumCostBalance = 0;
2654 
2655  return true;
2656  }
2657 
2658  return false;
2659 }
2660 
2661 /*
2662  * lazy_cleanup_all_indexes() -- cleanup all indexes of relation.
2663  */
2664 static void
2666 {
2667  double reltuples = vacrel->new_rel_tuples;
2668  bool estimated_count = vacrel->scanned_pages < vacrel->rel_pages;
2669 
2670  Assert(vacrel->do_index_cleanup);
2671  Assert(vacrel->nindexes > 0);
2672 
2673  /* Report that we are now cleaning up indexes */
2676 
2677  if (!ParallelVacuumIsActive(vacrel))
2678  {
2679  for (int idx = 0; idx < vacrel->nindexes; idx++)
2680  {
2681  Relation indrel = vacrel->indrels[idx];
2682  IndexBulkDeleteResult *istat = vacrel->indstats[idx];
2683 
2684  vacrel->indstats[idx] =
2685  lazy_cleanup_one_index(indrel, istat, reltuples,
2686  estimated_count, vacrel);
2687  }
2688  }
2689  else
2690  {
2691  /* Outsource everything to parallel variant */
2692  parallel_vacuum_cleanup_all_indexes(vacrel->pvs, reltuples,
2693  vacrel->num_index_scans,
2694  estimated_count);
2695  }
2696 }
2697 
2698 /*
2699  * lazy_vacuum_one_index() -- vacuum index relation.
2700  *
2701  * Delete all the index tuples containing a TID collected in
2702  * vacrel->dead_items array. Also update running statistics.
2703  * Exact details depend on index AM's ambulkdelete routine.
2704  *
2705  * reltuples is the number of heap tuples to be passed to the
2706  * bulkdelete callback. It's always assumed to be estimated.
2707  * See indexam.sgml for more info.
2708  *
2709  * Returns bulk delete stats derived from input stats
2710  */
2711 static IndexBulkDeleteResult *
2713  double reltuples, LVRelState *vacrel)
2714 {
2715  IndexVacuumInfo ivinfo;
2716  LVSavedErrInfo saved_err_info;
2717 
2718  ivinfo.index = indrel;
2719  ivinfo.analyze_only = false;
2720  ivinfo.report_progress = false;
2721  ivinfo.estimated_count = true;
2722  ivinfo.message_level = DEBUG2;
2723  ivinfo.num_heap_tuples = reltuples;
2724  ivinfo.strategy = vacrel->bstrategy;
2725 
2726  /*
2727  * Update error traceback information.
2728  *
2729  * The index name is saved during this phase and restored immediately
2730  * after this phase. See vacuum_error_callback.
2731  */
2732  Assert(vacrel->indname == NULL);
2733  vacrel->indname = pstrdup(RelationGetRelationName(indrel));
2734  update_vacuum_error_info(vacrel, &saved_err_info,
2737 
2738  /* Do bulk deletion */
2739  istat = vac_bulkdel_one_index(&ivinfo, istat, (void *) vacrel->dead_items);
2740 
2741  /* Revert to the previous phase information for error traceback */
2742  restore_vacuum_error_info(vacrel, &saved_err_info);
2743  pfree(vacrel->indname);
2744  vacrel->indname = NULL;
2745 
2746  return istat;
2747 }
2748 
2749 /*
2750  * lazy_cleanup_one_index() -- do post-vacuum cleanup for index relation.
2751  *
2752  * Calls index AM's amvacuumcleanup routine. reltuples is the number
2753  * of heap tuples and estimated_count is true if reltuples is an
2754  * estimated value. See indexam.sgml for more info.
2755  *
2756  * Returns bulk delete stats derived from input stats
2757  */
2758 static IndexBulkDeleteResult *
2760  double reltuples, bool estimated_count,
2761  LVRelState *vacrel)
2762 {
2763  IndexVacuumInfo ivinfo;
2764  LVSavedErrInfo saved_err_info;
2765 
2766  ivinfo.index = indrel;
2767  ivinfo.analyze_only = false;
2768  ivinfo.report_progress = false;
2769  ivinfo.estimated_count = estimated_count;
2770  ivinfo.message_level = DEBUG2;
2771 
2772  ivinfo.num_heap_tuples = reltuples;
2773  ivinfo.strategy = vacrel->bstrategy;
2774 
2775  /*
2776  * Update error traceback information.
2777  *
2778  * The index name is saved during this phase and restored immediately
2779  * after this phase. See vacuum_error_callback.
2780  */
2781  Assert(vacrel->indname == NULL);
2782  vacrel->indname = pstrdup(RelationGetRelationName(indrel));
2783  update_vacuum_error_info(vacrel, &saved_err_info,
2786 
2787  istat = vac_cleanup_one_index(&ivinfo, istat);
2788 
2789  /* Revert to the previous phase information for error traceback */
2790  restore_vacuum_error_info(vacrel, &saved_err_info);
2791  pfree(vacrel->indname);
2792  vacrel->indname = NULL;
2793 
2794  return istat;
2795 }
2796 
2797 /*
2798  * should_attempt_truncation - should we attempt to truncate the heap?
2799  *
2800  * Don't even think about it unless we have a shot at releasing a goodly
2801  * number of pages. Otherwise, the time taken isn't worth it, mainly because
2802  * an AccessExclusive lock must be replayed on any hot standby, where it can
2803  * be particularly disruptive.
2804  *
2805  * Also don't attempt it if wraparound failsafe is in effect. The entire
2806  * system might be refusing to allocate new XIDs at this point. The system
2807  * definitely won't return to normal unless and until VACUUM actually advances
2808  * the oldest relfrozenxid -- which hasn't happened for target rel just yet.
2809  * If lazy_truncate_heap attempted to acquire an AccessExclusiveLock to
2810  * truncate the table under these circumstances, an XID exhaustion error might
2811  * make it impossible for VACUUM to fix the underlying XID exhaustion problem.
2812  * There is very little chance of truncation working out when the failsafe is
2813  * in effect in any case. lazy_scan_prune makes the optimistic assumption
2814  * that any LP_DEAD items it encounters will always be LP_UNUSED by the time
2815  * we're called.
2816  *
2817  * Also don't attempt it if we are doing early pruning/vacuuming, because a
2818  * scan which cannot find a truncated heap page cannot determine that the
2819  * snapshot is too old to read that page.
2820  */
2821 static bool
2823 {
2824  BlockNumber possibly_freeable;
2825 
2826  if (!vacrel->do_rel_truncate || vacrel->failsafe_active ||
2828  return false;
2829 
2830  possibly_freeable = vacrel->rel_pages - vacrel->nonempty_pages;
2831  if (possibly_freeable > 0 &&
2832  (possibly_freeable >= REL_TRUNCATE_MINIMUM ||
2833  possibly_freeable >= vacrel->rel_pages / REL_TRUNCATE_FRACTION))
2834  return true;
2835 
2836  return false;
2837 }
2838 
2839 /*
2840  * lazy_truncate_heap - try to truncate off any empty pages at the end
2841  */
2842 static void
2844 {
2845  BlockNumber orig_rel_pages = vacrel->rel_pages;
2846  BlockNumber new_rel_pages;
2847  bool lock_waiter_detected;
2848  int lock_retry;
2849 
2850  /* Report that we are now truncating */
2853 
2854  /* Update error traceback information one last time */
2857 
2858  /*
2859  * Loop until no more truncating can be done.
2860  */
2861  do
2862  {
2863  /*
2864  * We need full exclusive lock on the relation in order to do
2865  * truncation. If we can't get it, give up rather than waiting --- we
2866  * don't want to block other backends, and we don't want to deadlock
2867  * (which is quite possible considering we already hold a lower-grade
2868  * lock).
2869  */
2870  lock_waiter_detected = false;
2871  lock_retry = 0;
2872  while (true)
2873  {
2875  break;
2876 
2877  /*
2878  * Check for interrupts while trying to (re-)acquire the exclusive
2879  * lock.
2880  */
2882 
2883  if (++lock_retry > (VACUUM_TRUNCATE_LOCK_TIMEOUT /
2885  {
2886  /*
2887  * We failed to establish the lock in the specified number of
2888  * retries. This means we give up truncating.
2889  */
2890  ereport(vacrel->verbose ? INFO : DEBUG2,
2891  (errmsg("\"%s\": stopping truncate due to conflicting lock request",
2892  vacrel->relname)));
2893  return;
2894  }
2895 
2896  (void) WaitLatch(MyLatch,
2901  }
2902 
2903  /*
2904  * Now that we have exclusive lock, look to see if the rel has grown
2905  * whilst we were vacuuming with non-exclusive lock. If so, give up;
2906  * the newly added pages presumably contain non-deletable tuples.
2907  */
2908  new_rel_pages = RelationGetNumberOfBlocks(vacrel->rel);
2909  if (new_rel_pages != orig_rel_pages)
2910  {
2911  /*
2912  * Note: we intentionally don't update vacrel->rel_pages with the
2913  * new rel size here. If we did, it would amount to assuming that
2914  * the new pages are empty, which is unlikely. Leaving the numbers
2915  * alone amounts to assuming that the new pages have the same
2916  * tuple density as existing ones, which is less unlikely.
2917  */
2919  return;
2920  }
2921 
2922  /*
2923  * Scan backwards from the end to verify that the end pages actually
2924  * contain no tuples. This is *necessary*, not optional, because
2925  * other backends could have added tuples to these pages whilst we
2926  * were vacuuming.
2927  */
2928  new_rel_pages = count_nondeletable_pages(vacrel, &lock_waiter_detected);
2929  vacrel->blkno = new_rel_pages;
2930 
2931  if (new_rel_pages >= orig_rel_pages)
2932  {
2933  /* can't do anything after all */
2935  return;
2936  }
2937 
2938  /*
2939  * Okay to truncate.
2940  */
2941  RelationTruncate(vacrel->rel, new_rel_pages);
2942 
2943  /*
2944  * We can release the exclusive lock as soon as we have truncated.
2945  * Other backends can't safely access the relation until they have
2946  * processed the smgr invalidation that smgrtruncate sent out ... but
2947  * that should happen as part of standard invalidation processing once
2948  * they acquire lock on the relation.
2949  */
2951 
2952  /*
2953  * Update statistics. Here, it *is* correct to adjust rel_pages
2954  * without also touching reltuples, since the tuple count wasn't
2955  * changed by the truncation.
2956  */
2957  vacrel->removed_pages += orig_rel_pages - new_rel_pages;
2958  vacrel->rel_pages = new_rel_pages;
2959 
2960  ereport(vacrel->verbose ? INFO : DEBUG2,
2961  (errmsg("table \"%s\": truncated %u to %u pages",
2962  vacrel->relname,
2963  orig_rel_pages, new_rel_pages)));
2964  orig_rel_pages = new_rel_pages;
2965  } while (new_rel_pages > vacrel->nonempty_pages && lock_waiter_detected);
2966 }
2967 
2968 /*
2969  * Rescan end pages to verify that they are (still) empty of tuples.
2970  *
2971  * Returns number of nondeletable pages (last nonempty page + 1).
2972  */
2973 static BlockNumber
2974 count_nondeletable_pages(LVRelState *vacrel, bool *lock_waiter_detected)
2975 {
2976  BlockNumber blkno;
2977  BlockNumber prefetchedUntil;
2978  instr_time starttime;
2979 
2980  /* Initialize the starttime if we check for conflicting lock requests */
2981  INSTR_TIME_SET_CURRENT(starttime);
2982 
2983  /*
2984  * Start checking blocks at what we believe relation end to be and move
2985  * backwards. (Strange coding of loop control is needed because blkno is
2986  * unsigned.) To make the scan faster, we prefetch a few blocks at a time
2987  * in forward direction, so that OS-level readahead can kick in.
2988  */
2989  blkno = vacrel->rel_pages;
2991  "prefetch size must be power of 2");
2992  prefetchedUntil = InvalidBlockNumber;
2993  while (blkno > vacrel->nonempty_pages)
2994  {
2995  Buffer buf;
2996  Page page;
2997  OffsetNumber offnum,
2998  maxoff;
2999  bool hastup;
3000 
3001  /*
3002  * Check if another process requests a lock on our relation. We are
3003  * holding an AccessExclusiveLock here, so they will be waiting. We
3004  * only do this once per VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL, and we
3005  * only check if that interval has elapsed once every 32 blocks to
3006  * keep the number of system calls and actual shared lock table
3007  * lookups to a minimum.
3008  */
3009  if ((blkno % 32) == 0)
3010  {
3011  instr_time currenttime;
3012  instr_time elapsed;
3013 
3014  INSTR_TIME_SET_CURRENT(currenttime);
3015  elapsed = currenttime;
3016  INSTR_TIME_SUBTRACT(elapsed, starttime);
3017  if ((INSTR_TIME_GET_MICROSEC(elapsed) / 1000)
3019  {
3021  {
3022  ereport(vacrel->verbose ? INFO : DEBUG2,
3023  (errmsg("table \"%s\": suspending truncate due to conflicting lock request",
3024  vacrel->relname)));
3025 
3026  *lock_waiter_detected = true;
3027  return blkno;
3028  }
3029  starttime = currenttime;
3030  }
3031  }
3032 
3033  /*
3034  * We don't insert a vacuum delay point here, because we have an
3035  * exclusive lock on the table which we want to hold for as short a
3036  * time as possible. We still need to check for interrupts however.
3037  */
3039 
3040  blkno--;
3041 
3042  /* If we haven't prefetched this lot yet, do so now. */
3043  if (prefetchedUntil > blkno)
3044  {
3045  BlockNumber prefetchStart;
3046  BlockNumber pblkno;
3047 
3048  prefetchStart = blkno & ~(PREFETCH_SIZE - 1);
3049  for (pblkno = prefetchStart; pblkno <= blkno; pblkno++)
3050  {
3051  PrefetchBuffer(vacrel->rel, MAIN_FORKNUM, pblkno);
3053  }
3054  prefetchedUntil = prefetchStart;
3055  }
3056 
3057  buf = ReadBufferExtended(vacrel->rel, MAIN_FORKNUM, blkno, RBM_NORMAL,
3058  vacrel->bstrategy);
3059 
3060  /* In this phase we only need shared access to the buffer */
3062 
3063  page = BufferGetPage(buf);
3064 
3065  if (PageIsNew(page) || PageIsEmpty(page))
3066  {
3068  continue;
3069  }
3070 
3071  hastup = false;
3072  maxoff = PageGetMaxOffsetNumber(page);
3073  for (offnum = FirstOffsetNumber;
3074  offnum <= maxoff;
3075  offnum = OffsetNumberNext(offnum))
3076  {
3077  ItemId itemid;
3078 
3079  itemid = PageGetItemId(page, offnum);
3080 
3081  /*
3082  * Note: any non-unused item should be taken as a reason to keep
3083  * this page. Even an LP_DEAD item makes truncation unsafe, since
3084  * we must not have cleaned out its index entries.
3085  */
3086  if (ItemIdIsUsed(itemid))
3087  {
3088  hastup = true;
3089  break; /* can stop scanning */
3090  }
3091  } /* scan along page */
3092 
3094 
3095  /* Done scanning if we found a tuple here */
3096  if (hastup)
3097  return blkno + 1;
3098  }
3099 
3100  /*
3101  * If we fall out of the loop, all the previously-thought-to-be-empty
3102  * pages still are; we need not bother to look at the last known-nonempty
3103  * page.
3104  */
3105  return vacrel->nonempty_pages;
3106 }
3107 
3108 /*
3109  * Returns the number of dead TIDs that VACUUM should allocate space to
3110  * store, given a heap rel of size vacrel->rel_pages, and given current
3111  * maintenance_work_mem setting (or current autovacuum_work_mem setting,
3112  * when applicable).
3113  *
3114  * See the comments at the head of this file for rationale.
3115  */
3116 static int
3118 {
3119  int64 max_items;
3120  int vac_work_mem = IsAutoVacuumWorkerProcess() &&
3121  autovacuum_work_mem != -1 ?
3123 
3124  if (vacrel->nindexes > 0)
3125  {
3126  BlockNumber rel_pages = vacrel->rel_pages;
3127 
3128  max_items = MAXDEADITEMS(vac_work_mem * 1024L);
3129  max_items = Min(max_items, INT_MAX);
3130  max_items = Min(max_items, MAXDEADITEMS(MaxAllocSize));
3131 
3132  /* curious coding here to ensure the multiplication can't overflow */
3133  if ((BlockNumber) (max_items / MaxHeapTuplesPerPage) > rel_pages)
3134  max_items = rel_pages * MaxHeapTuplesPerPage;
3135 
3136  /* stay sane if small maintenance_work_mem */
3137  max_items = Max(max_items, MaxHeapTuplesPerPage);
3138  }
3139  else
3140  {
3141  /* One-pass case only stores a single heap page's TIDs at a time */
3142  max_items = MaxHeapTuplesPerPage;
3143  }
3144 
3145  return (int) max_items;
3146 }
3147 
3148 /*
3149  * Allocate dead_items (either using palloc, or in dynamic shared memory).
3150  * Sets dead_items in vacrel for caller.
3151  *
3152  * Also handles parallel initialization as part of allocating dead_items in
3153  * DSM when required.
3154  */
3155 static void
3156 dead_items_alloc(LVRelState *vacrel, int nworkers)
3157 {
3158  VacDeadItems *dead_items;
3159  int max_items;
3160 
3161  max_items = dead_items_max_items(vacrel);
3162  Assert(max_items >= MaxHeapTuplesPerPage);
3163 
3164  /*
3165  * Initialize state for a parallel vacuum. As of now, only one worker can
3166  * be used for an index, so we invoke parallelism only if there are at
3167  * least two indexes on a table.
3168  */
3169  if (nworkers >= 0 && vacrel->nindexes > 1 && vacrel->do_index_vacuuming)
3170  {
3171  /*
3172  * Since parallel workers cannot access data in temporary tables, we
3173  * can't perform parallel vacuum on them.
3174  */
3175  if (RelationUsesLocalBuffers(vacrel->rel))
3176  {
3177  /*
3178  * Give warning only if the user explicitly tries to perform a
3179  * parallel vacuum on the temporary table.
3180  */
3181  if (nworkers > 0)
3182  ereport(WARNING,
3183  (errmsg("disabling parallel option of vacuum on \"%s\" --- cannot vacuum temporary tables in parallel",
3184  vacrel->relname)));
3185  }
3186  else
3187  vacrel->pvs = parallel_vacuum_init(vacrel->rel, vacrel->indrels,
3188  vacrel->nindexes, nworkers,
3189  max_items,
3190  vacrel->verbose ? INFO : DEBUG2,
3191  vacrel->bstrategy);
3192 
3193  /* If parallel mode started, dead_items space is allocated in DSM */
3194  if (ParallelVacuumIsActive(vacrel))
3195  {
3196  vacrel->dead_items = parallel_vacuum_get_dead_items(vacrel->pvs);
3197  return;
3198  }
3199  }
3200 
3201  /* Serial VACUUM case */
3202  dead_items = (VacDeadItems *) palloc(vac_max_items_to_alloc_size(max_items));
3203  dead_items->max_items = max_items;
3204  dead_items->num_items = 0;
3205 
3206  vacrel->dead_items = dead_items;
3207 }
3208 
3209 /*
3210  * Perform cleanup for resources allocated in dead_items_alloc
3211  */
3212 static void
3214 {
3215  if (!ParallelVacuumIsActive(vacrel))
3216  {
3217  /* Don't bother with pfree here */
3218  return;
3219  }
3220 
3221  /* End parallel mode */
3222  parallel_vacuum_end(vacrel->pvs, vacrel->indstats);
3223  vacrel->pvs = NULL;
3224 }
3225 
3226 /*
3227  * Check if every tuple in the given page is visible to all current and future
3228  * transactions. Also return the visibility_cutoff_xid which is the highest
3229  * xmin amongst the visible tuples. Set *all_frozen to true if every tuple
3230  * on this page is frozen.
3231  *
3232  * This is a stripped down version of lazy_scan_prune(). If you change
3233  * anything here, make sure that everything stays in sync. Note that an
3234  * assertion calls us to verify that everybody still agrees. Be sure to avoid
3235  * introducing new side-effects here.
3236  */
3237 static bool
3239  TransactionId *visibility_cutoff_xid,
3240  bool *all_frozen)
3241 {
3242  Page page = BufferGetPage(buf);
3244  OffsetNumber offnum,
3245  maxoff;
3246  bool all_visible = true;
3247 
3248  *visibility_cutoff_xid = InvalidTransactionId;
3249  *all_frozen = true;
3250 
3251  maxoff = PageGetMaxOffsetNumber(page);
3252  for (offnum = FirstOffsetNumber;
3253  offnum <= maxoff && all_visible;
3254  offnum = OffsetNumberNext(offnum))
3255  {
3256  ItemId itemid;
3257  HeapTupleData tuple;
3258 
3259  /*
3260  * Set the offset number so that we can display it along with any
3261  * error that occurred while processing this tuple.
3262  */
3263  vacrel->offnum = offnum;
3264  itemid = PageGetItemId(page, offnum);
3265 
3266  /* Unused or redirect line pointers are of no interest */
3267  if (!ItemIdIsUsed(itemid) || ItemIdIsRedirected(itemid))
3268  continue;
3269 
3270  ItemPointerSet(&(tuple.t_self), blockno, offnum);
3271 
3272  /*
3273  * Dead line pointers can have index pointers pointing to them. So
3274  * they can't be treated as visible
3275  */
3276  if (ItemIdIsDead(itemid))
3277  {
3278  all_visible = false;
3279  *all_frozen = false;
3280  break;
3281  }
3282 
3283  Assert(ItemIdIsNormal(itemid));
3284 
3285  tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
3286  tuple.t_len = ItemIdGetLength(itemid);
3287  tuple.t_tableOid = RelationGetRelid(vacrel->rel);
3288 
3289  switch (HeapTupleSatisfiesVacuum(&tuple, vacrel->OldestXmin, buf))
3290  {
3291  case HEAPTUPLE_LIVE:
3292  {
3293  TransactionId xmin;
3294 
3295  /* Check comments in lazy_scan_prune. */
3297  {
3298  all_visible = false;
3299  *all_frozen = false;
3300  break;
3301  }
3302 
3303  /*
3304  * The inserter definitely committed. But is it old enough
3305  * that everyone sees it as committed?
3306  */
3307  xmin = HeapTupleHeaderGetXmin(tuple.t_data);
3308  if (!TransactionIdPrecedes(xmin, vacrel->OldestXmin))
3309  {
3310  all_visible = false;
3311  *all_frozen = false;
3312  break;
3313  }
3314 
3315  /* Track newest xmin on page. */
3316  if (TransactionIdFollows(xmin, *visibility_cutoff_xid))
3317  *visibility_cutoff_xid = xmin;
3318 
3319  /* Check whether this tuple is already frozen or not */
3320  if (all_visible && *all_frozen &&
3322  *all_frozen = false;
3323  }
3324  break;
3325 
3326  case HEAPTUPLE_DEAD:
3330  {
3331  all_visible = false;
3332  *all_frozen = false;
3333  break;
3334  }
3335  default:
3336  elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
3337  break;
3338  }
3339  } /* scan along page */
3340 
3341  /* Clear the offset information once we have processed the given page. */
3342  vacrel->offnum = InvalidOffsetNumber;
3343 
3344  return all_visible;
3345 }
3346 
3347 /*
3348  * Update index statistics in pg_class if the statistics are accurate.
3349  */
3350 static void
3352 {
3353  Relation *indrels = vacrel->indrels;
3354  int nindexes = vacrel->nindexes;
3355  IndexBulkDeleteResult **indstats = vacrel->indstats;
3356 
3357  Assert(vacrel->do_index_cleanup);
3358 
3359  for (int idx = 0; idx < nindexes; idx++)
3360  {
3361  Relation indrel = indrels[idx];
3362  IndexBulkDeleteResult *istat = indstats[idx];
3363 
3364  if (istat == NULL || istat->estimated_count)
3365  continue;
3366 
3367  /* Update index statistics */
3368  vac_update_relstats(indrel,
3369  istat->num_pages,
3370  istat->num_index_tuples,
3371  0,
3372  false,
3375  NULL, NULL, false);
3376  }
3377 }
3378 
3379 /*
3380  * Error context callback for errors occurring during vacuum. The error
3381  * context messages for index phases should match the messages set in parallel
3382  * vacuum. If you change this function for those phases, change
3383  * parallel_vacuum_error_callback() as well.
3384  */
3385 static void
3387 {
3388  LVRelState *errinfo = arg;
3389 
3390  switch (errinfo->phase)
3391  {
3393  if (BlockNumberIsValid(errinfo->blkno))
3394  {
3395  if (OffsetNumberIsValid(errinfo->offnum))
3396  errcontext("while scanning block %u offset %u of relation \"%s.%s\"",
3397  errinfo->blkno, errinfo->offnum, errinfo->relnamespace, errinfo->relname);
3398  else
3399  errcontext("while scanning block %u of relation \"%s.%s\"",
3400  errinfo->blkno, errinfo->relnamespace, errinfo->relname);
3401  }
3402  else
3403  errcontext("while scanning relation \"%s.%s\"",
3404  errinfo->relnamespace, errinfo->relname);
3405  break;
3406 
3408  if (BlockNumberIsValid(errinfo->blkno))
3409  {
3410  if (OffsetNumberIsValid(errinfo->offnum))
3411  errcontext("while vacuuming block %u offset %u of relation \"%s.%s\"",
3412  errinfo->blkno, errinfo->offnum, errinfo->relnamespace, errinfo->relname);
3413  else
3414  errcontext("while vacuuming block %u of relation \"%s.%s\"",
3415  errinfo->blkno, errinfo->relnamespace, errinfo->relname);
3416  }
3417  else
3418  errcontext("while vacuuming relation \"%s.%s\"",
3419  errinfo->relnamespace, errinfo->relname);
3420  break;
3421 
3423  errcontext("while vacuuming index \"%s\" of relation \"%s.%s\"",
3424  errinfo->indname, errinfo->relnamespace, errinfo->relname);
3425  break;
3426 
3428  errcontext("while cleaning up index \"%s\" of relation \"%s.%s\"",
3429  errinfo->indname, errinfo->relnamespace, errinfo->relname);
3430  break;
3431 
3433  if (BlockNumberIsValid(errinfo->blkno))
3434  errcontext("while truncating relation \"%s.%s\" to %u blocks",
3435  errinfo->relnamespace, errinfo->relname, errinfo->blkno);
3436  break;
3437 
3439  default:
3440  return; /* do nothing; the errinfo may not be
3441  * initialized */
3442  }
3443 }
3444 
3445 /*
3446  * Updates the information required for vacuum error callback. This also saves
3447  * the current information which can be later restored via restore_vacuum_error_info.
3448  */
3449 static void
3451  int phase, BlockNumber blkno, OffsetNumber offnum)
3452 {
3453  if (saved_vacrel)
3454  {
3455  saved_vacrel->offnum = vacrel->offnum;
3456  saved_vacrel->blkno = vacrel->blkno;
3457  saved_vacrel->phase = vacrel->phase;
3458  }
3459 
3460  vacrel->blkno = blkno;
3461  vacrel->offnum = offnum;
3462  vacrel->phase = phase;
3463 }
3464 
3465 /*
3466  * Restores the vacuum information saved via a prior call to update_vacuum_error_info.
3467  */
3468 static void
3470  const LVSavedErrInfo *saved_vacrel)
3471 {
3472  vacrel->blkno = saved_vacrel->blkno;
3473  vacrel->offnum = saved_vacrel->offnum;
3474  vacrel->phase = saved_vacrel->phase;
3475 }
Datum idx(PG_FUNCTION_ARGS)
Definition: _int_op.c:259
int autovacuum_work_mem
Definition: autovacuum.c:118
bool IsAutoVacuumWorkerProcess(void)
Definition: autovacuum.c:3314
void TimestampDifference(TimestampTz start_time, TimestampTz stop_time, long *secs, int *microsecs)
Definition: timestamp.c:1650
bool TimestampDifferenceExceeds(TimestampTz start_time, TimestampTz stop_time, int msec)
Definition: timestamp.c:1705
TimestampTz GetCurrentTimestamp(void)
Definition: timestamp.c:1574
void pgstat_progress_start_command(ProgressCommandType cmdtype, Oid relid)
void pgstat_progress_update_param(int index, int64 val)
void pgstat_progress_update_multi_param(int nparam, const int *index, const int64 *val)
void pgstat_progress_end_command(void)
@ PROGRESS_COMMAND_VACUUM
uint32 BlockNumber
Definition: block.h:31
#define InvalidBlockNumber
Definition: block.h:33
static bool BlockNumberIsValid(BlockNumber blockNumber)
Definition: block.h:71
int Buffer
Definition: buf.h:23
#define InvalidBuffer
Definition: buf.h:25
bool track_io_timing
Definition: bufmgr.c:137
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:2766
PrefetchBufferResult PrefetchBuffer(Relation reln, ForkNumber forkNum, BlockNumber blockNum)
Definition: bufmgr.c:592
void ReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:3934
void UnlockReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:3957
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:1583
void LockBufferForCleanup(Buffer buffer)
Definition: bufmgr.c:4232
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:4175
Buffer ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
Definition: bufmgr.c:759
bool ConditionalLockBufferForCleanup(Buffer buffer)
Definition: bufmgr.c:4409
#define BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:96
#define BUFFER_LOCK_SHARE
Definition: bufmgr.h:97
#define RelationGetNumberOfBlocks(reln)
Definition: bufmgr.h:147
static Page BufferGetPage(Buffer buffer)
Definition: bufmgr.h:271
#define BUFFER_LOCK_EXCLUSIVE
Definition: bufmgr.h:98
@ RBM_NORMAL
Definition: bufmgr.h:39
static bool BufferIsValid(Buffer bufnum)
Definition: bufmgr.h:219
Size PageGetHeapFreeSpace(Page page)
Definition: bufpage.c:991
void PageTruncateLinePointerArray(Page page)
Definition: bufpage.c:835
static bool PageIsEmpty(Page page)
Definition: bufpage.h:220
Pointer Page
Definition: bufpage.h:78
static Item PageGetItem(Page page, ItemId itemId)
Definition: bufpage.h:351
static void PageClearAllVisible(Page page)
Definition: bufpage.h:436
#define SizeOfPageHeaderData
Definition: bufpage.h:213
static void PageSetAllVisible(Page page)
Definition: bufpage.h:431
static ItemId PageGetItemId(Page page, OffsetNumber offsetNumber)
Definition: bufpage.h:240
static bool PageIsNew(Page page)
Definition: bufpage.h:230
static bool PageIsAllVisible(Page page)
Definition: bufpage.h:426
static void PageSetLSN(Page page, XLogRecPtr lsn)
Definition: bufpage.h:388
static XLogRecPtr PageGetLSN(Page page)
Definition: bufpage.h:383
static OffsetNumber PageGetMaxOffsetNumber(Page page)
Definition: bufpage.h:369
#define Min(x, y)
Definition: c.h:976
signed int int32
Definition: c.h:430
#define Max(x, y)
Definition: c.h:970
TransactionId MultiXactId
Definition: c.h:598
#define unlikely(x)
Definition: c.h:295
unsigned char uint8
Definition: c.h:440
#define StaticAssertStmt(condition, errmessage)
Definition: c.h:908
uint32 TransactionId
Definition: c.h:588
size_t Size
Definition: c.h:541
int64 TimestampTz
Definition: timestamp.h:39
char * get_database_name(Oid dbid)
Definition: dbcommands.c:2981
int errmsg_internal(const char *fmt,...)
Definition: elog.c:993
int errdetail(const char *fmt,...)
Definition: elog.c:1039
ErrorContextCallback * error_context_stack
Definition: elog.c:94
int errhint(const char *fmt,...)
Definition: elog.c:1153
int errmsg(const char *fmt,...)
Definition: elog.c:906
#define _(x)
Definition: elog.c:90
#define LOG
Definition: elog.h:27
#define errcontext
Definition: elog.h:192
#define WARNING
Definition: elog.h:32
#define DEBUG2
Definition: elog.h:25
#define ERROR
Definition: elog.h:35
#define INFO
Definition: elog.h:30
#define ereport(elevel,...)
Definition: elog.h:145
void FreeSpaceMapVacuumRange(Relation rel, BlockNumber start, BlockNumber end)
Definition: freespace.c:354
Size GetRecordedFreeSpace(Relation rel, BlockNumber heapBlk)
Definition: freespace.c:232
void RecordPageWithFreeSpace(Relation rel, BlockNumber heapBlk, Size spaceAvail)
Definition: freespace.c:182
int64 VacuumPageHit
Definition: globals.c:148
int64 VacuumPageMiss
Definition: globals.c:149
bool VacuumCostActive
Definition: globals.c:153
int64 VacuumPageDirty
Definition: globals.c:150
int VacuumCostBalance
Definition: globals.c:152
int maintenance_work_mem
Definition: globals.c:127
struct Latch * MyLatch
Definition: globals.c:58
Oid MyDatabaseId
Definition: globals.c:89
void heap_execute_freeze_tuple(HeapTupleHeader tuple, xl_heap_freeze_tuple *frz)
Definition: heapam.c:6768
bool heap_tuple_needs_eventual_freeze(HeapTupleHeader tuple)
Definition: heapam.c:7185
bool heap_prepare_freeze_tuple(HeapTupleHeader tuple, TransactionId relfrozenxid, TransactionId relminmxid, TransactionId cutoff_xid, TransactionId cutoff_multi, xl_heap_freeze_tuple *frz, bool *totally_frozen, TransactionId *relfrozenxid_out, MultiXactId *relminmxid_out)
Definition: heapam.c:6471
XLogRecPtr log_heap_freeze(Relation reln, Buffer buffer, TransactionId cutoff_xid, xl_heap_freeze_tuple *tuples, int ntuples)
Definition: heapam.c:8159
bool heap_tuple_would_freeze(HeapTupleHeader tuple, TransactionId cutoff_xid, MultiXactId cutoff_multi, TransactionId *relfrozenxid_out, MultiXactId *relminmxid_out)
Definition: heapam.c:7238
HTSV_Result
Definition: heapam.h:94
@ HEAPTUPLE_RECENTLY_DEAD
Definition: heapam.h:97
@ HEAPTUPLE_INSERT_IN_PROGRESS
Definition: heapam.h:98
@ HEAPTUPLE_LIVE
Definition: heapam.h:96
@ HEAPTUPLE_DELETE_IN_PROGRESS
Definition: heapam.h:99
@ HEAPTUPLE_DEAD
Definition: heapam.h:95
HTSV_Result HeapTupleSatisfiesVacuum(HeapTuple htup, TransactionId OldestXmin, Buffer buffer)
#define XLOG_HEAP2_VACUUM
Definition: heapam_xlog.h:55
#define SizeOfHeapVacuum
Definition: heapam_xlog.h:265
HeapTupleHeaderData * HeapTupleHeader
Definition: htup.h:23
#define HeapTupleHeaderGetXmin(tup)
Definition: htup_details.h:308
#define HeapTupleHeaderXminCommitted(tup)
Definition: htup_details.h:319
#define MaxHeapTuplesPerPage
Definition: htup_details.h:568
#define INSTR_TIME_SET_CURRENT(t)
Definition: instr_time.h:89
#define INSTR_TIME_SUBTRACT(x, y)
Definition: instr_time.h:103
#define INSTR_TIME_GET_MICROSEC(t)
Definition: instr_time.h:138
struct timespec instr_time
Definition: instr_time.h:83
WalUsage pgWalUsage
Definition: instrument.c:22
void WalUsageAccumDiff(WalUsage *dst, const WalUsage *add, const WalUsage *sub)
Definition: instrument.c:280
int i
Definition: isn.c:73
#define ItemIdGetLength(itemId)
Definition: itemid.h:59
#define ItemIdIsNormal(itemId)
Definition: itemid.h:99
#define ItemIdIsDead(itemId)
Definition: itemid.h:113
#define ItemIdIsUsed(itemId)
Definition: itemid.h:92
#define ItemIdSetUnused(itemId)
Definition: itemid.h:128
#define ItemIdIsRedirected(itemId)
Definition: itemid.h:106
#define ItemIdHasStorage(itemId)
Definition: itemid.h:120
static void ItemPointerSet(ItemPointerData *pointer, BlockNumber blockNumber, OffsetNumber offNum)
Definition: itemptr.h:135
static void ItemPointerSetOffsetNumber(ItemPointerData *pointer, OffsetNumber offsetNumber)
Definition: itemptr.h:158
static void ItemPointerSetBlockNumber(ItemPointerData *pointer, BlockNumber blockNumber)
Definition: itemptr.h:147
static OffsetNumber ItemPointerGetOffsetNumber(const ItemPointerData *pointer)
Definition: itemptr.h:124
static BlockNumber ItemPointerGetBlockNumber(const ItemPointerData *pointer)
Definition: itemptr.h:103
void ResetLatch(Latch *latch)
Definition: latch.c:683
int WaitLatch(Latch *latch, int wakeEvents, long timeout, uint32 wait_event_info)
Definition: latch.c:476
#define WL_TIMEOUT
Definition: latch.h:128
#define WL_EXIT_ON_PM_DEATH
Definition: latch.h:130
#define WL_LATCH_SET
Definition: latch.h:125
Assert(fmt[strlen(fmt) - 1] !='\n')
void UnlockRelation(Relation relation, LOCKMODE lockmode)
Definition: lmgr.c:311
bool ConditionalLockRelation(Relation relation, LOCKMODE lockmode)
Definition: lmgr.c:276
bool LockHasWaitersRelation(Relation relation, LOCKMODE lockmode)
Definition: lmgr.c:374
#define NoLock
Definition: lockdefs.h:34
#define AccessExclusiveLock
Definition: lockdefs.h:43
#define RowExclusiveLock
Definition: lockdefs.h:38
char * get_namespace_name(Oid nspid)
Definition: lsyscache.c:3327
char * pstrdup(const char *in)
Definition: mcxt.c:1392
void pfree(void *pointer)
Definition: mcxt.c:1252
void * palloc0(Size size)
Definition: mcxt.c:1176
void * palloc(Size size)
Definition: mcxt.c:1145
#define MaxAllocSize
Definition: memutils.h:40
#define START_CRIT_SECTION()
Definition: miscadmin.h:148
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:121
#define END_CRIT_SECTION()
Definition: miscadmin.h:150
bool MultiXactIdPrecedesOrEquals(MultiXactId multi1, MultiXactId multi2)
Definition: multixact.c:3172
#define MultiXactIdIsValid(multi)
Definition: multixact.h:28
#define InvalidMultiXactId
Definition: multixact.h:24
#define InvalidOffsetNumber
Definition: off.h:26
#define OffsetNumberIsValid(offsetNumber)
Definition: off.h:39
#define OffsetNumberNext(offsetNumber)
Definition: off.h:52
uint16 OffsetNumber
Definition: off.h:24
#define FirstOffsetNumber
Definition: off.h:27
void * arg
static int verbose
const char * pg_rusage_show(const PGRUsage *ru0)
Definition: pg_rusage.c:40
void pg_rusage_init(PGRUsage *ru0)
Definition: pg_rusage.c:27
static char * buf
Definition: pg_test_fsync.c:67
int64 PgStat_Counter
Definition: pgstat.h:88
PgStat_Counter pgStatBlockReadTime
PgStat_Counter pgStatBlockWriteTime
void pgstat_report_vacuum(Oid tableoid, bool shared, PgStat_Counter livetuples, PgStat_Counter deadtuples)
GlobalVisState * GlobalVisTestFor(Relation rel)
Definition: procarray.c:4047
#define PROGRESS_VACUUM_PHASE_FINAL_CLEANUP
Definition: progress.h:35
#define PROGRESS_VACUUM_PHASE_SCAN_HEAP
Definition: progress.h:30
#define PROGRESS_VACUUM_TOTAL_HEAP_BLKS
Definition: progress.h:22
#define PROGRESS_VACUUM_PHASE
Definition: progress.h:21
#define PROGRESS_VACUUM_NUM_DEAD_TUPLES
Definition: progress.h:27
#define PROGRESS_VACUUM_NUM_INDEX_VACUUMS
Definition: progress.h:25
#define PROGRESS_VACUUM_PHASE_VACUUM_HEAP
Definition: progress.h:32
#define PROGRESS_VACUUM_HEAP_BLKS_SCANNED
Definition: progress.h:23
#define PROGRESS_VACUUM_PHASE_INDEX_CLEANUP
Definition: progress.h:33
#define PROGRESS_VACUUM_PHASE_VACUUM_INDEX
Definition: progress.h:31
#define PROGRESS_VACUUM_MAX_DEAD_TUPLES
Definition: progress.h:26
#define PROGRESS_VACUUM_HEAP_BLKS_VACUUMED
Definition: progress.h:24
#define PROGRESS_VACUUM_PHASE_TRUNCATE
Definition: progress.h:34
int heap_page_prune(Relation relation, Buffer buffer, GlobalVisState *vistest, TransactionId old_snap_xmin, TimestampTz old_snap_ts, int *nnewlpdead, OffsetNumber *off_loc)
Definition: pruneheap.c:266
#define RelationGetRelid(relation)
Definition: rel.h:501
#define RelationGetRelationName(relation)
Definition: rel.h:535
#define RelationNeedsWAL(relation)
Definition: rel.h:627
#define RelationUsesLocalBuffers(relation)
Definition: rel.h:636
#define RelationGetNamespace(relation)
Definition: rel.h:542
@ MAIN_FORKNUM
Definition: relpath.h:50
int old_snapshot_threshold
Definition: snapmgr.c:79
void RelationTruncate(Relation rel, BlockNumber nblocks)
Definition: storage.c:287
void appendStringInfo(StringInfo str, const char *fmt,...)
Definition: stringinfo.c:91
void appendStringInfoString(StringInfo str, const char *s)
Definition: stringinfo.c:176
void initStringInfo(StringInfo str)
Definition: stringinfo.c:59
struct ErrorContextCallback * previous
Definition: elog.h:234
void(* callback)(void *arg)
Definition: elog.h:235
ItemPointerData t_self
Definition: htup.h:65
uint32 t_len
Definition: htup.h:64
HeapTupleHeader t_data
Definition: htup.h:68
Oid t_tableOid
Definition: htup.h:66
bool estimated_count
Definition: genam.h:77
BlockNumber pages_deleted
Definition: genam.h:81
BlockNumber pages_newly_deleted
Definition: genam.h:80
BlockNumber pages_free
Definition: genam.h:82
BlockNumber num_pages
Definition: genam.h:76
double num_index_tuples
Definition: genam.h:78
Relation index
Definition: genam.h:46
double num_heap_tuples
Definition: genam.h:51
bool analyze_only
Definition: genam.h:47
BufferAccessStrategy strategy
Definition: genam.h:52
bool report_progress
Definition: genam.h:48
int message_level
Definition: genam.h:50
bool estimated_count
Definition: genam.h:49
TransactionId visibility_cutoff_xid
Definition: vacuumlazy.c:238
ParallelVacuumState * pvs
Definition: vacuumlazy.c:163
bool verbose
Definition: vacuumlazy.c:188
int nindexes
Definition: vacuumlazy.c:145
MultiXactId relminmxid
Definition: vacuumlazy.c:167
TransactionId OldestXmin
Definition: vacuumlazy.c:171
OffsetNumber offnum
Definition: vacuumlazy.c:186
int64 tuples_deleted
Definition: vacuumlazy.c:215
MultiXactId MultiXactCutoff
Definition: vacuumlazy.c:175
BlockNumber nonempty_pages
Definition: vacuumlazy.c:204
double old_live_tuples
Definition: vacuumlazy.c:168
bool do_rel_truncate
Definition: vacuumlazy.c:159
BlockNumber scanned_pages
Definition: vacuumlazy.c:199
bool aggressive
Definition: vacuumlazy.c:148
bool failsafe_active
Definition: vacuumlazy.c:152
GlobalVisState * vistest
Definition: vacuumlazy.c:172
BlockNumber removed_pages
Definition: vacuumlazy.c:200
int num_index_scans
Definition: vacuumlazy.c:213
IndexBulkDeleteResult ** indstats
Definition: vacuumlazy.c:210
double new_live_tuples
Definition: vacuumlazy.c:208
double new_rel_tuples
Definition: vacuumlazy.c:207
TransactionId NewRelfrozenXid
Definition: vacuumlazy.c:177
Relation rel
Definition: vacuumlazy.c:143
bool consider_bypass_optimization
Definition: vacuumlazy.c:154
BlockNumber rel_pages
Definition: vacuumlazy.c:198
TransactionId FreezeLimit
Definition: vacuumlazy.c:174
int64 recently_dead_tuples
Definition: vacuumlazy.c:219
int64 tuples_frozen
Definition: vacuumlazy.c:216
BlockNumber frozen_pages
Definition: vacuumlazy.c:201
BlockNumber missed_dead_pages
Definition: vacuumlazy.c:203
char * relnamespace
Definition: vacuumlazy.c:182
int64 live_tuples
Definition: vacuumlazy.c:218
int64 lpdead_items
Definition: vacuumlazy.c:217
BufferAccessStrategy bstrategy
Definition: vacuumlazy.c:162
bool skippedallvis
Definition: vacuumlazy.c:179
BlockNumber lpdead_item_pages
Definition: vacuumlazy.c:202
Relation * indrels
Definition: vacuumlazy.c:144
bool skipwithvm
Definition: vacuumlazy.c:150
bool do_index_cleanup
Definition: vacuumlazy.c:158
MultiXactId NewRelminMxid
Definition: vacuumlazy.c:178
int64 missed_dead_tuples
Definition: vacuumlazy.c:220
BlockNumber blkno
Definition: vacuumlazy.c:185
TransactionId relfrozenxid
Definition: vacuumlazy.c:166
char * relname
Definition: vacuumlazy.c:183
VacDeadItems * dead_items
Definition: vacuumlazy.c:197
VacErrPhase phase
Definition: vacuumlazy.c:187
char * indname
Definition: vacuumlazy.c:184
bool do_index_vacuuming
Definition: vacuumlazy.c:157
BlockNumber blkno
Definition: vacuumlazy.c:244
VacErrPhase phase
Definition: vacuumlazy.c:246
OffsetNumber offnum
Definition: vacuumlazy.c:245
Form_pg_class rd_rel
Definition: rel.h:110
ItemPointerData items[FLEXIBLE_ARRAY_MEMBER]
Definition: vacuum.h:247
int max_items
Definition: vacuum.h:243
int num_items
Definition: vacuum.h:244
int nworkers
Definition: vacuum.h:235
int freeze_table_age
Definition: vacuum.h:218
VacOptValue truncate
Definition: vacuum.h:228
bits32 options
Definition: vacuum.h:216
int freeze_min_age
Definition: vacuum.h:217
bool is_wraparound
Definition: vacuum.h:223
int multixact_freeze_min_age
Definition: vacuum.h:219
int multixact_freeze_table_age
Definition: vacuum.h:221
int log_min_duration
Definition: vacuum.h:224
VacOptValue index_cleanup
Definition: vacuum.h:227
uint64 wal_bytes
Definition: instrument.h:53
int64 wal_fpi
Definition: instrument.h:52
int64 wal_records
Definition: instrument.h:51
Definition: type.h:95
OffsetNumber offset
Definition: heapam_xlog.h:327
bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition: transam.c:273
bool TransactionIdPrecedesOrEquals(TransactionId id1, TransactionId id2)
Definition: transam.c:292
bool TransactionIdFollows(TransactionId id1, TransactionId id2)
Definition: transam.c:307
static TransactionId ReadNextTransactionId(void)
Definition: transam.h:315
#define FrozenTransactionId
Definition: transam.h:33
#define InvalidTransactionId
Definition: transam.h:31
#define TransactionIdIsNormal(xid)
Definition: transam.h:42
IndexBulkDeleteResult * vac_bulkdel_one_index(IndexVacuumInfo *ivinfo, IndexBulkDeleteResult *istat, VacDeadItems *dead_items)
Definition: vacuum.c:2297
Size vac_max_items_to_alloc_size(int max_items)
Definition: vacuum.c:2343
void vac_open_indexes(Relation relation, LOCKMODE lockmode, int *nindexes, Relation **Irel)
Definition: vacuum.c:2114
void vac_update_relstats(Relation relation, BlockNumber num_pages, double num_tuples, BlockNumber num_all_visible_pages, bool hasindex, TransactionId frozenxid, MultiXactId minmulti, bool *frozenxid_updated, bool *minmulti_updated, bool in_outer_xact)
Definition: vacuum.c:1295
bool vacuum_xid_failsafe_check(TransactionId relfrozenxid, MultiXactId relminmxid)
Definition: vacuum.c:1139
void vac_close_indexes(int nindexes, Relation *Irel, LOCKMODE lockmode)
Definition: vacuum.c:2157
void vacuum_delay_point(void)
Definition: vacuum.c:2178
double vac_estimate_reltuples(Relation relation, BlockNumber total_pages, BlockNumber scanned_pages, double scanned_tuples)
Definition: vacuum.c:1199
IndexBulkDeleteResult * vac_cleanup_one_index(IndexVacuumInfo *ivinfo, IndexBulkDeleteResult *istat)
Definition: vacuum.c:2318
bool vacuum_set_xid_limits(Relation rel, int freeze_min_age, int multixact_freeze_min_age, int freeze_table_age, int multixact_freeze_table_age, TransactionId *oldestXmin, MultiXactId *oldestMxact, TransactionId *freezeLimit, MultiXactId *multiXactCutoff)
Definition: vacuum.c:957
#define VACOPT_VERBOSE
Definition: vacuum.h:185
#define MAXDEADITEMS(avail_mem)
Definition: vacuum.h:250
@ VACOPTVALUE_AUTO
Definition: vacuum.h:203
@ VACOPTVALUE_ENABLED
Definition: vacuum.h:205
@ VACOPTVALUE_UNSPECIFIED
Definition: vacuum.h:202
@ VACOPTVALUE_DISABLED
Definition: vacuum.h:204
#define VACOPT_DISABLE_PAGE_SKIPPING
Definition: vacuum.h:190
static void dead_items_cleanup(LVRelState *vacrel)
Definition: vacuumlazy.c:3213
static bool heap_page_is_all_visible(LVRelState *vacrel, Buffer buf, TransactionId *visibility_cutoff_xid, bool *all_frozen)
Definition: vacuumlazy.c:3238
static void update_relstats_all_indexes(LVRelState *vacrel)
Definition: vacuumlazy.c:3351
struct LVPagePruneState LVPagePruneState
#define VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL
Definition: vacuumlazy.c:87
static void vacuum_error_callback(void *arg)
Definition: vacuumlazy.c:3386
static void lazy_truncate_heap(LVRelState *vacrel)
Definition: vacuumlazy.c:2843
static void lazy_vacuum(LVRelState *vacrel)
Definition: vacuumlazy.c:2189
static void lazy_cleanup_all_indexes(LVRelState *vacrel)
Definition: vacuumlazy.c:2665
#define REL_TRUNCATE_MINIMUM
Definition: vacuumlazy.c:76
static bool should_attempt_truncation(LVRelState *vacrel)
Definition: vacuumlazy.c:2822
static bool lazy_scan_new_or_empty(LVRelState *vacrel, Buffer buf, BlockNumber blkno, Page page, bool sharelock, Buffer vmbuffer)
Definition: vacuumlazy.c:1425
VacErrPhase
Definition: vacuumlazy.c:131
@ VACUUM_ERRCB_PHASE_SCAN_HEAP
Definition: vacuumlazy.c:133
@ VACUUM_ERRCB_PHASE_VACUUM_INDEX
Definition: vacuumlazy.c:134
@ VACUUM_ERRCB_PHASE_TRUNCATE
Definition: vacuumlazy.c:137
@ VACUUM_ERRCB_PHASE_INDEX_CLEANUP
Definition: vacuumlazy.c:136
@ VACUUM_ERRCB_PHASE_VACUUM_HEAP
Definition: vacuumlazy.c:135
@ VACUUM_ERRCB_PHASE_UNKNOWN
Definition: vacuumlazy.c:132
static void lazy_scan_heap(LVRelState *vacrel)
Definition: vacuumlazy.c:854
static bool lazy_scan_noprune(LVRelState *vacrel, Buffer buf, BlockNumber blkno, Page page, bool *hastup, bool *recordfreespace)
Definition: vacuumlazy.c:1955
#define ParallelVacuumIsActive(vacrel)
Definition: vacuumlazy.c:127
static void restore_vacuum_error_info(LVRelState *vacrel, const LVSavedErrInfo *saved_vacrel)
Definition: vacuumlazy.c:3469
static void lazy_scan_prune(LVRelState *vacrel, Buffer buf, BlockNumber blkno, Page page, LVPagePruneState *prunestate)
Definition: vacuumlazy.c:1548
void heap_vacuum_rel(Relation rel, VacuumParams *params, BufferAccessStrategy bstrategy)
Definition: vacuumlazy.c:311
static int lazy_vacuum_heap_page(LVRelState *vacrel, BlockNumber blkno, Buffer buffer, int index, Buffer *vmbuffer)
Definition: vacuumlazy.c:2495
static IndexBulkDeleteResult * lazy_vacuum_one_index(Relation indrel, IndexBulkDeleteResult *istat, double reltuples, LVRelState *vacrel)
Definition: vacuumlazy.c:2712
#define REL_TRUNCATE_FRACTION
Definition: vacuumlazy.c:77
static bool lazy_check_wraparound_failsafe(LVRelState *vacrel)
Definition: vacuumlazy.c:2622
static int dead_items_max_items(LVRelState *vacrel)
Definition: vacuumlazy.c:3117
struct LVSavedErrInfo LVSavedErrInfo
static IndexBulkDeleteResult * lazy_cleanup_one_index(Relation indrel, IndexBulkDeleteResult *istat, double reltuples, bool estimated_count, LVRelState *vacrel)
Definition: vacuumlazy.c:2759
#define PREFETCH_SIZE
Definition: vacuumlazy.c:121
struct LVRelState LVRelState
static BlockNumber lazy_scan_skip(LVRelState *vacrel, Buffer *vmbuffer, BlockNumber next_block, bool *next_unskippable_allvis, bool *skipping_current_range)
Definition: vacuumlazy.c:1313
#define BYPASS_THRESHOLD_PAGES
Definition: vacuumlazy.c:94
static void dead_items_alloc(LVRelState *vacrel, int nworkers)
Definition: vacuumlazy.c:3156
#define VACUUM_TRUNCATE_LOCK_TIMEOUT
Definition: vacuumlazy.c:88
static bool lazy_vacuum_all_indexes(LVRelState *vacrel)
Definition: vacuumlazy.c:2314
static void update_vacuum_error_info(LVRelState *vacrel, LVSavedErrInfo *saved_vacrel, int phase, BlockNumber blkno, OffsetNumber offnum)
Definition: vacuumlazy.c:3450
static BlockNumber count_nondeletable_pages(LVRelState *vacrel, bool *lock_waiter_detected)
Definition: vacuumlazy.c:2974
#define SKIP_PAGES_THRESHOLD
Definition: vacuumlazy.c:115
#define FAILSAFE_EVERY_PAGES
Definition: vacuumlazy.c:99
#define VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL
Definition: vacuumlazy.c:86
static void lazy_vacuum_heap_rel(LVRelState *vacrel)
Definition: vacuumlazy.c:2409
#define VACUUM_FSM_EVERY_PAGES
Definition: vacuumlazy.c:108
VacDeadItems * parallel_vacuum_get_dead_items(ParallelVacuumState *pvs)
void parallel_vacuum_bulkdel_all_indexes(ParallelVacuumState *pvs, long num_table_tuples, int num_index_scans)
void parallel_vacuum_cleanup_all_indexes(ParallelVacuumState *pvs, long num_table_tuples, int num_index_scans, bool estimated_count)
void parallel_vacuum_end(ParallelVacuumState *pvs, IndexBulkDeleteResult **istats)
ParallelVacuumState * parallel_vacuum_init(Relation rel, Relation *indrels, int nindexes, int nrequested_workers, int max_items, int elevel, BufferAccessStrategy bstrategy)
void visibilitymap_set(Relation rel, BlockNumber heapBlk, Buffer heapBuf, XLogRecPtr recptr, Buffer vmBuf, TransactionId cutoff_xid, uint8 flags)
bool visibilitymap_clear(Relation rel, BlockNumber heapBlk, Buffer vmbuf, uint8 flags)
void visibilitymap_pin(Relation rel, BlockNumber heapBlk, Buffer *vmbuf)
uint8 visibilitymap_get_status(Relation rel, BlockNumber heapBlk, Buffer *vmbuf)
void visibilitymap_count(Relation rel, BlockNumber *all_visible, BlockNumber *all_frozen)
#define VM_ALL_VISIBLE(r, b, v)
Definition: visibilitymap.h:24
#define VM_ALL_FROZEN(r, b, v)
Definition: visibilitymap.h:26
#define VISIBILITYMAP_VALID_BITS
#define VISIBILITYMAP_ALL_FROZEN
#define VISIBILITYMAP_ALL_VISIBLE
@ WAIT_EVENT_VACUUM_TRUNCATE
Definition: wait_event.h:149
bool IsInParallelMode(void)
Definition: xact.c:1065
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define InvalidXLogRecPtr
Definition: xlogdefs.h:28
void XLogRegisterData(char *data, uint32 len)
Definition: xloginsert.c:351
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition: xloginsert.c:451
void XLogRegisterBufData(uint8 block_id, char *data, uint32 len)
Definition: xloginsert.c:389
XLogRecPtr log_newpage_buffer(Buffer buffer, bool page_std)
Definition: xloginsert.c:1191
void XLogRegisterBuffer(uint8 block_id, Buffer buffer, uint8 flags)
Definition: xloginsert.c:243
void XLogBeginInsert(void)
Definition: xloginsert.c:150
#define REGBUF_STANDARD
Definition: xloginsert.h:34