PostgreSQL Source Code  git master
vacuumlazy.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * vacuumlazy.c
4  * Concurrent ("lazy") vacuuming.
5  *
6  * The major space usage for vacuuming is storage for the array of dead TIDs
7  * that are to be removed from indexes. We want to ensure we can vacuum even
8  * the very largest relations with finite memory space usage. To do that, we
9  * set upper bounds on the number of TIDs we can keep track of at once.
10  *
11  * We are willing to use at most maintenance_work_mem (or perhaps
12  * autovacuum_work_mem) memory space to keep track of dead TIDs. We initially
13  * allocate an array of TIDs of that size, with an upper limit that depends on
14  * table size (this limit ensures we don't allocate a huge area uselessly for
15  * vacuuming small tables). If the array threatens to overflow, we must call
16  * lazy_vacuum to vacuum indexes (and to vacuum the pages that we've pruned).
17  * This frees up the memory space dedicated to storing dead TIDs.
18  *
19  * In practice VACUUM will often complete its initial pass over the target
20  * heap relation without ever running out of space to store TIDs. This means
21  * that there only needs to be one call to lazy_vacuum, after the initial pass
22  * completes.
23  *
24  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
25  * Portions Copyright (c) 1994, Regents of the University of California
26  *
27  *
28  * IDENTIFICATION
29  * src/backend/access/heap/vacuumlazy.c
30  *
31  *-------------------------------------------------------------------------
32  */
33 #include "postgres.h"
34 
35 #include <math.h>
36 
37 #include "access/amapi.h"
38 #include "access/genam.h"
39 #include "access/heapam.h"
40 #include "access/heapam_xlog.h"
41 #include "access/htup_details.h"
42 #include "access/multixact.h"
43 #include "access/transam.h"
44 #include "access/visibilitymap.h"
45 #include "access/xact.h"
46 #include "access/xlog.h"
47 #include "access/xloginsert.h"
48 #include "catalog/index.h"
49 #include "catalog/storage.h"
50 #include "commands/dbcommands.h"
51 #include "commands/progress.h"
52 #include "commands/vacuum.h"
53 #include "executor/instrument.h"
54 #include "miscadmin.h"
55 #include "optimizer/paths.h"
56 #include "pgstat.h"
57 #include "portability/instr_time.h"
58 #include "postmaster/autovacuum.h"
59 #include "storage/bufmgr.h"
60 #include "storage/freespace.h"
61 #include "storage/lmgr.h"
62 #include "tcop/tcopprot.h"
63 #include "utils/lsyscache.h"
64 #include "utils/memutils.h"
65 #include "utils/pg_rusage.h"
66 #include "utils/timestamp.h"
67 
68 
69 /*
70  * Space/time tradeoff parameters: do these need to be user-tunable?
71  *
72  * To consider truncating the relation, we want there to be at least
73  * REL_TRUNCATE_MINIMUM or (relsize / REL_TRUNCATE_FRACTION) (whichever
74  * is less) potentially-freeable pages.
75  */
76 #define REL_TRUNCATE_MINIMUM 1000
77 #define REL_TRUNCATE_FRACTION 16
78 
79 /*
80  * Timing parameters for truncate locking heuristics.
81  *
82  * These were not exposed as user tunable GUC values because it didn't seem
83  * that the potential for improvement was great enough to merit the cost of
84  * supporting them.
85  */
86 #define VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL 20 /* ms */
87 #define VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL 50 /* ms */
88 #define VACUUM_TRUNCATE_LOCK_TIMEOUT 5000 /* ms */
89 
90 /*
91  * Threshold that controls whether we bypass index vacuuming and heap
92  * vacuuming as an optimization
93  */
94 #define BYPASS_THRESHOLD_PAGES 0.02 /* i.e. 2% of rel_pages */
95 
96 /*
97  * Perform a failsafe check each time we scan another 4GB of pages.
98  * (Note that this is deliberately kept to a power-of-two, usually 2^19.)
99  */
100 #define FAILSAFE_EVERY_PAGES \
101  ((BlockNumber) (((uint64) 4 * 1024 * 1024 * 1024) / BLCKSZ))
102 
103 /*
104  * When a table has no indexes, vacuum the FSM after every 8GB, approximately
105  * (it won't be exact because we only vacuum FSM after processing a heap page
106  * that has some removable tuples). When there are indexes, this is ignored,
107  * and we vacuum FSM after each index/heap cleaning pass.
108  */
109 #define VACUUM_FSM_EVERY_PAGES \
110  ((BlockNumber) (((uint64) 8 * 1024 * 1024 * 1024) / BLCKSZ))
111 
112 /*
113  * Before we consider skipping a page that's marked as clean in
114  * visibility map, we must've seen at least this many clean pages.
115  */
116 #define SKIP_PAGES_THRESHOLD ((BlockNumber) 32)
117 
118 /*
119  * Size of the prefetch window for lazy vacuum backwards truncation scan.
120  * Needs to be a power of 2.
121  */
122 #define PREFETCH_SIZE ((BlockNumber) 32)
123 
124 /*
125  * Macro to check if we are in a parallel vacuum. If true, we are in the
126  * parallel mode and the DSM segment is initialized.
127  */
128 #define ParallelVacuumIsActive(vacrel) ((vacrel)->pvs != NULL)
129 
130 /* Phases of vacuum during which we report error context. */
131 typedef enum
132 {
140 
141 typedef struct LVRelState
142 {
143  /* Target heap relation and its indexes */
146  int nindexes;
147 
148  /* Buffer access strategy and parallel vacuum state */
151 
152  /* Aggressive VACUUM? (must set relfrozenxid >= FreezeLimit) */
154  /* Use visibility map to skip? (disabled by DISABLE_PAGE_SKIPPING) */
156  /* Consider index vacuuming bypass optimization? */
158 
159  /* Doing index vacuuming, index cleanup, rel truncation? */
163 
164  /* VACUUM operation's cutoffs for freezing and pruning */
165  struct VacuumCutoffs cutoffs;
167  /* Tracks oldest extant XID/MXID for setting relfrozenxid/relminmxid */
171 
172  /* Error reporting state */
173  char *dbname;
175  char *relname;
176  char *indname; /* Current index name */
177  BlockNumber blkno; /* used only for heap operations */
178  OffsetNumber offnum; /* used only for heap operations */
180  bool verbose; /* VACUUM VERBOSE? */
181 
182  /*
183  * dead_items stores TIDs whose index tuples are deleted by index
184  * vacuuming. Each TID points to an LP_DEAD line pointer from a heap page
185  * that has been processed by lazy_scan_prune. Also needed by
186  * lazy_vacuum_heap_rel, which marks the same LP_DEAD line pointers as
187  * LP_UNUSED during second heap pass.
188  */
189  VacDeadItems *dead_items; /* TIDs whose index tuples we'll delete */
190  BlockNumber rel_pages; /* total number of pages */
191  BlockNumber scanned_pages; /* # pages examined (not skipped via VM) */
192  BlockNumber removed_pages; /* # pages removed by relation truncation */
193  BlockNumber frozen_pages; /* # pages with newly frozen tuples */
194  BlockNumber lpdead_item_pages; /* # pages with LP_DEAD items */
195  BlockNumber missed_dead_pages; /* # pages with missed dead tuples */
196  BlockNumber nonempty_pages; /* actually, last nonempty page + 1 */
197 
198  /* Statistics output by us, for table */
199  double new_rel_tuples; /* new estimated total # of tuples */
200  double new_live_tuples; /* new estimated total # of live tuples */
201  /* Statistics output by index AMs */
203 
204  /* Instrumentation counters */
206  /* Counters that follow are only for scanned_pages */
207  int64 tuples_deleted; /* # deleted from table */
208  int64 tuples_frozen; /* # newly frozen */
209  int64 lpdead_items; /* # deleted from indexes */
210  int64 live_tuples; /* # live tuples remaining */
211  int64 recently_dead_tuples; /* # dead, but not yet removable */
212  int64 missed_dead_tuples; /* # removable, but not removed */
214 
215 /*
216  * State returned by lazy_scan_prune()
217  */
218 typedef struct LVPagePruneState
219 {
220  bool hastup; /* Page prevents rel truncation? */
221  bool has_lpdead_items; /* includes existing LP_DEAD items */
222 
223  /*
224  * State describes the proper VM bit states to set for the page following
225  * pruning and freezing. all_visible implies !has_lpdead_items, but don't
226  * trust all_frozen result unless all_visible is also set to true.
227  */
228  bool all_visible; /* Every item visible to all? */
229  bool all_frozen; /* provided all_visible is also true */
230  TransactionId visibility_cutoff_xid; /* For recovery conflicts */
232 
233 /* Struct for saving and restoring vacuum error information. */
234 typedef struct LVSavedErrInfo
235 {
240 
241 
242 /* non-export function prototypes */
243 static void lazy_scan_heap(LVRelState *vacrel);
244 static BlockNumber lazy_scan_skip(LVRelState *vacrel, Buffer *vmbuffer,
245  BlockNumber next_block,
246  bool *next_unskippable_allvis,
247  bool *skipping_current_range);
248 static bool lazy_scan_new_or_empty(LVRelState *vacrel, Buffer buf,
249  BlockNumber blkno, Page page,
250  bool sharelock, Buffer vmbuffer);
251 static void lazy_scan_prune(LVRelState *vacrel, Buffer buf,
252  BlockNumber blkno, Page page,
253  LVPagePruneState *prunestate);
254 static bool lazy_scan_noprune(LVRelState *vacrel, Buffer buf,
255  BlockNumber blkno, Page page,
256  bool *hastup, bool *recordfreespace);
257 static void lazy_vacuum(LVRelState *vacrel);
258 static bool lazy_vacuum_all_indexes(LVRelState *vacrel);
259 static void lazy_vacuum_heap_rel(LVRelState *vacrel);
260 static int lazy_vacuum_heap_page(LVRelState *vacrel, BlockNumber blkno,
261  Buffer buffer, int index, Buffer vmbuffer);
262 static bool lazy_check_wraparound_failsafe(LVRelState *vacrel);
263 static void lazy_cleanup_all_indexes(LVRelState *vacrel);
265  IndexBulkDeleteResult *istat,
266  double reltuples,
267  LVRelState *vacrel);
269  IndexBulkDeleteResult *istat,
270  double reltuples,
271  bool estimated_count,
272  LVRelState *vacrel);
273 static bool should_attempt_truncation(LVRelState *vacrel);
274 static void lazy_truncate_heap(LVRelState *vacrel);
276  bool *lock_waiter_detected);
277 static void dead_items_alloc(LVRelState *vacrel, int nworkers);
278 static void dead_items_cleanup(LVRelState *vacrel);
279 static bool heap_page_is_all_visible(LVRelState *vacrel, Buffer buf,
280  TransactionId *visibility_cutoff_xid, bool *all_frozen);
281 static void update_relstats_all_indexes(LVRelState *vacrel);
282 static void vacuum_error_callback(void *arg);
283 static void update_vacuum_error_info(LVRelState *vacrel,
284  LVSavedErrInfo *saved_vacrel,
285  int phase, BlockNumber blkno,
286  OffsetNumber offnum);
287 static void restore_vacuum_error_info(LVRelState *vacrel,
288  const LVSavedErrInfo *saved_vacrel);
289 
290 
291 /*
292  * heap_vacuum_rel() -- perform VACUUM for one heap relation
293  *
294  * This routine sets things up for and then calls lazy_scan_heap, where
295  * almost all work actually takes place. Finalizes everything after call
296  * returns by managing relation truncation and updating rel's pg_class
297  * entry. (Also updates pg_class entries for any indexes that need it.)
298  *
299  * At entry, we have already established a transaction and opened
300  * and locked the relation.
301  */
302 void
304  BufferAccessStrategy bstrategy)
305 {
306  LVRelState *vacrel;
307  bool verbose,
308  instrument,
309  skipwithvm,
310  frozenxid_updated,
311  minmulti_updated;
312  BlockNumber orig_rel_pages,
313  new_rel_pages,
314  new_rel_allvisible;
315  PGRUsage ru0;
316  TimestampTz starttime = 0;
317  PgStat_Counter startreadtime = 0,
318  startwritetime = 0;
319  WalUsage startwalusage = pgWalUsage;
320  int64 StartPageHit = VacuumPageHit,
321  StartPageMiss = VacuumPageMiss,
322  StartPageDirty = VacuumPageDirty;
323  ErrorContextCallback errcallback;
324  char **indnames = NULL;
325 
326  verbose = (params->options & VACOPT_VERBOSE) != 0;
327  instrument = (verbose || (IsAutoVacuumWorkerProcess() &&
328  params->log_min_duration >= 0));
329  if (instrument)
330  {
331  pg_rusage_init(&ru0);
332  starttime = GetCurrentTimestamp();
333  if (track_io_timing)
334  {
335  startreadtime = pgStatBlockReadTime;
336  startwritetime = pgStatBlockWriteTime;
337  }
338  }
339 
341  RelationGetRelid(rel));
342 
343  /*
344  * Setup error traceback support for ereport() first. The idea is to set
345  * up an error context callback to display additional information on any
346  * error during a vacuum. During different phases of vacuum, we update
347  * the state so that the error context callback always display current
348  * information.
349  *
350  * Copy the names of heap rel into local memory for error reporting
351  * purposes, too. It isn't always safe to assume that we can get the name
352  * of each rel. It's convenient for code in lazy_scan_heap to always use
353  * these temp copies.
354  */
355  vacrel = (LVRelState *) palloc0(sizeof(LVRelState));
358  vacrel->relname = pstrdup(RelationGetRelationName(rel));
359  vacrel->indname = NULL;
361  vacrel->verbose = verbose;
362  errcallback.callback = vacuum_error_callback;
363  errcallback.arg = vacrel;
364  errcallback.previous = error_context_stack;
365  error_context_stack = &errcallback;
366 
367  /* Set up high level stuff about rel and its indexes */
368  vacrel->rel = rel;
369  vac_open_indexes(vacrel->rel, RowExclusiveLock, &vacrel->nindexes,
370  &vacrel->indrels);
371  vacrel->bstrategy = bstrategy;
372  if (instrument && vacrel->nindexes > 0)
373  {
374  /* Copy index names used by instrumentation (not error reporting) */
375  indnames = palloc(sizeof(char *) * vacrel->nindexes);
376  for (int i = 0; i < vacrel->nindexes; i++)
377  indnames[i] = pstrdup(RelationGetRelationName(vacrel->indrels[i]));
378  }
379 
380  /*
381  * The index_cleanup param either disables index vacuuming and cleanup or
382  * forces it to go ahead when we would otherwise apply the index bypass
383  * optimization. The default is 'auto', which leaves the final decision
384  * up to lazy_vacuum().
385  *
386  * The truncate param allows user to avoid attempting relation truncation,
387  * though it can't force truncation to happen.
388  */
391  params->truncate != VACOPTVALUE_AUTO);
392 
393  /*
394  * While VacuumFailSafeActive is reset to false before calling this, we
395  * still need to reset it here due to recursive calls.
396  */
397  VacuumFailsafeActive = false;
398  vacrel->consider_bypass_optimization = true;
399  vacrel->do_index_vacuuming = true;
400  vacrel->do_index_cleanup = true;
401  vacrel->do_rel_truncate = (params->truncate != VACOPTVALUE_DISABLED);
402  if (params->index_cleanup == VACOPTVALUE_DISABLED)
403  {
404  /* Force disable index vacuuming up-front */
405  vacrel->do_index_vacuuming = false;
406  vacrel->do_index_cleanup = false;
407  }
408  else if (params->index_cleanup == VACOPTVALUE_ENABLED)
409  {
410  /* Force index vacuuming. Note that failsafe can still bypass. */
411  vacrel->consider_bypass_optimization = false;
412  }
413  else
414  {
415  /* Default/auto, make all decisions dynamically */
417  }
418 
419  /* Initialize page counters explicitly (be tidy) */
420  vacrel->scanned_pages = 0;
421  vacrel->removed_pages = 0;
422  vacrel->frozen_pages = 0;
423  vacrel->lpdead_item_pages = 0;
424  vacrel->missed_dead_pages = 0;
425  vacrel->nonempty_pages = 0;
426  /* dead_items_alloc allocates vacrel->dead_items later on */
427 
428  /* Allocate/initialize output statistics state */
429  vacrel->new_rel_tuples = 0;
430  vacrel->new_live_tuples = 0;
431  vacrel->indstats = (IndexBulkDeleteResult **)
432  palloc0(vacrel->nindexes * sizeof(IndexBulkDeleteResult *));
433 
434  /* Initialize remaining counters (be tidy) */
435  vacrel->num_index_scans = 0;
436  vacrel->tuples_deleted = 0;
437  vacrel->tuples_frozen = 0;
438  vacrel->lpdead_items = 0;
439  vacrel->live_tuples = 0;
440  vacrel->recently_dead_tuples = 0;
441  vacrel->missed_dead_tuples = 0;
442 
443  /*
444  * Get cutoffs that determine which deleted tuples are considered DEAD,
445  * not just RECENTLY_DEAD, and which XIDs/MXIDs to freeze. Then determine
446  * the extent of the blocks that we'll scan in lazy_scan_heap. It has to
447  * happen in this order to ensure that the OldestXmin cutoff field works
448  * as an upper bound on the XIDs stored in the pages we'll actually scan
449  * (NewRelfrozenXid tracking must never be allowed to miss unfrozen XIDs).
450  *
451  * Next acquire vistest, a related cutoff that's used in heap_page_prune.
452  * We expect vistest will always make heap_page_prune remove any deleted
453  * tuple whose xmax is < OldestXmin. lazy_scan_prune must never become
454  * confused about whether a tuple should be frozen or removed. (In the
455  * future we might want to teach lazy_scan_prune to recompute vistest from
456  * time to time, to increase the number of dead tuples it can prune away.)
457  */
458  vacrel->aggressive = vacuum_get_cutoffs(rel, params, &vacrel->cutoffs);
459  vacrel->rel_pages = orig_rel_pages = RelationGetNumberOfBlocks(rel);
460  vacrel->vistest = GlobalVisTestFor(rel);
461  /* Initialize state used to track oldest extant XID/MXID */
462  vacrel->NewRelfrozenXid = vacrel->cutoffs.OldestXmin;
463  vacrel->NewRelminMxid = vacrel->cutoffs.OldestMxact;
464  vacrel->skippedallvis = false;
465  skipwithvm = true;
467  {
468  /*
469  * Force aggressive mode, and disable skipping blocks using the
470  * visibility map (even those set all-frozen)
471  */
472  vacrel->aggressive = true;
473  skipwithvm = false;
474  }
475 
476  vacrel->skipwithvm = skipwithvm;
477 
478  if (verbose)
479  {
480  if (vacrel->aggressive)
481  ereport(INFO,
482  (errmsg("aggressively vacuuming \"%s.%s.%s\"",
483  vacrel->dbname, vacrel->relnamespace,
484  vacrel->relname)));
485  else
486  ereport(INFO,
487  (errmsg("vacuuming \"%s.%s.%s\"",
488  vacrel->dbname, vacrel->relnamespace,
489  vacrel->relname)));
490  }
491 
492  /*
493  * Allocate dead_items array memory using dead_items_alloc. This handles
494  * parallel VACUUM initialization as part of allocating shared memory
495  * space used for dead_items. (But do a failsafe precheck first, to
496  * ensure that parallel VACUUM won't be attempted at all when relfrozenxid
497  * is already dangerously old.)
498  */
500  dead_items_alloc(vacrel, params->nworkers);
501 
502  /*
503  * Call lazy_scan_heap to perform all required heap pruning, index
504  * vacuuming, and heap vacuuming (plus related processing)
505  */
506  lazy_scan_heap(vacrel);
507 
508  /*
509  * Free resources managed by dead_items_alloc. This ends parallel mode in
510  * passing when necessary.
511  */
512  dead_items_cleanup(vacrel);
514 
515  /*
516  * Update pg_class entries for each of rel's indexes where appropriate.
517  *
518  * Unlike the later update to rel's pg_class entry, this is not critical.
519  * Maintains relpages/reltuples statistics used by the planner only.
520  */
521  if (vacrel->do_index_cleanup)
523 
524  /* Done with rel's indexes */
525  vac_close_indexes(vacrel->nindexes, vacrel->indrels, NoLock);
526 
527  /* Optionally truncate rel */
528  if (should_attempt_truncation(vacrel))
529  lazy_truncate_heap(vacrel);
530 
531  /* Pop the error context stack */
532  error_context_stack = errcallback.previous;
533 
534  /* Report that we are now doing final cleanup */
537 
538  /*
539  * Prepare to update rel's pg_class entry.
540  *
541  * Aggressive VACUUMs must always be able to advance relfrozenxid to a
542  * value >= FreezeLimit, and relminmxid to a value >= MultiXactCutoff.
543  * Non-aggressive VACUUMs may advance them by any amount, or not at all.
544  */
545  Assert(vacrel->NewRelfrozenXid == vacrel->cutoffs.OldestXmin ||
547  vacrel->cutoffs.relfrozenxid,
548  vacrel->NewRelfrozenXid));
549  Assert(vacrel->NewRelminMxid == vacrel->cutoffs.OldestMxact ||
551  vacrel->cutoffs.relminmxid,
552  vacrel->NewRelminMxid));
553  if (vacrel->skippedallvis)
554  {
555  /*
556  * Must keep original relfrozenxid in a non-aggressive VACUUM that
557  * chose to skip an all-visible page range. The state that tracks new
558  * values will have missed unfrozen XIDs from the pages we skipped.
559  */
560  Assert(!vacrel->aggressive);
563  }
564 
565  /*
566  * For safety, clamp relallvisible to be not more than what we're setting
567  * pg_class.relpages to
568  */
569  new_rel_pages = vacrel->rel_pages; /* After possible rel truncation */
570  visibilitymap_count(rel, &new_rel_allvisible, NULL);
571  if (new_rel_allvisible > new_rel_pages)
572  new_rel_allvisible = new_rel_pages;
573 
574  /*
575  * Now actually update rel's pg_class entry.
576  *
577  * In principle new_live_tuples could be -1 indicating that we (still)
578  * don't know the tuple count. In practice that can't happen, since we
579  * scan every page that isn't skipped using the visibility map.
580  */
581  vac_update_relstats(rel, new_rel_pages, vacrel->new_live_tuples,
582  new_rel_allvisible, vacrel->nindexes > 0,
583  vacrel->NewRelfrozenXid, vacrel->NewRelminMxid,
584  &frozenxid_updated, &minmulti_updated, false);
585 
586  /*
587  * Report results to the cumulative stats system, too.
588  *
589  * Deliberately avoid telling the stats system about LP_DEAD items that
590  * remain in the table due to VACUUM bypassing index and heap vacuuming.
591  * ANALYZE will consider the remaining LP_DEAD items to be dead "tuples".
592  * It seems like a good idea to err on the side of not vacuuming again too
593  * soon in cases where the failsafe prevented significant amounts of heap
594  * vacuuming.
595  */
597  rel->rd_rel->relisshared,
598  Max(vacrel->new_live_tuples, 0),
599  vacrel->recently_dead_tuples +
600  vacrel->missed_dead_tuples);
602 
603  if (instrument)
604  {
605  TimestampTz endtime = GetCurrentTimestamp();
606 
607  if (verbose || params->log_min_duration == 0 ||
608  TimestampDifferenceExceeds(starttime, endtime,
609  params->log_min_duration))
610  {
611  long secs_dur;
612  int usecs_dur;
613  WalUsage walusage;
615  char *msgfmt;
616  int32 diff;
617  int64 PageHitOp = VacuumPageHit - StartPageHit,
618  PageMissOp = VacuumPageMiss - StartPageMiss,
619  PageDirtyOp = VacuumPageDirty - StartPageDirty;
620  double read_rate = 0,
621  write_rate = 0;
622 
623  TimestampDifference(starttime, endtime, &secs_dur, &usecs_dur);
624  memset(&walusage, 0, sizeof(WalUsage));
625  WalUsageAccumDiff(&walusage, &pgWalUsage, &startwalusage);
626 
628  if (verbose)
629  {
630  /*
631  * Aggressiveness already reported earlier, in dedicated
632  * VACUUM VERBOSE ereport
633  */
634  Assert(!params->is_wraparound);
635  msgfmt = _("finished vacuuming \"%s.%s.%s\": index scans: %d\n");
636  }
637  else if (params->is_wraparound)
638  {
639  /*
640  * While it's possible for a VACUUM to be both is_wraparound
641  * and !aggressive, that's just a corner-case -- is_wraparound
642  * implies aggressive. Produce distinct output for the corner
643  * case all the same, just in case.
644  */
645  if (vacrel->aggressive)
646  msgfmt = _("automatic aggressive vacuum to prevent wraparound of table \"%s.%s.%s\": index scans: %d\n");
647  else
648  msgfmt = _("automatic vacuum to prevent wraparound of table \"%s.%s.%s\": index scans: %d\n");
649  }
650  else
651  {
652  if (vacrel->aggressive)
653  msgfmt = _("automatic aggressive vacuum of table \"%s.%s.%s\": index scans: %d\n");
654  else
655  msgfmt = _("automatic vacuum of table \"%s.%s.%s\": index scans: %d\n");
656  }
657  appendStringInfo(&buf, msgfmt,
658  vacrel->dbname,
659  vacrel->relnamespace,
660  vacrel->relname,
661  vacrel->num_index_scans);
662  appendStringInfo(&buf, _("pages: %u removed, %u remain, %u scanned (%.2f%% of total)\n"),
663  vacrel->removed_pages,
664  new_rel_pages,
665  vacrel->scanned_pages,
666  orig_rel_pages == 0 ? 100.0 :
667  100.0 * vacrel->scanned_pages / orig_rel_pages);
669  _("tuples: %lld removed, %lld remain, %lld are dead but not yet removable\n"),
670  (long long) vacrel->tuples_deleted,
671  (long long) vacrel->new_rel_tuples,
672  (long long) vacrel->recently_dead_tuples);
673  if (vacrel->missed_dead_tuples > 0)
675  _("tuples missed: %lld dead from %u pages not removed due to cleanup lock contention\n"),
676  (long long) vacrel->missed_dead_tuples,
677  vacrel->missed_dead_pages);
678  diff = (int32) (ReadNextTransactionId() -
679  vacrel->cutoffs.OldestXmin);
681  _("removable cutoff: %u, which was %d XIDs old when operation ended\n"),
682  vacrel->cutoffs.OldestXmin, diff);
683  if (frozenxid_updated)
684  {
685  diff = (int32) (vacrel->NewRelfrozenXid -
686  vacrel->cutoffs.relfrozenxid);
688  _("new relfrozenxid: %u, which is %d XIDs ahead of previous value\n"),
689  vacrel->NewRelfrozenXid, diff);
690  }
691  if (minmulti_updated)
692  {
693  diff = (int32) (vacrel->NewRelminMxid -
694  vacrel->cutoffs.relminmxid);
696  _("new relminmxid: %u, which is %d MXIDs ahead of previous value\n"),
697  vacrel->NewRelminMxid, diff);
698  }
699  appendStringInfo(&buf, _("frozen: %u pages from table (%.2f%% of total) had %lld tuples frozen\n"),
700  vacrel->frozen_pages,
701  orig_rel_pages == 0 ? 100.0 :
702  100.0 * vacrel->frozen_pages / orig_rel_pages,
703  (long long) vacrel->tuples_frozen);
704  if (vacrel->do_index_vacuuming)
705  {
706  if (vacrel->nindexes == 0 || vacrel->num_index_scans == 0)
707  appendStringInfoString(&buf, _("index scan not needed: "));
708  else
709  appendStringInfoString(&buf, _("index scan needed: "));
710 
711  msgfmt = _("%u pages from table (%.2f%% of total) had %lld dead item identifiers removed\n");
712  }
713  else
714  {
716  appendStringInfoString(&buf, _("index scan bypassed: "));
717  else
718  appendStringInfoString(&buf, _("index scan bypassed by failsafe: "));
719 
720  msgfmt = _("%u pages from table (%.2f%% of total) have %lld dead item identifiers\n");
721  }
722  appendStringInfo(&buf, msgfmt,
723  vacrel->lpdead_item_pages,
724  orig_rel_pages == 0 ? 100.0 :
725  100.0 * vacrel->lpdead_item_pages / orig_rel_pages,
726  (long long) vacrel->lpdead_items);
727  for (int i = 0; i < vacrel->nindexes; i++)
728  {
729  IndexBulkDeleteResult *istat = vacrel->indstats[i];
730 
731  if (!istat)
732  continue;
733 
735  _("index \"%s\": pages: %u in total, %u newly deleted, %u currently deleted, %u reusable\n"),
736  indnames[i],
737  istat->num_pages,
738  istat->pages_newly_deleted,
739  istat->pages_deleted,
740  istat->pages_free);
741  }
742  if (track_io_timing)
743  {
744  double read_ms = (double) (pgStatBlockReadTime - startreadtime) / 1000;
745  double write_ms = (double) (pgStatBlockWriteTime - startwritetime) / 1000;
746 
747  appendStringInfo(&buf, _("I/O timings: read: %.3f ms, write: %.3f ms\n"),
748  read_ms, write_ms);
749  }
750  if (secs_dur > 0 || usecs_dur > 0)
751  {
752  read_rate = (double) BLCKSZ * PageMissOp / (1024 * 1024) /
753  (secs_dur + usecs_dur / 1000000.0);
754  write_rate = (double) BLCKSZ * PageDirtyOp / (1024 * 1024) /
755  (secs_dur + usecs_dur / 1000000.0);
756  }
757  appendStringInfo(&buf, _("avg read rate: %.3f MB/s, avg write rate: %.3f MB/s\n"),
758  read_rate, write_rate);
760  _("buffer usage: %lld hits, %lld misses, %lld dirtied\n"),
761  (long long) PageHitOp,
762  (long long) PageMissOp,
763  (long long) PageDirtyOp);
765  _("WAL usage: %lld records, %lld full page images, %llu bytes\n"),
766  (long long) walusage.wal_records,
767  (long long) walusage.wal_fpi,
768  (unsigned long long) walusage.wal_bytes);
769  appendStringInfo(&buf, _("system usage: %s"), pg_rusage_show(&ru0));
770 
771  ereport(verbose ? INFO : LOG,
772  (errmsg_internal("%s", buf.data)));
773  pfree(buf.data);
774  }
775  }
776 
777  /* Cleanup index statistics and index names */
778  for (int i = 0; i < vacrel->nindexes; i++)
779  {
780  if (vacrel->indstats[i])
781  pfree(vacrel->indstats[i]);
782 
783  if (instrument)
784  pfree(indnames[i]);
785  }
786 }
787 
788 /*
789  * lazy_scan_heap() -- workhorse function for VACUUM
790  *
791  * This routine prunes each page in the heap, and considers the need to
792  * freeze remaining tuples with storage (not including pages that can be
793  * skipped using the visibility map). Also performs related maintenance
794  * of the FSM and visibility map. These steps all take place during an
795  * initial pass over the target heap relation.
796  *
797  * Also invokes lazy_vacuum_all_indexes to vacuum indexes, which largely
798  * consists of deleting index tuples that point to LP_DEAD items left in
799  * heap pages following pruning. Earlier initial pass over the heap will
800  * have collected the TIDs whose index tuples need to be removed.
801  *
802  * Finally, invokes lazy_vacuum_heap_rel to vacuum heap pages, which
803  * largely consists of marking LP_DEAD items (from collected TID array)
804  * as LP_UNUSED. This has to happen in a second, final pass over the
805  * heap, to preserve a basic invariant that all index AMs rely on: no
806  * extant index tuple can ever be allowed to contain a TID that points to
807  * an LP_UNUSED line pointer in the heap. We must disallow premature
808  * recycling of line pointers to avoid index scans that get confused
809  * about which TID points to which tuple immediately after recycling.
810  * (Actually, this isn't a concern when target heap relation happens to
811  * have no indexes, which allows us to safely apply the one-pass strategy
812  * as an optimization).
813  *
814  * In practice we often have enough space to fit all TIDs, and so won't
815  * need to call lazy_vacuum more than once, after our initial pass over
816  * the heap has totally finished. Otherwise things are slightly more
817  * complicated: our "initial pass" over the heap applies only to those
818  * pages that were pruned before we needed to call lazy_vacuum, and our
819  * "final pass" over the heap only vacuums these same heap pages.
820  * However, we process indexes in full every time lazy_vacuum is called,
821  * which makes index processing very inefficient when memory is in short
822  * supply.
823  */
824 static void
826 {
827  BlockNumber rel_pages = vacrel->rel_pages,
828  blkno,
829  next_unskippable_block,
830  next_fsm_block_to_vacuum = 0;
831  VacDeadItems *dead_items = vacrel->dead_items;
832  Buffer vmbuffer = InvalidBuffer;
833  bool next_unskippable_allvis,
834  skipping_current_range;
835  const int initprog_index[] = {
839  };
840  int64 initprog_val[3];
841 
842  /* Report that we're scanning the heap, advertising total # of blocks */
843  initprog_val[0] = PROGRESS_VACUUM_PHASE_SCAN_HEAP;
844  initprog_val[1] = rel_pages;
845  initprog_val[2] = dead_items->max_items;
846  pgstat_progress_update_multi_param(3, initprog_index, initprog_val);
847 
848  /* Set up an initial range of skippable blocks using the visibility map */
849  next_unskippable_block = lazy_scan_skip(vacrel, &vmbuffer, 0,
850  &next_unskippable_allvis,
851  &skipping_current_range);
852  for (blkno = 0; blkno < rel_pages; blkno++)
853  {
854  Buffer buf;
855  Page page;
856  bool all_visible_according_to_vm;
857  LVPagePruneState prunestate;
858 
859  if (blkno == next_unskippable_block)
860  {
861  /*
862  * Can't skip this page safely. Must scan the page. But
863  * determine the next skippable range after the page first.
864  */
865  all_visible_according_to_vm = next_unskippable_allvis;
866  next_unskippable_block = lazy_scan_skip(vacrel, &vmbuffer,
867  blkno + 1,
868  &next_unskippable_allvis,
869  &skipping_current_range);
870 
871  Assert(next_unskippable_block >= blkno + 1);
872  }
873  else
874  {
875  /* Last page always scanned (may need to set nonempty_pages) */
876  Assert(blkno < rel_pages - 1);
877 
878  if (skipping_current_range)
879  continue;
880 
881  /* Current range is too small to skip -- just scan the page */
882  all_visible_according_to_vm = true;
883  }
884 
885  vacrel->scanned_pages++;
886 
887  /* Report as block scanned, update error traceback information */
890  blkno, InvalidOffsetNumber);
891 
893 
894  /*
895  * Regularly check if wraparound failsafe should trigger.
896  *
897  * There is a similar check inside lazy_vacuum_all_indexes(), but
898  * relfrozenxid might start to look dangerously old before we reach
899  * that point. This check also provides failsafe coverage for the
900  * one-pass strategy, and the two-pass strategy with the index_cleanup
901  * param set to 'off'.
902  */
903  if (vacrel->scanned_pages % FAILSAFE_EVERY_PAGES == 0)
905 
906  /*
907  * Consider if we definitely have enough space to process TIDs on page
908  * already. If we are close to overrunning the available space for
909  * dead_items TIDs, pause and do a cycle of vacuuming before we tackle
910  * this page.
911  */
912  Assert(dead_items->max_items >= MaxHeapTuplesPerPage);
913  if (dead_items->max_items - dead_items->num_items < MaxHeapTuplesPerPage)
914  {
915  /*
916  * Before beginning index vacuuming, we release any pin we may
917  * hold on the visibility map page. This isn't necessary for
918  * correctness, but we do it anyway to avoid holding the pin
919  * across a lengthy, unrelated operation.
920  */
921  if (BufferIsValid(vmbuffer))
922  {
923  ReleaseBuffer(vmbuffer);
924  vmbuffer = InvalidBuffer;
925  }
926 
927  /* Perform a round of index and heap vacuuming */
928  vacrel->consider_bypass_optimization = false;
929  lazy_vacuum(vacrel);
930 
931  /*
932  * Vacuum the Free Space Map to make newly-freed space visible on
933  * upper-level FSM pages. Note we have not yet processed blkno.
934  */
935  FreeSpaceMapVacuumRange(vacrel->rel, next_fsm_block_to_vacuum,
936  blkno);
937  next_fsm_block_to_vacuum = blkno;
938 
939  /* Report that we are once again scanning the heap */
942  }
943 
944  /*
945  * Pin the visibility map page in case we need to mark the page
946  * all-visible. In most cases this will be very cheap, because we'll
947  * already have the correct page pinned anyway.
948  */
949  visibilitymap_pin(vacrel->rel, blkno, &vmbuffer);
950 
951  /*
952  * We need a buffer cleanup lock to prune HOT chains and defragment
953  * the page in lazy_scan_prune. But when it's not possible to acquire
954  * a cleanup lock right away, we may be able to settle for reduced
955  * processing using lazy_scan_noprune.
956  */
957  buf = ReadBufferExtended(vacrel->rel, MAIN_FORKNUM, blkno, RBM_NORMAL,
958  vacrel->bstrategy);
959  page = BufferGetPage(buf);
961  {
962  bool hastup,
963  recordfreespace;
964 
966 
967  /* Check for new or empty pages before lazy_scan_noprune call */
968  if (lazy_scan_new_or_empty(vacrel, buf, blkno, page, true,
969  vmbuffer))
970  {
971  /* Processed as new/empty page (lock and pin released) */
972  continue;
973  }
974 
975  /* Collect LP_DEAD items in dead_items array, count tuples */
976  if (lazy_scan_noprune(vacrel, buf, blkno, page, &hastup,
977  &recordfreespace))
978  {
979  Size freespace = 0;
980 
981  /*
982  * Processed page successfully (without cleanup lock) -- just
983  * need to perform rel truncation and FSM steps, much like the
984  * lazy_scan_prune case. Don't bother trying to match its
985  * visibility map setting steps, though.
986  */
987  if (hastup)
988  vacrel->nonempty_pages = blkno + 1;
989  if (recordfreespace)
990  freespace = PageGetHeapFreeSpace(page);
992  if (recordfreespace)
993  RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
994  continue;
995  }
996 
997  /*
998  * lazy_scan_noprune could not do all required processing. Wait
999  * for a cleanup lock, and call lazy_scan_prune in the usual way.
1000  */
1001  Assert(vacrel->aggressive);
1004  }
1005 
1006  /* Check for new or empty pages before lazy_scan_prune call */
1007  if (lazy_scan_new_or_empty(vacrel, buf, blkno, page, false, vmbuffer))
1008  {
1009  /* Processed as new/empty page (lock and pin released) */
1010  continue;
1011  }
1012 
1013  /*
1014  * Prune, freeze, and count tuples.
1015  *
1016  * Accumulates details of remaining LP_DEAD line pointers on page in
1017  * dead_items array. This includes LP_DEAD line pointers that we
1018  * pruned ourselves, as well as existing LP_DEAD line pointers that
1019  * were pruned some time earlier. Also considers freezing XIDs in the
1020  * tuple headers of remaining items with storage.
1021  */
1022  lazy_scan_prune(vacrel, buf, blkno, page, &prunestate);
1023 
1024  Assert(!prunestate.all_visible || !prunestate.has_lpdead_items);
1025 
1026  /* Remember the location of the last page with nonremovable tuples */
1027  if (prunestate.hastup)
1028  vacrel->nonempty_pages = blkno + 1;
1029 
1030  if (vacrel->nindexes == 0)
1031  {
1032  /*
1033  * Consider the need to do page-at-a-time heap vacuuming when
1034  * using the one-pass strategy now.
1035  *
1036  * The one-pass strategy will never call lazy_vacuum(). The steps
1037  * performed here can be thought of as the one-pass equivalent of
1038  * a call to lazy_vacuum().
1039  */
1040  if (prunestate.has_lpdead_items)
1041  {
1042  Size freespace;
1043 
1044  lazy_vacuum_heap_page(vacrel, blkno, buf, 0, vmbuffer);
1045 
1046  /* Forget the LP_DEAD items that we just vacuumed */
1047  dead_items->num_items = 0;
1048 
1049  /*
1050  * Periodically perform FSM vacuuming to make newly-freed
1051  * space visible on upper FSM pages. Note we have not yet
1052  * performed FSM processing for blkno.
1053  */
1054  if (blkno - next_fsm_block_to_vacuum >= VACUUM_FSM_EVERY_PAGES)
1055  {
1056  FreeSpaceMapVacuumRange(vacrel->rel, next_fsm_block_to_vacuum,
1057  blkno);
1058  next_fsm_block_to_vacuum = blkno;
1059  }
1060 
1061  /*
1062  * Now perform FSM processing for blkno, and move on to next
1063  * page.
1064  *
1065  * Our call to lazy_vacuum_heap_page() will have considered if
1066  * it's possible to set all_visible/all_frozen independently
1067  * of lazy_scan_prune(). Note that prunestate was invalidated
1068  * by lazy_vacuum_heap_page() call.
1069  */
1070  freespace = PageGetHeapFreeSpace(page);
1071 
1073  RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
1074  continue;
1075  }
1076 
1077  /*
1078  * There was no call to lazy_vacuum_heap_page() because pruning
1079  * didn't encounter/create any LP_DEAD items that needed to be
1080  * vacuumed. Prune state has not been invalidated, so proceed
1081  * with prunestate-driven visibility map and FSM steps (just like
1082  * the two-pass strategy).
1083  */
1084  Assert(dead_items->num_items == 0);
1085  }
1086 
1087  /*
1088  * Handle setting visibility map bit based on information from the VM
1089  * (as of last lazy_scan_skip() call), and from prunestate
1090  */
1091  if (!all_visible_according_to_vm && prunestate.all_visible)
1092  {
1094 
1095  if (prunestate.all_frozen)
1096  {
1098  flags |= VISIBILITYMAP_ALL_FROZEN;
1099  }
1100 
1101  /*
1102  * It should never be the case that the visibility map page is set
1103  * while the page-level bit is clear, but the reverse is allowed
1104  * (if checksums are not enabled). Regardless, set both bits so
1105  * that we get back in sync.
1106  *
1107  * NB: If the heap page is all-visible but the VM bit is not set,
1108  * we don't need to dirty the heap page. However, if checksums
1109  * are enabled, we do need to make sure that the heap page is
1110  * dirtied before passing it to visibilitymap_set(), because it
1111  * may be logged. Given that this situation should only happen in
1112  * rare cases after a crash, it is not worth optimizing.
1113  */
1114  PageSetAllVisible(page);
1116  visibilitymap_set(vacrel->rel, blkno, buf, InvalidXLogRecPtr,
1117  vmbuffer, prunestate.visibility_cutoff_xid,
1118  flags);
1119  }
1120 
1121  /*
1122  * As of PostgreSQL 9.2, the visibility map bit should never be set if
1123  * the page-level bit is clear. However, it's possible that the bit
1124  * got cleared after lazy_scan_skip() was called, so we must recheck
1125  * with buffer lock before concluding that the VM is corrupt.
1126  */
1127  else if (all_visible_according_to_vm && !PageIsAllVisible(page) &&
1128  visibilitymap_get_status(vacrel->rel, blkno, &vmbuffer) != 0)
1129  {
1130  elog(WARNING, "page is not marked all-visible but visibility map bit is set in relation \"%s\" page %u",
1131  vacrel->relname, blkno);
1132  visibilitymap_clear(vacrel->rel, blkno, vmbuffer,
1134  }
1135 
1136  /*
1137  * It's possible for the value returned by
1138  * GetOldestNonRemovableTransactionId() to move backwards, so it's not
1139  * wrong for us to see tuples that appear to not be visible to
1140  * everyone yet, while PD_ALL_VISIBLE is already set. The real safe
1141  * xmin value never moves backwards, but
1142  * GetOldestNonRemovableTransactionId() is conservative and sometimes
1143  * returns a value that's unnecessarily small, so if we see that
1144  * contradiction it just means that the tuples that we think are not
1145  * visible to everyone yet actually are, and the PD_ALL_VISIBLE flag
1146  * is correct.
1147  *
1148  * There should never be LP_DEAD items on a page with PD_ALL_VISIBLE
1149  * set, however.
1150  */
1151  else if (prunestate.has_lpdead_items && PageIsAllVisible(page))
1152  {
1153  elog(WARNING, "page containing LP_DEAD items is marked as all-visible in relation \"%s\" page %u",
1154  vacrel->relname, blkno);
1155  PageClearAllVisible(page);
1157  visibilitymap_clear(vacrel->rel, blkno, vmbuffer,
1159  }
1160 
1161  /*
1162  * If the all-visible page is all-frozen but not marked as such yet,
1163  * mark it as all-frozen. Note that all_frozen is only valid if
1164  * all_visible is true, so we must check both prunestate fields.
1165  */
1166  else if (all_visible_according_to_vm && prunestate.all_visible &&
1167  prunestate.all_frozen &&
1168  !VM_ALL_FROZEN(vacrel->rel, blkno, &vmbuffer))
1169  {
1170  /*
1171  * Avoid relying on all_visible_according_to_vm as a proxy for the
1172  * page-level PD_ALL_VISIBLE bit being set, since it might have
1173  * become stale -- even when all_visible is set in prunestate
1174  */
1175  if (!PageIsAllVisible(page))
1176  {
1177  PageSetAllVisible(page);
1179  }
1180 
1181  /*
1182  * Set the page all-frozen (and all-visible) in the VM.
1183  *
1184  * We can pass InvalidTransactionId as our visibility_cutoff_xid,
1185  * since a snapshotConflictHorizon sufficient to make everything
1186  * safe for REDO was logged when the page's tuples were frozen.
1187  */
1189  visibilitymap_set(vacrel->rel, blkno, buf, InvalidXLogRecPtr,
1190  vmbuffer, InvalidTransactionId,
1193  }
1194 
1195  /*
1196  * Final steps for block: drop cleanup lock, record free space in the
1197  * FSM
1198  */
1199  if (prunestate.has_lpdead_items && vacrel->do_index_vacuuming)
1200  {
1201  /*
1202  * Wait until lazy_vacuum_heap_rel() to save free space. This
1203  * doesn't just save us some cycles; it also allows us to record
1204  * any additional free space that lazy_vacuum_heap_page() will
1205  * make available in cases where it's possible to truncate the
1206  * page's line pointer array.
1207  *
1208  * Note: It's not in fact 100% certain that we really will call
1209  * lazy_vacuum_heap_rel() -- lazy_vacuum() might yet opt to skip
1210  * index vacuuming (and so must skip heap vacuuming). This is
1211  * deemed okay because it only happens in emergencies, or when
1212  * there is very little free space anyway. (Besides, we start
1213  * recording free space in the FSM once index vacuuming has been
1214  * abandoned.)
1215  *
1216  * Note: The one-pass (no indexes) case is only supposed to make
1217  * it this far when there were no LP_DEAD items during pruning.
1218  */
1219  Assert(vacrel->nindexes > 0);
1221  }
1222  else
1223  {
1224  Size freespace = PageGetHeapFreeSpace(page);
1225 
1227  RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
1228  }
1229  }
1230 
1231  vacrel->blkno = InvalidBlockNumber;
1232  if (BufferIsValid(vmbuffer))
1233  ReleaseBuffer(vmbuffer);
1234 
1235  /* report that everything is now scanned */
1237 
1238  /* now we can compute the new value for pg_class.reltuples */
1239  vacrel->new_live_tuples = vac_estimate_reltuples(vacrel->rel, rel_pages,
1240  vacrel->scanned_pages,
1241  vacrel->live_tuples);
1242 
1243  /*
1244  * Also compute the total number of surviving heap entries. In the
1245  * (unlikely) scenario that new_live_tuples is -1, take it as zero.
1246  */
1247  vacrel->new_rel_tuples =
1248  Max(vacrel->new_live_tuples, 0) + vacrel->recently_dead_tuples +
1249  vacrel->missed_dead_tuples;
1250 
1251  /*
1252  * Do index vacuuming (call each index's ambulkdelete routine), then do
1253  * related heap vacuuming
1254  */
1255  if (dead_items->num_items > 0)
1256  lazy_vacuum(vacrel);
1257 
1258  /*
1259  * Vacuum the remainder of the Free Space Map. We must do this whether or
1260  * not there were indexes, and whether or not we bypassed index vacuuming.
1261  */
1262  if (blkno > next_fsm_block_to_vacuum)
1263  FreeSpaceMapVacuumRange(vacrel->rel, next_fsm_block_to_vacuum, blkno);
1264 
1265  /* report all blocks vacuumed */
1267 
1268  /* Do final index cleanup (call each index's amvacuumcleanup routine) */
1269  if (vacrel->nindexes > 0 && vacrel->do_index_cleanup)
1270  lazy_cleanup_all_indexes(vacrel);
1271 }
1272 
1273 /*
1274  * lazy_scan_skip() -- set up range of skippable blocks using visibility map.
1275  *
1276  * lazy_scan_heap() calls here every time it needs to set up a new range of
1277  * blocks to skip via the visibility map. Caller passes the next block in
1278  * line. We return a next_unskippable_block for this range. When there are
1279  * no skippable blocks we just return caller's next_block. The all-visible
1280  * status of the returned block is set in *next_unskippable_allvis for caller,
1281  * too. Block usually won't be all-visible (since it's unskippable), but it
1282  * can be during aggressive VACUUMs (as well as in certain edge cases).
1283  *
1284  * Sets *skipping_current_range to indicate if caller should skip this range.
1285  * Costs and benefits drive our decision. Very small ranges won't be skipped.
1286  *
1287  * Note: our opinion of which blocks can be skipped can go stale immediately.
1288  * It's okay if caller "misses" a page whose all-visible or all-frozen marking
1289  * was concurrently cleared, though. All that matters is that caller scan all
1290  * pages whose tuples might contain XIDs < OldestXmin, or MXIDs < OldestMxact.
1291  * (Actually, non-aggressive VACUUMs can choose to skip all-visible pages with
1292  * older XIDs/MXIDs. The vacrel->skippedallvis flag will be set here when the
1293  * choice to skip such a range is actually made, making everything safe.)
1294  */
1295 static BlockNumber
1296 lazy_scan_skip(LVRelState *vacrel, Buffer *vmbuffer, BlockNumber next_block,
1297  bool *next_unskippable_allvis, bool *skipping_current_range)
1298 {
1299  BlockNumber rel_pages = vacrel->rel_pages,
1300  next_unskippable_block = next_block,
1301  nskippable_blocks = 0;
1302  bool skipsallvis = false;
1303 
1304  *next_unskippable_allvis = true;
1305  while (next_unskippable_block < rel_pages)
1306  {
1307  uint8 mapbits = visibilitymap_get_status(vacrel->rel,
1308  next_unskippable_block,
1309  vmbuffer);
1310 
1311  if ((mapbits & VISIBILITYMAP_ALL_VISIBLE) == 0)
1312  {
1313  Assert((mapbits & VISIBILITYMAP_ALL_FROZEN) == 0);
1314  *next_unskippable_allvis = false;
1315  break;
1316  }
1317 
1318  /*
1319  * Caller must scan the last page to determine whether it has tuples
1320  * (caller must have the opportunity to set vacrel->nonempty_pages).
1321  * This rule avoids having lazy_truncate_heap() take access-exclusive
1322  * lock on rel to attempt a truncation that fails anyway, just because
1323  * there are tuples on the last page (it is likely that there will be
1324  * tuples on other nearby pages as well, but those can be skipped).
1325  *
1326  * Implement this by always treating the last block as unsafe to skip.
1327  */
1328  if (next_unskippable_block == rel_pages - 1)
1329  break;
1330 
1331  /* DISABLE_PAGE_SKIPPING makes all skipping unsafe */
1332  if (!vacrel->skipwithvm)
1333  {
1334  /* Caller shouldn't rely on all_visible_according_to_vm */
1335  *next_unskippable_allvis = false;
1336  break;
1337  }
1338 
1339  /*
1340  * Aggressive VACUUM caller can't skip pages just because they are
1341  * all-visible. They may still skip all-frozen pages, which can't
1342  * contain XIDs < OldestXmin (XIDs that aren't already frozen by now).
1343  */
1344  if ((mapbits & VISIBILITYMAP_ALL_FROZEN) == 0)
1345  {
1346  if (vacrel->aggressive)
1347  break;
1348 
1349  /*
1350  * All-visible block is safe to skip in non-aggressive case. But
1351  * remember that the final range contains such a block for later.
1352  */
1353  skipsallvis = true;
1354  }
1355 
1357  next_unskippable_block++;
1358  nskippable_blocks++;
1359  }
1360 
1361  /*
1362  * We only skip a range with at least SKIP_PAGES_THRESHOLD consecutive
1363  * pages. Since we're reading sequentially, the OS should be doing
1364  * readahead for us, so there's no gain in skipping a page now and then.
1365  * Skipping such a range might even discourage sequential detection.
1366  *
1367  * This test also enables more frequent relfrozenxid advancement during
1368  * non-aggressive VACUUMs. If the range has any all-visible pages then
1369  * skipping makes updating relfrozenxid unsafe, which is a real downside.
1370  */
1371  if (nskippable_blocks < SKIP_PAGES_THRESHOLD)
1372  *skipping_current_range = false;
1373  else
1374  {
1375  *skipping_current_range = true;
1376  if (skipsallvis)
1377  vacrel->skippedallvis = true;
1378  }
1379 
1380  return next_unskippable_block;
1381 }
1382 
1383 /*
1384  * lazy_scan_new_or_empty() -- lazy_scan_heap() new/empty page handling.
1385  *
1386  * Must call here to handle both new and empty pages before calling
1387  * lazy_scan_prune or lazy_scan_noprune, since they're not prepared to deal
1388  * with new or empty pages.
1389  *
1390  * It's necessary to consider new pages as a special case, since the rules for
1391  * maintaining the visibility map and FSM with empty pages are a little
1392  * different (though new pages can be truncated away during rel truncation).
1393  *
1394  * Empty pages are not really a special case -- they're just heap pages that
1395  * have no allocated tuples (including even LP_UNUSED items). You might
1396  * wonder why we need to handle them here all the same. It's only necessary
1397  * because of a corner-case involving a hard crash during heap relation
1398  * extension. If we ever make relation-extension crash safe, then it should
1399  * no longer be necessary to deal with empty pages here (or new pages, for
1400  * that matter).
1401  *
1402  * Caller must hold at least a shared lock. We might need to escalate the
1403  * lock in that case, so the type of lock caller holds needs to be specified
1404  * using 'sharelock' argument.
1405  *
1406  * Returns false in common case where caller should go on to call
1407  * lazy_scan_prune (or lazy_scan_noprune). Otherwise returns true, indicating
1408  * that lazy_scan_heap is done processing the page, releasing lock on caller's
1409  * behalf.
1410  */
1411 static bool
1413  Page page, bool sharelock, Buffer vmbuffer)
1414 {
1415  Size freespace;
1416 
1417  if (PageIsNew(page))
1418  {
1419  /*
1420  * All-zeroes pages can be left over if either a backend extends the
1421  * relation by a single page, but crashes before the newly initialized
1422  * page has been written out, or when bulk-extending the relation
1423  * (which creates a number of empty pages at the tail end of the
1424  * relation), and then enters them into the FSM.
1425  *
1426  * Note we do not enter the page into the visibilitymap. That has the
1427  * downside that we repeatedly visit this page in subsequent vacuums,
1428  * but otherwise we'll never discover the space on a promoted standby.
1429  * The harm of repeated checking ought to normally not be too bad. The
1430  * space usually should be used at some point, otherwise there
1431  * wouldn't be any regular vacuums.
1432  *
1433  * Make sure these pages are in the FSM, to ensure they can be reused.
1434  * Do that by testing if there's any space recorded for the page. If
1435  * not, enter it. We do so after releasing the lock on the heap page,
1436  * the FSM is approximate, after all.
1437  */
1439 
1440  if (GetRecordedFreeSpace(vacrel->rel, blkno) == 0)
1441  {
1442  freespace = BLCKSZ - SizeOfPageHeaderData;
1443 
1444  RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
1445  }
1446 
1447  return true;
1448  }
1449 
1450  if (PageIsEmpty(page))
1451  {
1452  /*
1453  * It seems likely that caller will always be able to get a cleanup
1454  * lock on an empty page. But don't take any chances -- escalate to
1455  * an exclusive lock (still don't need a cleanup lock, though).
1456  */
1457  if (sharelock)
1458  {
1461 
1462  if (!PageIsEmpty(page))
1463  {
1464  /* page isn't new or empty -- keep lock and pin for now */
1465  return false;
1466  }
1467  }
1468  else
1469  {
1470  /* Already have a full cleanup lock (which is more than enough) */
1471  }
1472 
1473  /*
1474  * Unlike new pages, empty pages are always set all-visible and
1475  * all-frozen.
1476  */
1477  if (!PageIsAllVisible(page))
1478  {
1480 
1481  /* mark buffer dirty before writing a WAL record */
1483 
1484  /*
1485  * It's possible that another backend has extended the heap,
1486  * initialized the page, and then failed to WAL-log the page due
1487  * to an ERROR. Since heap extension is not WAL-logged, recovery
1488  * might try to replay our record setting the page all-visible and
1489  * find that the page isn't initialized, which will cause a PANIC.
1490  * To prevent that, check whether the page has been previously
1491  * WAL-logged, and if not, do that now.
1492  */
1493  if (RelationNeedsWAL(vacrel->rel) &&
1494  PageGetLSN(page) == InvalidXLogRecPtr)
1495  log_newpage_buffer(buf, true);
1496 
1497  PageSetAllVisible(page);
1498  visibilitymap_set(vacrel->rel, blkno, buf, InvalidXLogRecPtr,
1499  vmbuffer, InvalidTransactionId,
1501  END_CRIT_SECTION();
1502  }
1503 
1504  freespace = PageGetHeapFreeSpace(page);
1506  RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
1507  return true;
1508  }
1509 
1510  /* page isn't new or empty -- keep lock and pin */
1511  return false;
1512 }
1513 
1514 /*
1515  * lazy_scan_prune() -- lazy_scan_heap() pruning and freezing.
1516  *
1517  * Caller must hold pin and buffer cleanup lock on the buffer.
1518  *
1519  * Prior to PostgreSQL 14 there were very rare cases where heap_page_prune()
1520  * was allowed to disagree with our HeapTupleSatisfiesVacuum() call about
1521  * whether or not a tuple should be considered DEAD. This happened when an
1522  * inserting transaction concurrently aborted (after our heap_page_prune()
1523  * call, before our HeapTupleSatisfiesVacuum() call). There was rather a lot
1524  * of complexity just so we could deal with tuples that were DEAD to VACUUM,
1525  * but nevertheless were left with storage after pruning.
1526  *
1527  * The approach we take now is to restart pruning when the race condition is
1528  * detected. This allows heap_page_prune() to prune the tuples inserted by
1529  * the now-aborted transaction. This is a little crude, but it guarantees
1530  * that any items that make it into the dead_items array are simple LP_DEAD
1531  * line pointers, and that every remaining item with tuple storage is
1532  * considered as a candidate for freezing.
1533  */
1534 static void
1536  Buffer buf,
1537  BlockNumber blkno,
1538  Page page,
1539  LVPagePruneState *prunestate)
1540 {
1541  Relation rel = vacrel->rel;
1542  OffsetNumber offnum,
1543  maxoff;
1544  ItemId itemid;
1545  HeapTupleData tuple;
1546  HTSV_Result res;
1547  int tuples_deleted,
1548  tuples_frozen,
1549  lpdead_items,
1550  live_tuples,
1551  recently_dead_tuples;
1552  int nnewlpdead;
1553  HeapPageFreeze pagefrz;
1554  int64 fpi_before = pgWalUsage.wal_fpi;
1555  OffsetNumber deadoffsets[MaxHeapTuplesPerPage];
1557 
1558  Assert(BufferGetBlockNumber(buf) == blkno);
1559 
1560  /*
1561  * maxoff might be reduced following line pointer array truncation in
1562  * heap_page_prune. That's safe for us to ignore, since the reclaimed
1563  * space will continue to look like LP_UNUSED items below.
1564  */
1565  maxoff = PageGetMaxOffsetNumber(page);
1566 
1567 retry:
1568 
1569  /* Initialize (or reset) page-level state */
1570  pagefrz.freeze_required = false;
1571  pagefrz.FreezePageRelfrozenXid = vacrel->NewRelfrozenXid;
1572  pagefrz.FreezePageRelminMxid = vacrel->NewRelminMxid;
1573  pagefrz.NoFreezePageRelfrozenXid = vacrel->NewRelfrozenXid;
1574  pagefrz.NoFreezePageRelminMxid = vacrel->NewRelminMxid;
1575  tuples_deleted = 0;
1576  tuples_frozen = 0;
1577  lpdead_items = 0;
1578  live_tuples = 0;
1579  recently_dead_tuples = 0;
1580 
1581  /*
1582  * Prune all HOT-update chains in this page.
1583  *
1584  * We count tuples removed by the pruning step as tuples_deleted. Its
1585  * final value can be thought of as the number of tuples that have been
1586  * deleted from the table. It should not be confused with lpdead_items;
1587  * lpdead_items's final value can be thought of as the number of tuples
1588  * that were deleted from indexes.
1589  */
1590  tuples_deleted = heap_page_prune(rel, buf, vacrel->vistest,
1591  InvalidTransactionId, 0, &nnewlpdead,
1592  &vacrel->offnum);
1593 
1594  /*
1595  * Now scan the page to collect LP_DEAD items and check for tuples
1596  * requiring freezing among remaining tuples with storage
1597  */
1598  prunestate->hastup = false;
1599  prunestate->has_lpdead_items = false;
1600  prunestate->all_visible = true;
1601  prunestate->all_frozen = true;
1603 
1604  for (offnum = FirstOffsetNumber;
1605  offnum <= maxoff;
1606  offnum = OffsetNumberNext(offnum))
1607  {
1608  bool totally_frozen;
1609 
1610  /*
1611  * Set the offset number so that we can display it along with any
1612  * error that occurred while processing this tuple.
1613  */
1614  vacrel->offnum = offnum;
1615  itemid = PageGetItemId(page, offnum);
1616 
1617  if (!ItemIdIsUsed(itemid))
1618  continue;
1619 
1620  /* Redirect items mustn't be touched */
1621  if (ItemIdIsRedirected(itemid))
1622  {
1623  /* page makes rel truncation unsafe */
1624  prunestate->hastup = true;
1625  continue;
1626  }
1627 
1628  if (ItemIdIsDead(itemid))
1629  {
1630  /*
1631  * Deliberately don't set hastup for LP_DEAD items. We make the
1632  * soft assumption that any LP_DEAD items encountered here will
1633  * become LP_UNUSED later on, before count_nondeletable_pages is
1634  * reached. If we don't make this assumption then rel truncation
1635  * will only happen every other VACUUM, at most. Besides, VACUUM
1636  * must treat hastup/nonempty_pages as provisional no matter how
1637  * LP_DEAD items are handled (handled here, or handled later on).
1638  *
1639  * Also deliberately delay unsetting all_visible until just before
1640  * we return to lazy_scan_heap caller, as explained in full below.
1641  * (This is another case where it's useful to anticipate that any
1642  * LP_DEAD items will become LP_UNUSED during the ongoing VACUUM.)
1643  */
1644  deadoffsets[lpdead_items++] = offnum;
1645  continue;
1646  }
1647 
1648  Assert(ItemIdIsNormal(itemid));
1649 
1650  ItemPointerSet(&(tuple.t_self), blkno, offnum);
1651  tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
1652  tuple.t_len = ItemIdGetLength(itemid);
1653  tuple.t_tableOid = RelationGetRelid(rel);
1654 
1655  /*
1656  * DEAD tuples are almost always pruned into LP_DEAD line pointers by
1657  * heap_page_prune(), but it's possible that the tuple state changed
1658  * since heap_page_prune() looked. Handle that here by restarting.
1659  * (See comments at the top of function for a full explanation.)
1660  */
1661  res = HeapTupleSatisfiesVacuum(&tuple, vacrel->cutoffs.OldestXmin,
1662  buf);
1663 
1664  if (unlikely(res == HEAPTUPLE_DEAD))
1665  goto retry;
1666 
1667  /*
1668  * The criteria for counting a tuple as live in this block need to
1669  * match what analyze.c's acquire_sample_rows() does, otherwise VACUUM
1670  * and ANALYZE may produce wildly different reltuples values, e.g.
1671  * when there are many recently-dead tuples.
1672  *
1673  * The logic here is a bit simpler than acquire_sample_rows(), as
1674  * VACUUM can't run inside a transaction block, which makes some cases
1675  * impossible (e.g. in-progress insert from the same transaction).
1676  *
1677  * We treat LP_DEAD items (which are the closest thing to DEAD tuples
1678  * that might be seen here) differently, too: we assume that they'll
1679  * become LP_UNUSED before VACUUM finishes. This difference is only
1680  * superficial. VACUUM effectively agrees with ANALYZE about DEAD
1681  * items, in the end. VACUUM won't remember LP_DEAD items, but only
1682  * because they're not supposed to be left behind when it is done.
1683  * (Cases where we bypass index vacuuming will violate this optimistic
1684  * assumption, but the overall impact of that should be negligible.)
1685  */
1686  switch (res)
1687  {
1688  case HEAPTUPLE_LIVE:
1689 
1690  /*
1691  * Count it as live. Not only is this natural, but it's also
1692  * what acquire_sample_rows() does.
1693  */
1694  live_tuples++;
1695 
1696  /*
1697  * Is the tuple definitely visible to all transactions?
1698  *
1699  * NB: Like with per-tuple hint bits, we can't set the
1700  * PD_ALL_VISIBLE flag if the inserter committed
1701  * asynchronously. See SetHintBits for more info. Check that
1702  * the tuple is hinted xmin-committed because of that.
1703  */
1704  if (prunestate->all_visible)
1705  {
1706  TransactionId xmin;
1707 
1709  {
1710  prunestate->all_visible = false;
1711  break;
1712  }
1713 
1714  /*
1715  * The inserter definitely committed. But is it old enough
1716  * that everyone sees it as committed?
1717  */
1718  xmin = HeapTupleHeaderGetXmin(tuple.t_data);
1719  if (!TransactionIdPrecedes(xmin,
1720  vacrel->cutoffs.OldestXmin))
1721  {
1722  prunestate->all_visible = false;
1723  break;
1724  }
1725 
1726  /* Track newest xmin on page. */
1727  if (TransactionIdFollows(xmin, prunestate->visibility_cutoff_xid) &&
1728  TransactionIdIsNormal(xmin))
1729  prunestate->visibility_cutoff_xid = xmin;
1730  }
1731  break;
1733 
1734  /*
1735  * If tuple is recently dead then we must not remove it from
1736  * the relation. (We only remove items that are LP_DEAD from
1737  * pruning.)
1738  */
1739  recently_dead_tuples++;
1740  prunestate->all_visible = false;
1741  break;
1743 
1744  /*
1745  * We do not count these rows as live, because we expect the
1746  * inserting transaction to update the counters at commit, and
1747  * we assume that will happen only after we report our
1748  * results. This assumption is a bit shaky, but it is what
1749  * acquire_sample_rows() does, so be consistent.
1750  */
1751  prunestate->all_visible = false;
1752  break;
1754  /* This is an expected case during concurrent vacuum */
1755  prunestate->all_visible = false;
1756 
1757  /*
1758  * Count such rows as live. As above, we assume the deleting
1759  * transaction will commit and update the counters after we
1760  * report.
1761  */
1762  live_tuples++;
1763  break;
1764  default:
1765  elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
1766  break;
1767  }
1768 
1769  prunestate->hastup = true; /* page makes rel truncation unsafe */
1770 
1771  /* Tuple with storage -- consider need to freeze */
1772  if (heap_prepare_freeze_tuple(tuple.t_data, &vacrel->cutoffs, &pagefrz,
1773  &frozen[tuples_frozen], &totally_frozen))
1774  {
1775  /* Save prepared freeze plan for later */
1776  frozen[tuples_frozen++].offset = offnum;
1777  }
1778 
1779  /*
1780  * If any tuple isn't either totally frozen already or eligible to
1781  * become totally frozen (according to its freeze plan), then the page
1782  * definitely cannot be set all-frozen in the visibility map later on
1783  */
1784  if (!totally_frozen)
1785  prunestate->all_frozen = false;
1786  }
1787 
1788  /*
1789  * We have now divided every item on the page into either an LP_DEAD item
1790  * that will need to be vacuumed in indexes later, or a LP_NORMAL tuple
1791  * that remains and needs to be considered for freezing now (LP_UNUSED and
1792  * LP_REDIRECT items also remain, but are of no further interest to us).
1793  */
1794  vacrel->offnum = InvalidOffsetNumber;
1795 
1796  /*
1797  * Freeze the page when heap_prepare_freeze_tuple indicates that at least
1798  * one XID/MXID from before FreezeLimit/MultiXactCutoff is present. Also
1799  * freeze when pruning generated an FPI, if doing so means that we set the
1800  * page all-frozen afterwards (might not happen until final heap pass).
1801  */
1802  if (pagefrz.freeze_required || tuples_frozen == 0 ||
1803  (prunestate->all_visible && prunestate->all_frozen &&
1804  fpi_before != pgWalUsage.wal_fpi))
1805  {
1806  /*
1807  * We're freezing the page. Our final NewRelfrozenXid doesn't need to
1808  * be affected by the XIDs that are just about to be frozen anyway.
1809  */
1810  vacrel->NewRelfrozenXid = pagefrz.FreezePageRelfrozenXid;
1811  vacrel->NewRelminMxid = pagefrz.FreezePageRelminMxid;
1812 
1813  if (tuples_frozen == 0)
1814  {
1815  /*
1816  * We have no freeze plans to execute, so there's no added cost
1817  * from following the freeze path. That's why it was chosen. This
1818  * is important in the case where the page only contains totally
1819  * frozen tuples at this point (perhaps only following pruning).
1820  * Such pages can be marked all-frozen in the VM by our caller,
1821  * even though none of its tuples were newly frozen here (note
1822  * that the "no freeze" path never sets pages all-frozen).
1823  *
1824  * We never increment the frozen_pages instrumentation counter
1825  * here, since it only counts pages with newly frozen tuples
1826  * (don't confuse that with pages newly set all-frozen in VM).
1827  */
1828  }
1829  else
1830  {
1831  TransactionId snapshotConflictHorizon;
1832 
1833  vacrel->frozen_pages++;
1834 
1835  /*
1836  * We can use visibility_cutoff_xid as our cutoff for conflicts
1837  * when the whole page is eligible to become all-frozen in the VM
1838  * once we're done with it. Otherwise we generate a conservative
1839  * cutoff by stepping back from OldestXmin.
1840  */
1841  if (prunestate->all_visible && prunestate->all_frozen)
1842  {
1843  /* Using same cutoff when setting VM is now unnecessary */
1844  snapshotConflictHorizon = prunestate->visibility_cutoff_xid;
1846  }
1847  else
1848  {
1849  /* Avoids false conflicts when hot_standby_feedback in use */
1850  snapshotConflictHorizon = vacrel->cutoffs.OldestXmin;
1851  TransactionIdRetreat(snapshotConflictHorizon);
1852  }
1853 
1854  /* Execute all freeze plans for page as a single atomic action */
1856  snapshotConflictHorizon,
1857  frozen, tuples_frozen);
1858  }
1859  }
1860  else
1861  {
1862  /*
1863  * Page requires "no freeze" processing. It might be set all-visible
1864  * in the visibility map, but it can never be set all-frozen.
1865  */
1866  vacrel->NewRelfrozenXid = pagefrz.NoFreezePageRelfrozenXid;
1867  vacrel->NewRelminMxid = pagefrz.NoFreezePageRelminMxid;
1868  prunestate->all_frozen = false;
1869  tuples_frozen = 0; /* avoid miscounts in instrumentation */
1870  }
1871 
1872  /*
1873  * VACUUM will call heap_page_is_all_visible() during the second pass over
1874  * the heap to determine all_visible and all_frozen for the page -- this
1875  * is a specialized version of the logic from this function. Now that
1876  * we've finished pruning and freezing, make sure that we're in total
1877  * agreement with heap_page_is_all_visible() using an assertion.
1878  */
1879 #ifdef USE_ASSERT_CHECKING
1880  /* Note that all_frozen value does not matter when !all_visible */
1881  if (prunestate->all_visible && lpdead_items == 0)
1882  {
1883  TransactionId cutoff;
1884  bool all_frozen;
1885 
1886  if (!heap_page_is_all_visible(vacrel, buf, &cutoff, &all_frozen))
1887  Assert(false);
1888 
1889  Assert(!TransactionIdIsValid(cutoff) ||
1890  cutoff == prunestate->visibility_cutoff_xid);
1891  }
1892 #endif
1893 
1894  /*
1895  * Now save details of the LP_DEAD items from the page in vacrel
1896  */
1897  if (lpdead_items > 0)
1898  {
1899  VacDeadItems *dead_items = vacrel->dead_items;
1900  ItemPointerData tmp;
1901 
1902  vacrel->lpdead_item_pages++;
1903  prunestate->has_lpdead_items = true;
1904 
1905  ItemPointerSetBlockNumber(&tmp, blkno);
1906 
1907  for (int i = 0; i < lpdead_items; i++)
1908  {
1909  ItemPointerSetOffsetNumber(&tmp, deadoffsets[i]);
1910  dead_items->items[dead_items->num_items++] = tmp;
1911  }
1912 
1913  Assert(dead_items->num_items <= dead_items->max_items);
1915  dead_items->num_items);
1916 
1917  /*
1918  * It was convenient to ignore LP_DEAD items in all_visible earlier on
1919  * to make the choice of whether or not to freeze the page unaffected
1920  * by the short-term presence of LP_DEAD items. These LP_DEAD items
1921  * were effectively assumed to be LP_UNUSED items in the making. It
1922  * doesn't matter which heap pass (initial pass or final pass) ends up
1923  * setting the page all-frozen, as long as the ongoing VACUUM does it.
1924  *
1925  * Now that freezing has been finalized, unset all_visible. It needs
1926  * to reflect the present state of things, as expected by our caller.
1927  */
1928  prunestate->all_visible = false;
1929  }
1930 
1931  /* Finally, add page-local counts to whole-VACUUM counts */
1932  vacrel->tuples_deleted += tuples_deleted;
1933  vacrel->tuples_frozen += tuples_frozen;
1934  vacrel->lpdead_items += lpdead_items;
1935  vacrel->live_tuples += live_tuples;
1936  vacrel->recently_dead_tuples += recently_dead_tuples;
1937 }
1938 
1939 /*
1940  * lazy_scan_noprune() -- lazy_scan_prune() without pruning or freezing
1941  *
1942  * Caller need only hold a pin and share lock on the buffer, unlike
1943  * lazy_scan_prune, which requires a full cleanup lock. While pruning isn't
1944  * performed here, it's quite possible that an earlier opportunistic pruning
1945  * operation left LP_DEAD items behind. We'll at least collect any such items
1946  * in the dead_items array for removal from indexes.
1947  *
1948  * For aggressive VACUUM callers, we may return false to indicate that a full
1949  * cleanup lock is required for processing by lazy_scan_prune. This is only
1950  * necessary when the aggressive VACUUM needs to freeze some tuple XIDs from
1951  * one or more tuples on the page. We always return true for non-aggressive
1952  * callers.
1953  *
1954  * See lazy_scan_prune for an explanation of hastup return flag.
1955  * recordfreespace flag instructs caller on whether or not it should do
1956  * generic FSM processing for page.
1957  */
1958 static bool
1960  Buffer buf,
1961  BlockNumber blkno,
1962  Page page,
1963  bool *hastup,
1964  bool *recordfreespace)
1965 {
1966  OffsetNumber offnum,
1967  maxoff;
1968  int lpdead_items,
1969  live_tuples,
1970  recently_dead_tuples,
1971  missed_dead_tuples;
1972  HeapTupleHeader tupleheader;
1973  TransactionId NoFreezePageRelfrozenXid = vacrel->NewRelfrozenXid;
1974  MultiXactId NoFreezePageRelminMxid = vacrel->NewRelminMxid;
1975  OffsetNumber deadoffsets[MaxHeapTuplesPerPage];
1976 
1977  Assert(BufferGetBlockNumber(buf) == blkno);
1978 
1979  *hastup = false; /* for now */
1980  *recordfreespace = false; /* for now */
1981 
1982  lpdead_items = 0;
1983  live_tuples = 0;
1984  recently_dead_tuples = 0;
1985  missed_dead_tuples = 0;
1986 
1987  maxoff = PageGetMaxOffsetNumber(page);
1988  for (offnum = FirstOffsetNumber;
1989  offnum <= maxoff;
1990  offnum = OffsetNumberNext(offnum))
1991  {
1992  ItemId itemid;
1993  HeapTupleData tuple;
1994 
1995  vacrel->offnum = offnum;
1996  itemid = PageGetItemId(page, offnum);
1997 
1998  if (!ItemIdIsUsed(itemid))
1999  continue;
2000 
2001  if (ItemIdIsRedirected(itemid))
2002  {
2003  *hastup = true;
2004  continue;
2005  }
2006 
2007  if (ItemIdIsDead(itemid))
2008  {
2009  /*
2010  * Deliberately don't set hastup=true here. See same point in
2011  * lazy_scan_prune for an explanation.
2012  */
2013  deadoffsets[lpdead_items++] = offnum;
2014  continue;
2015  }
2016 
2017  *hastup = true; /* page prevents rel truncation */
2018  tupleheader = (HeapTupleHeader) PageGetItem(page, itemid);
2019  if (heap_tuple_should_freeze(tupleheader, &vacrel->cutoffs,
2020  &NoFreezePageRelfrozenXid,
2021  &NoFreezePageRelminMxid))
2022  {
2023  /* Tuple with XID < FreezeLimit (or MXID < MultiXactCutoff) */
2024  if (vacrel->aggressive)
2025  {
2026  /*
2027  * Aggressive VACUUMs must always be able to advance rel's
2028  * relfrozenxid to a value >= FreezeLimit (and be able to
2029  * advance rel's relminmxid to a value >= MultiXactCutoff).
2030  * The ongoing aggressive VACUUM won't be able to do that
2031  * unless it can freeze an XID (or MXID) from this tuple now.
2032  *
2033  * The only safe option is to have caller perform processing
2034  * of this page using lazy_scan_prune. Caller might have to
2035  * wait a while for a cleanup lock, but it can't be helped.
2036  */
2037  vacrel->offnum = InvalidOffsetNumber;
2038  return false;
2039  }
2040 
2041  /*
2042  * Non-aggressive VACUUMs are under no obligation to advance
2043  * relfrozenxid (even by one XID). We can be much laxer here.
2044  *
2045  * Currently we always just accept an older final relfrozenxid
2046  * and/or relminmxid value. We never make caller wait or work a
2047  * little harder, even when it likely makes sense to do so.
2048  */
2049  }
2050 
2051  ItemPointerSet(&(tuple.t_self), blkno, offnum);
2052  tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
2053  tuple.t_len = ItemIdGetLength(itemid);
2054  tuple.t_tableOid = RelationGetRelid(vacrel->rel);
2055 
2056  switch (HeapTupleSatisfiesVacuum(&tuple, vacrel->cutoffs.OldestXmin,
2057  buf))
2058  {
2060  case HEAPTUPLE_LIVE:
2061 
2062  /*
2063  * Count both cases as live, just like lazy_scan_prune
2064  */
2065  live_tuples++;
2066 
2067  break;
2068  case HEAPTUPLE_DEAD:
2069 
2070  /*
2071  * There is some useful work for pruning to do, that won't be
2072  * done due to failure to get a cleanup lock.
2073  */
2074  missed_dead_tuples++;
2075  break;
2077 
2078  /*
2079  * Count in recently_dead_tuples, just like lazy_scan_prune
2080  */
2081  recently_dead_tuples++;
2082  break;
2084 
2085  /*
2086  * Do not count these rows as live, just like lazy_scan_prune
2087  */
2088  break;
2089  default:
2090  elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
2091  break;
2092  }
2093  }
2094 
2095  vacrel->offnum = InvalidOffsetNumber;
2096 
2097  /*
2098  * By here we know for sure that caller can put off freezing and pruning
2099  * this particular page until the next VACUUM. Remember its details now.
2100  * (lazy_scan_prune expects a clean slate, so we have to do this last.)
2101  */
2102  vacrel->NewRelfrozenXid = NoFreezePageRelfrozenXid;
2103  vacrel->NewRelminMxid = NoFreezePageRelminMxid;
2104 
2105  /* Save any LP_DEAD items found on the page in dead_items array */
2106  if (vacrel->nindexes == 0)
2107  {
2108  /* Using one-pass strategy (since table has no indexes) */
2109  if (lpdead_items > 0)
2110  {
2111  /*
2112  * Perfunctory handling for the corner case where a single pass
2113  * strategy VACUUM cannot get a cleanup lock, and it turns out
2114  * that there is one or more LP_DEAD items: just count the LP_DEAD
2115  * items as missed_dead_tuples instead. (This is a bit dishonest,
2116  * but it beats having to maintain specialized heap vacuuming code
2117  * forever, for vanishingly little benefit.)
2118  */
2119  *hastup = true;
2120  missed_dead_tuples += lpdead_items;
2121  }
2122 
2123  *recordfreespace = true;
2124  }
2125  else if (lpdead_items == 0)
2126  {
2127  /*
2128  * Won't be vacuuming this page later, so record page's freespace in
2129  * the FSM now
2130  */
2131  *recordfreespace = true;
2132  }
2133  else
2134  {
2135  VacDeadItems *dead_items = vacrel->dead_items;
2136  ItemPointerData tmp;
2137 
2138  /*
2139  * Page has LP_DEAD items, and so any references/TIDs that remain in
2140  * indexes will be deleted during index vacuuming (and then marked
2141  * LP_UNUSED in the heap)
2142  */
2143  vacrel->lpdead_item_pages++;
2144 
2145  ItemPointerSetBlockNumber(&tmp, blkno);
2146 
2147  for (int i = 0; i < lpdead_items; i++)
2148  {
2149  ItemPointerSetOffsetNumber(&tmp, deadoffsets[i]);
2150  dead_items->items[dead_items->num_items++] = tmp;
2151  }
2152 
2153  Assert(dead_items->num_items <= dead_items->max_items);
2155  dead_items->num_items);
2156 
2157  vacrel->lpdead_items += lpdead_items;
2158 
2159  /*
2160  * Assume that we'll go on to vacuum this heap page during final pass
2161  * over the heap. Don't record free space until then.
2162  */
2163  *recordfreespace = false;
2164  }
2165 
2166  /*
2167  * Finally, add relevant page-local counts to whole-VACUUM counts
2168  */
2169  vacrel->live_tuples += live_tuples;
2170  vacrel->recently_dead_tuples += recently_dead_tuples;
2171  vacrel->missed_dead_tuples += missed_dead_tuples;
2172  if (missed_dead_tuples > 0)
2173  vacrel->missed_dead_pages++;
2174 
2175  /* Caller won't need to call lazy_scan_prune with same page */
2176  return true;
2177 }
2178 
2179 /*
2180  * Main entry point for index vacuuming and heap vacuuming.
2181  *
2182  * Removes items collected in dead_items from table's indexes, then marks the
2183  * same items LP_UNUSED in the heap. See the comments above lazy_scan_heap
2184  * for full details.
2185  *
2186  * Also empties dead_items, freeing up space for later TIDs.
2187  *
2188  * We may choose to bypass index vacuuming at this point, though only when the
2189  * ongoing VACUUM operation will definitely only have one index scan/round of
2190  * index vacuuming.
2191  */
2192 static void
2194 {
2195  bool bypass;
2196 
2197  /* Should not end up here with no indexes */
2198  Assert(vacrel->nindexes > 0);
2199  Assert(vacrel->lpdead_item_pages > 0);
2200 
2201  if (!vacrel->do_index_vacuuming)
2202  {
2203  Assert(!vacrel->do_index_cleanup);
2204  vacrel->dead_items->num_items = 0;
2205  return;
2206  }
2207 
2208  /*
2209  * Consider bypassing index vacuuming (and heap vacuuming) entirely.
2210  *
2211  * We currently only do this in cases where the number of LP_DEAD items
2212  * for the entire VACUUM operation is close to zero. This avoids sharp
2213  * discontinuities in the duration and overhead of successive VACUUM
2214  * operations that run against the same table with a fixed workload.
2215  * Ideally, successive VACUUM operations will behave as if there are
2216  * exactly zero LP_DEAD items in cases where there are close to zero.
2217  *
2218  * This is likely to be helpful with a table that is continually affected
2219  * by UPDATEs that can mostly apply the HOT optimization, but occasionally
2220  * have small aberrations that lead to just a few heap pages retaining
2221  * only one or two LP_DEAD items. This is pretty common; even when the
2222  * DBA goes out of their way to make UPDATEs use HOT, it is practically
2223  * impossible to predict whether HOT will be applied in 100% of cases.
2224  * It's far easier to ensure that 99%+ of all UPDATEs against a table use
2225  * HOT through careful tuning.
2226  */
2227  bypass = false;
2228  if (vacrel->consider_bypass_optimization && vacrel->rel_pages > 0)
2229  {
2230  BlockNumber threshold;
2231 
2232  Assert(vacrel->num_index_scans == 0);
2233  Assert(vacrel->lpdead_items == vacrel->dead_items->num_items);
2234  Assert(vacrel->do_index_vacuuming);
2235  Assert(vacrel->do_index_cleanup);
2236 
2237  /*
2238  * This crossover point at which we'll start to do index vacuuming is
2239  * expressed as a percentage of the total number of heap pages in the
2240  * table that are known to have at least one LP_DEAD item. This is
2241  * much more important than the total number of LP_DEAD items, since
2242  * it's a proxy for the number of heap pages whose visibility map bits
2243  * cannot be set on account of bypassing index and heap vacuuming.
2244  *
2245  * We apply one further precautionary test: the space currently used
2246  * to store the TIDs (TIDs that now all point to LP_DEAD items) must
2247  * not exceed 32MB. This limits the risk that we will bypass index
2248  * vacuuming again and again until eventually there is a VACUUM whose
2249  * dead_items space is not CPU cache resident.
2250  *
2251  * We don't take any special steps to remember the LP_DEAD items (such
2252  * as counting them in our final update to the stats system) when the
2253  * optimization is applied. Though the accounting used in analyze.c's
2254  * acquire_sample_rows() will recognize the same LP_DEAD items as dead
2255  * rows in its own stats report, that's okay. The discrepancy should
2256  * be negligible. If this optimization is ever expanded to cover more
2257  * cases then this may need to be reconsidered.
2258  */
2259  threshold = (double) vacrel->rel_pages * BYPASS_THRESHOLD_PAGES;
2260  bypass = (vacrel->lpdead_item_pages < threshold &&
2261  vacrel->lpdead_items < MAXDEADITEMS(32L * 1024L * 1024L));
2262  }
2263 
2264  if (bypass)
2265  {
2266  /*
2267  * There are almost zero TIDs. Behave as if there were precisely
2268  * zero: bypass index vacuuming, but do index cleanup.
2269  *
2270  * We expect that the ongoing VACUUM operation will finish very
2271  * quickly, so there is no point in considering speeding up as a
2272  * failsafe against wraparound failure. (Index cleanup is expected to
2273  * finish very quickly in cases where there were no ambulkdelete()
2274  * calls.)
2275  */
2276  vacrel->do_index_vacuuming = false;
2277  }
2278  else if (lazy_vacuum_all_indexes(vacrel))
2279  {
2280  /*
2281  * We successfully completed a round of index vacuuming. Do related
2282  * heap vacuuming now.
2283  */
2284  lazy_vacuum_heap_rel(vacrel);
2285  }
2286  else
2287  {
2288  /*
2289  * Failsafe case.
2290  *
2291  * We attempted index vacuuming, but didn't finish a full round/full
2292  * index scan. This happens when relfrozenxid or relminmxid is too
2293  * far in the past.
2294  *
2295  * From this point on the VACUUM operation will do no further index
2296  * vacuuming or heap vacuuming. This VACUUM operation won't end up
2297  * back here again.
2298  */
2300  }
2301 
2302  /*
2303  * Forget the LP_DEAD items that we just vacuumed (or just decided to not
2304  * vacuum)
2305  */
2306  vacrel->dead_items->num_items = 0;
2307 }
2308 
2309 /*
2310  * lazy_vacuum_all_indexes() -- Main entry for index vacuuming
2311  *
2312  * Returns true in the common case when all indexes were successfully
2313  * vacuumed. Returns false in rare cases where we determined that the ongoing
2314  * VACUUM operation is at risk of taking too long to finish, leading to
2315  * wraparound failure.
2316  */
2317 static bool
2319 {
2320  bool allindexes = true;
2321  double old_live_tuples = vacrel->rel->rd_rel->reltuples;
2322 
2323  Assert(vacrel->nindexes > 0);
2324  Assert(vacrel->do_index_vacuuming);
2325  Assert(vacrel->do_index_cleanup);
2326 
2327  /* Precheck for XID wraparound emergencies */
2328  if (lazy_check_wraparound_failsafe(vacrel))
2329  {
2330  /* Wraparound emergency -- don't even start an index scan */
2331  return false;
2332  }
2333 
2334  /* Report that we are now vacuuming indexes */
2337 
2338  if (!ParallelVacuumIsActive(vacrel))
2339  {
2340  for (int idx = 0; idx < vacrel->nindexes; idx++)
2341  {
2342  Relation indrel = vacrel->indrels[idx];
2343  IndexBulkDeleteResult *istat = vacrel->indstats[idx];
2344 
2345  vacrel->indstats[idx] = lazy_vacuum_one_index(indrel, istat,
2346  old_live_tuples,
2347  vacrel);
2348 
2349  if (lazy_check_wraparound_failsafe(vacrel))
2350  {
2351  /* Wraparound emergency -- end current index scan */
2352  allindexes = false;
2353  break;
2354  }
2355  }
2356  }
2357  else
2358  {
2359  /* Outsource everything to parallel variant */
2360  parallel_vacuum_bulkdel_all_indexes(vacrel->pvs, old_live_tuples,
2361  vacrel->num_index_scans);
2362 
2363  /*
2364  * Do a postcheck to consider applying wraparound failsafe now. Note
2365  * that parallel VACUUM only gets the precheck and this postcheck.
2366  */
2367  if (lazy_check_wraparound_failsafe(vacrel))
2368  allindexes = false;
2369  }
2370 
2371  /*
2372  * We delete all LP_DEAD items from the first heap pass in all indexes on
2373  * each call here (except calls where we choose to do the failsafe). This
2374  * makes the next call to lazy_vacuum_heap_rel() safe (except in the event
2375  * of the failsafe triggering, which prevents the next call from taking
2376  * place).
2377  */
2378  Assert(vacrel->num_index_scans > 0 ||
2379  vacrel->dead_items->num_items == vacrel->lpdead_items);
2380  Assert(allindexes || VacuumFailsafeActive);
2381 
2382  /*
2383  * Increase and report the number of index scans.
2384  *
2385  * We deliberately include the case where we started a round of bulk
2386  * deletes that we weren't able to finish due to the failsafe triggering.
2387  */
2388  vacrel->num_index_scans++;
2390  vacrel->num_index_scans);
2391 
2392  return allindexes;
2393 }
2394 
2395 /*
2396  * lazy_vacuum_heap_rel() -- second pass over the heap for two pass strategy
2397  *
2398  * This routine marks LP_DEAD items in vacrel->dead_items array as LP_UNUSED.
2399  * Pages that never had lazy_scan_prune record LP_DEAD items are not visited
2400  * at all.
2401  *
2402  * We may also be able to truncate the line pointer array of the heap pages we
2403  * visit. If there is a contiguous group of LP_UNUSED items at the end of the
2404  * array, it can be reclaimed as free space. These LP_UNUSED items usually
2405  * start out as LP_DEAD items recorded by lazy_scan_prune (we set items from
2406  * each page to LP_UNUSED, and then consider if it's possible to truncate the
2407  * page's line pointer array).
2408  *
2409  * Note: the reason for doing this as a second pass is we cannot remove the
2410  * tuples until we've removed their index entries, and we want to process
2411  * index entry removal in batches as large as possible.
2412  */
2413 static void
2415 {
2416  int index = 0;
2417  BlockNumber vacuumed_pages = 0;
2418  Buffer vmbuffer = InvalidBuffer;
2419  LVSavedErrInfo saved_err_info;
2420 
2421  Assert(vacrel->do_index_vacuuming);
2422  Assert(vacrel->do_index_cleanup);
2423  Assert(vacrel->num_index_scans > 0);
2424 
2425  /* Report that we are now vacuuming the heap */
2428 
2429  /* Update error traceback information */
2430  update_vacuum_error_info(vacrel, &saved_err_info,
2433 
2434  while (index < vacrel->dead_items->num_items)
2435  {
2436  BlockNumber blkno;
2437  Buffer buf;
2438  Page page;
2439  Size freespace;
2440 
2442 
2443  blkno = ItemPointerGetBlockNumber(&vacrel->dead_items->items[index]);
2444  vacrel->blkno = blkno;
2445 
2446  /*
2447  * Pin the visibility map page in case we need to mark the page
2448  * all-visible. In most cases this will be very cheap, because we'll
2449  * already have the correct page pinned anyway.
2450  */
2451  visibilitymap_pin(vacrel->rel, blkno, &vmbuffer);
2452 
2453  /* We need a non-cleanup exclusive lock to mark dead_items unused */
2454  buf = ReadBufferExtended(vacrel->rel, MAIN_FORKNUM, blkno, RBM_NORMAL,
2455  vacrel->bstrategy);
2457  index = lazy_vacuum_heap_page(vacrel, blkno, buf, index, vmbuffer);
2458 
2459  /* Now that we've vacuumed the page, record its available space */
2460  page = BufferGetPage(buf);
2461  freespace = PageGetHeapFreeSpace(page);
2462 
2464  RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
2465  vacuumed_pages++;
2466  }
2467 
2468  vacrel->blkno = InvalidBlockNumber;
2469  if (BufferIsValid(vmbuffer))
2470  ReleaseBuffer(vmbuffer);
2471 
2472  /*
2473  * We set all LP_DEAD items from the first heap pass to LP_UNUSED during
2474  * the second heap pass. No more, no less.
2475  */
2476  Assert(index > 0);
2477  Assert(vacrel->num_index_scans > 1 ||
2478  (index == vacrel->lpdead_items &&
2479  vacuumed_pages == vacrel->lpdead_item_pages));
2480 
2481  ereport(DEBUG2,
2482  (errmsg("table \"%s\": removed %lld dead item identifiers in %u pages",
2483  vacrel->relname, (long long) index, vacuumed_pages)));
2484 
2485  /* Revert to the previous phase information for error traceback */
2486  restore_vacuum_error_info(vacrel, &saved_err_info);
2487 }
2488 
2489 /*
2490  * lazy_vacuum_heap_page() -- free page's LP_DEAD items listed in the
2491  * vacrel->dead_items array.
2492  *
2493  * Caller must have an exclusive buffer lock on the buffer (though a full
2494  * cleanup lock is also acceptable). vmbuffer must be valid and already have
2495  * a pin on blkno's visibility map page.
2496  *
2497  * index is an offset into the vacrel->dead_items array for the first listed
2498  * LP_DEAD item on the page. The return value is the first index immediately
2499  * after all LP_DEAD items for the same page in the array.
2500  */
2501 static int
2503  int index, Buffer vmbuffer)
2504 {
2505  VacDeadItems *dead_items = vacrel->dead_items;
2506  Page page = BufferGetPage(buffer);
2508  int nunused = 0;
2509  TransactionId visibility_cutoff_xid;
2510  bool all_frozen;
2511  LVSavedErrInfo saved_err_info;
2512 
2513  Assert(vacrel->nindexes == 0 || vacrel->do_index_vacuuming);
2514 
2516 
2517  /* Update error traceback information */
2518  update_vacuum_error_info(vacrel, &saved_err_info,
2521 
2523 
2524  for (; index < dead_items->num_items; index++)
2525  {
2526  BlockNumber tblk;
2527  OffsetNumber toff;
2528  ItemId itemid;
2529 
2530  tblk = ItemPointerGetBlockNumber(&dead_items->items[index]);
2531  if (tblk != blkno)
2532  break; /* past end of tuples for this block */
2533  toff = ItemPointerGetOffsetNumber(&dead_items->items[index]);
2534  itemid = PageGetItemId(page, toff);
2535 
2536  Assert(ItemIdIsDead(itemid) && !ItemIdHasStorage(itemid));
2537  ItemIdSetUnused(itemid);
2538  unused[nunused++] = toff;
2539  }
2540 
2541  Assert(nunused > 0);
2542 
2543  /* Attempt to truncate line pointer array now */
2545 
2546  /*
2547  * Mark buffer dirty before we write WAL.
2548  */
2549  MarkBufferDirty(buffer);
2550 
2551  /* XLOG stuff */
2552  if (RelationNeedsWAL(vacrel->rel))
2553  {
2554  xl_heap_vacuum xlrec;
2555  XLogRecPtr recptr;
2556 
2557  xlrec.nunused = nunused;
2558 
2559  XLogBeginInsert();
2560  XLogRegisterData((char *) &xlrec, SizeOfHeapVacuum);
2561 
2562  XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
2563  XLogRegisterBufData(0, (char *) unused, nunused * sizeof(OffsetNumber));
2564 
2565  recptr = XLogInsert(RM_HEAP2_ID, XLOG_HEAP2_VACUUM);
2566 
2567  PageSetLSN(page, recptr);
2568  }
2569 
2570  /*
2571  * End critical section, so we safely can do visibility tests (which
2572  * possibly need to perform IO and allocate memory!). If we crash now the
2573  * page (including the corresponding vm bit) might not be marked all
2574  * visible, but that's fine. A later vacuum will fix that.
2575  */
2576  END_CRIT_SECTION();
2577 
2578  /*
2579  * Now that we have removed the LP_DEAD items from the page, once again
2580  * check if the page has become all-visible. The page is already marked
2581  * dirty, exclusively locked, and, if needed, a full page image has been
2582  * emitted.
2583  */
2584  Assert(!PageIsAllVisible(page));
2585  if (heap_page_is_all_visible(vacrel, buffer, &visibility_cutoff_xid,
2586  &all_frozen))
2587  {
2589 
2590  if (all_frozen)
2591  {
2592  Assert(!TransactionIdIsValid(visibility_cutoff_xid));
2593  flags |= VISIBILITYMAP_ALL_FROZEN;
2594  }
2595 
2596  PageSetAllVisible(page);
2597  visibilitymap_set(vacrel->rel, blkno, buffer, InvalidXLogRecPtr,
2598  vmbuffer, visibility_cutoff_xid, flags);
2599  }
2600 
2601  /* Revert to the previous phase information for error traceback */
2602  restore_vacuum_error_info(vacrel, &saved_err_info);
2603  return index;
2604 }
2605 
2606 /*
2607  * Trigger the failsafe to avoid wraparound failure when vacrel table has a
2608  * relfrozenxid and/or relminmxid that is dangerously far in the past.
2609  * Triggering the failsafe makes the ongoing VACUUM bypass any further index
2610  * vacuuming and heap vacuuming. Truncating the heap is also bypassed.
2611  *
2612  * Any remaining work (work that VACUUM cannot just bypass) is typically sped
2613  * up when the failsafe triggers. VACUUM stops applying any cost-based delay
2614  * that it started out with.
2615  *
2616  * Returns true when failsafe has been triggered.
2617  */
2618 static bool
2620 {
2621  /* Don't warn more than once per VACUUM */
2623  return true;
2624 
2626  {
2627  VacuumFailsafeActive = true;
2628 
2629  /*
2630  * Abandon use of a buffer access strategy to allow use of all of
2631  * shared buffers. We assume the caller who allocated the memory for
2632  * the BufferAccessStrategy will free it.
2633  */
2634  vacrel->bstrategy = NULL;
2635 
2636  /* Disable index vacuuming, index cleanup, and heap rel truncation */
2637  vacrel->do_index_vacuuming = false;
2638  vacrel->do_index_cleanup = false;
2639  vacrel->do_rel_truncate = false;
2640 
2641  ereport(WARNING,
2642  (errmsg("bypassing nonessential maintenance of table \"%s.%s.%s\" as a failsafe after %d index scans",
2643  vacrel->dbname, vacrel->relnamespace, vacrel->relname,
2644  vacrel->num_index_scans),
2645  errdetail("The table's relfrozenxid or relminmxid is too far in the past."),
2646  errhint("Consider increasing configuration parameter \"maintenance_work_mem\" or \"autovacuum_work_mem\".\n"
2647  "You might also need to consider other ways for VACUUM to keep up with the allocation of transaction IDs.")));
2648 
2649  /* Stop applying cost limits from this point on */
2650  VacuumCostActive = false;
2651  VacuumCostBalance = 0;
2652 
2653  return true;
2654  }
2655 
2656  return false;
2657 }
2658 
2659 /*
2660  * lazy_cleanup_all_indexes() -- cleanup all indexes of relation.
2661  */
2662 static void
2664 {
2665  double reltuples = vacrel->new_rel_tuples;
2666  bool estimated_count = vacrel->scanned_pages < vacrel->rel_pages;
2667 
2668  Assert(vacrel->do_index_cleanup);
2669  Assert(vacrel->nindexes > 0);
2670 
2671  /* Report that we are now cleaning up indexes */
2674 
2675  if (!ParallelVacuumIsActive(vacrel))
2676  {
2677  for (int idx = 0; idx < vacrel->nindexes; idx++)
2678  {
2679  Relation indrel = vacrel->indrels[idx];
2680  IndexBulkDeleteResult *istat = vacrel->indstats[idx];
2681 
2682  vacrel->indstats[idx] =
2683  lazy_cleanup_one_index(indrel, istat, reltuples,
2684  estimated_count, vacrel);
2685  }
2686  }
2687  else
2688  {
2689  /* Outsource everything to parallel variant */
2690  parallel_vacuum_cleanup_all_indexes(vacrel->pvs, reltuples,
2691  vacrel->num_index_scans,
2692  estimated_count);
2693  }
2694 }
2695 
2696 /*
2697  * lazy_vacuum_one_index() -- vacuum index relation.
2698  *
2699  * Delete all the index tuples containing a TID collected in
2700  * vacrel->dead_items array. Also update running statistics.
2701  * Exact details depend on index AM's ambulkdelete routine.
2702  *
2703  * reltuples is the number of heap tuples to be passed to the
2704  * bulkdelete callback. It's always assumed to be estimated.
2705  * See indexam.sgml for more info.
2706  *
2707  * Returns bulk delete stats derived from input stats
2708  */
2709 static IndexBulkDeleteResult *
2711  double reltuples, LVRelState *vacrel)
2712 {
2713  IndexVacuumInfo ivinfo;
2714  LVSavedErrInfo saved_err_info;
2715 
2716  ivinfo.index = indrel;
2717  ivinfo.heaprel = vacrel->rel;
2718  ivinfo.analyze_only = false;
2719  ivinfo.report_progress = false;
2720  ivinfo.estimated_count = true;
2721  ivinfo.message_level = DEBUG2;
2722  ivinfo.num_heap_tuples = reltuples;
2723  ivinfo.strategy = vacrel->bstrategy;
2724 
2725  /*
2726  * Update error traceback information.
2727  *
2728  * The index name is saved during this phase and restored immediately
2729  * after this phase. See vacuum_error_callback.
2730  */
2731  Assert(vacrel->indname == NULL);
2732  vacrel->indname = pstrdup(RelationGetRelationName(indrel));
2733  update_vacuum_error_info(vacrel, &saved_err_info,
2736 
2737  /* Do bulk deletion */
2738  istat = vac_bulkdel_one_index(&ivinfo, istat, (void *) vacrel->dead_items);
2739 
2740  /* Revert to the previous phase information for error traceback */
2741  restore_vacuum_error_info(vacrel, &saved_err_info);
2742  pfree(vacrel->indname);
2743  vacrel->indname = NULL;
2744 
2745  return istat;
2746 }
2747 
2748 /*
2749  * lazy_cleanup_one_index() -- do post-vacuum cleanup for index relation.
2750  *
2751  * Calls index AM's amvacuumcleanup routine. reltuples is the number
2752  * of heap tuples and estimated_count is true if reltuples is an
2753  * estimated value. See indexam.sgml for more info.
2754  *
2755  * Returns bulk delete stats derived from input stats
2756  */
2757 static IndexBulkDeleteResult *
2759  double reltuples, bool estimated_count,
2760  LVRelState *vacrel)
2761 {
2762  IndexVacuumInfo ivinfo;
2763  LVSavedErrInfo saved_err_info;
2764 
2765  ivinfo.index = indrel;
2766  ivinfo.heaprel = vacrel->rel;
2767  ivinfo.analyze_only = false;
2768  ivinfo.report_progress = false;
2769  ivinfo.estimated_count = estimated_count;
2770  ivinfo.message_level = DEBUG2;
2771 
2772  ivinfo.num_heap_tuples = reltuples;
2773  ivinfo.strategy = vacrel->bstrategy;
2774 
2775  /*
2776  * Update error traceback information.
2777  *
2778  * The index name is saved during this phase and restored immediately
2779  * after this phase. See vacuum_error_callback.
2780  */
2781  Assert(vacrel->indname == NULL);
2782  vacrel->indname = pstrdup(RelationGetRelationName(indrel));
2783  update_vacuum_error_info(vacrel, &saved_err_info,
2786 
2787  istat = vac_cleanup_one_index(&ivinfo, istat);
2788 
2789  /* Revert to the previous phase information for error traceback */
2790  restore_vacuum_error_info(vacrel, &saved_err_info);
2791  pfree(vacrel->indname);
2792  vacrel->indname = NULL;
2793 
2794  return istat;
2795 }
2796 
2797 /*
2798  * should_attempt_truncation - should we attempt to truncate the heap?
2799  *
2800  * Don't even think about it unless we have a shot at releasing a goodly
2801  * number of pages. Otherwise, the time taken isn't worth it, mainly because
2802  * an AccessExclusive lock must be replayed on any hot standby, where it can
2803  * be particularly disruptive.
2804  *
2805  * Also don't attempt it if wraparound failsafe is in effect. The entire
2806  * system might be refusing to allocate new XIDs at this point. The system
2807  * definitely won't return to normal unless and until VACUUM actually advances
2808  * the oldest relfrozenxid -- which hasn't happened for target rel just yet.
2809  * If lazy_truncate_heap attempted to acquire an AccessExclusiveLock to
2810  * truncate the table under these circumstances, an XID exhaustion error might
2811  * make it impossible for VACUUM to fix the underlying XID exhaustion problem.
2812  * There is very little chance of truncation working out when the failsafe is
2813  * in effect in any case. lazy_scan_prune makes the optimistic assumption
2814  * that any LP_DEAD items it encounters will always be LP_UNUSED by the time
2815  * we're called.
2816  *
2817  * Also don't attempt it if we are doing early pruning/vacuuming, because a
2818  * scan which cannot find a truncated heap page cannot determine that the
2819  * snapshot is too old to read that page.
2820  */
2821 static bool
2823 {
2824  BlockNumber possibly_freeable;
2825 
2826  if (!vacrel->do_rel_truncate || VacuumFailsafeActive ||
2828  return false;
2829 
2830  possibly_freeable = vacrel->rel_pages - vacrel->nonempty_pages;
2831  if (possibly_freeable > 0 &&
2832  (possibly_freeable >= REL_TRUNCATE_MINIMUM ||
2833  possibly_freeable >= vacrel->rel_pages / REL_TRUNCATE_FRACTION))
2834  return true;
2835 
2836  return false;
2837 }
2838 
2839 /*
2840  * lazy_truncate_heap - try to truncate off any empty pages at the end
2841  */
2842 static void
2844 {
2845  BlockNumber orig_rel_pages = vacrel->rel_pages;
2846  BlockNumber new_rel_pages;
2847  bool lock_waiter_detected;
2848  int lock_retry;
2849 
2850  /* Report that we are now truncating */
2853 
2854  /* Update error traceback information one last time */
2857 
2858  /*
2859  * Loop until no more truncating can be done.
2860  */
2861  do
2862  {
2863  /*
2864  * We need full exclusive lock on the relation in order to do
2865  * truncation. If we can't get it, give up rather than waiting --- we
2866  * don't want to block other backends, and we don't want to deadlock
2867  * (which is quite possible considering we already hold a lower-grade
2868  * lock).
2869  */
2870  lock_waiter_detected = false;
2871  lock_retry = 0;
2872  while (true)
2873  {
2875  break;
2876 
2877  /*
2878  * Check for interrupts while trying to (re-)acquire the exclusive
2879  * lock.
2880  */
2882 
2883  if (++lock_retry > (VACUUM_TRUNCATE_LOCK_TIMEOUT /
2885  {
2886  /*
2887  * We failed to establish the lock in the specified number of
2888  * retries. This means we give up truncating.
2889  */
2890  ereport(vacrel->verbose ? INFO : DEBUG2,
2891  (errmsg("\"%s\": stopping truncate due to conflicting lock request",
2892  vacrel->relname)));
2893  return;
2894  }
2895 
2896  (void) WaitLatch(MyLatch,
2901  }
2902 
2903  /*
2904  * Now that we have exclusive lock, look to see if the rel has grown
2905  * whilst we were vacuuming with non-exclusive lock. If so, give up;
2906  * the newly added pages presumably contain non-deletable tuples.
2907  */
2908  new_rel_pages = RelationGetNumberOfBlocks(vacrel->rel);
2909  if (new_rel_pages != orig_rel_pages)
2910  {
2911  /*
2912  * Note: we intentionally don't update vacrel->rel_pages with the
2913  * new rel size here. If we did, it would amount to assuming that
2914  * the new pages are empty, which is unlikely. Leaving the numbers
2915  * alone amounts to assuming that the new pages have the same
2916  * tuple density as existing ones, which is less unlikely.
2917  */
2919  return;
2920  }
2921 
2922  /*
2923  * Scan backwards from the end to verify that the end pages actually
2924  * contain no tuples. This is *necessary*, not optional, because
2925  * other backends could have added tuples to these pages whilst we
2926  * were vacuuming.
2927  */
2928  new_rel_pages = count_nondeletable_pages(vacrel, &lock_waiter_detected);
2929  vacrel->blkno = new_rel_pages;
2930 
2931  if (new_rel_pages >= orig_rel_pages)
2932  {
2933  /* can't do anything after all */
2935  return;
2936  }
2937 
2938  /*
2939  * Okay to truncate.
2940  */
2941  RelationTruncate(vacrel->rel, new_rel_pages);
2942 
2943  /*
2944  * We can release the exclusive lock as soon as we have truncated.
2945  * Other backends can't safely access the relation until they have
2946  * processed the smgr invalidation that smgrtruncate sent out ... but
2947  * that should happen as part of standard invalidation processing once
2948  * they acquire lock on the relation.
2949  */
2951 
2952  /*
2953  * Update statistics. Here, it *is* correct to adjust rel_pages
2954  * without also touching reltuples, since the tuple count wasn't
2955  * changed by the truncation.
2956  */
2957  vacrel->removed_pages += orig_rel_pages - new_rel_pages;
2958  vacrel->rel_pages = new_rel_pages;
2959 
2960  ereport(vacrel->verbose ? INFO : DEBUG2,
2961  (errmsg("table \"%s\": truncated %u to %u pages",
2962  vacrel->relname,
2963  orig_rel_pages, new_rel_pages)));
2964  orig_rel_pages = new_rel_pages;
2965  } while (new_rel_pages > vacrel->nonempty_pages && lock_waiter_detected);
2966 }
2967 
2968 /*
2969  * Rescan end pages to verify that they are (still) empty of tuples.
2970  *
2971  * Returns number of nondeletable pages (last nonempty page + 1).
2972  */
2973 static BlockNumber
2974 count_nondeletable_pages(LVRelState *vacrel, bool *lock_waiter_detected)
2975 {
2976  BlockNumber blkno;
2977  BlockNumber prefetchedUntil;
2978  instr_time starttime;
2979 
2980  /* Initialize the starttime if we check for conflicting lock requests */
2981  INSTR_TIME_SET_CURRENT(starttime);
2982 
2983  /*
2984  * Start checking blocks at what we believe relation end to be and move
2985  * backwards. (Strange coding of loop control is needed because blkno is
2986  * unsigned.) To make the scan faster, we prefetch a few blocks at a time
2987  * in forward direction, so that OS-level readahead can kick in.
2988  */
2989  blkno = vacrel->rel_pages;
2991  "prefetch size must be power of 2");
2992  prefetchedUntil = InvalidBlockNumber;
2993  while (blkno > vacrel->nonempty_pages)
2994  {
2995  Buffer buf;
2996  Page page;
2997  OffsetNumber offnum,
2998  maxoff;
2999  bool hastup;
3000 
3001  /*
3002  * Check if another process requests a lock on our relation. We are
3003  * holding an AccessExclusiveLock here, so they will be waiting. We
3004  * only do this once per VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL, and we
3005  * only check if that interval has elapsed once every 32 blocks to
3006  * keep the number of system calls and actual shared lock table
3007  * lookups to a minimum.
3008  */
3009  if ((blkno % 32) == 0)
3010  {
3011  instr_time currenttime;
3012  instr_time elapsed;
3013 
3014  INSTR_TIME_SET_CURRENT(currenttime);
3015  elapsed = currenttime;
3016  INSTR_TIME_SUBTRACT(elapsed, starttime);
3017  if ((INSTR_TIME_GET_MICROSEC(elapsed) / 1000)
3019  {
3021  {
3022  ereport(vacrel->verbose ? INFO : DEBUG2,
3023  (errmsg("table \"%s\": suspending truncate due to conflicting lock request",
3024  vacrel->relname)));
3025 
3026  *lock_waiter_detected = true;
3027  return blkno;
3028  }
3029  starttime = currenttime;
3030  }
3031  }
3032 
3033  /*
3034  * We don't insert a vacuum delay point here, because we have an
3035  * exclusive lock on the table which we want to hold for as short a
3036  * time as possible. We still need to check for interrupts however.
3037  */
3039 
3040  blkno--;
3041 
3042  /* If we haven't prefetched this lot yet, do so now. */
3043  if (prefetchedUntil > blkno)
3044  {
3045  BlockNumber prefetchStart;
3046  BlockNumber pblkno;
3047 
3048  prefetchStart = blkno & ~(PREFETCH_SIZE - 1);
3049  for (pblkno = prefetchStart; pblkno <= blkno; pblkno++)
3050  {
3051  PrefetchBuffer(vacrel->rel, MAIN_FORKNUM, pblkno);
3053  }
3054  prefetchedUntil = prefetchStart;
3055  }
3056 
3057  buf = ReadBufferExtended(vacrel->rel, MAIN_FORKNUM, blkno, RBM_NORMAL,
3058  vacrel->bstrategy);
3059 
3060  /* In this phase we only need shared access to the buffer */
3062 
3063  page = BufferGetPage(buf);
3064 
3065  if (PageIsNew(page) || PageIsEmpty(page))
3066  {
3068  continue;
3069  }
3070 
3071  hastup = false;
3072  maxoff = PageGetMaxOffsetNumber(page);
3073  for (offnum = FirstOffsetNumber;
3074  offnum <= maxoff;
3075  offnum = OffsetNumberNext(offnum))
3076  {
3077  ItemId itemid;
3078 
3079  itemid = PageGetItemId(page, offnum);
3080 
3081  /*
3082  * Note: any non-unused item should be taken as a reason to keep
3083  * this page. Even an LP_DEAD item makes truncation unsafe, since
3084  * we must not have cleaned out its index entries.
3085  */
3086  if (ItemIdIsUsed(itemid))
3087  {
3088  hastup = true;
3089  break; /* can stop scanning */
3090  }
3091  } /* scan along page */
3092 
3094 
3095  /* Done scanning if we found a tuple here */
3096  if (hastup)
3097  return blkno + 1;
3098  }
3099 
3100  /*
3101  * If we fall out of the loop, all the previously-thought-to-be-empty
3102  * pages still are; we need not bother to look at the last known-nonempty
3103  * page.
3104  */
3105  return vacrel->nonempty_pages;
3106 }
3107 
3108 /*
3109  * Returns the number of dead TIDs that VACUUM should allocate space to
3110  * store, given a heap rel of size vacrel->rel_pages, and given current
3111  * maintenance_work_mem setting (or current autovacuum_work_mem setting,
3112  * when applicable).
3113  *
3114  * See the comments at the head of this file for rationale.
3115  */
3116 static int
3118 {
3119  int64 max_items;
3120  int vac_work_mem = IsAutoVacuumWorkerProcess() &&
3121  autovacuum_work_mem != -1 ?
3123 
3124  if (vacrel->nindexes > 0)
3125  {
3126  BlockNumber rel_pages = vacrel->rel_pages;
3127 
3128  max_items = MAXDEADITEMS(vac_work_mem * 1024L);
3129  max_items = Min(max_items, INT_MAX);
3130  max_items = Min(max_items, MAXDEADITEMS(MaxAllocSize));
3131 
3132  /* curious coding here to ensure the multiplication can't overflow */
3133  if ((BlockNumber) (max_items / MaxHeapTuplesPerPage) > rel_pages)
3134  max_items = rel_pages * MaxHeapTuplesPerPage;
3135 
3136  /* stay sane if small maintenance_work_mem */
3137  max_items = Max(max_items, MaxHeapTuplesPerPage);
3138  }
3139  else
3140  {
3141  /* One-pass case only stores a single heap page's TIDs at a time */
3142  max_items = MaxHeapTuplesPerPage;
3143  }
3144 
3145  return (int) max_items;
3146 }
3147 
3148 /*
3149  * Allocate dead_items (either using palloc, or in dynamic shared memory).
3150  * Sets dead_items in vacrel for caller.
3151  *
3152  * Also handles parallel initialization as part of allocating dead_items in
3153  * DSM when required.
3154  */
3155 static void
3156 dead_items_alloc(LVRelState *vacrel, int nworkers)
3157 {
3158  VacDeadItems *dead_items;
3159  int max_items;
3160 
3161  max_items = dead_items_max_items(vacrel);
3162  Assert(max_items >= MaxHeapTuplesPerPage);
3163 
3164  /*
3165  * Initialize state for a parallel vacuum. As of now, only one worker can
3166  * be used for an index, so we invoke parallelism only if there are at
3167  * least two indexes on a table.
3168  */
3169  if (nworkers >= 0 && vacrel->nindexes > 1 && vacrel->do_index_vacuuming)
3170  {
3171  /*
3172  * Since parallel workers cannot access data in temporary tables, we
3173  * can't perform parallel vacuum on them.
3174  */
3175  if (RelationUsesLocalBuffers(vacrel->rel))
3176  {
3177  /*
3178  * Give warning only if the user explicitly tries to perform a
3179  * parallel vacuum on the temporary table.
3180  */
3181  if (nworkers > 0)
3182  ereport(WARNING,
3183  (errmsg("disabling parallel option of vacuum on \"%s\" --- cannot vacuum temporary tables in parallel",
3184  vacrel->relname)));
3185  }
3186  else
3187  vacrel->pvs = parallel_vacuum_init(vacrel->rel, vacrel->indrels,
3188  vacrel->nindexes, nworkers,
3189  max_items,
3190  vacrel->verbose ? INFO : DEBUG2,
3191  vacrel->bstrategy);
3192 
3193  /* If parallel mode started, dead_items space is allocated in DSM */
3194  if (ParallelVacuumIsActive(vacrel))
3195  {
3196  vacrel->dead_items = parallel_vacuum_get_dead_items(vacrel->pvs);
3197  return;
3198  }
3199  }
3200 
3201  /* Serial VACUUM case */
3202  dead_items = (VacDeadItems *) palloc(vac_max_items_to_alloc_size(max_items));
3203  dead_items->max_items = max_items;
3204  dead_items->num_items = 0;
3205 
3206  vacrel->dead_items = dead_items;
3207 }
3208 
3209 /*
3210  * Perform cleanup for resources allocated in dead_items_alloc
3211  */
3212 static void
3214 {
3215  if (!ParallelVacuumIsActive(vacrel))
3216  {
3217  /* Don't bother with pfree here */
3218  return;
3219  }
3220 
3221  /* End parallel mode */
3222  parallel_vacuum_end(vacrel->pvs, vacrel->indstats);
3223  vacrel->pvs = NULL;
3224 }
3225 
3226 /*
3227  * Check if every tuple in the given page is visible to all current and future
3228  * transactions. Also return the visibility_cutoff_xid which is the highest
3229  * xmin amongst the visible tuples. Set *all_frozen to true if every tuple
3230  * on this page is frozen.
3231  *
3232  * This is a stripped down version of lazy_scan_prune(). If you change
3233  * anything here, make sure that everything stays in sync. Note that an
3234  * assertion calls us to verify that everybody still agrees. Be sure to avoid
3235  * introducing new side-effects here.
3236  */
3237 static bool
3239  TransactionId *visibility_cutoff_xid,
3240  bool *all_frozen)
3241 {
3242  Page page = BufferGetPage(buf);
3244  OffsetNumber offnum,
3245  maxoff;
3246  bool all_visible = true;
3247 
3248  *visibility_cutoff_xid = InvalidTransactionId;
3249  *all_frozen = true;
3250 
3251  maxoff = PageGetMaxOffsetNumber(page);
3252  for (offnum = FirstOffsetNumber;
3253  offnum <= maxoff && all_visible;
3254  offnum = OffsetNumberNext(offnum))
3255  {
3256  ItemId itemid;
3257  HeapTupleData tuple;
3258 
3259  /*
3260  * Set the offset number so that we can display it along with any
3261  * error that occurred while processing this tuple.
3262  */
3263  vacrel->offnum = offnum;
3264  itemid = PageGetItemId(page, offnum);
3265 
3266  /* Unused or redirect line pointers are of no interest */
3267  if (!ItemIdIsUsed(itemid) || ItemIdIsRedirected(itemid))
3268  continue;
3269 
3270  ItemPointerSet(&(tuple.t_self), blockno, offnum);
3271 
3272  /*
3273  * Dead line pointers can have index pointers pointing to them. So
3274  * they can't be treated as visible
3275  */
3276  if (ItemIdIsDead(itemid))
3277  {
3278  all_visible = false;
3279  *all_frozen = false;
3280  break;
3281  }
3282 
3283  Assert(ItemIdIsNormal(itemid));
3284 
3285  tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
3286  tuple.t_len = ItemIdGetLength(itemid);
3287  tuple.t_tableOid = RelationGetRelid(vacrel->rel);
3288 
3289  switch (HeapTupleSatisfiesVacuum(&tuple, vacrel->cutoffs.OldestXmin,
3290  buf))
3291  {
3292  case HEAPTUPLE_LIVE:
3293  {
3294  TransactionId xmin;
3295 
3296  /* Check comments in lazy_scan_prune. */
3298  {
3299  all_visible = false;
3300  *all_frozen = false;
3301  break;
3302  }
3303 
3304  /*
3305  * The inserter definitely committed. But is it old enough
3306  * that everyone sees it as committed?
3307  */
3308  xmin = HeapTupleHeaderGetXmin(tuple.t_data);
3309  if (!TransactionIdPrecedes(xmin,
3310  vacrel->cutoffs.OldestXmin))
3311  {
3312  all_visible = false;
3313  *all_frozen = false;
3314  break;
3315  }
3316 
3317  /* Track newest xmin on page. */
3318  if (TransactionIdFollows(xmin, *visibility_cutoff_xid) &&
3319  TransactionIdIsNormal(xmin))
3320  *visibility_cutoff_xid = xmin;
3321 
3322  /* Check whether this tuple is already frozen or not */
3323  if (all_visible && *all_frozen &&
3325  *all_frozen = false;
3326  }
3327  break;
3328 
3329  case HEAPTUPLE_DEAD:
3333  {
3334  all_visible = false;
3335  *all_frozen = false;
3336  break;
3337  }
3338  default:
3339  elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
3340  break;
3341  }
3342  } /* scan along page */
3343 
3344  /* Clear the offset information once we have processed the given page. */
3345  vacrel->offnum = InvalidOffsetNumber;
3346 
3347  return all_visible;
3348 }
3349 
3350 /*
3351  * Update index statistics in pg_class if the statistics are accurate.
3352  */
3353 static void
3355 {
3356  Relation *indrels = vacrel->indrels;
3357  int nindexes = vacrel->nindexes;
3358  IndexBulkDeleteResult **indstats = vacrel->indstats;
3359 
3360  Assert(vacrel->do_index_cleanup);
3361 
3362  for (int idx = 0; idx < nindexes; idx++)
3363  {
3364  Relation indrel = indrels[idx];
3365  IndexBulkDeleteResult *istat = indstats[idx];
3366 
3367  if (istat == NULL || istat->estimated_count)
3368  continue;
3369 
3370  /* Update index statistics */
3371  vac_update_relstats(indrel,
3372  istat->num_pages,
3373  istat->num_index_tuples,
3374  0,
3375  false,
3378  NULL, NULL, false);
3379  }
3380 }
3381 
3382 /*
3383  * Error context callback for errors occurring during vacuum. The error
3384  * context messages for index phases should match the messages set in parallel
3385  * vacuum. If you change this function for those phases, change
3386  * parallel_vacuum_error_callback() as well.
3387  */
3388 static void
3390 {
3391  LVRelState *errinfo = arg;
3392 
3393  switch (errinfo->phase)
3394  {
3396  if (BlockNumberIsValid(errinfo->blkno))
3397  {
3398  if (OffsetNumberIsValid(errinfo->offnum))
3399  errcontext("while scanning block %u offset %u of relation \"%s.%s\"",
3400  errinfo->blkno, errinfo->offnum, errinfo->relnamespace, errinfo->relname);
3401  else
3402  errcontext("while scanning block %u of relation \"%s.%s\"",
3403  errinfo->blkno, errinfo->relnamespace, errinfo->relname);
3404  }
3405  else
3406  errcontext("while scanning relation \"%s.%s\"",
3407  errinfo->relnamespace, errinfo->relname);
3408  break;
3409 
3411  if (BlockNumberIsValid(errinfo->blkno))
3412  {
3413  if (OffsetNumberIsValid(errinfo->offnum))
3414  errcontext("while vacuuming block %u offset %u of relation \"%s.%s\"",
3415  errinfo->blkno, errinfo->offnum, errinfo->relnamespace, errinfo->relname);
3416  else
3417  errcontext("while vacuuming block %u of relation \"%s.%s\"",
3418  errinfo->blkno, errinfo->relnamespace, errinfo->relname);
3419  }
3420  else
3421  errcontext("while vacuuming relation \"%s.%s\"",
3422  errinfo->relnamespace, errinfo->relname);
3423  break;
3424 
3426  errcontext("while vacuuming index \"%s\" of relation \"%s.%s\"",
3427  errinfo->indname, errinfo->relnamespace, errinfo->relname);
3428  break;
3429 
3431  errcontext("while cleaning up index \"%s\" of relation \"%s.%s\"",
3432  errinfo->indname, errinfo->relnamespace, errinfo->relname);
3433  break;
3434 
3436  if (BlockNumberIsValid(errinfo->blkno))
3437  errcontext("while truncating relation \"%s.%s\" to %u blocks",
3438  errinfo->relnamespace, errinfo->relname, errinfo->blkno);
3439  break;
3440 
3442  default:
3443  return; /* do nothing; the errinfo may not be
3444  * initialized */
3445  }
3446 }
3447 
3448 /*
3449  * Updates the information required for vacuum error callback. This also saves
3450  * the current information which can be later restored via restore_vacuum_error_info.
3451  */
3452 static void
3454  int phase, BlockNumber blkno, OffsetNumber offnum)
3455 {
3456  if (saved_vacrel)
3457  {
3458  saved_vacrel->offnum = vacrel->offnum;
3459  saved_vacrel->blkno = vacrel->blkno;
3460  saved_vacrel->phase = vacrel->phase;
3461  }
3462 
3463  vacrel->blkno = blkno;
3464  vacrel->offnum = offnum;
3465  vacrel->phase = phase;
3466 }
3467 
3468 /*
3469  * Restores the vacuum information saved via a prior call to update_vacuum_error_info.
3470  */
3471 static void
3473  const LVSavedErrInfo *saved_vacrel)
3474 {
3475  vacrel->blkno = saved_vacrel->blkno;
3476  vacrel->offnum = saved_vacrel->offnum;
3477  vacrel->phase = saved_vacrel->phase;
3478 }
Datum idx(PG_FUNCTION_ARGS)
Definition: _int_op.c:259
int autovacuum_work_mem
Definition: autovacuum.c:118
bool IsAutoVacuumWorkerProcess(void)
Definition: autovacuum.c:3385
void TimestampDifference(TimestampTz start_time, TimestampTz stop_time, long *secs, int *microsecs)
Definition: timestamp.c:1659
bool TimestampDifferenceExceeds(TimestampTz start_time, TimestampTz stop_time, int msec)
Definition: timestamp.c:1719
TimestampTz GetCurrentTimestamp(void)
Definition: timestamp.c:1583
void pgstat_progress_start_command(ProgressCommandType cmdtype, Oid relid)
void pgstat_progress_update_param(int index, int64 val)
void pgstat_progress_update_multi_param(int nparam, const int *index, const int64 *val)
void pgstat_progress_end_command(void)
@ PROGRESS_COMMAND_VACUUM
uint32 BlockNumber
Definition: block.h:31
#define InvalidBlockNumber
Definition: block.h:33
static bool BlockNumberIsValid(BlockNumber blockNumber)
Definition: block.h:71
int Buffer
Definition: buf.h:23
#define InvalidBuffer
Definition: buf.h:25
bool track_io_timing
Definition: bufmgr.c:138
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:3290
PrefetchBufferResult PrefetchBuffer(Relation reln, ForkNumber forkNum, BlockNumber blockNum)
Definition: bufmgr.c:601
void ReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:4480
void UnlockReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:4497
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:2111
void LockBufferForCleanup(Buffer buffer)
Definition: bufmgr.c:4795
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:4715
Buffer ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
Definition: bufmgr.c:755
bool ConditionalLockBufferForCleanup(Buffer buffer)
Definition: bufmgr.c:4956
#define BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:157
#define BUFFER_LOCK_SHARE
Definition: bufmgr.h:158
#define RelationGetNumberOfBlocks(reln)
Definition: bufmgr.h:227
static Page BufferGetPage(Buffer buffer)
Definition: bufmgr.h:355
#define BUFFER_LOCK_EXCLUSIVE
Definition: bufmgr.h:159
@ RBM_NORMAL
Definition: bufmgr.h:44
static bool BufferIsValid(Buffer bufnum)
Definition: bufmgr.h:303
Size PageGetHeapFreeSpace(Page page)
Definition: bufpage.c:991
void PageTruncateLinePointerArray(Page page)
Definition: bufpage.c:835
static bool PageIsEmpty(Page page)
Definition: bufpage.h:220
Pointer Page
Definition: bufpage.h:78
static Item PageGetItem(Page page, ItemId itemId)
Definition: bufpage.h:351
static void PageClearAllVisible(Page page)
Definition: bufpage.h:436
#define SizeOfPageHeaderData
Definition: bufpage.h:213
static void PageSetAllVisible(Page page)
Definition: bufpage.h:431
static ItemId PageGetItemId(Page page, OffsetNumber offsetNumber)
Definition: bufpage.h:240
static bool PageIsNew(Page page)
Definition: bufpage.h:230
static bool PageIsAllVisible(Page page)
Definition: bufpage.h:426
static void PageSetLSN(Page page, XLogRecPtr lsn)
Definition: bufpage.h:388
static XLogRecPtr PageGetLSN(Page page)
Definition: bufpage.h:383
static OffsetNumber PageGetMaxOffsetNumber(Page page)
Definition: bufpage.h:369
#define Min(x, y)
Definition: c.h:988
signed int int32
Definition: c.h:478
#define Max(x, y)
Definition: c.h:982
TransactionId MultiXactId
Definition: c.h:646
#define unlikely(x)
Definition: c.h:295
unsigned char uint8
Definition: c.h:488
#define StaticAssertStmt(condition, errmessage)
Definition: c.h:922
uint32 TransactionId
Definition: c.h:636
size_t Size
Definition: c.h:589
int64 TimestampTz
Definition: timestamp.h:39
char * get_database_name(Oid dbid)
Definition: dbcommands.c:3043
int errmsg_internal(const char *fmt,...)
Definition: elog.c:1156
int errdetail(const char *fmt,...)
Definition: elog.c:1202
ErrorContextCallback * error_context_stack
Definition: elog.c:95
int errhint(const char *fmt,...)
Definition: elog.c:1316
int errmsg(const char *fmt,...)
Definition: elog.c:1069
#define _(x)
Definition: elog.c:91
#define LOG
Definition: elog.h:31
#define errcontext
Definition: elog.h:196
#define WARNING
Definition: elog.h:36
#define DEBUG2
Definition: elog.h:29
#define ERROR
Definition: elog.h:39
#define INFO
Definition: elog.h:34
#define ereport(elevel,...)
Definition: elog.h:149
void FreeSpaceMapVacuumRange(Relation rel, BlockNumber start, BlockNumber end)
Definition: freespace.c:354
Size GetRecordedFreeSpace(Relation rel, BlockNumber heapBlk)
Definition: freespace.c:232
void RecordPageWithFreeSpace(Relation rel, BlockNumber heapBlk, Size spaceAvail)
Definition: freespace.c:182
int64 VacuumPageHit
Definition: globals.c:151
int64 VacuumPageMiss
Definition: globals.c:152
bool VacuumCostActive
Definition: globals.c:156
int64 VacuumPageDirty
Definition: globals.c:153
int VacuumCostBalance
Definition: globals.c:155
int maintenance_work_mem
Definition: globals.c:127
struct Latch * MyLatch
Definition: globals.c:58
Oid MyDatabaseId
Definition: globals.c:89
bool heap_tuple_needs_eventual_freeze(HeapTupleHeader tuple)
Definition: heapam.c:7329
bool heap_tuple_should_freeze(HeapTupleHeader tuple, const struct VacuumCutoffs *cutoffs, TransactionId *NoFreezePageRelfrozenXid, MultiXactId *NoFreezePageRelminMxid)
Definition: heapam.c:7384
void heap_freeze_execute_prepared(Relation rel, Buffer buffer, TransactionId snapshotConflictHorizon, HeapTupleFreeze *tuples, int ntuples)
Definition: heapam.c:6672
bool heap_prepare_freeze_tuple(HeapTupleHeader tuple, const struct VacuumCutoffs *cutoffs, HeapPageFreeze *pagefrz, HeapTupleFreeze *frz, bool *totally_frozen)
Definition: heapam.c:6369
HTSV_Result
Definition: heapam.h:95
@ HEAPTUPLE_RECENTLY_DEAD
Definition: heapam.h:98
@ HEAPTUPLE_INSERT_IN_PROGRESS
Definition: heapam.h:99
@ HEAPTUPLE_LIVE
Definition: heapam.h:97
@ HEAPTUPLE_DELETE_IN_PROGRESS
Definition: heapam.h:100
@ HEAPTUPLE_DEAD
Definition: heapam.h:96
HTSV_Result HeapTupleSatisfiesVacuum(HeapTuple htup, TransactionId OldestXmin, Buffer buffer)
#define XLOG_HEAP2_VACUUM
Definition: heapam_xlog.h:55
#define SizeOfHeapVacuum
Definition: heapam_xlog.h:267
HeapTupleHeaderData * HeapTupleHeader
Definition: htup.h:23
#define HeapTupleHeaderGetXmin(tup)
Definition: htup_details.h:309
#define HeapTupleHeaderXminCommitted(tup)
Definition: htup_details.h:320
#define MaxHeapTuplesPerPage
Definition: htup_details.h:572
int verbose
#define INSTR_TIME_SET_CURRENT(t)
Definition: instr_time.h:122
#define INSTR_TIME_SUBTRACT(x, y)
Definition: instr_time.h:181
#define INSTR_TIME_GET_MICROSEC(t)
Definition: instr_time.h:194
WalUsage pgWalUsage
Definition: instrument.c:22
void WalUsageAccumDiff(WalUsage *dst, const WalUsage *add, const WalUsage *sub)
Definition: instrument.c:280
int i
Definition: isn.c:73
#define ItemIdGetLength(itemId)
Definition: itemid.h:59
#define ItemIdIsNormal(itemId)
Definition: itemid.h:99
#define ItemIdIsDead(itemId)
Definition: itemid.h:113
#define ItemIdIsUsed(itemId)
Definition: itemid.h:92
#define ItemIdSetUnused(itemId)
Definition: itemid.h:128
#define ItemIdIsRedirected(itemId)
Definition: itemid.h:106
#define ItemIdHasStorage(itemId)
Definition: itemid.h:120
static void ItemPointerSet(ItemPointerData *pointer, BlockNumber blockNumber, OffsetNumber offNum)
Definition: itemptr.h:135
static void ItemPointerSetOffsetNumber(ItemPointerData *pointer, OffsetNumber offsetNumber)
Definition: itemptr.h:158
static void ItemPointerSetBlockNumber(ItemPointerData *pointer, BlockNumber blockNumber)
Definition: itemptr.h:147
static OffsetNumber ItemPointerGetOffsetNumber(const ItemPointerData *pointer)
Definition: itemptr.h:124
static BlockNumber ItemPointerGetBlockNumber(const ItemPointerData *pointer)
Definition: itemptr.h:103
void ResetLatch(Latch *latch)
Definition: latch.c:699
int WaitLatch(Latch *latch, int wakeEvents, long timeout, uint32 wait_event_info)
Definition: latch.c:492
#define WL_TIMEOUT
Definition: latch.h:128
#define WL_EXIT_ON_PM_DEATH
Definition: latch.h:130
#define WL_LATCH_SET
Definition: latch.h:125
Assert(fmt[strlen(fmt) - 1] !='\n')
void UnlockRelation(Relation relation, LOCKMODE lockmode)
Definition: lmgr.c:311
bool ConditionalLockRelation(Relation relation, LOCKMODE lockmode)
Definition: lmgr.c:276
bool LockHasWaitersRelation(Relation relation, LOCKMODE lockmode)
Definition: lmgr.c:374
#define NoLock
Definition: lockdefs.h:34
#define AccessExclusiveLock
Definition: lockdefs.h:43
#define RowExclusiveLock
Definition: lockdefs.h:38
char * get_namespace_name(Oid nspid)
Definition: lsyscache.c:3324
char * pstrdup(const char *in)
Definition: mcxt.c:1644
void pfree(void *pointer)
Definition: mcxt.c:1456
void * palloc0(Size size)
Definition: mcxt.c:1257
void * palloc(Size size)
Definition: mcxt.c:1226
#define MaxAllocSize
Definition: memutils.h:40
#define START_CRIT_SECTION()
Definition: miscadmin.h:148
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:121
#define END_CRIT_SECTION()
Definition: miscadmin.h:150
bool MultiXactIdPrecedesOrEquals(MultiXactId multi1, MultiXactId multi2)
Definition: multixact.c:3170
#define InvalidMultiXactId
Definition: multixact.h:24
#define InvalidOffsetNumber
Definition: off.h:26
#define OffsetNumberIsValid(offsetNumber)
Definition: off.h:39
#define OffsetNumberNext(offsetNumber)
Definition: off.h:52
uint16 OffsetNumber
Definition: off.h:24
#define FirstOffsetNumber
Definition: off.h:27
void * arg
const char * pg_rusage_show(const PGRUsage *ru0)
Definition: pg_rusage.c:40
void pg_rusage_init(PGRUsage *ru0)
Definition: pg_rusage.c:27
static char * buf
Definition: pg_test_fsync.c:67
int64 PgStat_Counter
Definition: pgstat.h:89
PgStat_Counter pgStatBlockReadTime
PgStat_Counter pgStatBlockWriteTime
void pgstat_report_vacuum(Oid tableoid, bool shared, PgStat_Counter livetuples, PgStat_Counter deadtuples)
GlobalVisState * GlobalVisTestFor(Relation rel)
Definition: procarray.c:4040
#define PROGRESS_VACUUM_PHASE_FINAL_CLEANUP
Definition: progress.h:35
#define PROGRESS_VACUUM_PHASE_SCAN_HEAP
Definition: progress.h:30
#define PROGRESS_VACUUM_TOTAL_HEAP_BLKS
Definition: progress.h:22
#define PROGRESS_VACUUM_PHASE
Definition: progress.h:21
#define PROGRESS_VACUUM_NUM_DEAD_TUPLES
Definition: progress.h:27
#define PROGRESS_VACUUM_NUM_INDEX_VACUUMS
Definition: progress.h:25
#define PROGRESS_VACUUM_PHASE_VACUUM_HEAP
Definition: progress.h:32
#define PROGRESS_VACUUM_HEAP_BLKS_SCANNED
Definition: progress.h:23
#define PROGRESS_VACUUM_PHASE_INDEX_CLEANUP
Definition: progress.h:33
#define PROGRESS_VACUUM_PHASE_VACUUM_INDEX
Definition: progress.h:31
#define PROGRESS_VACUUM_MAX_DEAD_TUPLES
Definition: progress.h:26
#define PROGRESS_VACUUM_HEAP_BLKS_VACUUMED
Definition: progress.h:24
#define PROGRESS_VACUUM_PHASE_TRUNCATE
Definition: progress.h:34
int heap_page_prune(Relation relation, Buffer buffer, GlobalVisState *vistest, TransactionId old_snap_xmin, TimestampTz old_snap_ts, int *nnewlpdead, OffsetNumber *off_loc)
Definition: pruneheap.c:265
#define RelationGetRelid(relation)
Definition: rel.h:504
#define RelationGetRelationName(relation)
Definition: rel.h:538
#define RelationNeedsWAL(relation)
Definition: rel.h:629
#define RelationUsesLocalBuffers(relation)
Definition: rel.h:638
#define RelationGetNamespace(relation)
Definition: rel.h:545
@ MAIN_FORKNUM
Definition: relpath.h:50
int old_snapshot_threshold
Definition: snapmgr.c:79
void RelationTruncate(Relation rel, BlockNumber nblocks)
Definition: storage.c:287
void appendStringInfo(StringInfo str, const char *fmt,...)
Definition: stringinfo.c:91
void appendStringInfoString(StringInfo str, const char *s)
Definition: stringinfo.c:176
void initStringInfo(StringInfo str)
Definition: stringinfo.c:59
struct ErrorContextCallback * previous
Definition: elog.h:295
void(* callback)(void *arg)
Definition: elog.h:296
MultiXactId NoFreezePageRelminMxid
Definition: heapam.h:190
TransactionId FreezePageRelfrozenXid
Definition: heapam.h:178
bool freeze_required
Definition: heapam.h:152
MultiXactId FreezePageRelminMxid
Definition: heapam.h:179
TransactionId NoFreezePageRelfrozenXid
Definition: heapam.h:189
ItemPointerData t_self
Definition: htup.h:65
uint32 t_len
Definition: htup.h:64
HeapTupleHeader t_data
Definition: htup.h:68
Oid t_tableOid
Definition: htup.h:66
OffsetNumber offset
Definition: heapam.h:122
bool estimated_count
Definition: genam.h:78
BlockNumber pages_deleted
Definition: genam.h:82
BlockNumber pages_newly_deleted
Definition: genam.h:81
BlockNumber pages_free
Definition: genam.h:83
BlockNumber num_pages
Definition: genam.h:77
double num_index_tuples
Definition: genam.h:79
Relation index
Definition: genam.h:46
double num_heap_tuples
Definition: genam.h:52
bool analyze_only
Definition: genam.h:48
BufferAccessStrategy strategy
Definition: genam.h:53
Relation heaprel
Definition: genam.h:47
bool report_progress
Definition: genam.h:49
int message_level
Definition: genam.h:51
bool estimated_count
Definition: genam.h:50
TransactionId visibility_cutoff_xid
Definition: vacuumlazy.c:230
ParallelVacuumState * pvs
Definition: vacuumlazy.c:150
bool verbose
Definition: vacuumlazy.c:180
int nindexes
Definition: vacuumlazy.c:146
OffsetNumber offnum
Definition: vacuumlazy.c:178
int64 tuples_deleted
Definition: vacuumlazy.c:207
BlockNumber nonempty_pages
Definition: vacuumlazy.c:196
bool do_rel_truncate
Definition: vacuumlazy.c:162
BlockNumber scanned_pages
Definition: vacuumlazy.c:191
bool aggressive
Definition: vacuumlazy.c:153
GlobalVisState * vistest
Definition: vacuumlazy.c:166
BlockNumber removed_pages
Definition: vacuumlazy.c:192
int num_index_scans
Definition: vacuumlazy.c:205
IndexBulkDeleteResult ** indstats
Definition: vacuumlazy.c:202
double new_live_tuples
Definition: vacuumlazy.c:200
double new_rel_tuples
Definition: vacuumlazy.c:199
TransactionId NewRelfrozenXid
Definition: vacuumlazy.c:168
Relation rel
Definition: vacuumlazy.c:144
bool consider_bypass_optimization
Definition: vacuumlazy.c:157
BlockNumber rel_pages
Definition: vacuumlazy.c:190
int64 recently_dead_tuples
Definition: vacuumlazy.c:211
int64 tuples_frozen
Definition: vacuumlazy.c:208
BlockNumber frozen_pages
Definition: vacuumlazy.c:193
char * dbname
Definition: vacuumlazy.c:173
BlockNumber missed_dead_pages
Definition: vacuumlazy.c:195
char * relnamespace
Definition: vacuumlazy.c:174
int64 live_tuples
Definition: vacuumlazy.c:210
int64 lpdead_items
Definition: vacuumlazy.c:209
BufferAccessStrategy bstrategy
Definition: vacuumlazy.c:149
bool skippedallvis
Definition: vacuumlazy.c:170
BlockNumber lpdead_item_pages
Definition: vacuumlazy.c:194
Relation * indrels
Definition: vacuumlazy.c:145
bool skipwithvm
Definition: vacuumlazy.c:155
bool do_index_cleanup
Definition: vacuumlazy.c:161
MultiXactId NewRelminMxid
Definition: vacuumlazy.c:169
int64 missed_dead_tuples
Definition: vacuumlazy.c:212
BlockNumber blkno
Definition: vacuumlazy.c:177
struct VacuumCutoffs cutoffs
Definition: vacuumlazy.c:165
char * relname
Definition: vacuumlazy.c:175
VacDeadItems * dead_items
Definition: vacuumlazy.c:189
VacErrPhase phase
Definition: vacuumlazy.c:179
char * indname
Definition: vacuumlazy.c:176
bool do_index_vacuuming
Definition: vacuumlazy.c:160
BlockNumber blkno
Definition: vacuumlazy.c:236
VacErrPhase phase
Definition: vacuumlazy.c:238
OffsetNumber offnum
Definition: vacuumlazy.c:237
Form_pg_class rd_rel
Definition: rel.h:111
ItemPointerData items[FLEXIBLE_ARRAY_MEMBER]
Definition: vacuum.h:292
int max_items
Definition: vacuum.h:288
int num_items
Definition: vacuum.h:289
TransactionId FreezeLimit
Definition: vacuum.h:279
TransactionId OldestXmin
Definition: vacuum.h:269
TransactionId relfrozenxid
Definition: vacuum.h:253
MultiXactId relminmxid
Definition: vacuum.h:254
MultiXactId MultiXactCutoff
Definition: vacuum.h:280
MultiXactId OldestMxact
Definition: vacuum.h:270
int nworkers
Definition: vacuum.h:241
VacOptValue truncate
Definition: vacuum.h:234
bits32 options
Definition: vacuum.h:222
bool is_wraparound
Definition: vacuum.h:229
int log_min_duration
Definition: vacuum.h:230
VacOptValue index_cleanup
Definition: vacuum.h:233
uint64 wal_bytes
Definition: instrument.h:53
int64 wal_fpi
Definition: instrument.h:52
int64 wal_records
Definition: instrument.h:51
Definition: type.h:95
bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition: transam.c:280
bool TransactionIdPrecedesOrEquals(TransactionId id1, TransactionId id2)
Definition: transam.c:299
bool TransactionIdFollows(TransactionId id1, TransactionId id2)
Definition: transam.c:314
static TransactionId ReadNextTransactionId(void)
Definition: transam.h:315
#define TransactionIdRetreat(dest)
Definition: transam.h:141
#define InvalidTransactionId
Definition: transam.h:31
#define TransactionIdIsValid(xid)
Definition: transam.h:41
#define TransactionIdIsNormal(xid)
Definition: transam.h:42
IndexBulkDeleteResult * vac_bulkdel_one_index(IndexVacuumInfo *ivinfo, IndexBulkDeleteResult *istat, VacDeadItems *dead_items)
Definition: vacuum.c:2480
Size vac_max_items_to_alloc_size(int max_items)
Definition: vacuum.c:2526
void vac_open_indexes(Relation relation, LOCKMODE lockmode, int *nindexes, Relation **Irel)
Definition: vacuum.c:2262
void vac_update_relstats(Relation relation, BlockNumber num_pages, double num_tuples, BlockNumber num_all_visible_pages, bool hasindex, TransactionId frozenxid, MultiXactId minmulti, bool *frozenxid_updated, bool *minmulti_updated, bool in_outer_xact)
Definition: vacuum.c:1423
void vac_close_indexes(int nindexes, Relation *Irel, LOCKMODE lockmode)
Definition: vacuum.c:2305
void vacuum_delay_point(void)
Definition: vacuum.c:2326
bool vacuum_get_cutoffs(Relation rel, const VacuumParams *params, struct VacuumCutoffs *cutoffs)
Definition: vacuum.c:1078
bool vacuum_xid_failsafe_check(const struct VacuumCutoffs *cutoffs)
Definition: vacuum.c:1265
bool VacuumFailsafeActive
Definition: vacuum.c:99
double vac_estimate_reltuples(Relation relation, BlockNumber total_pages, BlockNumber scanned_pages, double scanned_tuples)
Definition: vacuum.c:1327
IndexBulkDeleteResult * vac_cleanup_one_index(IndexVacuumInfo *ivinfo, IndexBulkDeleteResult *istat)
Definition: vacuum.c:2501
#define VACOPT_VERBOSE
Definition: vacuum.h:185
#define MAXDEADITEMS(avail_mem)
Definition: vacuum.h:295
@ VACOPTVALUE_AUTO
Definition: vacuum.h:206
@ VACOPTVALUE_ENABLED
Definition: vacuum.h:208
@ VACOPTVALUE_UNSPECIFIED
Definition: vacuum.h:205
@ VACOPTVALUE_DISABLED
Definition: vacuum.h:207
#define VACOPT_DISABLE_PAGE_SKIPPING
Definition: vacuum.h:191
static void dead_items_cleanup(LVRelState *vacrel)
Definition: vacuumlazy.c:3213
static bool heap_page_is_all_visible(LVRelState *vacrel, Buffer buf, TransactionId *visibility_cutoff_xid, bool *all_frozen)
Definition: vacuumlazy.c:3238
static void update_relstats_all_indexes(LVRelState *vacrel)
Definition: vacuumlazy.c:3354
struct LVPagePruneState LVPagePruneState
#define VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL
Definition: vacuumlazy.c:87
static void vacuum_error_callback(void *arg)
Definition: vacuumlazy.c:3389
static void lazy_truncate_heap(LVRelState *vacrel)
Definition: vacuumlazy.c:2843
static void lazy_vacuum(LVRelState *vacrel)
Definition: vacuumlazy.c:2193
static void lazy_cleanup_all_indexes(LVRelState *vacrel)
Definition: vacuumlazy.c:2663
static int lazy_vacuum_heap_page(LVRelState *vacrel, BlockNumber blkno, Buffer buffer, int index, Buffer vmbuffer)
Definition: vacuumlazy.c:2502
#define REL_TRUNCATE_MINIMUM
Definition: vacuumlazy.c:76
static bool should_attempt_truncation(LVRelState *vacrel)
Definition: vacuumlazy.c:2822
static bool lazy_scan_new_or_empty(LVRelState *vacrel, Buffer buf, BlockNumber blkno, Page page, bool sharelock, Buffer vmbuffer)
Definition: vacuumlazy.c:1412
VacErrPhase
Definition: vacuumlazy.c:132
@ VACUUM_ERRCB_PHASE_SCAN_HEAP
Definition: vacuumlazy.c:134
@ VACUUM_ERRCB_PHASE_VACUUM_INDEX
Definition: vacuumlazy.c:135
@ VACUUM_ERRCB_PHASE_TRUNCATE
Definition: vacuumlazy.c:138
@ VACUUM_ERRCB_PHASE_INDEX_CLEANUP
Definition: vacuumlazy.c:137
@ VACUUM_ERRCB_PHASE_VACUUM_HEAP
Definition: vacuumlazy.c:136
@ VACUUM_ERRCB_PHASE_UNKNOWN
Definition: vacuumlazy.c:133
static void lazy_scan_heap(LVRelState *vacrel)
Definition: vacuumlazy.c:825
static bool lazy_scan_noprune(LVRelState *vacrel, Buffer buf, BlockNumber blkno, Page page, bool *hastup, bool *recordfreespace)
Definition: vacuumlazy.c:1959
#define ParallelVacuumIsActive(vacrel)
Definition: vacuumlazy.c:128
static void restore_vacuum_error_info(LVRelState *vacrel, const LVSavedErrInfo *saved_vacrel)
Definition: vacuumlazy.c:3472
static void lazy_scan_prune(LVRelState *vacrel, Buffer buf, BlockNumber blkno, Page page, LVPagePruneState *prunestate)
Definition: vacuumlazy.c:1535
void heap_vacuum_rel(Relation rel, VacuumParams *params, BufferAccessStrategy bstrategy)
Definition: vacuumlazy.c:303
static IndexBulkDeleteResult * lazy_vacuum_one_index(Relation indrel, IndexBulkDeleteResult *istat, double reltuples, LVRelState *vacrel)
Definition: vacuumlazy.c:2710
#define REL_TRUNCATE_FRACTION
Definition: vacuumlazy.c:77
static bool lazy_check_wraparound_failsafe(LVRelState *vacrel)
Definition: vacuumlazy.c:2619
static int dead_items_max_items(LVRelState *vacrel)
Definition: vacuumlazy.c:3117
struct LVSavedErrInfo LVSavedErrInfo
static IndexBulkDeleteResult * lazy_cleanup_one_index(Relation indrel, IndexBulkDeleteResult *istat, double reltuples, bool estimated_count, LVRelState *vacrel)
Definition: vacuumlazy.c:2758
#define PREFETCH_SIZE
Definition: vacuumlazy.c:122
struct LVRelState LVRelState
static BlockNumber lazy_scan_skip(LVRelState *vacrel, Buffer *vmbuffer, BlockNumber next_block, bool *next_unskippable_allvis, bool *skipping_current_range)
Definition: vacuumlazy.c:1296
#define BYPASS_THRESHOLD_PAGES
Definition: vacuumlazy.c:94
static void dead_items_alloc(LVRelState *vacrel, int nworkers)
Definition: vacuumlazy.c:3156
#define VACUUM_TRUNCATE_LOCK_TIMEOUT
Definition: vacuumlazy.c:88
static bool lazy_vacuum_all_indexes(LVRelState *vacrel)
Definition: vacuumlazy.c:2318
static void update_vacuum_error_info(LVRelState *vacrel, LVSavedErrInfo *saved_vacrel, int phase, BlockNumber blkno, OffsetNumber offnum)
Definition: vacuumlazy.c:3453
static BlockNumber count_nondeletable_pages(LVRelState *vacrel, bool *lock_waiter_detected)
Definition: vacuumlazy.c:2974
#define SKIP_PAGES_THRESHOLD
Definition: vacuumlazy.c:116
#define FAILSAFE_EVERY_PAGES
Definition: vacuumlazy.c:100
#define VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL
Definition: vacuumlazy.c:86
static void lazy_vacuum_heap_rel(LVRelState *vacrel)
Definition: vacuumlazy.c:2414
#define VACUUM_FSM_EVERY_PAGES
Definition: vacuumlazy.c:109
VacDeadItems * parallel_vacuum_get_dead_items(ParallelVacuumState *pvs)
void parallel_vacuum_bulkdel_all_indexes(ParallelVacuumState *pvs, long num_table_tuples, int num_index_scans)
void parallel_vacuum_cleanup_all_indexes(ParallelVacuumState *pvs, long num_table_tuples, int num_index_scans, bool estimated_count)
void parallel_vacuum_end(ParallelVacuumState *pvs, IndexBulkDeleteResult **istats)
ParallelVacuumState * parallel_vacuum_init(Relation rel, Relation *indrels, int nindexes, int nrequested_workers, int max_items, int elevel, BufferAccessStrategy bstrategy)
void visibilitymap_set(Relation rel, BlockNumber heapBlk, Buffer heapBuf, XLogRecPtr recptr, Buffer vmBuf, TransactionId cutoff_xid, uint8 flags)
bool visibilitymap_clear(Relation rel, BlockNumber heapBlk, Buffer vmbuf, uint8 flags)
void visibilitymap_pin(Relation rel, BlockNumber heapBlk, Buffer *vmbuf)
uint8 visibilitymap_get_status(Relation rel, BlockNumber heapBlk, Buffer *vmbuf)
void visibilitymap_count(Relation rel, BlockNumber *all_visible, BlockNumber *all_frozen)
#define VM_ALL_FROZEN(r, b, v)
Definition: visibilitymap.h:26
#define VISIBILITYMAP_VALID_BITS
#define VISIBILITYMAP_ALL_FROZEN
#define VISIBILITYMAP_ALL_VISIBLE
@ WAIT_EVENT_VACUUM_TRUNCATE
Definition: wait_event.h:153
bool IsInParallelMode(void)
Definition: xact.c:1069
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define InvalidXLogRecPtr
Definition: xlogdefs.h:28
void XLogRegisterData(char *data, uint32 len)
Definition: xloginsert.c:351
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition: xloginsert.c:461
void XLogRegisterBufData(uint8 block_id, char *data, uint32 len)
Definition: xloginsert.c:392
XLogRecPtr log_newpage_buffer(Buffer buffer, bool page_std)
Definition: xloginsert.c:1225
void XLogRegisterBuffer(uint8 block_id, Buffer buffer, uint8 flags)
Definition: xloginsert.c:243
void XLogBeginInsert(void)
Definition: xloginsert.c:150
#define REGBUF_STANDARD
Definition: xloginsert.h:34