PostgreSQL Source Code  git master
vacuumlazy.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * vacuumlazy.c
4  * Concurrent ("lazy") vacuuming.
5  *
6  * The major space usage for vacuuming is storage for the array of dead TIDs
7  * that are to be removed from indexes. We want to ensure we can vacuum even
8  * the very largest relations with finite memory space usage. To do that, we
9  * set upper bounds on the number of TIDs we can keep track of at once.
10  *
11  * We are willing to use at most maintenance_work_mem (or perhaps
12  * autovacuum_work_mem) memory space to keep track of dead TIDs. We initially
13  * allocate an array of TIDs of that size, with an upper limit that depends on
14  * table size (this limit ensures we don't allocate a huge area uselessly for
15  * vacuuming small tables). If the array threatens to overflow, we must call
16  * lazy_vacuum to vacuum indexes (and to vacuum the pages that we've pruned).
17  * This frees up the memory space dedicated to storing dead TIDs.
18  *
19  * In practice VACUUM will often complete its initial pass over the target
20  * heap relation without ever running out of space to store TIDs. This means
21  * that there only needs to be one call to lazy_vacuum, after the initial pass
22  * completes.
23  *
24  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
25  * Portions Copyright (c) 1994, Regents of the University of California
26  *
27  *
28  * IDENTIFICATION
29  * src/backend/access/heap/vacuumlazy.c
30  *
31  *-------------------------------------------------------------------------
32  */
33 #include "postgres.h"
34 
35 #include <math.h>
36 
37 #include "access/amapi.h"
38 #include "access/genam.h"
39 #include "access/heapam.h"
40 #include "access/heapam_xlog.h"
41 #include "access/htup_details.h"
42 #include "access/multixact.h"
43 #include "access/transam.h"
44 #include "access/visibilitymap.h"
45 #include "access/xact.h"
46 #include "access/xlog.h"
47 #include "access/xloginsert.h"
48 #include "catalog/index.h"
49 #include "catalog/storage.h"
50 #include "commands/dbcommands.h"
51 #include "commands/progress.h"
52 #include "commands/vacuum.h"
53 #include "executor/instrument.h"
54 #include "miscadmin.h"
55 #include "optimizer/paths.h"
56 #include "pgstat.h"
57 #include "portability/instr_time.h"
58 #include "postmaster/autovacuum.h"
59 #include "storage/bufmgr.h"
60 #include "storage/freespace.h"
61 #include "storage/lmgr.h"
62 #include "tcop/tcopprot.h"
63 #include "utils/lsyscache.h"
64 #include "utils/memutils.h"
65 #include "utils/pg_rusage.h"
66 #include "utils/timestamp.h"
67 
68 
69 /*
70  * Space/time tradeoff parameters: do these need to be user-tunable?
71  *
72  * To consider truncating the relation, we want there to be at least
73  * REL_TRUNCATE_MINIMUM or (relsize / REL_TRUNCATE_FRACTION) (whichever
74  * is less) potentially-freeable pages.
75  */
76 #define REL_TRUNCATE_MINIMUM 1000
77 #define REL_TRUNCATE_FRACTION 16
78 
79 /*
80  * Timing parameters for truncate locking heuristics.
81  *
82  * These were not exposed as user tunable GUC values because it didn't seem
83  * that the potential for improvement was great enough to merit the cost of
84  * supporting them.
85  */
86 #define VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL 20 /* ms */
87 #define VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL 50 /* ms */
88 #define VACUUM_TRUNCATE_LOCK_TIMEOUT 5000 /* ms */
89 
90 /*
91  * Threshold that controls whether we bypass index vacuuming and heap
92  * vacuuming as an optimization
93  */
94 #define BYPASS_THRESHOLD_PAGES 0.02 /* i.e. 2% of rel_pages */
95 
96 /*
97  * Perform a failsafe check each time we scan another 4GB of pages.
98  * (Note that this is deliberately kept to a power-of-two, usually 2^19.)
99  */
100 #define FAILSAFE_EVERY_PAGES \
101  ((BlockNumber) (((uint64) 4 * 1024 * 1024 * 1024) / BLCKSZ))
102 
103 /*
104  * When a table has no indexes, vacuum the FSM after every 8GB, approximately
105  * (it won't be exact because we only vacuum FSM after processing a heap page
106  * that has some removable tuples). When there are indexes, this is ignored,
107  * and we vacuum FSM after each index/heap cleaning pass.
108  */
109 #define VACUUM_FSM_EVERY_PAGES \
110  ((BlockNumber) (((uint64) 8 * 1024 * 1024 * 1024) / BLCKSZ))
111 
112 /*
113  * Before we consider skipping a page that's marked as clean in
114  * visibility map, we must've seen at least this many clean pages.
115  */
116 #define SKIP_PAGES_THRESHOLD ((BlockNumber) 32)
117 
118 /*
119  * Size of the prefetch window for lazy vacuum backwards truncation scan.
120  * Needs to be a power of 2.
121  */
122 #define PREFETCH_SIZE ((BlockNumber) 32)
123 
124 /*
125  * Macro to check if we are in a parallel vacuum. If true, we are in the
126  * parallel mode and the DSM segment is initialized.
127  */
128 #define ParallelVacuumIsActive(vacrel) ((vacrel)->pvs != NULL)
129 
130 /* Phases of vacuum during which we report error context. */
131 typedef enum
132 {
140 
141 typedef struct LVRelState
142 {
143  /* Target heap relation and its indexes */
146  int nindexes;
147 
148  /* Buffer access strategy and parallel vacuum state */
151 
152  /* Aggressive VACUUM? (must set relfrozenxid >= FreezeLimit) */
154  /* Use visibility map to skip? (disabled by DISABLE_PAGE_SKIPPING) */
156  /* Wraparound failsafe has been triggered? */
158  /* Consider index vacuuming bypass optimization? */
160 
161  /* Doing index vacuuming, index cleanup, rel truncation? */
165 
166  /* VACUUM operation's cutoffs for freezing and pruning */
167  struct VacuumCutoffs cutoffs;
169  /* Tracks oldest extant XID/MXID for setting relfrozenxid/relminmxid */
173 
174  /* Error reporting state */
175  char *dbname;
177  char *relname;
178  char *indname; /* Current index name */
179  BlockNumber blkno; /* used only for heap operations */
180  OffsetNumber offnum; /* used only for heap operations */
182  bool verbose; /* VACUUM VERBOSE? */
183 
184  /*
185  * dead_items stores TIDs whose index tuples are deleted by index
186  * vacuuming. Each TID points to an LP_DEAD line pointer from a heap page
187  * that has been processed by lazy_scan_prune. Also needed by
188  * lazy_vacuum_heap_rel, which marks the same LP_DEAD line pointers as
189  * LP_UNUSED during second heap pass.
190  */
191  VacDeadItems *dead_items; /* TIDs whose index tuples we'll delete */
192  BlockNumber rel_pages; /* total number of pages */
193  BlockNumber scanned_pages; /* # pages examined (not skipped via VM) */
194  BlockNumber removed_pages; /* # pages removed by relation truncation */
195  BlockNumber frozen_pages; /* # pages with newly frozen tuples */
196  BlockNumber lpdead_item_pages; /* # pages with LP_DEAD items */
197  BlockNumber missed_dead_pages; /* # pages with missed dead tuples */
198  BlockNumber nonempty_pages; /* actually, last nonempty page + 1 */
199 
200  /* Statistics output by us, for table */
201  double new_rel_tuples; /* new estimated total # of tuples */
202  double new_live_tuples; /* new estimated total # of live tuples */
203  /* Statistics output by index AMs */
205 
206  /* Instrumentation counters */
208  /* Counters that follow are only for scanned_pages */
209  int64 tuples_deleted; /* # deleted from table */
210  int64 tuples_frozen; /* # newly frozen */
211  int64 lpdead_items; /* # deleted from indexes */
212  int64 live_tuples; /* # live tuples remaining */
213  int64 recently_dead_tuples; /* # dead, but not yet removable */
214  int64 missed_dead_tuples; /* # removable, but not removed */
216 
217 /*
218  * State returned by lazy_scan_prune()
219  */
220 typedef struct LVPagePruneState
221 {
222  bool hastup; /* Page prevents rel truncation? */
223  bool has_lpdead_items; /* includes existing LP_DEAD items */
224 
225  /*
226  * State describes the proper VM bit states to set for the page following
227  * pruning and freezing. all_visible implies !has_lpdead_items, but don't
228  * trust all_frozen result unless all_visible is also set to true.
229  */
230  bool all_visible; /* Every item visible to all? */
231  bool all_frozen; /* provided all_visible is also true */
232  TransactionId visibility_cutoff_xid; /* For recovery conflicts */
234 
235 /* Struct for saving and restoring vacuum error information. */
236 typedef struct LVSavedErrInfo
237 {
242 
243 
244 /* non-export function prototypes */
245 static void lazy_scan_heap(LVRelState *vacrel);
246 static BlockNumber lazy_scan_skip(LVRelState *vacrel, Buffer *vmbuffer,
247  BlockNumber next_block,
248  bool *next_unskippable_allvis,
249  bool *skipping_current_range);
250 static bool lazy_scan_new_or_empty(LVRelState *vacrel, Buffer buf,
251  BlockNumber blkno, Page page,
252  bool sharelock, Buffer vmbuffer);
253 static void lazy_scan_prune(LVRelState *vacrel, Buffer buf,
254  BlockNumber blkno, Page page,
255  LVPagePruneState *prunestate);
256 static bool lazy_scan_noprune(LVRelState *vacrel, Buffer buf,
257  BlockNumber blkno, Page page,
258  bool *hastup, bool *recordfreespace);
259 static void lazy_vacuum(LVRelState *vacrel);
260 static bool lazy_vacuum_all_indexes(LVRelState *vacrel);
261 static void lazy_vacuum_heap_rel(LVRelState *vacrel);
262 static int lazy_vacuum_heap_page(LVRelState *vacrel, BlockNumber blkno,
263  Buffer buffer, int index, Buffer vmbuffer);
264 static bool lazy_check_wraparound_failsafe(LVRelState *vacrel);
265 static void lazy_cleanup_all_indexes(LVRelState *vacrel);
267  IndexBulkDeleteResult *istat,
268  double reltuples,
269  LVRelState *vacrel);
271  IndexBulkDeleteResult *istat,
272  double reltuples,
273  bool estimated_count,
274  LVRelState *vacrel);
275 static bool should_attempt_truncation(LVRelState *vacrel);
276 static void lazy_truncate_heap(LVRelState *vacrel);
278  bool *lock_waiter_detected);
279 static void dead_items_alloc(LVRelState *vacrel, int nworkers);
280 static void dead_items_cleanup(LVRelState *vacrel);
281 static bool heap_page_is_all_visible(LVRelState *vacrel, Buffer buf,
282  TransactionId *visibility_cutoff_xid, bool *all_frozen);
283 static void update_relstats_all_indexes(LVRelState *vacrel);
284 static void vacuum_error_callback(void *arg);
285 static void update_vacuum_error_info(LVRelState *vacrel,
286  LVSavedErrInfo *saved_vacrel,
287  int phase, BlockNumber blkno,
288  OffsetNumber offnum);
289 static void restore_vacuum_error_info(LVRelState *vacrel,
290  const LVSavedErrInfo *saved_vacrel);
291 
292 
293 /*
294  * heap_vacuum_rel() -- perform VACUUM for one heap relation
295  *
296  * This routine sets things up for and then calls lazy_scan_heap, where
297  * almost all work actually takes place. Finalizes everything after call
298  * returns by managing relation truncation and updating rel's pg_class
299  * entry. (Also updates pg_class entries for any indexes that need it.)
300  *
301  * At entry, we have already established a transaction and opened
302  * and locked the relation.
303  */
304 void
306  BufferAccessStrategy bstrategy)
307 {
308  LVRelState *vacrel;
309  bool verbose,
310  instrument,
311  skipwithvm,
312  frozenxid_updated,
313  minmulti_updated;
314  BlockNumber orig_rel_pages,
315  new_rel_pages,
316  new_rel_allvisible;
317  PGRUsage ru0;
318  TimestampTz starttime = 0;
319  PgStat_Counter startreadtime = 0,
320  startwritetime = 0;
321  WalUsage startwalusage = pgWalUsage;
322  int64 StartPageHit = VacuumPageHit,
323  StartPageMiss = VacuumPageMiss,
324  StartPageDirty = VacuumPageDirty;
325  ErrorContextCallback errcallback;
326  char **indnames = NULL;
327 
328  verbose = (params->options & VACOPT_VERBOSE) != 0;
329  instrument = (verbose || (IsAutoVacuumWorkerProcess() &&
330  params->log_min_duration >= 0));
331  if (instrument)
332  {
333  pg_rusage_init(&ru0);
334  starttime = GetCurrentTimestamp();
335  if (track_io_timing)
336  {
337  startreadtime = pgStatBlockReadTime;
338  startwritetime = pgStatBlockWriteTime;
339  }
340  }
341 
343  RelationGetRelid(rel));
344 
345  /*
346  * Setup error traceback support for ereport() first. The idea is to set
347  * up an error context callback to display additional information on any
348  * error during a vacuum. During different phases of vacuum, we update
349  * the state so that the error context callback always display current
350  * information.
351  *
352  * Copy the names of heap rel into local memory for error reporting
353  * purposes, too. It isn't always safe to assume that we can get the name
354  * of each rel. It's convenient for code in lazy_scan_heap to always use
355  * these temp copies.
356  */
357  vacrel = (LVRelState *) palloc0(sizeof(LVRelState));
360  vacrel->relname = pstrdup(RelationGetRelationName(rel));
361  vacrel->indname = NULL;
363  vacrel->verbose = verbose;
364  errcallback.callback = vacuum_error_callback;
365  errcallback.arg = vacrel;
366  errcallback.previous = error_context_stack;
367  error_context_stack = &errcallback;
368 
369  /* Set up high level stuff about rel and its indexes */
370  vacrel->rel = rel;
371  vac_open_indexes(vacrel->rel, RowExclusiveLock, &vacrel->nindexes,
372  &vacrel->indrels);
373  vacrel->bstrategy = bstrategy;
374  if (instrument && vacrel->nindexes > 0)
375  {
376  /* Copy index names used by instrumentation (not error reporting) */
377  indnames = palloc(sizeof(char *) * vacrel->nindexes);
378  for (int i = 0; i < vacrel->nindexes; i++)
379  indnames[i] = pstrdup(RelationGetRelationName(vacrel->indrels[i]));
380  }
381 
382  /*
383  * The index_cleanup param either disables index vacuuming and cleanup or
384  * forces it to go ahead when we would otherwise apply the index bypass
385  * optimization. The default is 'auto', which leaves the final decision
386  * up to lazy_vacuum().
387  *
388  * The truncate param allows user to avoid attempting relation truncation,
389  * though it can't force truncation to happen.
390  */
393  params->truncate != VACOPTVALUE_AUTO);
394  vacrel->failsafe_active = false;
395  vacrel->consider_bypass_optimization = true;
396  vacrel->do_index_vacuuming = true;
397  vacrel->do_index_cleanup = true;
398  vacrel->do_rel_truncate = (params->truncate != VACOPTVALUE_DISABLED);
399  if (params->index_cleanup == VACOPTVALUE_DISABLED)
400  {
401  /* Force disable index vacuuming up-front */
402  vacrel->do_index_vacuuming = false;
403  vacrel->do_index_cleanup = false;
404  }
405  else if (params->index_cleanup == VACOPTVALUE_ENABLED)
406  {
407  /* Force index vacuuming. Note that failsafe can still bypass. */
408  vacrel->consider_bypass_optimization = false;
409  }
410  else
411  {
412  /* Default/auto, make all decisions dynamically */
414  }
415 
416  /* Initialize page counters explicitly (be tidy) */
417  vacrel->scanned_pages = 0;
418  vacrel->removed_pages = 0;
419  vacrel->frozen_pages = 0;
420  vacrel->lpdead_item_pages = 0;
421  vacrel->missed_dead_pages = 0;
422  vacrel->nonempty_pages = 0;
423  /* dead_items_alloc allocates vacrel->dead_items later on */
424 
425  /* Allocate/initialize output statistics state */
426  vacrel->new_rel_tuples = 0;
427  vacrel->new_live_tuples = 0;
428  vacrel->indstats = (IndexBulkDeleteResult **)
429  palloc0(vacrel->nindexes * sizeof(IndexBulkDeleteResult *));
430 
431  /* Initialize remaining counters (be tidy) */
432  vacrel->num_index_scans = 0;
433  vacrel->tuples_deleted = 0;
434  vacrel->tuples_frozen = 0;
435  vacrel->lpdead_items = 0;
436  vacrel->live_tuples = 0;
437  vacrel->recently_dead_tuples = 0;
438  vacrel->missed_dead_tuples = 0;
439 
440  /*
441  * Get cutoffs that determine which deleted tuples are considered DEAD,
442  * not just RECENTLY_DEAD, and which XIDs/MXIDs to freeze. Then determine
443  * the extent of the blocks that we'll scan in lazy_scan_heap. It has to
444  * happen in this order to ensure that the OldestXmin cutoff field works
445  * as an upper bound on the XIDs stored in the pages we'll actually scan
446  * (NewRelfrozenXid tracking must never be allowed to miss unfrozen XIDs).
447  *
448  * Next acquire vistest, a related cutoff that's used in heap_page_prune.
449  * We expect vistest will always make heap_page_prune remove any deleted
450  * tuple whose xmax is < OldestXmin. lazy_scan_prune must never become
451  * confused about whether a tuple should be frozen or removed. (In the
452  * future we might want to teach lazy_scan_prune to recompute vistest from
453  * time to time, to increase the number of dead tuples it can prune away.)
454  */
455  vacrel->aggressive = vacuum_get_cutoffs(rel, params, &vacrel->cutoffs);
456  vacrel->rel_pages = orig_rel_pages = RelationGetNumberOfBlocks(rel);
457  vacrel->vistest = GlobalVisTestFor(rel);
458  /* Initialize state used to track oldest extant XID/MXID */
459  vacrel->NewRelfrozenXid = vacrel->cutoffs.OldestXmin;
460  vacrel->NewRelminMxid = vacrel->cutoffs.OldestMxact;
461  vacrel->skippedallvis = false;
462  skipwithvm = true;
464  {
465  /*
466  * Force aggressive mode, and disable skipping blocks using the
467  * visibility map (even those set all-frozen)
468  */
469  vacrel->aggressive = true;
470  skipwithvm = false;
471  }
472 
473  vacrel->skipwithvm = skipwithvm;
474 
475  if (verbose)
476  {
477  if (vacrel->aggressive)
478  ereport(INFO,
479  (errmsg("aggressively vacuuming \"%s.%s.%s\"",
480  vacrel->dbname, vacrel->relnamespace,
481  vacrel->relname)));
482  else
483  ereport(INFO,
484  (errmsg("vacuuming \"%s.%s.%s\"",
485  vacrel->dbname, vacrel->relnamespace,
486  vacrel->relname)));
487  }
488 
489  /*
490  * Allocate dead_items array memory using dead_items_alloc. This handles
491  * parallel VACUUM initialization as part of allocating shared memory
492  * space used for dead_items. (But do a failsafe precheck first, to
493  * ensure that parallel VACUUM won't be attempted at all when relfrozenxid
494  * is already dangerously old.)
495  */
497  dead_items_alloc(vacrel, params->nworkers);
498 
499  /*
500  * Call lazy_scan_heap to perform all required heap pruning, index
501  * vacuuming, and heap vacuuming (plus related processing)
502  */
503  lazy_scan_heap(vacrel);
504 
505  /*
506  * Free resources managed by dead_items_alloc. This ends parallel mode in
507  * passing when necessary.
508  */
509  dead_items_cleanup(vacrel);
511 
512  /*
513  * Update pg_class entries for each of rel's indexes where appropriate.
514  *
515  * Unlike the later update to rel's pg_class entry, this is not critical.
516  * Maintains relpages/reltuples statistics used by the planner only.
517  */
518  if (vacrel->do_index_cleanup)
520 
521  /* Done with rel's indexes */
522  vac_close_indexes(vacrel->nindexes, vacrel->indrels, NoLock);
523 
524  /* Optionally truncate rel */
525  if (should_attempt_truncation(vacrel))
526  lazy_truncate_heap(vacrel);
527 
528  /* Pop the error context stack */
529  error_context_stack = errcallback.previous;
530 
531  /* Report that we are now doing final cleanup */
534 
535  /*
536  * Prepare to update rel's pg_class entry.
537  *
538  * Aggressive VACUUMs must always be able to advance relfrozenxid to a
539  * value >= FreezeLimit, and relminmxid to a value >= MultiXactCutoff.
540  * Non-aggressive VACUUMs may advance them by any amount, or not at all.
541  */
542  Assert(vacrel->NewRelfrozenXid == vacrel->cutoffs.OldestXmin ||
544  vacrel->cutoffs.relfrozenxid,
545  vacrel->NewRelfrozenXid));
546  Assert(vacrel->NewRelminMxid == vacrel->cutoffs.OldestMxact ||
548  vacrel->cutoffs.relminmxid,
549  vacrel->NewRelminMxid));
550  if (vacrel->skippedallvis)
551  {
552  /*
553  * Must keep original relfrozenxid in a non-aggressive VACUUM that
554  * chose to skip an all-visible page range. The state that tracks new
555  * values will have missed unfrozen XIDs from the pages we skipped.
556  */
557  Assert(!vacrel->aggressive);
560  }
561 
562  /*
563  * For safety, clamp relallvisible to be not more than what we're setting
564  * pg_class.relpages to
565  */
566  new_rel_pages = vacrel->rel_pages; /* After possible rel truncation */
567  visibilitymap_count(rel, &new_rel_allvisible, NULL);
568  if (new_rel_allvisible > new_rel_pages)
569  new_rel_allvisible = new_rel_pages;
570 
571  /*
572  * Now actually update rel's pg_class entry.
573  *
574  * In principle new_live_tuples could be -1 indicating that we (still)
575  * don't know the tuple count. In practice that can't happen, since we
576  * scan every page that isn't skipped using the visibility map.
577  */
578  vac_update_relstats(rel, new_rel_pages, vacrel->new_live_tuples,
579  new_rel_allvisible, vacrel->nindexes > 0,
580  vacrel->NewRelfrozenXid, vacrel->NewRelminMxid,
581  &frozenxid_updated, &minmulti_updated, false);
582 
583  /*
584  * Report results to the cumulative stats system, too.
585  *
586  * Deliberately avoid telling the stats system about LP_DEAD items that
587  * remain in the table due to VACUUM bypassing index and heap vacuuming.
588  * ANALYZE will consider the remaining LP_DEAD items to be dead "tuples".
589  * It seems like a good idea to err on the side of not vacuuming again too
590  * soon in cases where the failsafe prevented significant amounts of heap
591  * vacuuming.
592  */
594  rel->rd_rel->relisshared,
595  Max(vacrel->new_live_tuples, 0),
596  vacrel->recently_dead_tuples +
597  vacrel->missed_dead_tuples);
599 
600  if (instrument)
601  {
602  TimestampTz endtime = GetCurrentTimestamp();
603 
604  if (verbose || params->log_min_duration == 0 ||
605  TimestampDifferenceExceeds(starttime, endtime,
606  params->log_min_duration))
607  {
608  long secs_dur;
609  int usecs_dur;
610  WalUsage walusage;
612  char *msgfmt;
613  int32 diff;
614  int64 PageHitOp = VacuumPageHit - StartPageHit,
615  PageMissOp = VacuumPageMiss - StartPageMiss,
616  PageDirtyOp = VacuumPageDirty - StartPageDirty;
617  double read_rate = 0,
618  write_rate = 0;
619 
620  TimestampDifference(starttime, endtime, &secs_dur, &usecs_dur);
621  memset(&walusage, 0, sizeof(WalUsage));
622  WalUsageAccumDiff(&walusage, &pgWalUsage, &startwalusage);
623 
625  if (verbose)
626  {
627  /*
628  * Aggressiveness already reported earlier, in dedicated
629  * VACUUM VERBOSE ereport
630  */
631  Assert(!params->is_wraparound);
632  msgfmt = _("finished vacuuming \"%s.%s.%s\": index scans: %d\n");
633  }
634  else if (params->is_wraparound)
635  {
636  /*
637  * While it's possible for a VACUUM to be both is_wraparound
638  * and !aggressive, that's just a corner-case -- is_wraparound
639  * implies aggressive. Produce distinct output for the corner
640  * case all the same, just in case.
641  */
642  if (vacrel->aggressive)
643  msgfmt = _("automatic aggressive vacuum to prevent wraparound of table \"%s.%s.%s\": index scans: %d\n");
644  else
645  msgfmt = _("automatic vacuum to prevent wraparound of table \"%s.%s.%s\": index scans: %d\n");
646  }
647  else
648  {
649  if (vacrel->aggressive)
650  msgfmt = _("automatic aggressive vacuum of table \"%s.%s.%s\": index scans: %d\n");
651  else
652  msgfmt = _("automatic vacuum of table \"%s.%s.%s\": index scans: %d\n");
653  }
654  appendStringInfo(&buf, msgfmt,
655  vacrel->dbname,
656  vacrel->relnamespace,
657  vacrel->relname,
658  vacrel->num_index_scans);
659  appendStringInfo(&buf, _("pages: %u removed, %u remain, %u scanned (%.2f%% of total)\n"),
660  vacrel->removed_pages,
661  new_rel_pages,
662  vacrel->scanned_pages,
663  orig_rel_pages == 0 ? 100.0 :
664  100.0 * vacrel->scanned_pages / orig_rel_pages);
666  _("tuples: %lld removed, %lld remain, %lld are dead but not yet removable\n"),
667  (long long) vacrel->tuples_deleted,
668  (long long) vacrel->new_rel_tuples,
669  (long long) vacrel->recently_dead_tuples);
670  if (vacrel->missed_dead_tuples > 0)
672  _("tuples missed: %lld dead from %u pages not removed due to cleanup lock contention\n"),
673  (long long) vacrel->missed_dead_tuples,
674  vacrel->missed_dead_pages);
675  diff = (int32) (ReadNextTransactionId() -
676  vacrel->cutoffs.OldestXmin);
678  _("removable cutoff: %u, which was %d XIDs old when operation ended\n"),
679  vacrel->cutoffs.OldestXmin, diff);
680  if (frozenxid_updated)
681  {
682  diff = (int32) (vacrel->NewRelfrozenXid -
683  vacrel->cutoffs.relfrozenxid);
685  _("new relfrozenxid: %u, which is %d XIDs ahead of previous value\n"),
686  vacrel->NewRelfrozenXid, diff);
687  }
688  if (minmulti_updated)
689  {
690  diff = (int32) (vacrel->NewRelminMxid -
691  vacrel->cutoffs.relminmxid);
693  _("new relminmxid: %u, which is %d MXIDs ahead of previous value\n"),
694  vacrel->NewRelminMxid, diff);
695  }
696  appendStringInfo(&buf, _("frozen: %u pages from table (%.2f%% of total) had %lld tuples frozen\n"),
697  vacrel->frozen_pages,
698  orig_rel_pages == 0 ? 100.0 :
699  100.0 * vacrel->frozen_pages / orig_rel_pages,
700  (long long) vacrel->tuples_frozen);
701  if (vacrel->do_index_vacuuming)
702  {
703  if (vacrel->nindexes == 0 || vacrel->num_index_scans == 0)
704  appendStringInfoString(&buf, _("index scan not needed: "));
705  else
706  appendStringInfoString(&buf, _("index scan needed: "));
707 
708  msgfmt = _("%u pages from table (%.2f%% of total) had %lld dead item identifiers removed\n");
709  }
710  else
711  {
712  if (!vacrel->failsafe_active)
713  appendStringInfoString(&buf, _("index scan bypassed: "));
714  else
715  appendStringInfoString(&buf, _("index scan bypassed by failsafe: "));
716 
717  msgfmt = _("%u pages from table (%.2f%% of total) have %lld dead item identifiers\n");
718  }
719  appendStringInfo(&buf, msgfmt,
720  vacrel->lpdead_item_pages,
721  orig_rel_pages == 0 ? 100.0 :
722  100.0 * vacrel->lpdead_item_pages / orig_rel_pages,
723  (long long) vacrel->lpdead_items);
724  for (int i = 0; i < vacrel->nindexes; i++)
725  {
726  IndexBulkDeleteResult *istat = vacrel->indstats[i];
727 
728  if (!istat)
729  continue;
730 
732  _("index \"%s\": pages: %u in total, %u newly deleted, %u currently deleted, %u reusable\n"),
733  indnames[i],
734  istat->num_pages,
735  istat->pages_newly_deleted,
736  istat->pages_deleted,
737  istat->pages_free);
738  }
739  if (track_io_timing)
740  {
741  double read_ms = (double) (pgStatBlockReadTime - startreadtime) / 1000;
742  double write_ms = (double) (pgStatBlockWriteTime - startwritetime) / 1000;
743 
744  appendStringInfo(&buf, _("I/O timings: read: %.3f ms, write: %.3f ms\n"),
745  read_ms, write_ms);
746  }
747  if (secs_dur > 0 || usecs_dur > 0)
748  {
749  read_rate = (double) BLCKSZ * PageMissOp / (1024 * 1024) /
750  (secs_dur + usecs_dur / 1000000.0);
751  write_rate = (double) BLCKSZ * PageDirtyOp / (1024 * 1024) /
752  (secs_dur + usecs_dur / 1000000.0);
753  }
754  appendStringInfo(&buf, _("avg read rate: %.3f MB/s, avg write rate: %.3f MB/s\n"),
755  read_rate, write_rate);
757  _("buffer usage: %lld hits, %lld misses, %lld dirtied\n"),
758  (long long) PageHitOp,
759  (long long) PageMissOp,
760  (long long) PageDirtyOp);
762  _("WAL usage: %lld records, %lld full page images, %llu bytes\n"),
763  (long long) walusage.wal_records,
764  (long long) walusage.wal_fpi,
765  (unsigned long long) walusage.wal_bytes);
766  appendStringInfo(&buf, _("system usage: %s"), pg_rusage_show(&ru0));
767 
768  ereport(verbose ? INFO : LOG,
769  (errmsg_internal("%s", buf.data)));
770  pfree(buf.data);
771  }
772  }
773 
774  /* Cleanup index statistics and index names */
775  for (int i = 0; i < vacrel->nindexes; i++)
776  {
777  if (vacrel->indstats[i])
778  pfree(vacrel->indstats[i]);
779 
780  if (instrument)
781  pfree(indnames[i]);
782  }
783 }
784 
785 /*
786  * lazy_scan_heap() -- workhorse function for VACUUM
787  *
788  * This routine prunes each page in the heap, and considers the need to
789  * freeze remaining tuples with storage (not including pages that can be
790  * skipped using the visibility map). Also performs related maintenance
791  * of the FSM and visibility map. These steps all take place during an
792  * initial pass over the target heap relation.
793  *
794  * Also invokes lazy_vacuum_all_indexes to vacuum indexes, which largely
795  * consists of deleting index tuples that point to LP_DEAD items left in
796  * heap pages following pruning. Earlier initial pass over the heap will
797  * have collected the TIDs whose index tuples need to be removed.
798  *
799  * Finally, invokes lazy_vacuum_heap_rel to vacuum heap pages, which
800  * largely consists of marking LP_DEAD items (from collected TID array)
801  * as LP_UNUSED. This has to happen in a second, final pass over the
802  * heap, to preserve a basic invariant that all index AMs rely on: no
803  * extant index tuple can ever be allowed to contain a TID that points to
804  * an LP_UNUSED line pointer in the heap. We must disallow premature
805  * recycling of line pointers to avoid index scans that get confused
806  * about which TID points to which tuple immediately after recycling.
807  * (Actually, this isn't a concern when target heap relation happens to
808  * have no indexes, which allows us to safely apply the one-pass strategy
809  * as an optimization).
810  *
811  * In practice we often have enough space to fit all TIDs, and so won't
812  * need to call lazy_vacuum more than once, after our initial pass over
813  * the heap has totally finished. Otherwise things are slightly more
814  * complicated: our "initial pass" over the heap applies only to those
815  * pages that were pruned before we needed to call lazy_vacuum, and our
816  * "final pass" over the heap only vacuums these same heap pages.
817  * However, we process indexes in full every time lazy_vacuum is called,
818  * which makes index processing very inefficient when memory is in short
819  * supply.
820  */
821 static void
823 {
824  BlockNumber rel_pages = vacrel->rel_pages,
825  blkno,
826  next_unskippable_block,
827  next_fsm_block_to_vacuum = 0;
828  VacDeadItems *dead_items = vacrel->dead_items;
829  Buffer vmbuffer = InvalidBuffer;
830  bool next_unskippable_allvis,
831  skipping_current_range;
832  const int initprog_index[] = {
836  };
837  int64 initprog_val[3];
838 
839  /* Report that we're scanning the heap, advertising total # of blocks */
840  initprog_val[0] = PROGRESS_VACUUM_PHASE_SCAN_HEAP;
841  initprog_val[1] = rel_pages;
842  initprog_val[2] = dead_items->max_items;
843  pgstat_progress_update_multi_param(3, initprog_index, initprog_val);
844 
845  /* Set up an initial range of skippable blocks using the visibility map */
846  next_unskippable_block = lazy_scan_skip(vacrel, &vmbuffer, 0,
847  &next_unskippable_allvis,
848  &skipping_current_range);
849  for (blkno = 0; blkno < rel_pages; blkno++)
850  {
851  Buffer buf;
852  Page page;
853  bool all_visible_according_to_vm;
854  LVPagePruneState prunestate;
855 
856  if (blkno == next_unskippable_block)
857  {
858  /*
859  * Can't skip this page safely. Must scan the page. But
860  * determine the next skippable range after the page first.
861  */
862  all_visible_according_to_vm = next_unskippable_allvis;
863  next_unskippable_block = lazy_scan_skip(vacrel, &vmbuffer,
864  blkno + 1,
865  &next_unskippable_allvis,
866  &skipping_current_range);
867 
868  Assert(next_unskippable_block >= blkno + 1);
869  }
870  else
871  {
872  /* Last page always scanned (may need to set nonempty_pages) */
873  Assert(blkno < rel_pages - 1);
874 
875  if (skipping_current_range)
876  continue;
877 
878  /* Current range is too small to skip -- just scan the page */
879  all_visible_according_to_vm = true;
880  }
881 
882  vacrel->scanned_pages++;
883 
884  /* Report as block scanned, update error traceback information */
887  blkno, InvalidOffsetNumber);
888 
890 
891  /*
892  * Regularly check if wraparound failsafe should trigger.
893  *
894  * There is a similar check inside lazy_vacuum_all_indexes(), but
895  * relfrozenxid might start to look dangerously old before we reach
896  * that point. This check also provides failsafe coverage for the
897  * one-pass strategy, and the two-pass strategy with the index_cleanup
898  * param set to 'off'.
899  */
900  if (vacrel->scanned_pages % FAILSAFE_EVERY_PAGES == 0)
902 
903  /*
904  * Consider if we definitely have enough space to process TIDs on page
905  * already. If we are close to overrunning the available space for
906  * dead_items TIDs, pause and do a cycle of vacuuming before we tackle
907  * this page.
908  */
909  Assert(dead_items->max_items >= MaxHeapTuplesPerPage);
910  if (dead_items->max_items - dead_items->num_items < MaxHeapTuplesPerPage)
911  {
912  /*
913  * Before beginning index vacuuming, we release any pin we may
914  * hold on the visibility map page. This isn't necessary for
915  * correctness, but we do it anyway to avoid holding the pin
916  * across a lengthy, unrelated operation.
917  */
918  if (BufferIsValid(vmbuffer))
919  {
920  ReleaseBuffer(vmbuffer);
921  vmbuffer = InvalidBuffer;
922  }
923 
924  /* Perform a round of index and heap vacuuming */
925  vacrel->consider_bypass_optimization = false;
926  lazy_vacuum(vacrel);
927 
928  /*
929  * Vacuum the Free Space Map to make newly-freed space visible on
930  * upper-level FSM pages. Note we have not yet processed blkno.
931  */
932  FreeSpaceMapVacuumRange(vacrel->rel, next_fsm_block_to_vacuum,
933  blkno);
934  next_fsm_block_to_vacuum = blkno;
935 
936  /* Report that we are once again scanning the heap */
939  }
940 
941  /*
942  * Pin the visibility map page in case we need to mark the page
943  * all-visible. In most cases this will be very cheap, because we'll
944  * already have the correct page pinned anyway.
945  */
946  visibilitymap_pin(vacrel->rel, blkno, &vmbuffer);
947 
948  /*
949  * We need a buffer cleanup lock to prune HOT chains and defragment
950  * the page in lazy_scan_prune. But when it's not possible to acquire
951  * a cleanup lock right away, we may be able to settle for reduced
952  * processing using lazy_scan_noprune.
953  */
954  buf = ReadBufferExtended(vacrel->rel, MAIN_FORKNUM, blkno, RBM_NORMAL,
955  vacrel->bstrategy);
956  page = BufferGetPage(buf);
958  {
959  bool hastup,
960  recordfreespace;
961 
963 
964  /* Check for new or empty pages before lazy_scan_noprune call */
965  if (lazy_scan_new_or_empty(vacrel, buf, blkno, page, true,
966  vmbuffer))
967  {
968  /* Processed as new/empty page (lock and pin released) */
969  continue;
970  }
971 
972  /* Collect LP_DEAD items in dead_items array, count tuples */
973  if (lazy_scan_noprune(vacrel, buf, blkno, page, &hastup,
974  &recordfreespace))
975  {
976  Size freespace = 0;
977 
978  /*
979  * Processed page successfully (without cleanup lock) -- just
980  * need to perform rel truncation and FSM steps, much like the
981  * lazy_scan_prune case. Don't bother trying to match its
982  * visibility map setting steps, though.
983  */
984  if (hastup)
985  vacrel->nonempty_pages = blkno + 1;
986  if (recordfreespace)
987  freespace = PageGetHeapFreeSpace(page);
989  if (recordfreespace)
990  RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
991  continue;
992  }
993 
994  /*
995  * lazy_scan_noprune could not do all required processing. Wait
996  * for a cleanup lock, and call lazy_scan_prune in the usual way.
997  */
998  Assert(vacrel->aggressive);
1001  }
1002 
1003  /* Check for new or empty pages before lazy_scan_prune call */
1004  if (lazy_scan_new_or_empty(vacrel, buf, blkno, page, false, vmbuffer))
1005  {
1006  /* Processed as new/empty page (lock and pin released) */
1007  continue;
1008  }
1009 
1010  /*
1011  * Prune, freeze, and count tuples.
1012  *
1013  * Accumulates details of remaining LP_DEAD line pointers on page in
1014  * dead_items array. This includes LP_DEAD line pointers that we
1015  * pruned ourselves, as well as existing LP_DEAD line pointers that
1016  * were pruned some time earlier. Also considers freezing XIDs in the
1017  * tuple headers of remaining items with storage.
1018  */
1019  lazy_scan_prune(vacrel, buf, blkno, page, &prunestate);
1020 
1021  Assert(!prunestate.all_visible || !prunestate.has_lpdead_items);
1022 
1023  /* Remember the location of the last page with nonremovable tuples */
1024  if (prunestate.hastup)
1025  vacrel->nonempty_pages = blkno + 1;
1026 
1027  if (vacrel->nindexes == 0)
1028  {
1029  /*
1030  * Consider the need to do page-at-a-time heap vacuuming when
1031  * using the one-pass strategy now.
1032  *
1033  * The one-pass strategy will never call lazy_vacuum(). The steps
1034  * performed here can be thought of as the one-pass equivalent of
1035  * a call to lazy_vacuum().
1036  */
1037  if (prunestate.has_lpdead_items)
1038  {
1039  Size freespace;
1040 
1041  lazy_vacuum_heap_page(vacrel, blkno, buf, 0, vmbuffer);
1042 
1043  /* Forget the LP_DEAD items that we just vacuumed */
1044  dead_items->num_items = 0;
1045 
1046  /*
1047  * Periodically perform FSM vacuuming to make newly-freed
1048  * space visible on upper FSM pages. Note we have not yet
1049  * performed FSM processing for blkno.
1050  */
1051  if (blkno - next_fsm_block_to_vacuum >= VACUUM_FSM_EVERY_PAGES)
1052  {
1053  FreeSpaceMapVacuumRange(vacrel->rel, next_fsm_block_to_vacuum,
1054  blkno);
1055  next_fsm_block_to_vacuum = blkno;
1056  }
1057 
1058  /*
1059  * Now perform FSM processing for blkno, and move on to next
1060  * page.
1061  *
1062  * Our call to lazy_vacuum_heap_page() will have considered if
1063  * it's possible to set all_visible/all_frozen independently
1064  * of lazy_scan_prune(). Note that prunestate was invalidated
1065  * by lazy_vacuum_heap_page() call.
1066  */
1067  freespace = PageGetHeapFreeSpace(page);
1068 
1070  RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
1071  continue;
1072  }
1073 
1074  /*
1075  * There was no call to lazy_vacuum_heap_page() because pruning
1076  * didn't encounter/create any LP_DEAD items that needed to be
1077  * vacuumed. Prune state has not been invalidated, so proceed
1078  * with prunestate-driven visibility map and FSM steps (just like
1079  * the two-pass strategy).
1080  */
1081  Assert(dead_items->num_items == 0);
1082  }
1083 
1084  /*
1085  * Handle setting visibility map bit based on information from the VM
1086  * (as of last lazy_scan_skip() call), and from prunestate
1087  */
1088  if (!all_visible_according_to_vm && prunestate.all_visible)
1089  {
1091 
1092  if (prunestate.all_frozen)
1093  {
1095  flags |= VISIBILITYMAP_ALL_FROZEN;
1096  }
1097 
1098  /*
1099  * It should never be the case that the visibility map page is set
1100  * while the page-level bit is clear, but the reverse is allowed
1101  * (if checksums are not enabled). Regardless, set both bits so
1102  * that we get back in sync.
1103  *
1104  * NB: If the heap page is all-visible but the VM bit is not set,
1105  * we don't need to dirty the heap page. However, if checksums
1106  * are enabled, we do need to make sure that the heap page is
1107  * dirtied before passing it to visibilitymap_set(), because it
1108  * may be logged. Given that this situation should only happen in
1109  * rare cases after a crash, it is not worth optimizing.
1110  */
1111  PageSetAllVisible(page);
1113  visibilitymap_set(vacrel->rel, blkno, buf, InvalidXLogRecPtr,
1114  vmbuffer, prunestate.visibility_cutoff_xid,
1115  flags);
1116  }
1117 
1118  /*
1119  * As of PostgreSQL 9.2, the visibility map bit should never be set if
1120  * the page-level bit is clear. However, it's possible that the bit
1121  * got cleared after lazy_scan_skip() was called, so we must recheck
1122  * with buffer lock before concluding that the VM is corrupt.
1123  */
1124  else if (all_visible_according_to_vm && !PageIsAllVisible(page) &&
1125  visibilitymap_get_status(vacrel->rel, blkno, &vmbuffer) != 0)
1126  {
1127  elog(WARNING, "page is not marked all-visible but visibility map bit is set in relation \"%s\" page %u",
1128  vacrel->relname, blkno);
1129  visibilitymap_clear(vacrel->rel, blkno, vmbuffer,
1131  }
1132 
1133  /*
1134  * It's possible for the value returned by
1135  * GetOldestNonRemovableTransactionId() to move backwards, so it's not
1136  * wrong for us to see tuples that appear to not be visible to
1137  * everyone yet, while PD_ALL_VISIBLE is already set. The real safe
1138  * xmin value never moves backwards, but
1139  * GetOldestNonRemovableTransactionId() is conservative and sometimes
1140  * returns a value that's unnecessarily small, so if we see that
1141  * contradiction it just means that the tuples that we think are not
1142  * visible to everyone yet actually are, and the PD_ALL_VISIBLE flag
1143  * is correct.
1144  *
1145  * There should never be LP_DEAD items on a page with PD_ALL_VISIBLE
1146  * set, however.
1147  */
1148  else if (prunestate.has_lpdead_items && PageIsAllVisible(page))
1149  {
1150  elog(WARNING, "page containing LP_DEAD items is marked as all-visible in relation \"%s\" page %u",
1151  vacrel->relname, blkno);
1152  PageClearAllVisible(page);
1154  visibilitymap_clear(vacrel->rel, blkno, vmbuffer,
1156  }
1157 
1158  /*
1159  * If the all-visible page is all-frozen but not marked as such yet,
1160  * mark it as all-frozen. Note that all_frozen is only valid if
1161  * all_visible is true, so we must check both prunestate fields.
1162  */
1163  else if (all_visible_according_to_vm && prunestate.all_visible &&
1164  prunestate.all_frozen &&
1165  !VM_ALL_FROZEN(vacrel->rel, blkno, &vmbuffer))
1166  {
1167  /*
1168  * Avoid relying on all_visible_according_to_vm as a proxy for the
1169  * page-level PD_ALL_VISIBLE bit being set, since it might have
1170  * become stale -- even when all_visible is set in prunestate
1171  */
1172  if (!PageIsAllVisible(page))
1173  {
1174  PageSetAllVisible(page);
1176  }
1177 
1178  /*
1179  * Set the page all-frozen (and all-visible) in the VM.
1180  *
1181  * We can pass InvalidTransactionId as our visibility_cutoff_xid,
1182  * since a snapshotConflictHorizon sufficient to make everything
1183  * safe for REDO was logged when the page's tuples were frozen.
1184  */
1186  visibilitymap_set(vacrel->rel, blkno, buf, InvalidXLogRecPtr,
1187  vmbuffer, InvalidTransactionId,
1190  }
1191 
1192  /*
1193  * Final steps for block: drop cleanup lock, record free space in the
1194  * FSM
1195  */
1196  if (prunestate.has_lpdead_items && vacrel->do_index_vacuuming)
1197  {
1198  /*
1199  * Wait until lazy_vacuum_heap_rel() to save free space. This
1200  * doesn't just save us some cycles; it also allows us to record
1201  * any additional free space that lazy_vacuum_heap_page() will
1202  * make available in cases where it's possible to truncate the
1203  * page's line pointer array.
1204  *
1205  * Note: It's not in fact 100% certain that we really will call
1206  * lazy_vacuum_heap_rel() -- lazy_vacuum() might yet opt to skip
1207  * index vacuuming (and so must skip heap vacuuming). This is
1208  * deemed okay because it only happens in emergencies, or when
1209  * there is very little free space anyway. (Besides, we start
1210  * recording free space in the FSM once index vacuuming has been
1211  * abandoned.)
1212  *
1213  * Note: The one-pass (no indexes) case is only supposed to make
1214  * it this far when there were no LP_DEAD items during pruning.
1215  */
1216  Assert(vacrel->nindexes > 0);
1218  }
1219  else
1220  {
1221  Size freespace = PageGetHeapFreeSpace(page);
1222 
1224  RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
1225  }
1226  }
1227 
1228  vacrel->blkno = InvalidBlockNumber;
1229  if (BufferIsValid(vmbuffer))
1230  ReleaseBuffer(vmbuffer);
1231 
1232  /* report that everything is now scanned */
1234 
1235  /* now we can compute the new value for pg_class.reltuples */
1236  vacrel->new_live_tuples = vac_estimate_reltuples(vacrel->rel, rel_pages,
1237  vacrel->scanned_pages,
1238  vacrel->live_tuples);
1239 
1240  /*
1241  * Also compute the total number of surviving heap entries. In the
1242  * (unlikely) scenario that new_live_tuples is -1, take it as zero.
1243  */
1244  vacrel->new_rel_tuples =
1245  Max(vacrel->new_live_tuples, 0) + vacrel->recently_dead_tuples +
1246  vacrel->missed_dead_tuples;
1247 
1248  /*
1249  * Do index vacuuming (call each index's ambulkdelete routine), then do
1250  * related heap vacuuming
1251  */
1252  if (dead_items->num_items > 0)
1253  lazy_vacuum(vacrel);
1254 
1255  /*
1256  * Vacuum the remainder of the Free Space Map. We must do this whether or
1257  * not there were indexes, and whether or not we bypassed index vacuuming.
1258  */
1259  if (blkno > next_fsm_block_to_vacuum)
1260  FreeSpaceMapVacuumRange(vacrel->rel, next_fsm_block_to_vacuum, blkno);
1261 
1262  /* report all blocks vacuumed */
1264 
1265  /* Do final index cleanup (call each index's amvacuumcleanup routine) */
1266  if (vacrel->nindexes > 0 && vacrel->do_index_cleanup)
1267  lazy_cleanup_all_indexes(vacrel);
1268 }
1269 
1270 /*
1271  * lazy_scan_skip() -- set up range of skippable blocks using visibility map.
1272  *
1273  * lazy_scan_heap() calls here every time it needs to set up a new range of
1274  * blocks to skip via the visibility map. Caller passes the next block in
1275  * line. We return a next_unskippable_block for this range. When there are
1276  * no skippable blocks we just return caller's next_block. The all-visible
1277  * status of the returned block is set in *next_unskippable_allvis for caller,
1278  * too. Block usually won't be all-visible (since it's unskippable), but it
1279  * can be during aggressive VACUUMs (as well as in certain edge cases).
1280  *
1281  * Sets *skipping_current_range to indicate if caller should skip this range.
1282  * Costs and benefits drive our decision. Very small ranges won't be skipped.
1283  *
1284  * Note: our opinion of which blocks can be skipped can go stale immediately.
1285  * It's okay if caller "misses" a page whose all-visible or all-frozen marking
1286  * was concurrently cleared, though. All that matters is that caller scan all
1287  * pages whose tuples might contain XIDs < OldestXmin, or MXIDs < OldestMxact.
1288  * (Actually, non-aggressive VACUUMs can choose to skip all-visible pages with
1289  * older XIDs/MXIDs. The vacrel->skippedallvis flag will be set here when the
1290  * choice to skip such a range is actually made, making everything safe.)
1291  */
1292 static BlockNumber
1293 lazy_scan_skip(LVRelState *vacrel, Buffer *vmbuffer, BlockNumber next_block,
1294  bool *next_unskippable_allvis, bool *skipping_current_range)
1295 {
1296  BlockNumber rel_pages = vacrel->rel_pages,
1297  next_unskippable_block = next_block,
1298  nskippable_blocks = 0;
1299  bool skipsallvis = false;
1300 
1301  *next_unskippable_allvis = true;
1302  while (next_unskippable_block < rel_pages)
1303  {
1304  uint8 mapbits = visibilitymap_get_status(vacrel->rel,
1305  next_unskippable_block,
1306  vmbuffer);
1307 
1308  if ((mapbits & VISIBILITYMAP_ALL_VISIBLE) == 0)
1309  {
1310  Assert((mapbits & VISIBILITYMAP_ALL_FROZEN) == 0);
1311  *next_unskippable_allvis = false;
1312  break;
1313  }
1314 
1315  /*
1316  * Caller must scan the last page to determine whether it has tuples
1317  * (caller must have the opportunity to set vacrel->nonempty_pages).
1318  * This rule avoids having lazy_truncate_heap() take access-exclusive
1319  * lock on rel to attempt a truncation that fails anyway, just because
1320  * there are tuples on the last page (it is likely that there will be
1321  * tuples on other nearby pages as well, but those can be skipped).
1322  *
1323  * Implement this by always treating the last block as unsafe to skip.
1324  */
1325  if (next_unskippable_block == rel_pages - 1)
1326  break;
1327 
1328  /* DISABLE_PAGE_SKIPPING makes all skipping unsafe */
1329  if (!vacrel->skipwithvm)
1330  {
1331  /* Caller shouldn't rely on all_visible_according_to_vm */
1332  *next_unskippable_allvis = false;
1333  break;
1334  }
1335 
1336  /*
1337  * Aggressive VACUUM caller can't skip pages just because they are
1338  * all-visible. They may still skip all-frozen pages, which can't
1339  * contain XIDs < OldestXmin (XIDs that aren't already frozen by now).
1340  */
1341  if ((mapbits & VISIBILITYMAP_ALL_FROZEN) == 0)
1342  {
1343  if (vacrel->aggressive)
1344  break;
1345 
1346  /*
1347  * All-visible block is safe to skip in non-aggressive case. But
1348  * remember that the final range contains such a block for later.
1349  */
1350  skipsallvis = true;
1351  }
1352 
1354  next_unskippable_block++;
1355  nskippable_blocks++;
1356  }
1357 
1358  /*
1359  * We only skip a range with at least SKIP_PAGES_THRESHOLD consecutive
1360  * pages. Since we're reading sequentially, the OS should be doing
1361  * readahead for us, so there's no gain in skipping a page now and then.
1362  * Skipping such a range might even discourage sequential detection.
1363  *
1364  * This test also enables more frequent relfrozenxid advancement during
1365  * non-aggressive VACUUMs. If the range has any all-visible pages then
1366  * skipping makes updating relfrozenxid unsafe, which is a real downside.
1367  */
1368  if (nskippable_blocks < SKIP_PAGES_THRESHOLD)
1369  *skipping_current_range = false;
1370  else
1371  {
1372  *skipping_current_range = true;
1373  if (skipsallvis)
1374  vacrel->skippedallvis = true;
1375  }
1376 
1377  return next_unskippable_block;
1378 }
1379 
1380 /*
1381  * lazy_scan_new_or_empty() -- lazy_scan_heap() new/empty page handling.
1382  *
1383  * Must call here to handle both new and empty pages before calling
1384  * lazy_scan_prune or lazy_scan_noprune, since they're not prepared to deal
1385  * with new or empty pages.
1386  *
1387  * It's necessary to consider new pages as a special case, since the rules for
1388  * maintaining the visibility map and FSM with empty pages are a little
1389  * different (though new pages can be truncated away during rel truncation).
1390  *
1391  * Empty pages are not really a special case -- they're just heap pages that
1392  * have no allocated tuples (including even LP_UNUSED items). You might
1393  * wonder why we need to handle them here all the same. It's only necessary
1394  * because of a corner-case involving a hard crash during heap relation
1395  * extension. If we ever make relation-extension crash safe, then it should
1396  * no longer be necessary to deal with empty pages here (or new pages, for
1397  * that matter).
1398  *
1399  * Caller must hold at least a shared lock. We might need to escalate the
1400  * lock in that case, so the type of lock caller holds needs to be specified
1401  * using 'sharelock' argument.
1402  *
1403  * Returns false in common case where caller should go on to call
1404  * lazy_scan_prune (or lazy_scan_noprune). Otherwise returns true, indicating
1405  * that lazy_scan_heap is done processing the page, releasing lock on caller's
1406  * behalf.
1407  */
1408 static bool
1410  Page page, bool sharelock, Buffer vmbuffer)
1411 {
1412  Size freespace;
1413 
1414  if (PageIsNew(page))
1415  {
1416  /*
1417  * All-zeroes pages can be left over if either a backend extends the
1418  * relation by a single page, but crashes before the newly initialized
1419  * page has been written out, or when bulk-extending the relation
1420  * (which creates a number of empty pages at the tail end of the
1421  * relation), and then enters them into the FSM.
1422  *
1423  * Note we do not enter the page into the visibilitymap. That has the
1424  * downside that we repeatedly visit this page in subsequent vacuums,
1425  * but otherwise we'll never discover the space on a promoted standby.
1426  * The harm of repeated checking ought to normally not be too bad. The
1427  * space usually should be used at some point, otherwise there
1428  * wouldn't be any regular vacuums.
1429  *
1430  * Make sure these pages are in the FSM, to ensure they can be reused.
1431  * Do that by testing if there's any space recorded for the page. If
1432  * not, enter it. We do so after releasing the lock on the heap page,
1433  * the FSM is approximate, after all.
1434  */
1436 
1437  if (GetRecordedFreeSpace(vacrel->rel, blkno) == 0)
1438  {
1439  freespace = BLCKSZ - SizeOfPageHeaderData;
1440 
1441  RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
1442  }
1443 
1444  return true;
1445  }
1446 
1447  if (PageIsEmpty(page))
1448  {
1449  /*
1450  * It seems likely that caller will always be able to get a cleanup
1451  * lock on an empty page. But don't take any chances -- escalate to
1452  * an exclusive lock (still don't need a cleanup lock, though).
1453  */
1454  if (sharelock)
1455  {
1458 
1459  if (!PageIsEmpty(page))
1460  {
1461  /* page isn't new or empty -- keep lock and pin for now */
1462  return false;
1463  }
1464  }
1465  else
1466  {
1467  /* Already have a full cleanup lock (which is more than enough) */
1468  }
1469 
1470  /*
1471  * Unlike new pages, empty pages are always set all-visible and
1472  * all-frozen.
1473  */
1474  if (!PageIsAllVisible(page))
1475  {
1477 
1478  /* mark buffer dirty before writing a WAL record */
1480 
1481  /*
1482  * It's possible that another backend has extended the heap,
1483  * initialized the page, and then failed to WAL-log the page due
1484  * to an ERROR. Since heap extension is not WAL-logged, recovery
1485  * might try to replay our record setting the page all-visible and
1486  * find that the page isn't initialized, which will cause a PANIC.
1487  * To prevent that, check whether the page has been previously
1488  * WAL-logged, and if not, do that now.
1489  */
1490  if (RelationNeedsWAL(vacrel->rel) &&
1491  PageGetLSN(page) == InvalidXLogRecPtr)
1492  log_newpage_buffer(buf, true);
1493 
1494  PageSetAllVisible(page);
1495  visibilitymap_set(vacrel->rel, blkno, buf, InvalidXLogRecPtr,
1496  vmbuffer, InvalidTransactionId,
1498  END_CRIT_SECTION();
1499  }
1500 
1501  freespace = PageGetHeapFreeSpace(page);
1503  RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
1504  return true;
1505  }
1506 
1507  /* page isn't new or empty -- keep lock and pin */
1508  return false;
1509 }
1510 
1511 /*
1512  * lazy_scan_prune() -- lazy_scan_heap() pruning and freezing.
1513  *
1514  * Caller must hold pin and buffer cleanup lock on the buffer.
1515  *
1516  * Prior to PostgreSQL 14 there were very rare cases where heap_page_prune()
1517  * was allowed to disagree with our HeapTupleSatisfiesVacuum() call about
1518  * whether or not a tuple should be considered DEAD. This happened when an
1519  * inserting transaction concurrently aborted (after our heap_page_prune()
1520  * call, before our HeapTupleSatisfiesVacuum() call). There was rather a lot
1521  * of complexity just so we could deal with tuples that were DEAD to VACUUM,
1522  * but nevertheless were left with storage after pruning.
1523  *
1524  * The approach we take now is to restart pruning when the race condition is
1525  * detected. This allows heap_page_prune() to prune the tuples inserted by
1526  * the now-aborted transaction. This is a little crude, but it guarantees
1527  * that any items that make it into the dead_items array are simple LP_DEAD
1528  * line pointers, and that every remaining item with tuple storage is
1529  * considered as a candidate for freezing.
1530  */
1531 static void
1533  Buffer buf,
1534  BlockNumber blkno,
1535  Page page,
1536  LVPagePruneState *prunestate)
1537 {
1538  Relation rel = vacrel->rel;
1539  OffsetNumber offnum,
1540  maxoff;
1541  ItemId itemid;
1542  HeapTupleData tuple;
1543  HTSV_Result res;
1544  int tuples_deleted,
1545  tuples_frozen,
1546  lpdead_items,
1547  live_tuples,
1548  recently_dead_tuples;
1549  int nnewlpdead;
1550  HeapPageFreeze pagefrz;
1551  int64 fpi_before = pgWalUsage.wal_fpi;
1552  OffsetNumber deadoffsets[MaxHeapTuplesPerPage];
1554 
1555  Assert(BufferGetBlockNumber(buf) == blkno);
1556 
1557  /*
1558  * maxoff might be reduced following line pointer array truncation in
1559  * heap_page_prune. That's safe for us to ignore, since the reclaimed
1560  * space will continue to look like LP_UNUSED items below.
1561  */
1562  maxoff = PageGetMaxOffsetNumber(page);
1563 
1564 retry:
1565 
1566  /* Initialize (or reset) page-level state */
1567  pagefrz.freeze_required = false;
1568  pagefrz.FreezePageRelfrozenXid = vacrel->NewRelfrozenXid;
1569  pagefrz.FreezePageRelminMxid = vacrel->NewRelminMxid;
1570  pagefrz.NoFreezePageRelfrozenXid = vacrel->NewRelfrozenXid;
1571  pagefrz.NoFreezePageRelminMxid = vacrel->NewRelminMxid;
1572  tuples_deleted = 0;
1573  tuples_frozen = 0;
1574  lpdead_items = 0;
1575  live_tuples = 0;
1576  recently_dead_tuples = 0;
1577 
1578  /*
1579  * Prune all HOT-update chains in this page.
1580  *
1581  * We count tuples removed by the pruning step as tuples_deleted. Its
1582  * final value can be thought of as the number of tuples that have been
1583  * deleted from the table. It should not be confused with lpdead_items;
1584  * lpdead_items's final value can be thought of as the number of tuples
1585  * that were deleted from indexes.
1586  */
1587  tuples_deleted = heap_page_prune(rel, buf, vacrel->vistest,
1588  InvalidTransactionId, 0, &nnewlpdead,
1589  &vacrel->offnum);
1590 
1591  /*
1592  * Now scan the page to collect LP_DEAD items and check for tuples
1593  * requiring freezing among remaining tuples with storage
1594  */
1595  prunestate->hastup = false;
1596  prunestate->has_lpdead_items = false;
1597  prunestate->all_visible = true;
1598  prunestate->all_frozen = true;
1600 
1601  for (offnum = FirstOffsetNumber;
1602  offnum <= maxoff;
1603  offnum = OffsetNumberNext(offnum))
1604  {
1605  bool totally_frozen;
1606 
1607  /*
1608  * Set the offset number so that we can display it along with any
1609  * error that occurred while processing this tuple.
1610  */
1611  vacrel->offnum = offnum;
1612  itemid = PageGetItemId(page, offnum);
1613 
1614  if (!ItemIdIsUsed(itemid))
1615  continue;
1616 
1617  /* Redirect items mustn't be touched */
1618  if (ItemIdIsRedirected(itemid))
1619  {
1620  /* page makes rel truncation unsafe */
1621  prunestate->hastup = true;
1622  continue;
1623  }
1624 
1625  if (ItemIdIsDead(itemid))
1626  {
1627  /*
1628  * Deliberately don't set hastup for LP_DEAD items. We make the
1629  * soft assumption that any LP_DEAD items encountered here will
1630  * become LP_UNUSED later on, before count_nondeletable_pages is
1631  * reached. If we don't make this assumption then rel truncation
1632  * will only happen every other VACUUM, at most. Besides, VACUUM
1633  * must treat hastup/nonempty_pages as provisional no matter how
1634  * LP_DEAD items are handled (handled here, or handled later on).
1635  *
1636  * Also deliberately delay unsetting all_visible until just before
1637  * we return to lazy_scan_heap caller, as explained in full below.
1638  * (This is another case where it's useful to anticipate that any
1639  * LP_DEAD items will become LP_UNUSED during the ongoing VACUUM.)
1640  */
1641  deadoffsets[lpdead_items++] = offnum;
1642  continue;
1643  }
1644 
1645  Assert(ItemIdIsNormal(itemid));
1646 
1647  ItemPointerSet(&(tuple.t_self), blkno, offnum);
1648  tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
1649  tuple.t_len = ItemIdGetLength(itemid);
1650  tuple.t_tableOid = RelationGetRelid(rel);
1651 
1652  /*
1653  * DEAD tuples are almost always pruned into LP_DEAD line pointers by
1654  * heap_page_prune(), but it's possible that the tuple state changed
1655  * since heap_page_prune() looked. Handle that here by restarting.
1656  * (See comments at the top of function for a full explanation.)
1657  */
1658  res = HeapTupleSatisfiesVacuum(&tuple, vacrel->cutoffs.OldestXmin,
1659  buf);
1660 
1661  if (unlikely(res == HEAPTUPLE_DEAD))
1662  goto retry;
1663 
1664  /*
1665  * The criteria for counting a tuple as live in this block need to
1666  * match what analyze.c's acquire_sample_rows() does, otherwise VACUUM
1667  * and ANALYZE may produce wildly different reltuples values, e.g.
1668  * when there are many recently-dead tuples.
1669  *
1670  * The logic here is a bit simpler than acquire_sample_rows(), as
1671  * VACUUM can't run inside a transaction block, which makes some cases
1672  * impossible (e.g. in-progress insert from the same transaction).
1673  *
1674  * We treat LP_DEAD items (which are the closest thing to DEAD tuples
1675  * that might be seen here) differently, too: we assume that they'll
1676  * become LP_UNUSED before VACUUM finishes. This difference is only
1677  * superficial. VACUUM effectively agrees with ANALYZE about DEAD
1678  * items, in the end. VACUUM won't remember LP_DEAD items, but only
1679  * because they're not supposed to be left behind when it is done.
1680  * (Cases where we bypass index vacuuming will violate this optimistic
1681  * assumption, but the overall impact of that should be negligible.)
1682  */
1683  switch (res)
1684  {
1685  case HEAPTUPLE_LIVE:
1686 
1687  /*
1688  * Count it as live. Not only is this natural, but it's also
1689  * what acquire_sample_rows() does.
1690  */
1691  live_tuples++;
1692 
1693  /*
1694  * Is the tuple definitely visible to all transactions?
1695  *
1696  * NB: Like with per-tuple hint bits, we can't set the
1697  * PD_ALL_VISIBLE flag if the inserter committed
1698  * asynchronously. See SetHintBits for more info. Check that
1699  * the tuple is hinted xmin-committed because of that.
1700  */
1701  if (prunestate->all_visible)
1702  {
1703  TransactionId xmin;
1704 
1706  {
1707  prunestate->all_visible = false;
1708  break;
1709  }
1710 
1711  /*
1712  * The inserter definitely committed. But is it old enough
1713  * that everyone sees it as committed?
1714  */
1715  xmin = HeapTupleHeaderGetXmin(tuple.t_data);
1716  if (!TransactionIdPrecedes(xmin,
1717  vacrel->cutoffs.OldestXmin))
1718  {
1719  prunestate->all_visible = false;
1720  break;
1721  }
1722 
1723  /* Track newest xmin on page. */
1724  if (TransactionIdFollows(xmin, prunestate->visibility_cutoff_xid) &&
1725  TransactionIdIsNormal(xmin))
1726  prunestate->visibility_cutoff_xid = xmin;
1727  }
1728  break;
1730 
1731  /*
1732  * If tuple is recently dead then we must not remove it from
1733  * the relation. (We only remove items that are LP_DEAD from
1734  * pruning.)
1735  */
1736  recently_dead_tuples++;
1737  prunestate->all_visible = false;
1738  break;
1740 
1741  /*
1742  * We do not count these rows as live, because we expect the
1743  * inserting transaction to update the counters at commit, and
1744  * we assume that will happen only after we report our
1745  * results. This assumption is a bit shaky, but it is what
1746  * acquire_sample_rows() does, so be consistent.
1747  */
1748  prunestate->all_visible = false;
1749  break;
1751  /* This is an expected case during concurrent vacuum */
1752  prunestate->all_visible = false;
1753 
1754  /*
1755  * Count such rows as live. As above, we assume the deleting
1756  * transaction will commit and update the counters after we
1757  * report.
1758  */
1759  live_tuples++;
1760  break;
1761  default:
1762  elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
1763  break;
1764  }
1765 
1766  prunestate->hastup = true; /* page makes rel truncation unsafe */
1767 
1768  /* Tuple with storage -- consider need to freeze */
1769  if (heap_prepare_freeze_tuple(tuple.t_data, &vacrel->cutoffs, &pagefrz,
1770  &frozen[tuples_frozen], &totally_frozen))
1771  {
1772  /* Save prepared freeze plan for later */
1773  frozen[tuples_frozen++].offset = offnum;
1774  }
1775 
1776  /*
1777  * If any tuple isn't either totally frozen already or eligible to
1778  * become totally frozen (according to its freeze plan), then the page
1779  * definitely cannot be set all-frozen in the visibility map later on
1780  */
1781  if (!totally_frozen)
1782  prunestate->all_frozen = false;
1783  }
1784 
1785  /*
1786  * We have now divided every item on the page into either an LP_DEAD item
1787  * that will need to be vacuumed in indexes later, or a LP_NORMAL tuple
1788  * that remains and needs to be considered for freezing now (LP_UNUSED and
1789  * LP_REDIRECT items also remain, but are of no further interest to us).
1790  */
1791  vacrel->offnum = InvalidOffsetNumber;
1792 
1793  /*
1794  * Freeze the page when heap_prepare_freeze_tuple indicates that at least
1795  * one XID/MXID from before FreezeLimit/MultiXactCutoff is present. Also
1796  * freeze when pruning generated an FPI, if doing so means that we set the
1797  * page all-frozen afterwards (might not happen until final heap pass).
1798  */
1799  if (pagefrz.freeze_required || tuples_frozen == 0 ||
1800  (prunestate->all_visible && prunestate->all_frozen &&
1801  fpi_before != pgWalUsage.wal_fpi))
1802  {
1803  /*
1804  * We're freezing the page. Our final NewRelfrozenXid doesn't need to
1805  * be affected by the XIDs that are just about to be frozen anyway.
1806  */
1807  vacrel->NewRelfrozenXid = pagefrz.FreezePageRelfrozenXid;
1808  vacrel->NewRelminMxid = pagefrz.FreezePageRelminMxid;
1809 
1810  if (tuples_frozen == 0)
1811  {
1812  /*
1813  * We have no freeze plans to execute, so there's no added cost
1814  * from following the freeze path. That's why it was chosen.
1815  * This is important in the case where the page only contains
1816  * totally frozen tuples at this point (perhaps only following
1817  * pruning). Such pages can be marked all-frozen in the VM by our
1818  * caller, even though none of its tuples were newly frozen here
1819  * (note that the "no freeze" path never sets pages all-frozen).
1820  *
1821  * We never increment the frozen_pages instrumentation counter
1822  * here, since it only counts pages with newly frozen tuples
1823  * (don't confuse that with pages newly set all-frozen in VM).
1824  */
1825  }
1826  else
1827  {
1828  TransactionId snapshotConflictHorizon;
1829 
1830  vacrel->frozen_pages++;
1831 
1832  /*
1833  * We can use visibility_cutoff_xid as our cutoff for conflicts
1834  * when the whole page is eligible to become all-frozen in the VM
1835  * once we're done with it. Otherwise we generate a conservative
1836  * cutoff by stepping back from OldestXmin.
1837  */
1838  if (prunestate->all_visible && prunestate->all_frozen)
1839  {
1840  /* Using same cutoff when setting VM is now unnecessary */
1841  snapshotConflictHorizon = prunestate->visibility_cutoff_xid;
1843  }
1844  else
1845  {
1846  /* Avoids false conflicts when hot_standby_feedback in use */
1847  snapshotConflictHorizon = vacrel->cutoffs.OldestXmin;
1848  TransactionIdRetreat(snapshotConflictHorizon);
1849  }
1850 
1851  /* Execute all freeze plans for page as a single atomic action */
1853  snapshotConflictHorizon,
1854  frozen, tuples_frozen);
1855  }
1856  }
1857  else
1858  {
1859  /*
1860  * Page requires "no freeze" processing. It might be set all-visible
1861  * in the visibility map, but it can never be set all-frozen.
1862  */
1863  vacrel->NewRelfrozenXid = pagefrz.NoFreezePageRelfrozenXid;
1864  vacrel->NewRelminMxid = pagefrz.NoFreezePageRelminMxid;
1865  prunestate->all_frozen = false;
1866  tuples_frozen = 0; /* avoid miscounts in instrumentation */
1867  }
1868 
1869  /*
1870  * VACUUM will call heap_page_is_all_visible() during the second pass over
1871  * the heap to determine all_visible and all_frozen for the page -- this
1872  * is a specialized version of the logic from this function. Now that
1873  * we've finished pruning and freezing, make sure that we're in total
1874  * agreement with heap_page_is_all_visible() using an assertion.
1875  */
1876 #ifdef USE_ASSERT_CHECKING
1877  /* Note that all_frozen value does not matter when !all_visible */
1878  if (prunestate->all_visible && lpdead_items == 0)
1879  {
1880  TransactionId cutoff;
1881  bool all_frozen;
1882 
1883  if (!heap_page_is_all_visible(vacrel, buf, &cutoff, &all_frozen))
1884  Assert(false);
1885 
1886  Assert(!TransactionIdIsValid(cutoff) ||
1887  cutoff == prunestate->visibility_cutoff_xid);
1888  }
1889 #endif
1890 
1891  /*
1892  * Now save details of the LP_DEAD items from the page in vacrel
1893  */
1894  if (lpdead_items > 0)
1895  {
1896  VacDeadItems *dead_items = vacrel->dead_items;
1897  ItemPointerData tmp;
1898 
1899  vacrel->lpdead_item_pages++;
1900  prunestate->has_lpdead_items = true;
1901 
1902  ItemPointerSetBlockNumber(&tmp, blkno);
1903 
1904  for (int i = 0; i < lpdead_items; i++)
1905  {
1906  ItemPointerSetOffsetNumber(&tmp, deadoffsets[i]);
1907  dead_items->items[dead_items->num_items++] = tmp;
1908  }
1909 
1910  Assert(dead_items->num_items <= dead_items->max_items);
1912  dead_items->num_items);
1913 
1914  /*
1915  * It was convenient to ignore LP_DEAD items in all_visible earlier on
1916  * to make the choice of whether or not to freeze the page unaffected
1917  * by the short-term presence of LP_DEAD items. These LP_DEAD items
1918  * were effectively assumed to be LP_UNUSED items in the making. It
1919  * doesn't matter which heap pass (initial pass or final pass) ends up
1920  * setting the page all-frozen, as long as the ongoing VACUUM does it.
1921  *
1922  * Now that freezing has been finalized, unset all_visible. It needs
1923  * to reflect the present state of things, as expected by our caller.
1924  */
1925  prunestate->all_visible = false;
1926  }
1927 
1928  /* Finally, add page-local counts to whole-VACUUM counts */
1929  vacrel->tuples_deleted += tuples_deleted;
1930  vacrel->tuples_frozen += tuples_frozen;
1931  vacrel->lpdead_items += lpdead_items;
1932  vacrel->live_tuples += live_tuples;
1933  vacrel->recently_dead_tuples += recently_dead_tuples;
1934 }
1935 
1936 /*
1937  * lazy_scan_noprune() -- lazy_scan_prune() without pruning or freezing
1938  *
1939  * Caller need only hold a pin and share lock on the buffer, unlike
1940  * lazy_scan_prune, which requires a full cleanup lock. While pruning isn't
1941  * performed here, it's quite possible that an earlier opportunistic pruning
1942  * operation left LP_DEAD items behind. We'll at least collect any such items
1943  * in the dead_items array for removal from indexes.
1944  *
1945  * For aggressive VACUUM callers, we may return false to indicate that a full
1946  * cleanup lock is required for processing by lazy_scan_prune. This is only
1947  * necessary when the aggressive VACUUM needs to freeze some tuple XIDs from
1948  * one or more tuples on the page. We always return true for non-aggressive
1949  * callers.
1950  *
1951  * See lazy_scan_prune for an explanation of hastup return flag.
1952  * recordfreespace flag instructs caller on whether or not it should do
1953  * generic FSM processing for page.
1954  */
1955 static bool
1957  Buffer buf,
1958  BlockNumber blkno,
1959  Page page,
1960  bool *hastup,
1961  bool *recordfreespace)
1962 {
1963  OffsetNumber offnum,
1964  maxoff;
1965  int lpdead_items,
1966  live_tuples,
1967  recently_dead_tuples,
1968  missed_dead_tuples;
1969  HeapTupleHeader tupleheader;
1970  TransactionId NoFreezePageRelfrozenXid = vacrel->NewRelfrozenXid;
1971  MultiXactId NoFreezePageRelminMxid = vacrel->NewRelminMxid;
1972  OffsetNumber deadoffsets[MaxHeapTuplesPerPage];
1973 
1974  Assert(BufferGetBlockNumber(buf) == blkno);
1975 
1976  *hastup = false; /* for now */
1977  *recordfreespace = false; /* for now */
1978 
1979  lpdead_items = 0;
1980  live_tuples = 0;
1981  recently_dead_tuples = 0;
1982  missed_dead_tuples = 0;
1983 
1984  maxoff = PageGetMaxOffsetNumber(page);
1985  for (offnum = FirstOffsetNumber;
1986  offnum <= maxoff;
1987  offnum = OffsetNumberNext(offnum))
1988  {
1989  ItemId itemid;
1990  HeapTupleData tuple;
1991 
1992  vacrel->offnum = offnum;
1993  itemid = PageGetItemId(page, offnum);
1994 
1995  if (!ItemIdIsUsed(itemid))
1996  continue;
1997 
1998  if (ItemIdIsRedirected(itemid))
1999  {
2000  *hastup = true;
2001  continue;
2002  }
2003 
2004  if (ItemIdIsDead(itemid))
2005  {
2006  /*
2007  * Deliberately don't set hastup=true here. See same point in
2008  * lazy_scan_prune for an explanation.
2009  */
2010  deadoffsets[lpdead_items++] = offnum;
2011  continue;
2012  }
2013 
2014  *hastup = true; /* page prevents rel truncation */
2015  tupleheader = (HeapTupleHeader) PageGetItem(page, itemid);
2016  if (heap_tuple_should_freeze(tupleheader, &vacrel->cutoffs,
2017  &NoFreezePageRelfrozenXid,
2018  &NoFreezePageRelminMxid))
2019  {
2020  /* Tuple with XID < FreezeLimit (or MXID < MultiXactCutoff) */
2021  if (vacrel->aggressive)
2022  {
2023  /*
2024  * Aggressive VACUUMs must always be able to advance rel's
2025  * relfrozenxid to a value >= FreezeLimit (and be able to
2026  * advance rel's relminmxid to a value >= MultiXactCutoff).
2027  * The ongoing aggressive VACUUM won't be able to do that
2028  * unless it can freeze an XID (or MXID) from this tuple now.
2029  *
2030  * The only safe option is to have caller perform processing
2031  * of this page using lazy_scan_prune. Caller might have to
2032  * wait a while for a cleanup lock, but it can't be helped.
2033  */
2034  vacrel->offnum = InvalidOffsetNumber;
2035  return false;
2036  }
2037 
2038  /*
2039  * Non-aggressive VACUUMs are under no obligation to advance
2040  * relfrozenxid (even by one XID). We can be much laxer here.
2041  *
2042  * Currently we always just accept an older final relfrozenxid
2043  * and/or relminmxid value. We never make caller wait or work a
2044  * little harder, even when it likely makes sense to do so.
2045  */
2046  }
2047 
2048  ItemPointerSet(&(tuple.t_self), blkno, offnum);
2049  tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
2050  tuple.t_len = ItemIdGetLength(itemid);
2051  tuple.t_tableOid = RelationGetRelid(vacrel->rel);
2052 
2053  switch (HeapTupleSatisfiesVacuum(&tuple, vacrel->cutoffs.OldestXmin,
2054  buf))
2055  {
2057  case HEAPTUPLE_LIVE:
2058 
2059  /*
2060  * Count both cases as live, just like lazy_scan_prune
2061  */
2062  live_tuples++;
2063 
2064  break;
2065  case HEAPTUPLE_DEAD:
2066 
2067  /*
2068  * There is some useful work for pruning to do, that won't be
2069  * done due to failure to get a cleanup lock.
2070  */
2071  missed_dead_tuples++;
2072  break;
2074 
2075  /*
2076  * Count in recently_dead_tuples, just like lazy_scan_prune
2077  */
2078  recently_dead_tuples++;
2079  break;
2081 
2082  /*
2083  * Do not count these rows as live, just like lazy_scan_prune
2084  */
2085  break;
2086  default:
2087  elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
2088  break;
2089  }
2090  }
2091 
2092  vacrel->offnum = InvalidOffsetNumber;
2093 
2094  /*
2095  * By here we know for sure that caller can put off freezing and pruning
2096  * this particular page until the next VACUUM. Remember its details now.
2097  * (lazy_scan_prune expects a clean slate, so we have to do this last.)
2098  */
2099  vacrel->NewRelfrozenXid = NoFreezePageRelfrozenXid;
2100  vacrel->NewRelminMxid = NoFreezePageRelminMxid;
2101 
2102  /* Save any LP_DEAD items found on the page in dead_items array */
2103  if (vacrel->nindexes == 0)
2104  {
2105  /* Using one-pass strategy (since table has no indexes) */
2106  if (lpdead_items > 0)
2107  {
2108  /*
2109  * Perfunctory handling for the corner case where a single pass
2110  * strategy VACUUM cannot get a cleanup lock, and it turns out
2111  * that there is one or more LP_DEAD items: just count the LP_DEAD
2112  * items as missed_dead_tuples instead. (This is a bit dishonest,
2113  * but it beats having to maintain specialized heap vacuuming code
2114  * forever, for vanishingly little benefit.)
2115  */
2116  *hastup = true;
2117  missed_dead_tuples += lpdead_items;
2118  }
2119 
2120  *recordfreespace = true;
2121  }
2122  else if (lpdead_items == 0)
2123  {
2124  /*
2125  * Won't be vacuuming this page later, so record page's freespace in
2126  * the FSM now
2127  */
2128  *recordfreespace = true;
2129  }
2130  else
2131  {
2132  VacDeadItems *dead_items = vacrel->dead_items;
2133  ItemPointerData tmp;
2134 
2135  /*
2136  * Page has LP_DEAD items, and so any references/TIDs that remain in
2137  * indexes will be deleted during index vacuuming (and then marked
2138  * LP_UNUSED in the heap)
2139  */
2140  vacrel->lpdead_item_pages++;
2141 
2142  ItemPointerSetBlockNumber(&tmp, blkno);
2143 
2144  for (int i = 0; i < lpdead_items; i++)
2145  {
2146  ItemPointerSetOffsetNumber(&tmp, deadoffsets[i]);
2147  dead_items->items[dead_items->num_items++] = tmp;
2148  }
2149 
2150  Assert(dead_items->num_items <= dead_items->max_items);
2152  dead_items->num_items);
2153 
2154  vacrel->lpdead_items += lpdead_items;
2155 
2156  /*
2157  * Assume that we'll go on to vacuum this heap page during final pass
2158  * over the heap. Don't record free space until then.
2159  */
2160  *recordfreespace = false;
2161  }
2162 
2163  /*
2164  * Finally, add relevant page-local counts to whole-VACUUM counts
2165  */
2166  vacrel->live_tuples += live_tuples;
2167  vacrel->recently_dead_tuples += recently_dead_tuples;
2168  vacrel->missed_dead_tuples += missed_dead_tuples;
2169  if (missed_dead_tuples > 0)
2170  vacrel->missed_dead_pages++;
2171 
2172  /* Caller won't need to call lazy_scan_prune with same page */
2173  return true;
2174 }
2175 
2176 /*
2177  * Main entry point for index vacuuming and heap vacuuming.
2178  *
2179  * Removes items collected in dead_items from table's indexes, then marks the
2180  * same items LP_UNUSED in the heap. See the comments above lazy_scan_heap
2181  * for full details.
2182  *
2183  * Also empties dead_items, freeing up space for later TIDs.
2184  *
2185  * We may choose to bypass index vacuuming at this point, though only when the
2186  * ongoing VACUUM operation will definitely only have one index scan/round of
2187  * index vacuuming.
2188  */
2189 static void
2191 {
2192  bool bypass;
2193 
2194  /* Should not end up here with no indexes */
2195  Assert(vacrel->nindexes > 0);
2196  Assert(vacrel->lpdead_item_pages > 0);
2197 
2198  if (!vacrel->do_index_vacuuming)
2199  {
2200  Assert(!vacrel->do_index_cleanup);
2201  vacrel->dead_items->num_items = 0;
2202  return;
2203  }
2204 
2205  /*
2206  * Consider bypassing index vacuuming (and heap vacuuming) entirely.
2207  *
2208  * We currently only do this in cases where the number of LP_DEAD items
2209  * for the entire VACUUM operation is close to zero. This avoids sharp
2210  * discontinuities in the duration and overhead of successive VACUUM
2211  * operations that run against the same table with a fixed workload.
2212  * Ideally, successive VACUUM operations will behave as if there are
2213  * exactly zero LP_DEAD items in cases where there are close to zero.
2214  *
2215  * This is likely to be helpful with a table that is continually affected
2216  * by UPDATEs that can mostly apply the HOT optimization, but occasionally
2217  * have small aberrations that lead to just a few heap pages retaining
2218  * only one or two LP_DEAD items. This is pretty common; even when the
2219  * DBA goes out of their way to make UPDATEs use HOT, it is practically
2220  * impossible to predict whether HOT will be applied in 100% of cases.
2221  * It's far easier to ensure that 99%+ of all UPDATEs against a table use
2222  * HOT through careful tuning.
2223  */
2224  bypass = false;
2225  if (vacrel->consider_bypass_optimization && vacrel->rel_pages > 0)
2226  {
2227  BlockNumber threshold;
2228 
2229  Assert(vacrel->num_index_scans == 0);
2230  Assert(vacrel->lpdead_items == vacrel->dead_items->num_items);
2231  Assert(vacrel->do_index_vacuuming);
2232  Assert(vacrel->do_index_cleanup);
2233 
2234  /*
2235  * This crossover point at which we'll start to do index vacuuming is
2236  * expressed as a percentage of the total number of heap pages in the
2237  * table that are known to have at least one LP_DEAD item. This is
2238  * much more important than the total number of LP_DEAD items, since
2239  * it's a proxy for the number of heap pages whose visibility map bits
2240  * cannot be set on account of bypassing index and heap vacuuming.
2241  *
2242  * We apply one further precautionary test: the space currently used
2243  * to store the TIDs (TIDs that now all point to LP_DEAD items) must
2244  * not exceed 32MB. This limits the risk that we will bypass index
2245  * vacuuming again and again until eventually there is a VACUUM whose
2246  * dead_items space is not CPU cache resident.
2247  *
2248  * We don't take any special steps to remember the LP_DEAD items (such
2249  * as counting them in our final update to the stats system) when the
2250  * optimization is applied. Though the accounting used in analyze.c's
2251  * acquire_sample_rows() will recognize the same LP_DEAD items as dead
2252  * rows in its own stats report, that's okay. The discrepancy should
2253  * be negligible. If this optimization is ever expanded to cover more
2254  * cases then this may need to be reconsidered.
2255  */
2256  threshold = (double) vacrel->rel_pages * BYPASS_THRESHOLD_PAGES;
2257  bypass = (vacrel->lpdead_item_pages < threshold &&
2258  vacrel->lpdead_items < MAXDEADITEMS(32L * 1024L * 1024L));
2259  }
2260 
2261  if (bypass)
2262  {
2263  /*
2264  * There are almost zero TIDs. Behave as if there were precisely
2265  * zero: bypass index vacuuming, but do index cleanup.
2266  *
2267  * We expect that the ongoing VACUUM operation will finish very
2268  * quickly, so there is no point in considering speeding up as a
2269  * failsafe against wraparound failure. (Index cleanup is expected to
2270  * finish very quickly in cases where there were no ambulkdelete()
2271  * calls.)
2272  */
2273  vacrel->do_index_vacuuming = false;
2274  }
2275  else if (lazy_vacuum_all_indexes(vacrel))
2276  {
2277  /*
2278  * We successfully completed a round of index vacuuming. Do related
2279  * heap vacuuming now.
2280  */
2281  lazy_vacuum_heap_rel(vacrel);
2282  }
2283  else
2284  {
2285  /*
2286  * Failsafe case.
2287  *
2288  * We attempted index vacuuming, but didn't finish a full round/full
2289  * index scan. This happens when relfrozenxid or relminmxid is too
2290  * far in the past.
2291  *
2292  * From this point on the VACUUM operation will do no further index
2293  * vacuuming or heap vacuuming. This VACUUM operation won't end up
2294  * back here again.
2295  */
2296  Assert(vacrel->failsafe_active);
2297  }
2298 
2299  /*
2300  * Forget the LP_DEAD items that we just vacuumed (or just decided to not
2301  * vacuum)
2302  */
2303  vacrel->dead_items->num_items = 0;
2304 }
2305 
2306 /*
2307  * lazy_vacuum_all_indexes() -- Main entry for index vacuuming
2308  *
2309  * Returns true in the common case when all indexes were successfully
2310  * vacuumed. Returns false in rare cases where we determined that the ongoing
2311  * VACUUM operation is at risk of taking too long to finish, leading to
2312  * wraparound failure.
2313  */
2314 static bool
2316 {
2317  bool allindexes = true;
2318  double old_live_tuples = vacrel->rel->rd_rel->reltuples;
2319 
2320  Assert(vacrel->nindexes > 0);
2321  Assert(vacrel->do_index_vacuuming);
2322  Assert(vacrel->do_index_cleanup);
2323 
2324  /* Precheck for XID wraparound emergencies */
2325  if (lazy_check_wraparound_failsafe(vacrel))
2326  {
2327  /* Wraparound emergency -- don't even start an index scan */
2328  return false;
2329  }
2330 
2331  /* Report that we are now vacuuming indexes */
2334 
2335  if (!ParallelVacuumIsActive(vacrel))
2336  {
2337  for (int idx = 0; idx < vacrel->nindexes; idx++)
2338  {
2339  Relation indrel = vacrel->indrels[idx];
2340  IndexBulkDeleteResult *istat = vacrel->indstats[idx];
2341 
2342  vacrel->indstats[idx] = lazy_vacuum_one_index(indrel, istat,
2343  old_live_tuples,
2344  vacrel);
2345 
2346  if (lazy_check_wraparound_failsafe(vacrel))
2347  {
2348  /* Wraparound emergency -- end current index scan */
2349  allindexes = false;
2350  break;
2351  }
2352  }
2353  }
2354  else
2355  {
2356  /* Outsource everything to parallel variant */
2357  parallel_vacuum_bulkdel_all_indexes(vacrel->pvs, old_live_tuples,
2358  vacrel->num_index_scans);
2359 
2360  /*
2361  * Do a postcheck to consider applying wraparound failsafe now. Note
2362  * that parallel VACUUM only gets the precheck and this postcheck.
2363  */
2364  if (lazy_check_wraparound_failsafe(vacrel))
2365  allindexes = false;
2366  }
2367 
2368  /*
2369  * We delete all LP_DEAD items from the first heap pass in all indexes on
2370  * each call here (except calls where we choose to do the failsafe). This
2371  * makes the next call to lazy_vacuum_heap_rel() safe (except in the event
2372  * of the failsafe triggering, which prevents the next call from taking
2373  * place).
2374  */
2375  Assert(vacrel->num_index_scans > 0 ||
2376  vacrel->dead_items->num_items == vacrel->lpdead_items);
2377  Assert(allindexes || vacrel->failsafe_active);
2378 
2379  /*
2380  * Increase and report the number of index scans.
2381  *
2382  * We deliberately include the case where we started a round of bulk
2383  * deletes that we weren't able to finish due to the failsafe triggering.
2384  */
2385  vacrel->num_index_scans++;
2387  vacrel->num_index_scans);
2388 
2389  return allindexes;
2390 }
2391 
2392 /*
2393  * lazy_vacuum_heap_rel() -- second pass over the heap for two pass strategy
2394  *
2395  * This routine marks LP_DEAD items in vacrel->dead_items array as LP_UNUSED.
2396  * Pages that never had lazy_scan_prune record LP_DEAD items are not visited
2397  * at all.
2398  *
2399  * We may also be able to truncate the line pointer array of the heap pages we
2400  * visit. If there is a contiguous group of LP_UNUSED items at the end of the
2401  * array, it can be reclaimed as free space. These LP_UNUSED items usually
2402  * start out as LP_DEAD items recorded by lazy_scan_prune (we set items from
2403  * each page to LP_UNUSED, and then consider if it's possible to truncate the
2404  * page's line pointer array).
2405  *
2406  * Note: the reason for doing this as a second pass is we cannot remove the
2407  * tuples until we've removed their index entries, and we want to process
2408  * index entry removal in batches as large as possible.
2409  */
2410 static void
2412 {
2413  int index = 0;
2414  BlockNumber vacuumed_pages = 0;
2415  Buffer vmbuffer = InvalidBuffer;
2416  LVSavedErrInfo saved_err_info;
2417 
2418  Assert(vacrel->do_index_vacuuming);
2419  Assert(vacrel->do_index_cleanup);
2420  Assert(vacrel->num_index_scans > 0);
2421 
2422  /* Report that we are now vacuuming the heap */
2425 
2426  /* Update error traceback information */
2427  update_vacuum_error_info(vacrel, &saved_err_info,
2430 
2431  while (index < vacrel->dead_items->num_items)
2432  {
2433  BlockNumber blkno;
2434  Buffer buf;
2435  Page page;
2436  Size freespace;
2437 
2439 
2440  blkno = ItemPointerGetBlockNumber(&vacrel->dead_items->items[index]);
2441  vacrel->blkno = blkno;
2442 
2443  /*
2444  * Pin the visibility map page in case we need to mark the page
2445  * all-visible. In most cases this will be very cheap, because we'll
2446  * already have the correct page pinned anyway.
2447  */
2448  visibilitymap_pin(vacrel->rel, blkno, &vmbuffer);
2449 
2450  /* We need a non-cleanup exclusive lock to mark dead_items unused */
2451  buf = ReadBufferExtended(vacrel->rel, MAIN_FORKNUM, blkno, RBM_NORMAL,
2452  vacrel->bstrategy);
2454  index = lazy_vacuum_heap_page(vacrel, blkno, buf, index, vmbuffer);
2455 
2456  /* Now that we've vacuumed the page, record its available space */
2457  page = BufferGetPage(buf);
2458  freespace = PageGetHeapFreeSpace(page);
2459 
2461  RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
2462  vacuumed_pages++;
2463  }
2464 
2465  vacrel->blkno = InvalidBlockNumber;
2466  if (BufferIsValid(vmbuffer))
2467  ReleaseBuffer(vmbuffer);
2468 
2469  /*
2470  * We set all LP_DEAD items from the first heap pass to LP_UNUSED during
2471  * the second heap pass. No more, no less.
2472  */
2473  Assert(index > 0);
2474  Assert(vacrel->num_index_scans > 1 ||
2475  (index == vacrel->lpdead_items &&
2476  vacuumed_pages == vacrel->lpdead_item_pages));
2477 
2478  ereport(DEBUG2,
2479  (errmsg("table \"%s\": removed %lld dead item identifiers in %u pages",
2480  vacrel->relname, (long long) index, vacuumed_pages)));
2481 
2482  /* Revert to the previous phase information for error traceback */
2483  restore_vacuum_error_info(vacrel, &saved_err_info);
2484 }
2485 
2486 /*
2487  * lazy_vacuum_heap_page() -- free page's LP_DEAD items listed in the
2488  * vacrel->dead_items array.
2489  *
2490  * Caller must have an exclusive buffer lock on the buffer (though a full
2491  * cleanup lock is also acceptable). vmbuffer must be valid and already have
2492  * a pin on blkno's visibility map page.
2493  *
2494  * index is an offset into the vacrel->dead_items array for the first listed
2495  * LP_DEAD item on the page. The return value is the first index immediately
2496  * after all LP_DEAD items for the same page in the array.
2497  */
2498 static int
2500  int index, Buffer vmbuffer)
2501 {
2502  VacDeadItems *dead_items = vacrel->dead_items;
2503  Page page = BufferGetPage(buffer);
2505  int nunused = 0;
2506  TransactionId visibility_cutoff_xid;
2507  bool all_frozen;
2508  LVSavedErrInfo saved_err_info;
2509 
2510  Assert(vacrel->nindexes == 0 || vacrel->do_index_vacuuming);
2511 
2513 
2514  /* Update error traceback information */
2515  update_vacuum_error_info(vacrel, &saved_err_info,
2518 
2520 
2521  for (; index < dead_items->num_items; index++)
2522  {
2523  BlockNumber tblk;
2524  OffsetNumber toff;
2525  ItemId itemid;
2526 
2527  tblk = ItemPointerGetBlockNumber(&dead_items->items[index]);
2528  if (tblk != blkno)
2529  break; /* past end of tuples for this block */
2530  toff = ItemPointerGetOffsetNumber(&dead_items->items[index]);
2531  itemid = PageGetItemId(page, toff);
2532 
2533  Assert(ItemIdIsDead(itemid) && !ItemIdHasStorage(itemid));
2534  ItemIdSetUnused(itemid);
2535  unused[nunused++] = toff;
2536  }
2537 
2538  Assert(nunused > 0);
2539 
2540  /* Attempt to truncate line pointer array now */
2542 
2543  /*
2544  * Mark buffer dirty before we write WAL.
2545  */
2546  MarkBufferDirty(buffer);
2547 
2548  /* XLOG stuff */
2549  if (RelationNeedsWAL(vacrel->rel))
2550  {
2551  xl_heap_vacuum xlrec;
2552  XLogRecPtr recptr;
2553 
2554  xlrec.nunused = nunused;
2555 
2556  XLogBeginInsert();
2557  XLogRegisterData((char *) &xlrec, SizeOfHeapVacuum);
2558 
2559  XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
2560  XLogRegisterBufData(0, (char *) unused, nunused * sizeof(OffsetNumber));
2561 
2562  recptr = XLogInsert(RM_HEAP2_ID, XLOG_HEAP2_VACUUM);
2563 
2564  PageSetLSN(page, recptr);
2565  }
2566 
2567  /*
2568  * End critical section, so we safely can do visibility tests (which
2569  * possibly need to perform IO and allocate memory!). If we crash now the
2570  * page (including the corresponding vm bit) might not be marked all
2571  * visible, but that's fine. A later vacuum will fix that.
2572  */
2573  END_CRIT_SECTION();
2574 
2575  /*
2576  * Now that we have removed the LD_DEAD items from the page, once again
2577  * check if the page has become all-visible. The page is already marked
2578  * dirty, exclusively locked, and, if needed, a full page image has been
2579  * emitted.
2580  */
2581  Assert(!PageIsAllVisible(page));
2582  if (heap_page_is_all_visible(vacrel, buffer, &visibility_cutoff_xid,
2583  &all_frozen))
2584  {
2586 
2587  if (all_frozen)
2588  {
2589  Assert(!TransactionIdIsValid(visibility_cutoff_xid));
2590  flags |= VISIBILITYMAP_ALL_FROZEN;
2591  }
2592 
2593  PageSetAllVisible(page);
2594  visibilitymap_set(vacrel->rel, blkno, buffer, InvalidXLogRecPtr,
2595  vmbuffer, visibility_cutoff_xid, flags);
2596  }
2597 
2598  /* Revert to the previous phase information for error traceback */
2599  restore_vacuum_error_info(vacrel, &saved_err_info);
2600  return index;
2601 }
2602 
2603 /*
2604  * Trigger the failsafe to avoid wraparound failure when vacrel table has a
2605  * relfrozenxid and/or relminmxid that is dangerously far in the past.
2606  * Triggering the failsafe makes the ongoing VACUUM bypass any further index
2607  * vacuuming and heap vacuuming. Truncating the heap is also bypassed.
2608  *
2609  * Any remaining work (work that VACUUM cannot just bypass) is typically sped
2610  * up when the failsafe triggers. VACUUM stops applying any cost-based delay
2611  * that it started out with.
2612  *
2613  * Returns true when failsafe has been triggered.
2614  */
2615 static bool
2617 {
2618  /* Don't warn more than once per VACUUM */
2619  if (vacrel->failsafe_active)
2620  return true;
2621 
2623  {
2624  vacrel->failsafe_active = true;
2625 
2626  /* Disable index vacuuming, index cleanup, and heap rel truncation */
2627  vacrel->do_index_vacuuming = false;
2628  vacrel->do_index_cleanup = false;
2629  vacrel->do_rel_truncate = false;
2630 
2631  ereport(WARNING,
2632  (errmsg("bypassing nonessential maintenance of table \"%s.%s.%s\" as a failsafe after %d index scans",
2633  vacrel->dbname, vacrel->relnamespace, vacrel->relname,
2634  vacrel->num_index_scans),
2635  errdetail("The table's relfrozenxid or relminmxid is too far in the past."),
2636  errhint("Consider increasing configuration parameter \"maintenance_work_mem\" or \"autovacuum_work_mem\".\n"
2637  "You might also need to consider other ways for VACUUM to keep up with the allocation of transaction IDs.")));
2638 
2639  /* Stop applying cost limits from this point on */
2640  VacuumCostActive = false;
2641  VacuumCostBalance = 0;
2642 
2643  return true;
2644  }
2645 
2646  return false;
2647 }
2648 
2649 /*
2650  * lazy_cleanup_all_indexes() -- cleanup all indexes of relation.
2651  */
2652 static void
2654 {
2655  double reltuples = vacrel->new_rel_tuples;
2656  bool estimated_count = vacrel->scanned_pages < vacrel->rel_pages;
2657 
2658  Assert(vacrel->do_index_cleanup);
2659  Assert(vacrel->nindexes > 0);
2660 
2661  /* Report that we are now cleaning up indexes */
2664 
2665  if (!ParallelVacuumIsActive(vacrel))
2666  {
2667  for (int idx = 0; idx < vacrel->nindexes; idx++)
2668  {
2669  Relation indrel = vacrel->indrels[idx];
2670  IndexBulkDeleteResult *istat = vacrel->indstats[idx];
2671 
2672  vacrel->indstats[idx] =
2673  lazy_cleanup_one_index(indrel, istat, reltuples,
2674  estimated_count, vacrel);
2675  }
2676  }
2677  else
2678  {
2679  /* Outsource everything to parallel variant */
2680  parallel_vacuum_cleanup_all_indexes(vacrel->pvs, reltuples,
2681  vacrel->num_index_scans,
2682  estimated_count);
2683  }
2684 }
2685 
2686 /*
2687  * lazy_vacuum_one_index() -- vacuum index relation.
2688  *
2689  * Delete all the index tuples containing a TID collected in
2690  * vacrel->dead_items array. Also update running statistics.
2691  * Exact details depend on index AM's ambulkdelete routine.
2692  *
2693  * reltuples is the number of heap tuples to be passed to the
2694  * bulkdelete callback. It's always assumed to be estimated.
2695  * See indexam.sgml for more info.
2696  *
2697  * Returns bulk delete stats derived from input stats
2698  */
2699 static IndexBulkDeleteResult *
2701  double reltuples, LVRelState *vacrel)
2702 {
2703  IndexVacuumInfo ivinfo;
2704  LVSavedErrInfo saved_err_info;
2705 
2706  ivinfo.index = indrel;
2707  ivinfo.analyze_only = false;
2708  ivinfo.report_progress = false;
2709  ivinfo.estimated_count = true;
2710  ivinfo.message_level = DEBUG2;
2711  ivinfo.num_heap_tuples = reltuples;
2712  ivinfo.strategy = vacrel->bstrategy;
2713 
2714  /*
2715  * Update error traceback information.
2716  *
2717  * The index name is saved during this phase and restored immediately
2718  * after this phase. See vacuum_error_callback.
2719  */
2720  Assert(vacrel->indname == NULL);
2721  vacrel->indname = pstrdup(RelationGetRelationName(indrel));
2722  update_vacuum_error_info(vacrel, &saved_err_info,
2725 
2726  /* Do bulk deletion */
2727  istat = vac_bulkdel_one_index(&ivinfo, istat, (void *) vacrel->dead_items);
2728 
2729  /* Revert to the previous phase information for error traceback */
2730  restore_vacuum_error_info(vacrel, &saved_err_info);
2731  pfree(vacrel->indname);
2732  vacrel->indname = NULL;
2733 
2734  return istat;
2735 }
2736 
2737 /*
2738  * lazy_cleanup_one_index() -- do post-vacuum cleanup for index relation.
2739  *
2740  * Calls index AM's amvacuumcleanup routine. reltuples is the number
2741  * of heap tuples and estimated_count is true if reltuples is an
2742  * estimated value. See indexam.sgml for more info.
2743  *
2744  * Returns bulk delete stats derived from input stats
2745  */
2746 static IndexBulkDeleteResult *
2748  double reltuples, bool estimated_count,
2749  LVRelState *vacrel)
2750 {
2751  IndexVacuumInfo ivinfo;
2752  LVSavedErrInfo saved_err_info;
2753 
2754  ivinfo.index = indrel;
2755  ivinfo.analyze_only = false;
2756  ivinfo.report_progress = false;
2757  ivinfo.estimated_count = estimated_count;
2758  ivinfo.message_level = DEBUG2;
2759 
2760  ivinfo.num_heap_tuples = reltuples;
2761  ivinfo.strategy = vacrel->bstrategy;
2762 
2763  /*
2764  * Update error traceback information.
2765  *
2766  * The index name is saved during this phase and restored immediately
2767  * after this phase. See vacuum_error_callback.
2768  */
2769  Assert(vacrel->indname == NULL);
2770  vacrel->indname = pstrdup(RelationGetRelationName(indrel));
2771  update_vacuum_error_info(vacrel, &saved_err_info,
2774 
2775  istat = vac_cleanup_one_index(&ivinfo, istat);
2776 
2777  /* Revert to the previous phase information for error traceback */
2778  restore_vacuum_error_info(vacrel, &saved_err_info);
2779  pfree(vacrel->indname);
2780  vacrel->indname = NULL;
2781 
2782  return istat;
2783 }
2784 
2785 /*
2786  * should_attempt_truncation - should we attempt to truncate the heap?
2787  *
2788  * Don't even think about it unless we have a shot at releasing a goodly
2789  * number of pages. Otherwise, the time taken isn't worth it, mainly because
2790  * an AccessExclusive lock must be replayed on any hot standby, where it can
2791  * be particularly disruptive.
2792  *
2793  * Also don't attempt it if wraparound failsafe is in effect. The entire
2794  * system might be refusing to allocate new XIDs at this point. The system
2795  * definitely won't return to normal unless and until VACUUM actually advances
2796  * the oldest relfrozenxid -- which hasn't happened for target rel just yet.
2797  * If lazy_truncate_heap attempted to acquire an AccessExclusiveLock to
2798  * truncate the table under these circumstances, an XID exhaustion error might
2799  * make it impossible for VACUUM to fix the underlying XID exhaustion problem.
2800  * There is very little chance of truncation working out when the failsafe is
2801  * in effect in any case. lazy_scan_prune makes the optimistic assumption
2802  * that any LP_DEAD items it encounters will always be LP_UNUSED by the time
2803  * we're called.
2804  *
2805  * Also don't attempt it if we are doing early pruning/vacuuming, because a
2806  * scan which cannot find a truncated heap page cannot determine that the
2807  * snapshot is too old to read that page.
2808  */
2809 static bool
2811 {
2812  BlockNumber possibly_freeable;
2813 
2814  if (!vacrel->do_rel_truncate || vacrel->failsafe_active ||
2816  return false;
2817 
2818  possibly_freeable = vacrel->rel_pages - vacrel->nonempty_pages;
2819  if (possibly_freeable > 0 &&
2820  (possibly_freeable >= REL_TRUNCATE_MINIMUM ||
2821  possibly_freeable >= vacrel->rel_pages / REL_TRUNCATE_FRACTION))
2822  return true;
2823 
2824  return false;
2825 }
2826 
2827 /*
2828  * lazy_truncate_heap - try to truncate off any empty pages at the end
2829  */
2830 static void
2832 {
2833  BlockNumber orig_rel_pages = vacrel->rel_pages;
2834  BlockNumber new_rel_pages;
2835  bool lock_waiter_detected;
2836  int lock_retry;
2837 
2838  /* Report that we are now truncating */
2841 
2842  /* Update error traceback information one last time */
2845 
2846  /*
2847  * Loop until no more truncating can be done.
2848  */
2849  do
2850  {
2851  /*
2852  * We need full exclusive lock on the relation in order to do
2853  * truncation. If we can't get it, give up rather than waiting --- we
2854  * don't want to block other backends, and we don't want to deadlock
2855  * (which is quite possible considering we already hold a lower-grade
2856  * lock).
2857  */
2858  lock_waiter_detected = false;
2859  lock_retry = 0;
2860  while (true)
2861  {
2863  break;
2864 
2865  /*
2866  * Check for interrupts while trying to (re-)acquire the exclusive
2867  * lock.
2868  */
2870 
2871  if (++lock_retry > (VACUUM_TRUNCATE_LOCK_TIMEOUT /
2873  {
2874  /*
2875  * We failed to establish the lock in the specified number of
2876  * retries. This means we give up truncating.
2877  */
2878  ereport(vacrel->verbose ? INFO : DEBUG2,
2879  (errmsg("\"%s\": stopping truncate due to conflicting lock request",
2880  vacrel->relname)));
2881  return;
2882  }
2883 
2884  (void) WaitLatch(MyLatch,
2889  }
2890 
2891  /*
2892  * Now that we have exclusive lock, look to see if the rel has grown
2893  * whilst we were vacuuming with non-exclusive lock. If so, give up;
2894  * the newly added pages presumably contain non-deletable tuples.
2895  */
2896  new_rel_pages = RelationGetNumberOfBlocks(vacrel->rel);
2897  if (new_rel_pages != orig_rel_pages)
2898  {
2899  /*
2900  * Note: we intentionally don't update vacrel->rel_pages with the
2901  * new rel size here. If we did, it would amount to assuming that
2902  * the new pages are empty, which is unlikely. Leaving the numbers
2903  * alone amounts to assuming that the new pages have the same
2904  * tuple density as existing ones, which is less unlikely.
2905  */
2907  return;
2908  }
2909 
2910  /*
2911  * Scan backwards from the end to verify that the end pages actually
2912  * contain no tuples. This is *necessary*, not optional, because
2913  * other backends could have added tuples to these pages whilst we
2914  * were vacuuming.
2915  */
2916  new_rel_pages = count_nondeletable_pages(vacrel, &lock_waiter_detected);
2917  vacrel->blkno = new_rel_pages;
2918 
2919  if (new_rel_pages >= orig_rel_pages)
2920  {
2921  /* can't do anything after all */
2923  return;
2924  }
2925 
2926  /*
2927  * Okay to truncate.
2928  */
2929  RelationTruncate(vacrel->rel, new_rel_pages);
2930 
2931  /*
2932  * We can release the exclusive lock as soon as we have truncated.
2933  * Other backends can't safely access the relation until they have
2934  * processed the smgr invalidation that smgrtruncate sent out ... but
2935  * that should happen as part of standard invalidation processing once
2936  * they acquire lock on the relation.
2937  */
2939 
2940  /*
2941  * Update statistics. Here, it *is* correct to adjust rel_pages
2942  * without also touching reltuples, since the tuple count wasn't
2943  * changed by the truncation.
2944  */
2945  vacrel->removed_pages += orig_rel_pages - new_rel_pages;
2946  vacrel->rel_pages = new_rel_pages;
2947 
2948  ereport(vacrel->verbose ? INFO : DEBUG2,
2949  (errmsg("table \"%s\": truncated %u to %u pages",
2950  vacrel->relname,
2951  orig_rel_pages, new_rel_pages)));
2952  orig_rel_pages = new_rel_pages;
2953  } while (new_rel_pages > vacrel->nonempty_pages && lock_waiter_detected);
2954 }
2955 
2956 /*
2957  * Rescan end pages to verify that they are (still) empty of tuples.
2958  *
2959  * Returns number of nondeletable pages (last nonempty page + 1).
2960  */
2961 static BlockNumber
2962 count_nondeletable_pages(LVRelState *vacrel, bool *lock_waiter_detected)
2963 {
2964  BlockNumber blkno;
2965  BlockNumber prefetchedUntil;
2966  instr_time starttime;
2967 
2968  /* Initialize the starttime if we check for conflicting lock requests */
2969  INSTR_TIME_SET_CURRENT(starttime);
2970 
2971  /*
2972  * Start checking blocks at what we believe relation end to be and move
2973  * backwards. (Strange coding of loop control is needed because blkno is
2974  * unsigned.) To make the scan faster, we prefetch a few blocks at a time
2975  * in forward direction, so that OS-level readahead can kick in.
2976  */
2977  blkno = vacrel->rel_pages;
2979  "prefetch size must be power of 2");
2980  prefetchedUntil = InvalidBlockNumber;
2981  while (blkno > vacrel->nonempty_pages)
2982  {
2983  Buffer buf;
2984  Page page;
2985  OffsetNumber offnum,
2986  maxoff;
2987  bool hastup;
2988 
2989  /*
2990  * Check if another process requests a lock on our relation. We are
2991  * holding an AccessExclusiveLock here, so they will be waiting. We
2992  * only do this once per VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL, and we
2993  * only check if that interval has elapsed once every 32 blocks to
2994  * keep the number of system calls and actual shared lock table
2995  * lookups to a minimum.
2996  */
2997  if ((blkno % 32) == 0)
2998  {
2999  instr_time currenttime;
3000  instr_time elapsed;
3001 
3002  INSTR_TIME_SET_CURRENT(currenttime);
3003  elapsed = currenttime;
3004  INSTR_TIME_SUBTRACT(elapsed, starttime);
3005  if ((INSTR_TIME_GET_MICROSEC(elapsed) / 1000)
3007  {
3009  {
3010  ereport(vacrel->verbose ? INFO : DEBUG2,
3011  (errmsg("table \"%s\": suspending truncate due to conflicting lock request",
3012  vacrel->relname)));
3013 
3014  *lock_waiter_detected = true;
3015  return blkno;
3016  }
3017  starttime = currenttime;
3018  }
3019  }
3020 
3021  /*
3022  * We don't insert a vacuum delay point here, because we have an
3023  * exclusive lock on the table which we want to hold for as short a
3024  * time as possible. We still need to check for interrupts however.
3025  */
3027 
3028  blkno--;
3029 
3030  /* If we haven't prefetched this lot yet, do so now. */
3031  if (prefetchedUntil > blkno)
3032  {
3033  BlockNumber prefetchStart;
3034  BlockNumber pblkno;
3035 
3036  prefetchStart = blkno & ~(PREFETCH_SIZE - 1);
3037  for (pblkno = prefetchStart; pblkno <= blkno; pblkno++)
3038  {
3039  PrefetchBuffer(vacrel->rel, MAIN_FORKNUM, pblkno);
3041  }
3042  prefetchedUntil = prefetchStart;
3043  }
3044 
3045  buf = ReadBufferExtended(vacrel->rel, MAIN_FORKNUM, blkno, RBM_NORMAL,
3046  vacrel->bstrategy);
3047 
3048  /* In this phase we only need shared access to the buffer */
3050 
3051  page = BufferGetPage(buf);
3052 
3053  if (PageIsNew(page) || PageIsEmpty(page))
3054  {
3056  continue;
3057  }
3058 
3059  hastup = false;
3060  maxoff = PageGetMaxOffsetNumber(page);
3061  for (offnum = FirstOffsetNumber;
3062  offnum <= maxoff;
3063  offnum = OffsetNumberNext(offnum))
3064  {
3065  ItemId itemid;
3066 
3067  itemid = PageGetItemId(page, offnum);
3068 
3069  /*
3070  * Note: any non-unused item should be taken as a reason to keep
3071  * this page. Even an LP_DEAD item makes truncation unsafe, since
3072  * we must not have cleaned out its index entries.
3073  */
3074  if (ItemIdIsUsed(itemid))
3075  {
3076  hastup = true;
3077  break; /* can stop scanning */
3078  }
3079  } /* scan along page */
3080 
3082 
3083  /* Done scanning if we found a tuple here */
3084  if (hastup)
3085  return blkno + 1;
3086  }
3087 
3088  /*
3089  * If we fall out of the loop, all the previously-thought-to-be-empty
3090  * pages still are; we need not bother to look at the last known-nonempty
3091  * page.
3092  */
3093  return vacrel->nonempty_pages;
3094 }
3095 
3096 /*
3097  * Returns the number of dead TIDs that VACUUM should allocate space to
3098  * store, given a heap rel of size vacrel->rel_pages, and given current
3099  * maintenance_work_mem setting (or current autovacuum_work_mem setting,
3100  * when applicable).
3101  *
3102  * See the comments at the head of this file for rationale.
3103  */
3104 static int
3106 {
3107  int64 max_items;
3108  int vac_work_mem = IsAutoVacuumWorkerProcess() &&
3109  autovacuum_work_mem != -1 ?
3111 
3112  if (vacrel->nindexes > 0)
3113  {
3114  BlockNumber rel_pages = vacrel->rel_pages;
3115 
3116  max_items = MAXDEADITEMS(vac_work_mem * 1024L);
3117  max_items = Min(max_items, INT_MAX);
3118  max_items = Min(max_items, MAXDEADITEMS(MaxAllocSize));
3119 
3120  /* curious coding here to ensure the multiplication can't overflow */
3121  if ((BlockNumber) (max_items / MaxHeapTuplesPerPage) > rel_pages)
3122  max_items = rel_pages * MaxHeapTuplesPerPage;
3123 
3124  /* stay sane if small maintenance_work_mem */
3125  max_items = Max(max_items, MaxHeapTuplesPerPage);
3126  }
3127  else
3128  {
3129  /* One-pass case only stores a single heap page's TIDs at a time */
3130  max_items = MaxHeapTuplesPerPage;
3131  }
3132 
3133  return (int) max_items;
3134 }
3135 
3136 /*
3137  * Allocate dead_items (either using palloc, or in dynamic shared memory).
3138  * Sets dead_items in vacrel for caller.
3139  *
3140  * Also handles parallel initialization as part of allocating dead_items in
3141  * DSM when required.
3142  */
3143 static void
3144 dead_items_alloc(LVRelState *vacrel, int nworkers)
3145 {
3146  VacDeadItems *dead_items;
3147  int max_items;
3148 
3149  max_items = dead_items_max_items(vacrel);
3150  Assert(max_items >= MaxHeapTuplesPerPage);
3151 
3152  /*
3153  * Initialize state for a parallel vacuum. As of now, only one worker can
3154  * be used for an index, so we invoke parallelism only if there are at
3155  * least two indexes on a table.
3156  */
3157  if (nworkers >= 0 && vacrel->nindexes > 1 && vacrel->do_index_vacuuming)
3158  {
3159  /*
3160  * Since parallel workers cannot access data in temporary tables, we
3161  * can't perform parallel vacuum on them.
3162  */
3163  if (RelationUsesLocalBuffers(vacrel->rel))
3164  {
3165  /*
3166  * Give warning only if the user explicitly tries to perform a
3167  * parallel vacuum on the temporary table.
3168  */
3169  if (nworkers > 0)
3170  ereport(WARNING,
3171  (errmsg("disabling parallel option of vacuum on \"%s\" --- cannot vacuum temporary tables in parallel",
3172  vacrel->relname)));
3173  }
3174  else
3175  vacrel->pvs = parallel_vacuum_init(vacrel->rel, vacrel->indrels,
3176  vacrel->nindexes, nworkers,
3177  max_items,
3178  vacrel->verbose ? INFO : DEBUG2,
3179  vacrel->bstrategy);
3180 
3181  /* If parallel mode started, dead_items space is allocated in DSM */
3182  if (ParallelVacuumIsActive(vacrel))
3183  {
3184  vacrel->dead_items = parallel_vacuum_get_dead_items(vacrel->pvs);
3185  return;
3186  }
3187  }
3188 
3189  /* Serial VACUUM case */
3190  dead_items = (VacDeadItems *) palloc(vac_max_items_to_alloc_size(max_items));
3191  dead_items->max_items = max_items;
3192  dead_items->num_items = 0;
3193 
3194  vacrel->dead_items = dead_items;
3195 }
3196 
3197 /*
3198  * Perform cleanup for resources allocated in dead_items_alloc
3199  */
3200 static void
3202 {
3203  if (!ParallelVacuumIsActive(vacrel))
3204  {
3205  /* Don't bother with pfree here */
3206  return;
3207  }
3208 
3209  /* End parallel mode */
3210  parallel_vacuum_end(vacrel->pvs, vacrel->indstats);
3211  vacrel->pvs = NULL;
3212 }
3213 
3214 /*
3215  * Check if every tuple in the given page is visible to all current and future
3216  * transactions. Also return the visibility_cutoff_xid which is the highest
3217  * xmin amongst the visible tuples. Set *all_frozen to true if every tuple
3218  * on this page is frozen.
3219  *
3220  * This is a stripped down version of lazy_scan_prune(). If you change
3221  * anything here, make sure that everything stays in sync. Note that an
3222  * assertion calls us to verify that everybody still agrees. Be sure to avoid
3223  * introducing new side-effects here.
3224  */
3225 static bool
3227  TransactionId *visibility_cutoff_xid,
3228  bool *all_frozen)
3229 {
3230  Page page = BufferGetPage(buf);
3232  OffsetNumber offnum,
3233  maxoff;
3234  bool all_visible = true;
3235 
3236  *visibility_cutoff_xid = InvalidTransactionId;
3237  *all_frozen = true;
3238 
3239  maxoff = PageGetMaxOffsetNumber(page);
3240  for (offnum = FirstOffsetNumber;
3241  offnum <= maxoff && all_visible;
3242  offnum = OffsetNumberNext(offnum))
3243  {
3244  ItemId itemid;
3245  HeapTupleData tuple;
3246 
3247  /*
3248  * Set the offset number so that we can display it along with any
3249  * error that occurred while processing this tuple.
3250  */
3251  vacrel->offnum = offnum;
3252  itemid = PageGetItemId(page, offnum);
3253 
3254  /* Unused or redirect line pointers are of no interest */
3255  if (!ItemIdIsUsed(itemid) || ItemIdIsRedirected(itemid))
3256  continue;
3257 
3258  ItemPointerSet(&(tuple.t_self), blockno, offnum);
3259 
3260  /*
3261  * Dead line pointers can have index pointers pointing to them. So
3262  * they can't be treated as visible
3263  */
3264  if (ItemIdIsDead(itemid))
3265  {
3266  all_visible = false;
3267  *all_frozen = false;
3268  break;
3269  }
3270 
3271  Assert(ItemIdIsNormal(itemid));
3272 
3273  tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
3274  tuple.t_len = ItemIdGetLength(itemid);
3275  tuple.t_tableOid = RelationGetRelid(vacrel->rel);
3276 
3277  switch (HeapTupleSatisfiesVacuum(&tuple, vacrel->cutoffs.OldestXmin,
3278  buf))
3279  {
3280  case HEAPTUPLE_LIVE:
3281  {
3282  TransactionId xmin;
3283 
3284  /* Check comments in lazy_scan_prune. */
3286  {
3287  all_visible = false;
3288  *all_frozen = false;
3289  break;
3290  }
3291 
3292  /*
3293  * The inserter definitely committed. But is it old enough
3294  * that everyone sees it as committed?
3295  */
3296  xmin = HeapTupleHeaderGetXmin(tuple.t_data);
3297  if (!TransactionIdPrecedes(xmin,
3298  vacrel->cutoffs.OldestXmin))
3299  {
3300  all_visible = false;
3301  *all_frozen = false;
3302  break;
3303  }
3304 
3305  /* Track newest xmin on page. */
3306  if (TransactionIdFollows(xmin, *visibility_cutoff_xid) &&
3307  TransactionIdIsNormal(xmin))
3308  *visibility_cutoff_xid = xmin;
3309 
3310  /* Check whether this tuple is already frozen or not */
3311  if (all_visible && *all_frozen &&
3313  *all_frozen = false;
3314  }
3315  break;
3316 
3317  case HEAPTUPLE_DEAD:
3321  {
3322  all_visible = false;
3323  *all_frozen = false;
3324  break;
3325  }
3326  default:
3327  elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
3328  break;
3329  }
3330  } /* scan along page */
3331 
3332  /* Clear the offset information once we have processed the given page. */
3333  vacrel->offnum = InvalidOffsetNumber;
3334 
3335  return all_visible;
3336 }
3337 
3338 /*
3339  * Update index statistics in pg_class if the statistics are accurate.
3340  */
3341 static void
3343 {
3344  Relation *indrels = vacrel->indrels;
3345  int nindexes = vacrel->nindexes;
3346  IndexBulkDeleteResult **indstats = vacrel->indstats;
3347 
3348  Assert(vacrel->do_index_cleanup);
3349 
3350  for (int idx = 0; idx < nindexes; idx++)
3351  {
3352  Relation indrel = indrels[idx];
3353  IndexBulkDeleteResult *istat = indstats[idx];
3354 
3355  if (istat == NULL || istat->estimated_count)
3356  continue;
3357 
3358  /* Update index statistics */
3359  vac_update_relstats(indrel,
3360  istat->num_pages,
3361  istat->num_index_tuples,
3362  0,
3363  false,
3366  NULL, NULL, false);
3367  }
3368 }
3369 
3370 /*
3371  * Error context callback for errors occurring during vacuum. The error
3372  * context messages for index phases should match the messages set in parallel
3373  * vacuum. If you change this function for those phases, change
3374  * parallel_vacuum_error_callback() as well.
3375  */
3376 static void
3378 {
3379  LVRelState *errinfo = arg;
3380 
3381  switch (errinfo->phase)
3382  {
3384  if (BlockNumberIsValid(errinfo->blkno))
3385  {
3386  if (OffsetNumberIsValid(errinfo->offnum))
3387  errcontext("while scanning block %u offset %u of relation \"%s.%s\"",
3388  errinfo->blkno, errinfo->offnum, errinfo->relnamespace, errinfo->relname);
3389  else
3390  errcontext("while scanning block %u of relation \"%s.%s\"",
3391  errinfo->blkno, errinfo->relnamespace, errinfo->relname);
3392  }
3393  else
3394  errcontext("while scanning relation \"%s.%s\"",
3395  errinfo->relnamespace, errinfo->relname);
3396  break;
3397 
3399  if (BlockNumberIsValid(errinfo->blkno))
3400  {
3401  if (OffsetNumberIsValid(errinfo->offnum))
3402  errcontext("while vacuuming block %u offset %u of relation \"%s.%s\"",
3403  errinfo->blkno, errinfo->offnum, errinfo->relnamespace, errinfo->relname);
3404  else
3405  errcontext("while vacuuming block %u of relation \"%s.%s\"",
3406  errinfo->blkno, errinfo->relnamespace, errinfo->relname);
3407  }
3408  else
3409  errcontext("while vacuuming relation \"%s.%s\"",
3410  errinfo->relnamespace, errinfo->relname);
3411  break;
3412 
3414  errcontext("while vacuuming index \"%s\" of relation \"%s.%s\"",
3415  errinfo->indname, errinfo->relnamespace, errinfo->relname);
3416  break;
3417 
3419  errcontext("while cleaning up index \"%s\" of relation \"%s.%s\"",
3420  errinfo->indname, errinfo->relnamespace, errinfo->relname);
3421  break;
3422 
3424  if (BlockNumberIsValid(errinfo->blkno))
3425  errcontext("while truncating relation \"%s.%s\" to %u blocks",
3426  errinfo->relnamespace, errinfo->relname, errinfo->blkno);
3427  break;
3428 
3430  default:
3431  return; /* do nothing; the errinfo may not be
3432  * initialized */
3433  }
3434 }
3435 
3436 /*
3437  * Updates the information required for vacuum error callback. This also saves
3438  * the current information which can be later restored via restore_vacuum_error_info.
3439  */
3440 static void
3442  int phase, BlockNumber blkno, OffsetNumber offnum)
3443 {
3444  if (saved_vacrel)
3445  {
3446  saved_vacrel->offnum = vacrel->offnum;
3447  saved_vacrel->blkno = vacrel->blkno;
3448  saved_vacrel->phase = vacrel->phase;
3449  }
3450 
3451  vacrel->blkno = blkno;
3452  vacrel->offnum = offnum;
3453  vacrel->phase = phase;
3454 }
3455 
3456 /*
3457  * Restores the vacuum information saved via a prior call to update_vacuum_error_info.
3458  */
3459 static void
3461  const LVSavedErrInfo *saved_vacrel)
3462 {
3463  vacrel->blkno = saved_vacrel->blkno;
3464  vacrel->offnum = saved_vacrel->offnum;
3465  vacrel->phase = saved_vacrel->phase;
3466 }
Datum idx(PG_FUNCTION_ARGS)
Definition: _int_op.c:259
int autovacuum_work_mem
Definition: autovacuum.c:118
bool IsAutoVacuumWorkerProcess(void)
Definition: autovacuum.c:3319
void TimestampDifference(TimestampTz start_time, TimestampTz stop_time, long *secs, int *microsecs)
Definition: timestamp.c:1670
bool TimestampDifferenceExceeds(TimestampTz start_time, TimestampTz stop_time, int msec)
Definition: timestamp.c:1730
TimestampTz GetCurrentTimestamp(void)
Definition: timestamp.c:1585
void pgstat_progress_start_command(ProgressCommandType cmdtype, Oid relid)
void pgstat_progress_update_param(int index, int64 val)
void pgstat_progress_update_multi_param(int nparam, const int *index, const int64 *val)
void pgstat_progress_end_command(void)
@ PROGRESS_COMMAND_VACUUM
uint32 BlockNumber
Definition: block.h:31
#define InvalidBlockNumber
Definition: block.h:33
static bool BlockNumberIsValid(BlockNumber blockNumber)
Definition: block.h:71
int Buffer
Definition: buf.h:23
#define InvalidBuffer
Definition: buf.h:25
bool track_io_timing
Definition: bufmgr.c:137
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:2765
PrefetchBufferResult PrefetchBuffer(Relation reln, ForkNumber forkNum, BlockNumber blockNum)
Definition: bufmgr.c:592
void ReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:3935
void UnlockReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:3958
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:1585
void LockBufferForCleanup(Buffer buffer)
Definition: bufmgr.c:4233
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:4176
Buffer ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
Definition: bufmgr.c:759
bool ConditionalLockBufferForCleanup(Buffer buffer)
Definition: bufmgr.c:4410
#define BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:105
#define BUFFER_LOCK_SHARE
Definition: bufmgr.h:106
#define RelationGetNumberOfBlocks(reln)
Definition: bufmgr.h:156
static Page BufferGetPage(Buffer buffer)
Definition: bufmgr.h:280
#define BUFFER_LOCK_EXCLUSIVE
Definition: bufmgr.h:107
@ RBM_NORMAL
Definition: bufmgr.h:39
static bool BufferIsValid(Buffer bufnum)
Definition: bufmgr.h:228
Size PageGetHeapFreeSpace(Page page)
Definition: bufpage.c:991
void PageTruncateLinePointerArray(Page page)
Definition: bufpage.c:835
static bool PageIsEmpty(Page page)
Definition: bufpage.h:220
Pointer Page
Definition: bufpage.h:78
static Item PageGetItem(Page page, ItemId itemId)
Definition: bufpage.h:351
static void PageClearAllVisible(Page page)
Definition: bufpage.h:436
#define SizeOfPageHeaderData
Definition: bufpage.h:213
static void PageSetAllVisible(Page page)
Definition: bufpage.h:431
static ItemId PageGetItemId(Page page, OffsetNumber offsetNumber)
Definition: bufpage.h:240
static bool PageIsNew(Page page)
Definition: bufpage.h:230
static bool PageIsAllVisible(Page page)
Definition: bufpage.h:426
static void PageSetLSN(Page page, XLogRecPtr lsn)
Definition: bufpage.h:388
static XLogRecPtr PageGetLSN(Page page)
Definition: bufpage.h:383
static OffsetNumber PageGetMaxOffsetNumber(Page page)
Definition: bufpage.h:369
#define Min(x, y)
Definition: c.h:988
signed int int32
Definition: c.h:478
#define Max(x, y)
Definition: c.h:982
TransactionId MultiXactId
Definition: c.h:646
#define unlikely(x)
Definition: c.h:295
unsigned char uint8
Definition: c.h:488
#define StaticAssertStmt(condition, errmessage)
Definition: c.h:922
uint32 TransactionId
Definition: c.h:636
size_t Size
Definition: c.h:589
int64 TimestampTz
Definition: timestamp.h:39
char * get_database_name(Oid dbid)
Definition: dbcommands.c:2980
int errmsg_internal(const char *fmt,...)
Definition: elog.c:1156
int errdetail(const char *fmt,...)
Definition: elog.c:1202
ErrorContextCallback * error_context_stack
Definition: elog.c:95
int errhint(const char *fmt,...)
Definition: elog.c:1316
int errmsg(const char *fmt,...)
Definition: elog.c:1069
#define _(x)
Definition: elog.c:91
#define LOG
Definition: elog.h:31
#define errcontext
Definition: elog.h:196
#define WARNING
Definition: elog.h:36
#define DEBUG2
Definition: elog.h:29
#define ERROR
Definition: elog.h:39
#define INFO
Definition: elog.h:34
#define ereport(elevel,...)
Definition: elog.h:149
void FreeSpaceMapVacuumRange(Relation rel, BlockNumber start, BlockNumber end)
Definition: freespace.c:354
Size GetRecordedFreeSpace(Relation rel, BlockNumber heapBlk)
Definition: freespace.c:232
void RecordPageWithFreeSpace(Relation rel, BlockNumber heapBlk, Size spaceAvail)
Definition: freespace.c:182
int64 VacuumPageHit
Definition: globals.c:148
int64 VacuumPageMiss
Definition: globals.c:149
bool VacuumCostActive
Definition: globals.c:153
int64 VacuumPageDirty
Definition: globals.c:150
int VacuumCostBalance
Definition: globals.c:152
int maintenance_work_mem
Definition: globals.c:127
struct Latch * MyLatch
Definition: globals.c:58
Oid MyDatabaseId
Definition: globals.c:89
bool heap_tuple_needs_eventual_freeze(HeapTupleHeader tuple)
Definition: heapam.c:7336
bool heap_tuple_should_freeze(HeapTupleHeader tuple, const struct VacuumCutoffs *cutoffs, TransactionId *NoFreezePageRelfrozenXid, MultiXactId *NoFreezePageRelminMxid)
Definition: heapam.c:7391
void heap_freeze_execute_prepared(Relation rel, Buffer buffer, TransactionId snapshotConflictHorizon, HeapTupleFreeze *tuples, int ntuples)
Definition: heapam.c:6680
bool heap_prepare_freeze_tuple(HeapTupleHeader tuple, const struct VacuumCutoffs *cutoffs, HeapPageFreeze *pagefrz, HeapTupleFreeze *frz, bool *totally_frozen)
Definition: heapam.c:6377
HTSV_Result
Definition: heapam.h:96
@ HEAPTUPLE_RECENTLY_DEAD
Definition: heapam.h:99
@ HEAPTUPLE_INSERT_IN_PROGRESS
Definition: heapam.h:100
@ HEAPTUPLE_LIVE
Definition: heapam.h:98
@ HEAPTUPLE_DELETE_IN_PROGRESS
Definition: heapam.h:101
@ HEAPTUPLE_DEAD
Definition: heapam.h:97
HTSV_Result HeapTupleSatisfiesVacuum(HeapTuple htup, TransactionId OldestXmin, Buffer buffer)
#define XLOG_HEAP2_VACUUM
Definition: heapam_xlog.h:55
#define SizeOfHeapVacuum
Definition: heapam_xlog.h:265
HeapTupleHeaderData * HeapTupleHeader
Definition: htup.h:23
#define HeapTupleHeaderGetXmin(tup)
Definition: htup_details.h:309
#define HeapTupleHeaderXminCommitted(tup)
Definition: htup_details.h:320
#define MaxHeapTuplesPerPage
Definition: htup_details.h:572
#define INSTR_TIME_SET_CURRENT(t)
Definition: instr_time.h:122
#define INSTR_TIME_SUBTRACT(x, y)
Definition: instr_time.h:181
#define INSTR_TIME_GET_MICROSEC(t)
Definition: instr_time.h:194
WalUsage pgWalUsage
Definition: instrument.c:22
void WalUsageAccumDiff(WalUsage *dst, const WalUsage *add, const WalUsage *sub)
Definition: instrument.c:280
int i
Definition: isn.c:73
#define ItemIdGetLength(itemId)
Definition: itemid.h:59
#define ItemIdIsNormal(itemId)
Definition: itemid.h:99
#define ItemIdIsDead(itemId)
Definition: itemid.h:113
#define ItemIdIsUsed(itemId)
Definition: itemid.h:92
#define ItemIdSetUnused(itemId)
Definition: itemid.h:128
#define ItemIdIsRedirected(itemId)
Definition: itemid.h:106
#define ItemIdHasStorage(itemId)
Definition: itemid.h:120
static void ItemPointerSet(ItemPointerData *pointer, BlockNumber blockNumber, OffsetNumber offNum)
Definition: itemptr.h:135
static void ItemPointerSetOffsetNumber(ItemPointerData *pointer, OffsetNumber offsetNumber)
Definition: itemptr.h:158
static void ItemPointerSetBlockNumber(ItemPointerData *pointer, BlockNumber blockNumber)
Definition: itemptr.h:147
static OffsetNumber ItemPointerGetOffsetNumber(const ItemPointerData *pointer)
Definition: itemptr.h:124
static BlockNumber ItemPointerGetBlockNumber(const ItemPointerData *pointer)
Definition: itemptr.h:103
void ResetLatch(Latch *latch)
Definition: latch.c:699
int WaitLatch(Latch *latch, int wakeEvents, long timeout, uint32 wait_event_info)
Definition: latch.c:492
#define WL_TIMEOUT
Definition: latch.h:128
#define WL_EXIT_ON_PM_DEATH
Definition: latch.h:130
#define WL_LATCH_SET
Definition: latch.h:125
Assert(fmt[strlen(fmt) - 1] !='\n')
void UnlockRelation(Relation relation, LOCKMODE lockmode)
Definition: lmgr.c:311
bool ConditionalLockRelation(Relation relation, LOCKMODE lockmode)
Definition: lmgr.c:276
bool LockHasWaitersRelation(Relation relation, LOCKMODE lockmode)
Definition: lmgr.c:374
#define NoLock
Definition: lockdefs.h:34
#define AccessExclusiveLock
Definition: lockdefs.h:43
#define RowExclusiveLock
Definition: lockdefs.h:38
char * get_namespace_name(Oid nspid)
Definition: lsyscache.c:3331
char * pstrdup(const char *in)
Definition: mcxt.c:1624
void pfree(void *pointer)
Definition: mcxt.c:1436
void * palloc0(Size size)
Definition: mcxt.c:1241
void * palloc(Size size)
Definition: mcxt.c:1210
#define MaxAllocSize
Definition: memutils.h:40
#define START_CRIT_SECTION()
Definition: miscadmin.h:148
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:121
#define END_CRIT_SECTION()
Definition: miscadmin.h:150
bool MultiXactIdPrecedesOrEquals(MultiXactId multi1, MultiXactId multi2)
Definition: multixact.c:3170
#define InvalidMultiXactId
Definition: multixact.h:24
#define InvalidOffsetNumber
Definition: off.h:26
#define OffsetNumberIsValid(offsetNumber)
Definition: off.h:39
#define OffsetNumberNext(offsetNumber)
Definition: off.h:52
uint16 OffsetNumber
Definition: off.h:24
#define FirstOffsetNumber
Definition: off.h:27
void * arg
static int verbose
const char * pg_rusage_show(const PGRUsage *ru0)
Definition: pg_rusage.c:40
void pg_rusage_init(PGRUsage *ru0)
Definition: pg_rusage.c:27
static char * buf
Definition: pg_test_fsync.c:67
int64 PgStat_Counter
Definition: pgstat.h:88
PgStat_Counter pgStatBlockReadTime
PgStat_Counter pgStatBlockWriteTime
void pgstat_report_vacuum(Oid tableoid, bool shared, PgStat_Counter livetuples, PgStat_Counter deadtuples)
GlobalVisState * GlobalVisTestFor(Relation rel)
Definition: procarray.c:4066
#define PROGRESS_VACUUM_PHASE_FINAL_CLEANUP
Definition: progress.h:35
#define PROGRESS_VACUUM_PHASE_SCAN_HEAP
Definition: progress.h:30
#define PROGRESS_VACUUM_TOTAL_HEAP_BLKS
Definition: progress.h:22
#define PROGRESS_VACUUM_PHASE
Definition: progress.h:21
#define PROGRESS_VACUUM_NUM_DEAD_TUPLES
Definition: progress.h:27
#define PROGRESS_VACUUM_NUM_INDEX_VACUUMS
Definition: progress.h:25
#define PROGRESS_VACUUM_PHASE_VACUUM_HEAP
Definition: progress.h:32
#define PROGRESS_VACUUM_HEAP_BLKS_SCANNED
Definition: progress.h:23
#define PROGRESS_VACUUM_PHASE_INDEX_CLEANUP
Definition: progress.h:33
#define PROGRESS_VACUUM_PHASE_VACUUM_INDEX
Definition: progress.h:31
#define PROGRESS_VACUUM_MAX_DEAD_TUPLES
Definition: progress.h:26
#define PROGRESS_VACUUM_HEAP_BLKS_VACUUMED
Definition: progress.h:24
#define PROGRESS_VACUUM_PHASE_TRUNCATE
Definition: progress.h:34
int heap_page_prune(Relation relation, Buffer buffer, GlobalVisState *vistest, TransactionId old_snap_xmin, TimestampTz old_snap_ts, int *nnewlpdead, OffsetNumber *off_loc)
Definition: pruneheap.c:266
#define RelationGetRelid(relation)
Definition: rel.h:501
#define RelationGetRelationName(relation)
Definition: rel.h:535
#define RelationNeedsWAL(relation)
Definition: rel.h:626
#define RelationUsesLocalBuffers(relation)
Definition: rel.h:635
#define RelationGetNamespace(relation)
Definition: rel.h:542
@ MAIN_FORKNUM
Definition: relpath.h:50
int old_snapshot_threshold
Definition: snapmgr.c:79
void RelationTruncate(Relation rel, BlockNumber nblocks)
Definition: storage.c:287
void appendStringInfo(StringInfo str, const char *fmt,...)
Definition: stringinfo.c:91
void appendStringInfoString(StringInfo str, const char *s)
Definition: stringinfo.c:176
void initStringInfo(StringInfo str)
Definition: stringinfo.c:59
struct ErrorContextCallback * previous
Definition: elog.h:295
void(* callback)(void *arg)
Definition: elog.h:296
MultiXactId NoFreezePageRelminMxid
Definition: heapam.h:191
TransactionId FreezePageRelfrozenXid
Definition: heapam.h:179
bool freeze_required
Definition: heapam.h:153
MultiXactId FreezePageRelminMxid
Definition: heapam.h:180
TransactionId NoFreezePageRelfrozenXid
Definition: heapam.h:190
ItemPointerData t_self
Definition: htup.h:65
uint32 t_len
Definition: htup.h:64
HeapTupleHeader t_data
Definition: htup.h:68
Oid t_tableOid
Definition: htup.h:66
OffsetNumber offset
Definition: heapam.h:123
bool estimated_count
Definition: genam.h:77
BlockNumber pages_deleted
Definition: genam.h:81
BlockNumber pages_newly_deleted
Definition: genam.h:80
BlockNumber pages_free
Definition: genam.h:82
BlockNumber num_pages
Definition: genam.h:76
double num_index_tuples
Definition: genam.h:78
Relation index
Definition: genam.h:46
double num_heap_tuples
Definition: genam.h:51
bool analyze_only
Definition: genam.h:47
BufferAccessStrategy strategy
Definition: genam.h:52
bool report_progress
Definition: genam.h:48
int message_level
Definition: genam.h:50
bool estimated_count
Definition: genam.h:49
TransactionId visibility_cutoff_xid
Definition: vacuumlazy.c:232
ParallelVacuumState * pvs
Definition: vacuumlazy.c:150
bool verbose
Definition: vacuumlazy.c:182
int nindexes
Definition: vacuumlazy.c:146
OffsetNumber offnum
Definition: vacuumlazy.c:180
int64 tuples_deleted
Definition: vacuumlazy.c:209
BlockNumber nonempty_pages
Definition: vacuumlazy.c:198
bool do_rel_truncate
Definition: vacuumlazy.c:164
BlockNumber scanned_pages
Definition: vacuumlazy.c:193
bool aggressive
Definition: vacuumlazy.c:153
bool failsafe_active
Definition: vacuumlazy.c:157
GlobalVisState * vistest
Definition: vacuumlazy.c:168
BlockNumber removed_pages
Definition: vacuumlazy.c:194
int num_index_scans
Definition: vacuumlazy.c:207
IndexBulkDeleteResult ** indstats
Definition: vacuumlazy.c:204
double new_live_tuples
Definition: vacuumlazy.c:202
double new_rel_tuples
Definition: vacuumlazy.c:201
TransactionId NewRelfrozenXid
Definition: vacuumlazy.c:170
Relation rel
Definition: vacuumlazy.c:144
bool consider_bypass_optimization
Definition: vacuumlazy.c:159
BlockNumber rel_pages
Definition: vacuumlazy.c:192
int64 recently_dead_tuples
Definition: vacuumlazy.c:213
int64 tuples_frozen
Definition: vacuumlazy.c:210
BlockNumber frozen_pages
Definition: vacuumlazy.c:195
char * dbname
Definition: vacuumlazy.c:175
BlockNumber missed_dead_pages
Definition: vacuumlazy.c:197
char * relnamespace
Definition: vacuumlazy.c:176
int64 live_tuples
Definition: vacuumlazy.c:212
int64 lpdead_items
Definition: vacuumlazy.c:211
BufferAccessStrategy bstrategy
Definition: vacuumlazy.c:149
bool skippedallvis
Definition: vacuumlazy.c:172
BlockNumber lpdead_item_pages
Definition: vacuumlazy.c:196
Relation * indrels
Definition: vacuumlazy.c:145
bool skipwithvm
Definition: vacuumlazy.c:155
bool do_index_cleanup
Definition: vacuumlazy.c:163
MultiXactId NewRelminMxid
Definition: vacuumlazy.c:171
int64 missed_dead_tuples
Definition: vacuumlazy.c:214
BlockNumber blkno
Definition: vacuumlazy.c:179
struct VacuumCutoffs cutoffs
Definition: vacuumlazy.c:167
char * relname
Definition: vacuumlazy.c:177
VacDeadItems * dead_items
Definition: vacuumlazy.c:191
VacErrPhase phase
Definition: vacuumlazy.c:181
char * indname
Definition: vacuumlazy.c:178
bool do_index_vacuuming
Definition: vacuumlazy.c:162
BlockNumber blkno
Definition: vacuumlazy.c:238
VacErrPhase phase
Definition: vacuumlazy.c:240
OffsetNumber offnum
Definition: vacuumlazy.c:239
Form_pg_class rd_rel
Definition: rel.h:110
ItemPointerData items[FLEXIBLE_ARRAY_MEMBER]
Definition: vacuum.h:288
int max_items
Definition: vacuum.h:284
int num_items
Definition: vacuum.h:285
TransactionId FreezeLimit
Definition: vacuum.h:275
TransactionId OldestXmin
Definition: vacuum.h:265
TransactionId relfrozenxid
Definition: vacuum.h:249
MultiXactId relminmxid
Definition: vacuum.h:250
MultiXactId MultiXactCutoff
Definition: vacuum.h:276
MultiXactId OldestMxact
Definition: vacuum.h:266
int nworkers
Definition: vacuum.h:237
VacOptValue truncate
Definition: vacuum.h:230
bits32 options
Definition: vacuum.h:218
bool is_wraparound
Definition: vacuum.h:225
int log_min_duration
Definition: vacuum.h:226
VacOptValue index_cleanup
Definition: vacuum.h:229
uint64 wal_bytes
Definition: instrument.h:53
int64 wal_fpi
Definition: instrument.h:52
int64 wal_records
Definition: instrument.h:51
Definition: type.h:95
bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition: transam.c:280
bool TransactionIdPrecedesOrEquals(TransactionId id1, TransactionId id2)
Definition: transam.c:299
bool TransactionIdFollows(TransactionId id1, TransactionId id2)
Definition: transam.c:314
static TransactionId ReadNextTransactionId(void)
Definition: transam.h:315
#define TransactionIdRetreat(dest)
Definition: transam.h:141
#define InvalidTransactionId
Definition: transam.h:31
#define TransactionIdIsValid(xid)
Definition: transam.h:41
#define TransactionIdIsNormal(xid)
Definition: transam.h:42
IndexBulkDeleteResult * vac_bulkdel_one_index(IndexVacuumInfo *ivinfo, IndexBulkDeleteResult *istat, VacDeadItems *dead_items)
Definition: vacuum.c:2305
Size vac_max_items_to_alloc_size(int max_items)
Definition: vacuum.c:2351
void vac_open_indexes(Relation relation, LOCKMODE lockmode, int *nindexes, Relation **Irel)
Definition: vacuum.c:2122
void vac_update_relstats(Relation relation, BlockNumber num_pages, double num_tuples, BlockNumber num_all_visible_pages, bool hasindex, TransactionId frozenxid, MultiXactId minmulti, bool *frozenxid_updated, bool *minmulti_updated, bool in_outer_xact)
Definition: vacuum.c:1303
void vac_close_indexes(int nindexes, Relation *Irel, LOCKMODE lockmode)
Definition: vacuum.c:2165
void vacuum_delay_point(void)
Definition: vacuum.c:2186
bool vacuum_get_cutoffs(Relation rel, const VacuumParams *params, struct VacuumCutoffs *cutoffs)
Definition: vacuum.c:958
bool vacuum_xid_failsafe_check(const struct VacuumCutoffs *cutoffs)
Definition: vacuum.c:1145
double vac_estimate_reltuples(Relation relation, BlockNumber total_pages, BlockNumber scanned_pages, double scanned_tuples)
Definition: vacuum.c:1207
IndexBulkDeleteResult * vac_cleanup_one_index(IndexVacuumInfo *ivinfo, IndexBulkDeleteResult *istat)
Definition: vacuum.c:2326
#define VACOPT_VERBOSE
Definition: vacuum.h:185
#define MAXDEADITEMS(avail_mem)
Definition: vacuum.h:291
@ VACOPTVALUE_AUTO
Definition: vacuum.h:205
@ VACOPTVALUE_ENABLED
Definition: vacuum.h:207
@ VACOPTVALUE_UNSPECIFIED
Definition: vacuum.h:204
@ VACOPTVALUE_DISABLED
Definition: vacuum.h:206
#define VACOPT_DISABLE_PAGE_SKIPPING
Definition: vacuum.h:190
static void dead_items_cleanup(LVRelState *vacrel)
Definition: vacuumlazy.c:3201
static bool heap_page_is_all_visible(LVRelState *vacrel, Buffer buf, TransactionId *visibility_cutoff_xid, bool *all_frozen)
Definition: vacuumlazy.c:3226
static void update_relstats_all_indexes(LVRelState *vacrel)
Definition: vacuumlazy.c:3342
struct LVPagePruneState LVPagePruneState
#define VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL
Definition: vacuumlazy.c:87
static void vacuum_error_callback(void *arg)
Definition: vacuumlazy.c:3377
static void lazy_truncate_heap(LVRelState *vacrel)
Definition: vacuumlazy.c:2831
static void lazy_vacuum(LVRelState *vacrel)
Definition: vacuumlazy.c:2190
static void lazy_cleanup_all_indexes(LVRelState *vacrel)
Definition: vacuumlazy.c:2653
static int lazy_vacuum_heap_page(LVRelState *vacrel, BlockNumber blkno, Buffer buffer, int index, Buffer vmbuffer)
Definition: vacuumlazy.c:2499
#define REL_TRUNCATE_MINIMUM
Definition: vacuumlazy.c:76
static bool should_attempt_truncation(LVRelState *vacrel)
Definition: vacuumlazy.c:2810
static bool lazy_scan_new_or_empty(LVRelState *vacrel, Buffer buf, BlockNumber blkno, Page page, bool sharelock, Buffer vmbuffer)
Definition: vacuumlazy.c:1409
VacErrPhase
Definition: vacuumlazy.c:132
@ VACUUM_ERRCB_PHASE_SCAN_HEAP
Definition: vacuumlazy.c:134
@ VACUUM_ERRCB_PHASE_VACUUM_INDEX
Definition: vacuumlazy.c:135
@ VACUUM_ERRCB_PHASE_TRUNCATE
Definition: vacuumlazy.c:138
@ VACUUM_ERRCB_PHASE_INDEX_CLEANUP
Definition: vacuumlazy.c:137
@ VACUUM_ERRCB_PHASE_VACUUM_HEAP
Definition: vacuumlazy.c:136
@ VACUUM_ERRCB_PHASE_UNKNOWN
Definition: vacuumlazy.c:133
static void lazy_scan_heap(LVRelState *vacrel)
Definition: vacuumlazy.c:822
static bool lazy_scan_noprune(LVRelState *vacrel, Buffer buf, BlockNumber blkno, Page page, bool *hastup, bool *recordfreespace)
Definition: vacuumlazy.c:1956
#define ParallelVacuumIsActive(vacrel)
Definition: vacuumlazy.c:128
static void restore_vacuum_error_info(LVRelState *vacrel, const LVSavedErrInfo *saved_vacrel)
Definition: vacuumlazy.c:3460
static void lazy_scan_prune(LVRelState *vacrel, Buffer buf, BlockNumber blkno, Page page, LVPagePruneState *prunestate)
Definition: vacuumlazy.c:1532
void heap_vacuum_rel(Relation rel, VacuumParams *params, BufferAccessStrategy bstrategy)
Definition: vacuumlazy.c:305
static IndexBulkDeleteResult * lazy_vacuum_one_index(Relation indrel, IndexBulkDeleteResult *istat, double reltuples, LVRelState *vacrel)
Definition: vacuumlazy.c:2700
#define REL_TRUNCATE_FRACTION
Definition: vacuumlazy.c:77
static bool lazy_check_wraparound_failsafe(LVRelState *vacrel)
Definition: vacuumlazy.c:2616
static int dead_items_max_items(LVRelState *vacrel)
Definition: vacuumlazy.c:3105
struct LVSavedErrInfo LVSavedErrInfo
static IndexBulkDeleteResult * lazy_cleanup_one_index(Relation indrel, IndexBulkDeleteResult *istat, double reltuples, bool estimated_count, LVRelState *vacrel)
Definition: vacuumlazy.c:2747
#define PREFETCH_SIZE
Definition: vacuumlazy.c:122
struct LVRelState LVRelState
static BlockNumber lazy_scan_skip(LVRelState *vacrel, Buffer *vmbuffer, BlockNumber next_block, bool *next_unskippable_allvis, bool *skipping_current_range)
Definition: vacuumlazy.c:1293
#define BYPASS_THRESHOLD_PAGES
Definition: vacuumlazy.c:94
static void dead_items_alloc(LVRelState *vacrel, int nworkers)
Definition: vacuumlazy.c:3144
#define VACUUM_TRUNCATE_LOCK_TIMEOUT
Definition: vacuumlazy.c:88
static bool lazy_vacuum_all_indexes(LVRelState *vacrel)
Definition: vacuumlazy.c:2315
static void update_vacuum_error_info(LVRelState *vacrel, LVSavedErrInfo *saved_vacrel, int phase, BlockNumber blkno, OffsetNumber offnum)
Definition: vacuumlazy.c:3441
static BlockNumber count_nondeletable_pages(LVRelState *vacrel, bool *lock_waiter_detected)
Definition: vacuumlazy.c:2962
#define SKIP_PAGES_THRESHOLD
Definition: vacuumlazy.c:116
#define FAILSAFE_EVERY_PAGES
Definition: vacuumlazy.c:100
#define VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL
Definition: vacuumlazy.c:86
static void lazy_vacuum_heap_rel(LVRelState *vacrel)
Definition: vacuumlazy.c:2411
#define VACUUM_FSM_EVERY_PAGES
Definition: vacuumlazy.c:109
VacDeadItems * parallel_vacuum_get_dead_items(ParallelVacuumState *pvs)
void parallel_vacuum_bulkdel_all_indexes(ParallelVacuumState *pvs, long num_table_tuples, int num_index_scans)
void parallel_vacuum_cleanup_all_indexes(ParallelVacuumState *pvs, long num_table_tuples, int num_index_scans, bool estimated_count)
void parallel_vacuum_end(ParallelVacuumState *pvs, IndexBulkDeleteResult **istats)
ParallelVacuumState * parallel_vacuum_init(Relation rel, Relation *indrels, int nindexes, int nrequested_workers, int max_items, int elevel, BufferAccessStrategy bstrategy)
void visibilitymap_set(Relation rel, BlockNumber heapBlk, Buffer heapBuf, XLogRecPtr recptr, Buffer vmBuf, TransactionId cutoff_xid, uint8 flags)
bool visibilitymap_clear(Relation rel, BlockNumber heapBlk, Buffer vmbuf, uint8 flags)
void visibilitymap_pin(Relation rel, BlockNumber heapBlk, Buffer *vmbuf)
uint8 visibilitymap_get_status(Relation rel, BlockNumber heapBlk, Buffer *vmbuf)
void visibilitymap_count(Relation rel, BlockNumber *all_visible, BlockNumber *all_frozen)
#define VM_ALL_FROZEN(r, b, v)
Definition: visibilitymap.h:26
#define VISIBILITYMAP_VALID_BITS
#define VISIBILITYMAP_ALL_FROZEN
#define VISIBILITYMAP_ALL_VISIBLE
@ WAIT_EVENT_VACUUM_TRUNCATE
Definition: wait_event.h:152
bool IsInParallelMode(void)
Definition: xact.c:1069
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define InvalidXLogRecPtr
Definition: xlogdefs.h:28
void XLogRegisterData(char *data, uint32 len)
Definition: xloginsert.c:351
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition: xloginsert.c:451
void XLogRegisterBufData(uint8 block_id, char *data, uint32 len)
Definition: xloginsert.c:389
XLogRecPtr log_newpage_buffer(Buffer buffer, bool page_std)
Definition: xloginsert.c:1191
void XLogRegisterBuffer(uint8 block_id, Buffer buffer, uint8 flags)
Definition: xloginsert.c:243
void XLogBeginInsert(void)
Definition: xloginsert.c:150
#define REGBUF_STANDARD
Definition: xloginsert.h:34