PostgreSQL Source Code  git master
vacuumlazy.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * vacuumlazy.c
4  * Concurrent ("lazy") vacuuming.
5  *
6  * The major space usage for vacuuming is storage for the array of dead TIDs
7  * that are to be removed from indexes. We want to ensure we can vacuum even
8  * the very largest relations with finite memory space usage. To do that, we
9  * set upper bounds on the number of TIDs we can keep track of at once.
10  *
11  * We are willing to use at most maintenance_work_mem (or perhaps
12  * autovacuum_work_mem) memory space to keep track of dead TIDs. We initially
13  * allocate an array of TIDs of that size, with an upper limit that depends on
14  * table size (this limit ensures we don't allocate a huge area uselessly for
15  * vacuuming small tables). If the array threatens to overflow, we must call
16  * lazy_vacuum to vacuum indexes (and to vacuum the pages that we've pruned).
17  * This frees up the memory space dedicated to storing dead TIDs.
18  *
19  * In practice VACUUM will often complete its initial pass over the target
20  * heap relation without ever running out of space to store TIDs. This means
21  * that there only needs to be one call to lazy_vacuum, after the initial pass
22  * completes.
23  *
24  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
25  * Portions Copyright (c) 1994, Regents of the University of California
26  *
27  *
28  * IDENTIFICATION
29  * src/backend/access/heap/vacuumlazy.c
30  *
31  *-------------------------------------------------------------------------
32  */
33 #include "postgres.h"
34 
35 #include <math.h>
36 
37 #include "access/amapi.h"
38 #include "access/genam.h"
39 #include "access/heapam.h"
40 #include "access/heapam_xlog.h"
41 #include "access/htup_details.h"
42 #include "access/multixact.h"
43 #include "access/transam.h"
44 #include "access/visibilitymap.h"
45 #include "access/xact.h"
46 #include "access/xlog.h"
47 #include "access/xloginsert.h"
48 #include "catalog/index.h"
49 #include "catalog/storage.h"
50 #include "commands/dbcommands.h"
51 #include "commands/progress.h"
52 #include "commands/vacuum.h"
53 #include "executor/instrument.h"
54 #include "miscadmin.h"
55 #include "optimizer/paths.h"
56 #include "pgstat.h"
57 #include "portability/instr_time.h"
58 #include "postmaster/autovacuum.h"
59 #include "storage/bufmgr.h"
60 #include "storage/freespace.h"
61 #include "storage/lmgr.h"
62 #include "tcop/tcopprot.h"
63 #include "utils/lsyscache.h"
64 #include "utils/memutils.h"
65 #include "utils/pg_rusage.h"
66 #include "utils/timestamp.h"
67 
68 
69 /*
70  * Space/time tradeoff parameters: do these need to be user-tunable?
71  *
72  * To consider truncating the relation, we want there to be at least
73  * REL_TRUNCATE_MINIMUM or (relsize / REL_TRUNCATE_FRACTION) (whichever
74  * is less) potentially-freeable pages.
75  */
76 #define REL_TRUNCATE_MINIMUM 1000
77 #define REL_TRUNCATE_FRACTION 16
78 
79 /*
80  * Timing parameters for truncate locking heuristics.
81  *
82  * These were not exposed as user tunable GUC values because it didn't seem
83  * that the potential for improvement was great enough to merit the cost of
84  * supporting them.
85  */
86 #define VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL 20 /* ms */
87 #define VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL 50 /* ms */
88 #define VACUUM_TRUNCATE_LOCK_TIMEOUT 5000 /* ms */
89 
90 /*
91  * Threshold that controls whether we bypass index vacuuming and heap
92  * vacuuming as an optimization
93  */
94 #define BYPASS_THRESHOLD_PAGES 0.02 /* i.e. 2% of rel_pages */
95 
96 /*
97  * Perform a failsafe check each time we scan another 4GB of pages.
98  * (Note that this is deliberately kept to a power-of-two, usually 2^19.)
99  */
100 #define FAILSAFE_EVERY_PAGES \
101  ((BlockNumber) (((uint64) 4 * 1024 * 1024 * 1024) / BLCKSZ))
102 
103 /*
104  * When a table has no indexes, vacuum the FSM after every 8GB, approximately
105  * (it won't be exact because we only vacuum FSM after processing a heap page
106  * that has some removable tuples). When there are indexes, this is ignored,
107  * and we vacuum FSM after each index/heap cleaning pass.
108  */
109 #define VACUUM_FSM_EVERY_PAGES \
110  ((BlockNumber) (((uint64) 8 * 1024 * 1024 * 1024) / BLCKSZ))
111 
112 /*
113  * Before we consider skipping a page that's marked as clean in
114  * visibility map, we must've seen at least this many clean pages.
115  */
116 #define SKIP_PAGES_THRESHOLD ((BlockNumber) 32)
117 
118 /*
119  * Size of the prefetch window for lazy vacuum backwards truncation scan.
120  * Needs to be a power of 2.
121  */
122 #define PREFETCH_SIZE ((BlockNumber) 32)
123 
124 /*
125  * Macro to check if we are in a parallel vacuum. If true, we are in the
126  * parallel mode and the DSM segment is initialized.
127  */
128 #define ParallelVacuumIsActive(vacrel) ((vacrel)->pvs != NULL)
129 
130 /* Phases of vacuum during which we report error context. */
131 typedef enum
132 {
139 } VacErrPhase;
140 
141 typedef struct LVRelState
142 {
143  /* Target heap relation and its indexes */
146  int nindexes;
147 
148  /* Buffer access strategy and parallel vacuum state */
151 
152  /* Aggressive VACUUM? (must set relfrozenxid >= FreezeLimit) */
154  /* Use visibility map to skip? (disabled by DISABLE_PAGE_SKIPPING) */
156  /* Consider index vacuuming bypass optimization? */
158 
159  /* Doing index vacuuming, index cleanup, rel truncation? */
163 
164  /* VACUUM operation's cutoffs for freezing and pruning */
165  struct VacuumCutoffs cutoffs;
167  /* Tracks oldest extant XID/MXID for setting relfrozenxid/relminmxid */
171 
172  /* Error reporting state */
173  char *dbname;
175  char *relname;
176  char *indname; /* Current index name */
177  BlockNumber blkno; /* used only for heap operations */
178  OffsetNumber offnum; /* used only for heap operations */
180  bool verbose; /* VACUUM VERBOSE? */
181 
182  /*
183  * dead_items stores TIDs whose index tuples are deleted by index
184  * vacuuming. Each TID points to an LP_DEAD line pointer from a heap page
185  * that has been processed by lazy_scan_prune. Also needed by
186  * lazy_vacuum_heap_rel, which marks the same LP_DEAD line pointers as
187  * LP_UNUSED during second heap pass.
188  */
189  VacDeadItems *dead_items; /* TIDs whose index tuples we'll delete */
190  BlockNumber rel_pages; /* total number of pages */
191  BlockNumber scanned_pages; /* # pages examined (not skipped via VM) */
192  BlockNumber removed_pages; /* # pages removed by relation truncation */
193  BlockNumber frozen_pages; /* # pages with newly frozen tuples */
194  BlockNumber lpdead_item_pages; /* # pages with LP_DEAD items */
195  BlockNumber missed_dead_pages; /* # pages with missed dead tuples */
196  BlockNumber nonempty_pages; /* actually, last nonempty page + 1 */
197 
198  /* Statistics output by us, for table */
199  double new_rel_tuples; /* new estimated total # of tuples */
200  double new_live_tuples; /* new estimated total # of live tuples */
201  /* Statistics output by index AMs */
203 
204  /* Instrumentation counters */
206  /* Counters that follow are only for scanned_pages */
207  int64 tuples_deleted; /* # deleted from table */
208  int64 tuples_frozen; /* # newly frozen */
209  int64 lpdead_items; /* # deleted from indexes */
210  int64 live_tuples; /* # live tuples remaining */
211  int64 recently_dead_tuples; /* # dead, but not yet removable */
212  int64 missed_dead_tuples; /* # removable, but not removed */
214 
215 /*
216  * State returned by lazy_scan_prune()
217  */
218 typedef struct LVPagePruneState
219 {
220  bool hastup; /* Page prevents rel truncation? */
221  bool has_lpdead_items; /* includes existing LP_DEAD items */
222 
223  /*
224  * State describes the proper VM bit states to set for the page following
225  * pruning and freezing. all_visible implies !has_lpdead_items, but don't
226  * trust all_frozen result unless all_visible is also set to true.
227  */
228  bool all_visible; /* Every item visible to all? */
229  bool all_frozen; /* provided all_visible is also true */
230  TransactionId visibility_cutoff_xid; /* For recovery conflicts */
232 
233 /* Struct for saving and restoring vacuum error information. */
234 typedef struct LVSavedErrInfo
235 {
240 
241 
242 /* non-export function prototypes */
243 static void lazy_scan_heap(LVRelState *vacrel);
244 static BlockNumber lazy_scan_skip(LVRelState *vacrel, Buffer *vmbuffer,
245  BlockNumber next_block,
246  bool *next_unskippable_allvis,
247  bool *skipping_current_range);
248 static bool lazy_scan_new_or_empty(LVRelState *vacrel, Buffer buf,
249  BlockNumber blkno, Page page,
250  bool sharelock, Buffer vmbuffer);
251 static void lazy_scan_prune(LVRelState *vacrel, Buffer buf,
252  BlockNumber blkno, Page page,
253  LVPagePruneState *prunestate);
254 static bool lazy_scan_noprune(LVRelState *vacrel, Buffer buf,
255  BlockNumber blkno, Page page,
256  bool *hastup, bool *recordfreespace);
257 static void lazy_vacuum(LVRelState *vacrel);
258 static bool lazy_vacuum_all_indexes(LVRelState *vacrel);
259 static void lazy_vacuum_heap_rel(LVRelState *vacrel);
260 static int lazy_vacuum_heap_page(LVRelState *vacrel, BlockNumber blkno,
261  Buffer buffer, int index, Buffer vmbuffer);
262 static bool lazy_check_wraparound_failsafe(LVRelState *vacrel);
263 static void lazy_cleanup_all_indexes(LVRelState *vacrel);
265  IndexBulkDeleteResult *istat,
266  double reltuples,
267  LVRelState *vacrel);
269  IndexBulkDeleteResult *istat,
270  double reltuples,
271  bool estimated_count,
272  LVRelState *vacrel);
273 static bool should_attempt_truncation(LVRelState *vacrel);
274 static void lazy_truncate_heap(LVRelState *vacrel);
276  bool *lock_waiter_detected);
277 static void dead_items_alloc(LVRelState *vacrel, int nworkers);
278 static void dead_items_cleanup(LVRelState *vacrel);
279 static bool heap_page_is_all_visible(LVRelState *vacrel, Buffer buf,
280  TransactionId *visibility_cutoff_xid, bool *all_frozen);
281 static void update_relstats_all_indexes(LVRelState *vacrel);
282 static void vacuum_error_callback(void *arg);
283 static void update_vacuum_error_info(LVRelState *vacrel,
284  LVSavedErrInfo *saved_vacrel,
285  int phase, BlockNumber blkno,
286  OffsetNumber offnum);
287 static void restore_vacuum_error_info(LVRelState *vacrel,
288  const LVSavedErrInfo *saved_vacrel);
289 
290 
291 /*
292  * heap_vacuum_rel() -- perform VACUUM for one heap relation
293  *
294  * This routine sets things up for and then calls lazy_scan_heap, where
295  * almost all work actually takes place. Finalizes everything after call
296  * returns by managing relation truncation and updating rel's pg_class
297  * entry. (Also updates pg_class entries for any indexes that need it.)
298  *
299  * At entry, we have already established a transaction and opened
300  * and locked the relation.
301  */
302 void
304  BufferAccessStrategy bstrategy)
305 {
306  LVRelState *vacrel;
307  bool verbose,
308  instrument,
309  skipwithvm,
310  frozenxid_updated,
311  minmulti_updated;
312  BlockNumber orig_rel_pages,
313  new_rel_pages,
314  new_rel_allvisible;
315  PGRUsage ru0;
316  TimestampTz starttime = 0;
317  PgStat_Counter startreadtime = 0,
318  startwritetime = 0;
319  WalUsage startwalusage = pgWalUsage;
320  int64 StartPageHit = VacuumPageHit,
321  StartPageMiss = VacuumPageMiss,
322  StartPageDirty = VacuumPageDirty;
323  ErrorContextCallback errcallback;
324  char **indnames = NULL;
325 
326  verbose = (params->options & VACOPT_VERBOSE) != 0;
327  instrument = (verbose || (IsAutoVacuumWorkerProcess() &&
328  params->log_min_duration >= 0));
329  if (instrument)
330  {
331  pg_rusage_init(&ru0);
332  starttime = GetCurrentTimestamp();
333  if (track_io_timing)
334  {
335  startreadtime = pgStatBlockReadTime;
336  startwritetime = pgStatBlockWriteTime;
337  }
338  }
339 
341  RelationGetRelid(rel));
342 
343  /*
344  * Setup error traceback support for ereport() first. The idea is to set
345  * up an error context callback to display additional information on any
346  * error during a vacuum. During different phases of vacuum, we update
347  * the state so that the error context callback always display current
348  * information.
349  *
350  * Copy the names of heap rel into local memory for error reporting
351  * purposes, too. It isn't always safe to assume that we can get the name
352  * of each rel. It's convenient for code in lazy_scan_heap to always use
353  * these temp copies.
354  */
355  vacrel = (LVRelState *) palloc0(sizeof(LVRelState));
358  vacrel->relname = pstrdup(RelationGetRelationName(rel));
359  vacrel->indname = NULL;
361  vacrel->verbose = verbose;
362  errcallback.callback = vacuum_error_callback;
363  errcallback.arg = vacrel;
364  errcallback.previous = error_context_stack;
365  error_context_stack = &errcallback;
366 
367  /* Set up high level stuff about rel and its indexes */
368  vacrel->rel = rel;
369  vac_open_indexes(vacrel->rel, RowExclusiveLock, &vacrel->nindexes,
370  &vacrel->indrels);
371  vacrel->bstrategy = bstrategy;
372  if (instrument && vacrel->nindexes > 0)
373  {
374  /* Copy index names used by instrumentation (not error reporting) */
375  indnames = palloc(sizeof(char *) * vacrel->nindexes);
376  for (int i = 0; i < vacrel->nindexes; i++)
377  indnames[i] = pstrdup(RelationGetRelationName(vacrel->indrels[i]));
378  }
379 
380  /*
381  * The index_cleanup param either disables index vacuuming and cleanup or
382  * forces it to go ahead when we would otherwise apply the index bypass
383  * optimization. The default is 'auto', which leaves the final decision
384  * up to lazy_vacuum().
385  *
386  * The truncate param allows user to avoid attempting relation truncation,
387  * though it can't force truncation to happen.
388  */
391  params->truncate != VACOPTVALUE_AUTO);
392 
393  /*
394  * While VacuumFailSafeActive is reset to false before calling this, we
395  * still need to reset it here due to recursive calls.
396  */
397  VacuumFailsafeActive = false;
398  vacrel->consider_bypass_optimization = true;
399  vacrel->do_index_vacuuming = true;
400  vacrel->do_index_cleanup = true;
401  vacrel->do_rel_truncate = (params->truncate != VACOPTVALUE_DISABLED);
402  if (params->index_cleanup == VACOPTVALUE_DISABLED)
403  {
404  /* Force disable index vacuuming up-front */
405  vacrel->do_index_vacuuming = false;
406  vacrel->do_index_cleanup = false;
407  }
408  else if (params->index_cleanup == VACOPTVALUE_ENABLED)
409  {
410  /* Force index vacuuming. Note that failsafe can still bypass. */
411  vacrel->consider_bypass_optimization = false;
412  }
413  else
414  {
415  /* Default/auto, make all decisions dynamically */
417  }
418 
419  /* Initialize page counters explicitly (be tidy) */
420  vacrel->scanned_pages = 0;
421  vacrel->removed_pages = 0;
422  vacrel->frozen_pages = 0;
423  vacrel->lpdead_item_pages = 0;
424  vacrel->missed_dead_pages = 0;
425  vacrel->nonempty_pages = 0;
426  /* dead_items_alloc allocates vacrel->dead_items later on */
427 
428  /* Allocate/initialize output statistics state */
429  vacrel->new_rel_tuples = 0;
430  vacrel->new_live_tuples = 0;
431  vacrel->indstats = (IndexBulkDeleteResult **)
432  palloc0(vacrel->nindexes * sizeof(IndexBulkDeleteResult *));
433 
434  /* Initialize remaining counters (be tidy) */
435  vacrel->num_index_scans = 0;
436  vacrel->tuples_deleted = 0;
437  vacrel->tuples_frozen = 0;
438  vacrel->lpdead_items = 0;
439  vacrel->live_tuples = 0;
440  vacrel->recently_dead_tuples = 0;
441  vacrel->missed_dead_tuples = 0;
442 
443  /*
444  * Get cutoffs that determine which deleted tuples are considered DEAD,
445  * not just RECENTLY_DEAD, and which XIDs/MXIDs to freeze. Then determine
446  * the extent of the blocks that we'll scan in lazy_scan_heap. It has to
447  * happen in this order to ensure that the OldestXmin cutoff field works
448  * as an upper bound on the XIDs stored in the pages we'll actually scan
449  * (NewRelfrozenXid tracking must never be allowed to miss unfrozen XIDs).
450  *
451  * Next acquire vistest, a related cutoff that's used in heap_page_prune.
452  * We expect vistest will always make heap_page_prune remove any deleted
453  * tuple whose xmax is < OldestXmin. lazy_scan_prune must never become
454  * confused about whether a tuple should be frozen or removed. (In the
455  * future we might want to teach lazy_scan_prune to recompute vistest from
456  * time to time, to increase the number of dead tuples it can prune away.)
457  */
458  vacrel->aggressive = vacuum_get_cutoffs(rel, params, &vacrel->cutoffs);
459  vacrel->rel_pages = orig_rel_pages = RelationGetNumberOfBlocks(rel);
460  vacrel->vistest = GlobalVisTestFor(rel);
461  /* Initialize state used to track oldest extant XID/MXID */
462  vacrel->NewRelfrozenXid = vacrel->cutoffs.OldestXmin;
463  vacrel->NewRelminMxid = vacrel->cutoffs.OldestMxact;
464  vacrel->skippedallvis = false;
465  skipwithvm = true;
467  {
468  /*
469  * Force aggressive mode, and disable skipping blocks using the
470  * visibility map (even those set all-frozen)
471  */
472  vacrel->aggressive = true;
473  skipwithvm = false;
474  }
475 
476  vacrel->skipwithvm = skipwithvm;
477 
478  if (verbose)
479  {
480  if (vacrel->aggressive)
481  ereport(INFO,
482  (errmsg("aggressively vacuuming \"%s.%s.%s\"",
483  vacrel->dbname, vacrel->relnamespace,
484  vacrel->relname)));
485  else
486  ereport(INFO,
487  (errmsg("vacuuming \"%s.%s.%s\"",
488  vacrel->dbname, vacrel->relnamespace,
489  vacrel->relname)));
490  }
491 
492  /*
493  * Allocate dead_items array memory using dead_items_alloc. This handles
494  * parallel VACUUM initialization as part of allocating shared memory
495  * space used for dead_items. (But do a failsafe precheck first, to
496  * ensure that parallel VACUUM won't be attempted at all when relfrozenxid
497  * is already dangerously old.)
498  */
500  dead_items_alloc(vacrel, params->nworkers);
501 
502  /*
503  * Call lazy_scan_heap to perform all required heap pruning, index
504  * vacuuming, and heap vacuuming (plus related processing)
505  */
506  lazy_scan_heap(vacrel);
507 
508  /*
509  * Free resources managed by dead_items_alloc. This ends parallel mode in
510  * passing when necessary.
511  */
512  dead_items_cleanup(vacrel);
514 
515  /*
516  * Update pg_class entries for each of rel's indexes where appropriate.
517  *
518  * Unlike the later update to rel's pg_class entry, this is not critical.
519  * Maintains relpages/reltuples statistics used by the planner only.
520  */
521  if (vacrel->do_index_cleanup)
523 
524  /* Done with rel's indexes */
525  vac_close_indexes(vacrel->nindexes, vacrel->indrels, NoLock);
526 
527  /* Optionally truncate rel */
528  if (should_attempt_truncation(vacrel))
529  lazy_truncate_heap(vacrel);
530 
531  /* Pop the error context stack */
532  error_context_stack = errcallback.previous;
533 
534  /* Report that we are now doing final cleanup */
537 
538  /*
539  * Prepare to update rel's pg_class entry.
540  *
541  * Aggressive VACUUMs must always be able to advance relfrozenxid to a
542  * value >= FreezeLimit, and relminmxid to a value >= MultiXactCutoff.
543  * Non-aggressive VACUUMs may advance them by any amount, or not at all.
544  */
545  Assert(vacrel->NewRelfrozenXid == vacrel->cutoffs.OldestXmin ||
547  vacrel->cutoffs.relfrozenxid,
548  vacrel->NewRelfrozenXid));
549  Assert(vacrel->NewRelminMxid == vacrel->cutoffs.OldestMxact ||
551  vacrel->cutoffs.relminmxid,
552  vacrel->NewRelminMxid));
553  if (vacrel->skippedallvis)
554  {
555  /*
556  * Must keep original relfrozenxid in a non-aggressive VACUUM that
557  * chose to skip an all-visible page range. The state that tracks new
558  * values will have missed unfrozen XIDs from the pages we skipped.
559  */
560  Assert(!vacrel->aggressive);
563  }
564 
565  /*
566  * For safety, clamp relallvisible to be not more than what we're setting
567  * pg_class.relpages to
568  */
569  new_rel_pages = vacrel->rel_pages; /* After possible rel truncation */
570  visibilitymap_count(rel, &new_rel_allvisible, NULL);
571  if (new_rel_allvisible > new_rel_pages)
572  new_rel_allvisible = new_rel_pages;
573 
574  /*
575  * Now actually update rel's pg_class entry.
576  *
577  * In principle new_live_tuples could be -1 indicating that we (still)
578  * don't know the tuple count. In practice that can't happen, since we
579  * scan every page that isn't skipped using the visibility map.
580  */
581  vac_update_relstats(rel, new_rel_pages, vacrel->new_live_tuples,
582  new_rel_allvisible, vacrel->nindexes > 0,
583  vacrel->NewRelfrozenXid, vacrel->NewRelminMxid,
584  &frozenxid_updated, &minmulti_updated, false);
585 
586  /*
587  * Report results to the cumulative stats system, too.
588  *
589  * Deliberately avoid telling the stats system about LP_DEAD items that
590  * remain in the table due to VACUUM bypassing index and heap vacuuming.
591  * ANALYZE will consider the remaining LP_DEAD items to be dead "tuples".
592  * It seems like a good idea to err on the side of not vacuuming again too
593  * soon in cases where the failsafe prevented significant amounts of heap
594  * vacuuming.
595  */
597  rel->rd_rel->relisshared,
598  Max(vacrel->new_live_tuples, 0),
599  vacrel->recently_dead_tuples +
600  vacrel->missed_dead_tuples);
602 
603  if (instrument)
604  {
605  TimestampTz endtime = GetCurrentTimestamp();
606 
607  if (verbose || params->log_min_duration == 0 ||
608  TimestampDifferenceExceeds(starttime, endtime,
609  params->log_min_duration))
610  {
611  long secs_dur;
612  int usecs_dur;
613  WalUsage walusage;
615  char *msgfmt;
616  int32 diff;
617  int64 PageHitOp = VacuumPageHit - StartPageHit,
618  PageMissOp = VacuumPageMiss - StartPageMiss,
619  PageDirtyOp = VacuumPageDirty - StartPageDirty;
620  double read_rate = 0,
621  write_rate = 0;
622 
623  TimestampDifference(starttime, endtime, &secs_dur, &usecs_dur);
624  memset(&walusage, 0, sizeof(WalUsage));
625  WalUsageAccumDiff(&walusage, &pgWalUsage, &startwalusage);
626 
628  if (verbose)
629  {
630  /*
631  * Aggressiveness already reported earlier, in dedicated
632  * VACUUM VERBOSE ereport
633  */
634  Assert(!params->is_wraparound);
635  msgfmt = _("finished vacuuming \"%s.%s.%s\": index scans: %d\n");
636  }
637  else if (params->is_wraparound)
638  {
639  /*
640  * While it's possible for a VACUUM to be both is_wraparound
641  * and !aggressive, that's just a corner-case -- is_wraparound
642  * implies aggressive. Produce distinct output for the corner
643  * case all the same, just in case.
644  */
645  if (vacrel->aggressive)
646  msgfmt = _("automatic aggressive vacuum to prevent wraparound of table \"%s.%s.%s\": index scans: %d\n");
647  else
648  msgfmt = _("automatic vacuum to prevent wraparound of table \"%s.%s.%s\": index scans: %d\n");
649  }
650  else
651  {
652  if (vacrel->aggressive)
653  msgfmt = _("automatic aggressive vacuum of table \"%s.%s.%s\": index scans: %d\n");
654  else
655  msgfmt = _("automatic vacuum of table \"%s.%s.%s\": index scans: %d\n");
656  }
657  appendStringInfo(&buf, msgfmt,
658  vacrel->dbname,
659  vacrel->relnamespace,
660  vacrel->relname,
661  vacrel->num_index_scans);
662  appendStringInfo(&buf, _("pages: %u removed, %u remain, %u scanned (%.2f%% of total)\n"),
663  vacrel->removed_pages,
664  new_rel_pages,
665  vacrel->scanned_pages,
666  orig_rel_pages == 0 ? 100.0 :
667  100.0 * vacrel->scanned_pages / orig_rel_pages);
669  _("tuples: %lld removed, %lld remain, %lld are dead but not yet removable\n"),
670  (long long) vacrel->tuples_deleted,
671  (long long) vacrel->new_rel_tuples,
672  (long long) vacrel->recently_dead_tuples);
673  if (vacrel->missed_dead_tuples > 0)
675  _("tuples missed: %lld dead from %u pages not removed due to cleanup lock contention\n"),
676  (long long) vacrel->missed_dead_tuples,
677  vacrel->missed_dead_pages);
678  diff = (int32) (ReadNextTransactionId() -
679  vacrel->cutoffs.OldestXmin);
681  _("removable cutoff: %u, which was %d XIDs old when operation ended\n"),
682  vacrel->cutoffs.OldestXmin, diff);
683  if (frozenxid_updated)
684  {
685  diff = (int32) (vacrel->NewRelfrozenXid -
686  vacrel->cutoffs.relfrozenxid);
688  _("new relfrozenxid: %u, which is %d XIDs ahead of previous value\n"),
689  vacrel->NewRelfrozenXid, diff);
690  }
691  if (minmulti_updated)
692  {
693  diff = (int32) (vacrel->NewRelminMxid -
694  vacrel->cutoffs.relminmxid);
696  _("new relminmxid: %u, which is %d MXIDs ahead of previous value\n"),
697  vacrel->NewRelminMxid, diff);
698  }
699  appendStringInfo(&buf, _("frozen: %u pages from table (%.2f%% of total) had %lld tuples frozen\n"),
700  vacrel->frozen_pages,
701  orig_rel_pages == 0 ? 100.0 :
702  100.0 * vacrel->frozen_pages / orig_rel_pages,
703  (long long) vacrel->tuples_frozen);
704  if (vacrel->do_index_vacuuming)
705  {
706  if (vacrel->nindexes == 0 || vacrel->num_index_scans == 0)
707  appendStringInfoString(&buf, _("index scan not needed: "));
708  else
709  appendStringInfoString(&buf, _("index scan needed: "));
710 
711  msgfmt = _("%u pages from table (%.2f%% of total) had %lld dead item identifiers removed\n");
712  }
713  else
714  {
716  appendStringInfoString(&buf, _("index scan bypassed: "));
717  else
718  appendStringInfoString(&buf, _("index scan bypassed by failsafe: "));
719 
720  msgfmt = _("%u pages from table (%.2f%% of total) have %lld dead item identifiers\n");
721  }
722  appendStringInfo(&buf, msgfmt,
723  vacrel->lpdead_item_pages,
724  orig_rel_pages == 0 ? 100.0 :
725  100.0 * vacrel->lpdead_item_pages / orig_rel_pages,
726  (long long) vacrel->lpdead_items);
727  for (int i = 0; i < vacrel->nindexes; i++)
728  {
729  IndexBulkDeleteResult *istat = vacrel->indstats[i];
730 
731  if (!istat)
732  continue;
733 
735  _("index \"%s\": pages: %u in total, %u newly deleted, %u currently deleted, %u reusable\n"),
736  indnames[i],
737  istat->num_pages,
738  istat->pages_newly_deleted,
739  istat->pages_deleted,
740  istat->pages_free);
741  }
742  if (track_io_timing)
743  {
744  double read_ms = (double) (pgStatBlockReadTime - startreadtime) / 1000;
745  double write_ms = (double) (pgStatBlockWriteTime - startwritetime) / 1000;
746 
747  appendStringInfo(&buf, _("I/O timings: read: %.3f ms, write: %.3f ms\n"),
748  read_ms, write_ms);
749  }
750  if (secs_dur > 0 || usecs_dur > 0)
751  {
752  read_rate = (double) BLCKSZ * PageMissOp / (1024 * 1024) /
753  (secs_dur + usecs_dur / 1000000.0);
754  write_rate = (double) BLCKSZ * PageDirtyOp / (1024 * 1024) /
755  (secs_dur + usecs_dur / 1000000.0);
756  }
757  appendStringInfo(&buf, _("avg read rate: %.3f MB/s, avg write rate: %.3f MB/s\n"),
758  read_rate, write_rate);
760  _("buffer usage: %lld hits, %lld misses, %lld dirtied\n"),
761  (long long) PageHitOp,
762  (long long) PageMissOp,
763  (long long) PageDirtyOp);
765  _("WAL usage: %lld records, %lld full page images, %llu bytes\n"),
766  (long long) walusage.wal_records,
767  (long long) walusage.wal_fpi,
768  (unsigned long long) walusage.wal_bytes);
769  appendStringInfo(&buf, _("system usage: %s"), pg_rusage_show(&ru0));
770 
771  ereport(verbose ? INFO : LOG,
772  (errmsg_internal("%s", buf.data)));
773  pfree(buf.data);
774  }
775  }
776 
777  /* Cleanup index statistics and index names */
778  for (int i = 0; i < vacrel->nindexes; i++)
779  {
780  if (vacrel->indstats[i])
781  pfree(vacrel->indstats[i]);
782 
783  if (instrument)
784  pfree(indnames[i]);
785  }
786 }
787 
788 /*
789  * lazy_scan_heap() -- workhorse function for VACUUM
790  *
791  * This routine prunes each page in the heap, and considers the need to
792  * freeze remaining tuples with storage (not including pages that can be
793  * skipped using the visibility map). Also performs related maintenance
794  * of the FSM and visibility map. These steps all take place during an
795  * initial pass over the target heap relation.
796  *
797  * Also invokes lazy_vacuum_all_indexes to vacuum indexes, which largely
798  * consists of deleting index tuples that point to LP_DEAD items left in
799  * heap pages following pruning. Earlier initial pass over the heap will
800  * have collected the TIDs whose index tuples need to be removed.
801  *
802  * Finally, invokes lazy_vacuum_heap_rel to vacuum heap pages, which
803  * largely consists of marking LP_DEAD items (from collected TID array)
804  * as LP_UNUSED. This has to happen in a second, final pass over the
805  * heap, to preserve a basic invariant that all index AMs rely on: no
806  * extant index tuple can ever be allowed to contain a TID that points to
807  * an LP_UNUSED line pointer in the heap. We must disallow premature
808  * recycling of line pointers to avoid index scans that get confused
809  * about which TID points to which tuple immediately after recycling.
810  * (Actually, this isn't a concern when target heap relation happens to
811  * have no indexes, which allows us to safely apply the one-pass strategy
812  * as an optimization).
813  *
814  * In practice we often have enough space to fit all TIDs, and so won't
815  * need to call lazy_vacuum more than once, after our initial pass over
816  * the heap has totally finished. Otherwise things are slightly more
817  * complicated: our "initial pass" over the heap applies only to those
818  * pages that were pruned before we needed to call lazy_vacuum, and our
819  * "final pass" over the heap only vacuums these same heap pages.
820  * However, we process indexes in full every time lazy_vacuum is called,
821  * which makes index processing very inefficient when memory is in short
822  * supply.
823  */
824 static void
826 {
827  BlockNumber rel_pages = vacrel->rel_pages,
828  blkno,
829  next_unskippable_block,
830  next_fsm_block_to_vacuum = 0;
831  VacDeadItems *dead_items = vacrel->dead_items;
832  Buffer vmbuffer = InvalidBuffer;
833  bool next_unskippable_allvis,
834  skipping_current_range;
835  const int initprog_index[] = {
839  };
840  int64 initprog_val[3];
841 
842  /* Report that we're scanning the heap, advertising total # of blocks */
843  initprog_val[0] = PROGRESS_VACUUM_PHASE_SCAN_HEAP;
844  initprog_val[1] = rel_pages;
845  initprog_val[2] = dead_items->max_items;
846  pgstat_progress_update_multi_param(3, initprog_index, initprog_val);
847 
848  /* Set up an initial range of skippable blocks using the visibility map */
849  next_unskippable_block = lazy_scan_skip(vacrel, &vmbuffer, 0,
850  &next_unskippable_allvis,
851  &skipping_current_range);
852  for (blkno = 0; blkno < rel_pages; blkno++)
853  {
854  Buffer buf;
855  Page page;
856  bool all_visible_according_to_vm;
857  LVPagePruneState prunestate;
858 
859  if (blkno == next_unskippable_block)
860  {
861  /*
862  * Can't skip this page safely. Must scan the page. But
863  * determine the next skippable range after the page first.
864  */
865  all_visible_according_to_vm = next_unskippable_allvis;
866  next_unskippable_block = lazy_scan_skip(vacrel, &vmbuffer,
867  blkno + 1,
868  &next_unskippable_allvis,
869  &skipping_current_range);
870 
871  Assert(next_unskippable_block >= blkno + 1);
872  }
873  else
874  {
875  /* Last page always scanned (may need to set nonempty_pages) */
876  Assert(blkno < rel_pages - 1);
877 
878  if (skipping_current_range)
879  continue;
880 
881  /* Current range is too small to skip -- just scan the page */
882  all_visible_according_to_vm = true;
883  }
884 
885  vacrel->scanned_pages++;
886 
887  /* Report as block scanned, update error traceback information */
890  blkno, InvalidOffsetNumber);
891 
893 
894  /*
895  * Regularly check if wraparound failsafe should trigger.
896  *
897  * There is a similar check inside lazy_vacuum_all_indexes(), but
898  * relfrozenxid might start to look dangerously old before we reach
899  * that point. This check also provides failsafe coverage for the
900  * one-pass strategy, and the two-pass strategy with the index_cleanup
901  * param set to 'off'.
902  */
903  if (vacrel->scanned_pages % FAILSAFE_EVERY_PAGES == 0)
905 
906  /*
907  * Consider if we definitely have enough space to process TIDs on page
908  * already. If we are close to overrunning the available space for
909  * dead_items TIDs, pause and do a cycle of vacuuming before we tackle
910  * this page.
911  */
912  Assert(dead_items->max_items >= MaxHeapTuplesPerPage);
913  if (dead_items->max_items - dead_items->num_items < MaxHeapTuplesPerPage)
914  {
915  /*
916  * Before beginning index vacuuming, we release any pin we may
917  * hold on the visibility map page. This isn't necessary for
918  * correctness, but we do it anyway to avoid holding the pin
919  * across a lengthy, unrelated operation.
920  */
921  if (BufferIsValid(vmbuffer))
922  {
923  ReleaseBuffer(vmbuffer);
924  vmbuffer = InvalidBuffer;
925  }
926 
927  /* Perform a round of index and heap vacuuming */
928  vacrel->consider_bypass_optimization = false;
929  lazy_vacuum(vacrel);
930 
931  /*
932  * Vacuum the Free Space Map to make newly-freed space visible on
933  * upper-level FSM pages. Note we have not yet processed blkno.
934  */
935  FreeSpaceMapVacuumRange(vacrel->rel, next_fsm_block_to_vacuum,
936  blkno);
937  next_fsm_block_to_vacuum = blkno;
938 
939  /* Report that we are once again scanning the heap */
942  }
943 
944  /*
945  * Pin the visibility map page in case we need to mark the page
946  * all-visible. In most cases this will be very cheap, because we'll
947  * already have the correct page pinned anyway.
948  */
949  visibilitymap_pin(vacrel->rel, blkno, &vmbuffer);
950 
951  /*
952  * We need a buffer cleanup lock to prune HOT chains and defragment
953  * the page in lazy_scan_prune. But when it's not possible to acquire
954  * a cleanup lock right away, we may be able to settle for reduced
955  * processing using lazy_scan_noprune.
956  */
957  buf = ReadBufferExtended(vacrel->rel, MAIN_FORKNUM, blkno, RBM_NORMAL,
958  vacrel->bstrategy);
959  page = BufferGetPage(buf);
961  {
962  bool hastup,
963  recordfreespace;
964 
966 
967  /* Check for new or empty pages before lazy_scan_noprune call */
968  if (lazy_scan_new_or_empty(vacrel, buf, blkno, page, true,
969  vmbuffer))
970  {
971  /* Processed as new/empty page (lock and pin released) */
972  continue;
973  }
974 
975  /* Collect LP_DEAD items in dead_items array, count tuples */
976  if (lazy_scan_noprune(vacrel, buf, blkno, page, &hastup,
977  &recordfreespace))
978  {
979  Size freespace = 0;
980 
981  /*
982  * Processed page successfully (without cleanup lock) -- just
983  * need to perform rel truncation and FSM steps, much like the
984  * lazy_scan_prune case. Don't bother trying to match its
985  * visibility map setting steps, though.
986  */
987  if (hastup)
988  vacrel->nonempty_pages = blkno + 1;
989  if (recordfreespace)
990  freespace = PageGetHeapFreeSpace(page);
992  if (recordfreespace)
993  RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
994  continue;
995  }
996 
997  /*
998  * lazy_scan_noprune could not do all required processing. Wait
999  * for a cleanup lock, and call lazy_scan_prune in the usual way.
1000  */
1001  Assert(vacrel->aggressive);
1004  }
1005 
1006  /* Check for new or empty pages before lazy_scan_prune call */
1007  if (lazy_scan_new_or_empty(vacrel, buf, blkno, page, false, vmbuffer))
1008  {
1009  /* Processed as new/empty page (lock and pin released) */
1010  continue;
1011  }
1012 
1013  /*
1014  * Prune, freeze, and count tuples.
1015  *
1016  * Accumulates details of remaining LP_DEAD line pointers on page in
1017  * dead_items array. This includes LP_DEAD line pointers that we
1018  * pruned ourselves, as well as existing LP_DEAD line pointers that
1019  * were pruned some time earlier. Also considers freezing XIDs in the
1020  * tuple headers of remaining items with storage.
1021  */
1022  lazy_scan_prune(vacrel, buf, blkno, page, &prunestate);
1023 
1024  Assert(!prunestate.all_visible || !prunestate.has_lpdead_items);
1025 
1026  /* Remember the location of the last page with nonremovable tuples */
1027  if (prunestate.hastup)
1028  vacrel->nonempty_pages = blkno + 1;
1029 
1030  if (vacrel->nindexes == 0)
1031  {
1032  /*
1033  * Consider the need to do page-at-a-time heap vacuuming when
1034  * using the one-pass strategy now.
1035  *
1036  * The one-pass strategy will never call lazy_vacuum(). The steps
1037  * performed here can be thought of as the one-pass equivalent of
1038  * a call to lazy_vacuum().
1039  */
1040  if (prunestate.has_lpdead_items)
1041  {
1042  Size freespace;
1043 
1044  lazy_vacuum_heap_page(vacrel, blkno, buf, 0, vmbuffer);
1045 
1046  /* Forget the LP_DEAD items that we just vacuumed */
1047  dead_items->num_items = 0;
1048 
1049  /*
1050  * Now perform FSM processing for blkno, and move on to next
1051  * page.
1052  *
1053  * Our call to lazy_vacuum_heap_page() will have considered if
1054  * it's possible to set all_visible/all_frozen independently
1055  * of lazy_scan_prune(). Note that prunestate was invalidated
1056  * by lazy_vacuum_heap_page() call.
1057  */
1058  freespace = PageGetHeapFreeSpace(page);
1059 
1061  RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
1062 
1063  /*
1064  * Periodically perform FSM vacuuming to make newly-freed
1065  * space visible on upper FSM pages. FreeSpaceMapVacuumRange()
1066  * vacuums the portion of the freespace map covering heap
1067  * pages from start to end - 1. Include the block we just
1068  * vacuumed by passing it blkno + 1. Overflow isn't an issue
1069  * because MaxBlockNumber + 1 is InvalidBlockNumber which
1070  * causes FreeSpaceMapVacuumRange() to vacuum freespace map
1071  * pages covering the remainder of the relation.
1072  */
1073  if (blkno - next_fsm_block_to_vacuum >= VACUUM_FSM_EVERY_PAGES)
1074  {
1075  FreeSpaceMapVacuumRange(vacrel->rel, next_fsm_block_to_vacuum,
1076  blkno + 1);
1077  next_fsm_block_to_vacuum = blkno + 1;
1078  }
1079 
1080  continue;
1081  }
1082 
1083  /*
1084  * There was no call to lazy_vacuum_heap_page() because pruning
1085  * didn't encounter/create any LP_DEAD items that needed to be
1086  * vacuumed. Prune state has not been invalidated, so proceed
1087  * with prunestate-driven visibility map and FSM steps (just like
1088  * the two-pass strategy).
1089  */
1090  Assert(dead_items->num_items == 0);
1091  }
1092 
1093  /*
1094  * Handle setting visibility map bit based on information from the VM
1095  * (as of last lazy_scan_skip() call), and from prunestate
1096  */
1097  if (!all_visible_according_to_vm && prunestate.all_visible)
1098  {
1100 
1101  if (prunestate.all_frozen)
1102  {
1104  flags |= VISIBILITYMAP_ALL_FROZEN;
1105  }
1106 
1107  /*
1108  * It should never be the case that the visibility map page is set
1109  * while the page-level bit is clear, but the reverse is allowed
1110  * (if checksums are not enabled). Regardless, set both bits so
1111  * that we get back in sync.
1112  *
1113  * NB: If the heap page is all-visible but the VM bit is not set,
1114  * we don't need to dirty the heap page. However, if checksums
1115  * are enabled, we do need to make sure that the heap page is
1116  * dirtied before passing it to visibilitymap_set(), because it
1117  * may be logged. Given that this situation should only happen in
1118  * rare cases after a crash, it is not worth optimizing.
1119  */
1120  PageSetAllVisible(page);
1122  visibilitymap_set(vacrel->rel, blkno, buf, InvalidXLogRecPtr,
1123  vmbuffer, prunestate.visibility_cutoff_xid,
1124  flags);
1125  }
1126 
1127  /*
1128  * As of PostgreSQL 9.2, the visibility map bit should never be set if
1129  * the page-level bit is clear. However, it's possible that the bit
1130  * got cleared after lazy_scan_skip() was called, so we must recheck
1131  * with buffer lock before concluding that the VM is corrupt.
1132  */
1133  else if (all_visible_according_to_vm && !PageIsAllVisible(page) &&
1134  visibilitymap_get_status(vacrel->rel, blkno, &vmbuffer) != 0)
1135  {
1136  elog(WARNING, "page is not marked all-visible but visibility map bit is set in relation \"%s\" page %u",
1137  vacrel->relname, blkno);
1138  visibilitymap_clear(vacrel->rel, blkno, vmbuffer,
1140  }
1141 
1142  /*
1143  * It's possible for the value returned by
1144  * GetOldestNonRemovableTransactionId() to move backwards, so it's not
1145  * wrong for us to see tuples that appear to not be visible to
1146  * everyone yet, while PD_ALL_VISIBLE is already set. The real safe
1147  * xmin value never moves backwards, but
1148  * GetOldestNonRemovableTransactionId() is conservative and sometimes
1149  * returns a value that's unnecessarily small, so if we see that
1150  * contradiction it just means that the tuples that we think are not
1151  * visible to everyone yet actually are, and the PD_ALL_VISIBLE flag
1152  * is correct.
1153  *
1154  * There should never be LP_DEAD items on a page with PD_ALL_VISIBLE
1155  * set, however.
1156  */
1157  else if (prunestate.has_lpdead_items && PageIsAllVisible(page))
1158  {
1159  elog(WARNING, "page containing LP_DEAD items is marked as all-visible in relation \"%s\" page %u",
1160  vacrel->relname, blkno);
1161  PageClearAllVisible(page);
1163  visibilitymap_clear(vacrel->rel, blkno, vmbuffer,
1165  }
1166 
1167  /*
1168  * If the all-visible page is all-frozen but not marked as such yet,
1169  * mark it as all-frozen. Note that all_frozen is only valid if
1170  * all_visible is true, so we must check both prunestate fields.
1171  */
1172  else if (all_visible_according_to_vm && prunestate.all_visible &&
1173  prunestate.all_frozen &&
1174  !VM_ALL_FROZEN(vacrel->rel, blkno, &vmbuffer))
1175  {
1176  /*
1177  * Avoid relying on all_visible_according_to_vm as a proxy for the
1178  * page-level PD_ALL_VISIBLE bit being set, since it might have
1179  * become stale -- even when all_visible is set in prunestate
1180  */
1181  if (!PageIsAllVisible(page))
1182  {
1183  PageSetAllVisible(page);
1185  }
1186 
1187  /*
1188  * Set the page all-frozen (and all-visible) in the VM.
1189  *
1190  * We can pass InvalidTransactionId as our visibility_cutoff_xid,
1191  * since a snapshotConflictHorizon sufficient to make everything
1192  * safe for REDO was logged when the page's tuples were frozen.
1193  */
1195  visibilitymap_set(vacrel->rel, blkno, buf, InvalidXLogRecPtr,
1196  vmbuffer, InvalidTransactionId,
1199  }
1200 
1201  /*
1202  * Final steps for block: drop cleanup lock, record free space in the
1203  * FSM
1204  */
1205  if (prunestate.has_lpdead_items && vacrel->do_index_vacuuming)
1206  {
1207  /*
1208  * Wait until lazy_vacuum_heap_rel() to save free space. This
1209  * doesn't just save us some cycles; it also allows us to record
1210  * any additional free space that lazy_vacuum_heap_page() will
1211  * make available in cases where it's possible to truncate the
1212  * page's line pointer array.
1213  *
1214  * Note: It's not in fact 100% certain that we really will call
1215  * lazy_vacuum_heap_rel() -- lazy_vacuum() might yet opt to skip
1216  * index vacuuming (and so must skip heap vacuuming). This is
1217  * deemed okay because it only happens in emergencies, or when
1218  * there is very little free space anyway. (Besides, we start
1219  * recording free space in the FSM once index vacuuming has been
1220  * abandoned.)
1221  *
1222  * Note: The one-pass (no indexes) case is only supposed to make
1223  * it this far when there were no LP_DEAD items during pruning.
1224  */
1225  Assert(vacrel->nindexes > 0);
1227  }
1228  else
1229  {
1230  Size freespace = PageGetHeapFreeSpace(page);
1231 
1233  RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
1234  }
1235  }
1236 
1237  vacrel->blkno = InvalidBlockNumber;
1238  if (BufferIsValid(vmbuffer))
1239  ReleaseBuffer(vmbuffer);
1240 
1241  /* report that everything is now scanned */
1243 
1244  /* now we can compute the new value for pg_class.reltuples */
1245  vacrel->new_live_tuples = vac_estimate_reltuples(vacrel->rel, rel_pages,
1246  vacrel->scanned_pages,
1247  vacrel->live_tuples);
1248 
1249  /*
1250  * Also compute the total number of surviving heap entries. In the
1251  * (unlikely) scenario that new_live_tuples is -1, take it as zero.
1252  */
1253  vacrel->new_rel_tuples =
1254  Max(vacrel->new_live_tuples, 0) + vacrel->recently_dead_tuples +
1255  vacrel->missed_dead_tuples;
1256 
1257  /*
1258  * Do index vacuuming (call each index's ambulkdelete routine), then do
1259  * related heap vacuuming
1260  */
1261  if (dead_items->num_items > 0)
1262  lazy_vacuum(vacrel);
1263 
1264  /*
1265  * Vacuum the remainder of the Free Space Map. We must do this whether or
1266  * not there were indexes, and whether or not we bypassed index vacuuming.
1267  */
1268  if (blkno > next_fsm_block_to_vacuum)
1269  FreeSpaceMapVacuumRange(vacrel->rel, next_fsm_block_to_vacuum, blkno);
1270 
1271  /* report all blocks vacuumed */
1273 
1274  /* Do final index cleanup (call each index's amvacuumcleanup routine) */
1275  if (vacrel->nindexes > 0 && vacrel->do_index_cleanup)
1276  lazy_cleanup_all_indexes(vacrel);
1277 }
1278 
1279 /*
1280  * lazy_scan_skip() -- set up range of skippable blocks using visibility map.
1281  *
1282  * lazy_scan_heap() calls here every time it needs to set up a new range of
1283  * blocks to skip via the visibility map. Caller passes the next block in
1284  * line. We return a next_unskippable_block for this range. When there are
1285  * no skippable blocks we just return caller's next_block. The all-visible
1286  * status of the returned block is set in *next_unskippable_allvis for caller,
1287  * too. Block usually won't be all-visible (since it's unskippable), but it
1288  * can be during aggressive VACUUMs (as well as in certain edge cases).
1289  *
1290  * Sets *skipping_current_range to indicate if caller should skip this range.
1291  * Costs and benefits drive our decision. Very small ranges won't be skipped.
1292  *
1293  * Note: our opinion of which blocks can be skipped can go stale immediately.
1294  * It's okay if caller "misses" a page whose all-visible or all-frozen marking
1295  * was concurrently cleared, though. All that matters is that caller scan all
1296  * pages whose tuples might contain XIDs < OldestXmin, or MXIDs < OldestMxact.
1297  * (Actually, non-aggressive VACUUMs can choose to skip all-visible pages with
1298  * older XIDs/MXIDs. The vacrel->skippedallvis flag will be set here when the
1299  * choice to skip such a range is actually made, making everything safe.)
1300  */
1301 static BlockNumber
1302 lazy_scan_skip(LVRelState *vacrel, Buffer *vmbuffer, BlockNumber next_block,
1303  bool *next_unskippable_allvis, bool *skipping_current_range)
1304 {
1305  BlockNumber rel_pages = vacrel->rel_pages,
1306  next_unskippable_block = next_block,
1307  nskippable_blocks = 0;
1308  bool skipsallvis = false;
1309 
1310  *next_unskippable_allvis = true;
1311  while (next_unskippable_block < rel_pages)
1312  {
1313  uint8 mapbits = visibilitymap_get_status(vacrel->rel,
1314  next_unskippable_block,
1315  vmbuffer);
1316 
1317  if ((mapbits & VISIBILITYMAP_ALL_VISIBLE) == 0)
1318  {
1319  Assert((mapbits & VISIBILITYMAP_ALL_FROZEN) == 0);
1320  *next_unskippable_allvis = false;
1321  break;
1322  }
1323 
1324  /*
1325  * Caller must scan the last page to determine whether it has tuples
1326  * (caller must have the opportunity to set vacrel->nonempty_pages).
1327  * This rule avoids having lazy_truncate_heap() take access-exclusive
1328  * lock on rel to attempt a truncation that fails anyway, just because
1329  * there are tuples on the last page (it is likely that there will be
1330  * tuples on other nearby pages as well, but those can be skipped).
1331  *
1332  * Implement this by always treating the last block as unsafe to skip.
1333  */
1334  if (next_unskippable_block == rel_pages - 1)
1335  break;
1336 
1337  /* DISABLE_PAGE_SKIPPING makes all skipping unsafe */
1338  if (!vacrel->skipwithvm)
1339  {
1340  /* Caller shouldn't rely on all_visible_according_to_vm */
1341  *next_unskippable_allvis = false;
1342  break;
1343  }
1344 
1345  /*
1346  * Aggressive VACUUM caller can't skip pages just because they are
1347  * all-visible. They may still skip all-frozen pages, which can't
1348  * contain XIDs < OldestXmin (XIDs that aren't already frozen by now).
1349  */
1350  if ((mapbits & VISIBILITYMAP_ALL_FROZEN) == 0)
1351  {
1352  if (vacrel->aggressive)
1353  break;
1354 
1355  /*
1356  * All-visible block is safe to skip in non-aggressive case. But
1357  * remember that the final range contains such a block for later.
1358  */
1359  skipsallvis = true;
1360  }
1361 
1363  next_unskippable_block++;
1364  nskippable_blocks++;
1365  }
1366 
1367  /*
1368  * We only skip a range with at least SKIP_PAGES_THRESHOLD consecutive
1369  * pages. Since we're reading sequentially, the OS should be doing
1370  * readahead for us, so there's no gain in skipping a page now and then.
1371  * Skipping such a range might even discourage sequential detection.
1372  *
1373  * This test also enables more frequent relfrozenxid advancement during
1374  * non-aggressive VACUUMs. If the range has any all-visible pages then
1375  * skipping makes updating relfrozenxid unsafe, which is a real downside.
1376  */
1377  if (nskippable_blocks < SKIP_PAGES_THRESHOLD)
1378  *skipping_current_range = false;
1379  else
1380  {
1381  *skipping_current_range = true;
1382  if (skipsallvis)
1383  vacrel->skippedallvis = true;
1384  }
1385 
1386  return next_unskippable_block;
1387 }
1388 
1389 /*
1390  * lazy_scan_new_or_empty() -- lazy_scan_heap() new/empty page handling.
1391  *
1392  * Must call here to handle both new and empty pages before calling
1393  * lazy_scan_prune or lazy_scan_noprune, since they're not prepared to deal
1394  * with new or empty pages.
1395  *
1396  * It's necessary to consider new pages as a special case, since the rules for
1397  * maintaining the visibility map and FSM with empty pages are a little
1398  * different (though new pages can be truncated away during rel truncation).
1399  *
1400  * Empty pages are not really a special case -- they're just heap pages that
1401  * have no allocated tuples (including even LP_UNUSED items). You might
1402  * wonder why we need to handle them here all the same. It's only necessary
1403  * because of a corner-case involving a hard crash during heap relation
1404  * extension. If we ever make relation-extension crash safe, then it should
1405  * no longer be necessary to deal with empty pages here (or new pages, for
1406  * that matter).
1407  *
1408  * Caller must hold at least a shared lock. We might need to escalate the
1409  * lock in that case, so the type of lock caller holds needs to be specified
1410  * using 'sharelock' argument.
1411  *
1412  * Returns false in common case where caller should go on to call
1413  * lazy_scan_prune (or lazy_scan_noprune). Otherwise returns true, indicating
1414  * that lazy_scan_heap is done processing the page, releasing lock on caller's
1415  * behalf.
1416  */
1417 static bool
1419  Page page, bool sharelock, Buffer vmbuffer)
1420 {
1421  Size freespace;
1422 
1423  if (PageIsNew(page))
1424  {
1425  /*
1426  * All-zeroes pages can be left over if either a backend extends the
1427  * relation by a single page, but crashes before the newly initialized
1428  * page has been written out, or when bulk-extending the relation
1429  * (which creates a number of empty pages at the tail end of the
1430  * relation), and then enters them into the FSM.
1431  *
1432  * Note we do not enter the page into the visibilitymap. That has the
1433  * downside that we repeatedly visit this page in subsequent vacuums,
1434  * but otherwise we'll never discover the space on a promoted standby.
1435  * The harm of repeated checking ought to normally not be too bad. The
1436  * space usually should be used at some point, otherwise there
1437  * wouldn't be any regular vacuums.
1438  *
1439  * Make sure these pages are in the FSM, to ensure they can be reused.
1440  * Do that by testing if there's any space recorded for the page. If
1441  * not, enter it. We do so after releasing the lock on the heap page,
1442  * the FSM is approximate, after all.
1443  */
1445 
1446  if (GetRecordedFreeSpace(vacrel->rel, blkno) == 0)
1447  {
1448  freespace = BLCKSZ - SizeOfPageHeaderData;
1449 
1450  RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
1451  }
1452 
1453  return true;
1454  }
1455 
1456  if (PageIsEmpty(page))
1457  {
1458  /*
1459  * It seems likely that caller will always be able to get a cleanup
1460  * lock on an empty page. But don't take any chances -- escalate to
1461  * an exclusive lock (still don't need a cleanup lock, though).
1462  */
1463  if (sharelock)
1464  {
1467 
1468  if (!PageIsEmpty(page))
1469  {
1470  /* page isn't new or empty -- keep lock and pin for now */
1471  return false;
1472  }
1473  }
1474  else
1475  {
1476  /* Already have a full cleanup lock (which is more than enough) */
1477  }
1478 
1479  /*
1480  * Unlike new pages, empty pages are always set all-visible and
1481  * all-frozen.
1482  */
1483  if (!PageIsAllVisible(page))
1484  {
1486 
1487  /* mark buffer dirty before writing a WAL record */
1489 
1490  /*
1491  * It's possible that another backend has extended the heap,
1492  * initialized the page, and then failed to WAL-log the page due
1493  * to an ERROR. Since heap extension is not WAL-logged, recovery
1494  * might try to replay our record setting the page all-visible and
1495  * find that the page isn't initialized, which will cause a PANIC.
1496  * To prevent that, check whether the page has been previously
1497  * WAL-logged, and if not, do that now.
1498  */
1499  if (RelationNeedsWAL(vacrel->rel) &&
1500  PageGetLSN(page) == InvalidXLogRecPtr)
1501  log_newpage_buffer(buf, true);
1502 
1503  PageSetAllVisible(page);
1504  visibilitymap_set(vacrel->rel, blkno, buf, InvalidXLogRecPtr,
1505  vmbuffer, InvalidTransactionId,
1507  END_CRIT_SECTION();
1508  }
1509 
1510  freespace = PageGetHeapFreeSpace(page);
1512  RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
1513  return true;
1514  }
1515 
1516  /* page isn't new or empty -- keep lock and pin */
1517  return false;
1518 }
1519 
1520 /*
1521  * lazy_scan_prune() -- lazy_scan_heap() pruning and freezing.
1522  *
1523  * Caller must hold pin and buffer cleanup lock on the buffer.
1524  *
1525  * Prior to PostgreSQL 14 there were very rare cases where heap_page_prune()
1526  * was allowed to disagree with our HeapTupleSatisfiesVacuum() call about
1527  * whether or not a tuple should be considered DEAD. This happened when an
1528  * inserting transaction concurrently aborted (after our heap_page_prune()
1529  * call, before our HeapTupleSatisfiesVacuum() call). There was rather a lot
1530  * of complexity just so we could deal with tuples that were DEAD to VACUUM,
1531  * but nevertheless were left with storage after pruning.
1532  *
1533  * As of Postgres 17, we circumvent this problem altogether by reusing the
1534  * result of heap_page_prune()'s visibility check. Without the second call to
1535  * HeapTupleSatisfiesVacuum(), there is no new HTSV_Result and there can be no
1536  * disagreement. We'll just handle such tuples as if they had become fully dead
1537  * right after this operation completes instead of in the middle of it. Note that
1538  * any tuple that becomes dead after the call to heap_page_prune() can't need to
1539  * be frozen, because it was visible to another session when vacuum started.
1540  */
1541 static void
1543  Buffer buf,
1544  BlockNumber blkno,
1545  Page page,
1546  LVPagePruneState *prunestate)
1547 {
1548  Relation rel = vacrel->rel;
1549  OffsetNumber offnum,
1550  maxoff;
1551  ItemId itemid;
1552  PruneResult presult;
1553  int tuples_frozen,
1554  lpdead_items,
1555  live_tuples,
1556  recently_dead_tuples;
1557  HeapPageFreeze pagefrz;
1558  int64 fpi_before = pgWalUsage.wal_fpi;
1559  OffsetNumber deadoffsets[MaxHeapTuplesPerPage];
1561 
1562  Assert(BufferGetBlockNumber(buf) == blkno);
1563 
1564  /*
1565  * maxoff might be reduced following line pointer array truncation in
1566  * heap_page_prune. That's safe for us to ignore, since the reclaimed
1567  * space will continue to look like LP_UNUSED items below.
1568  */
1569  maxoff = PageGetMaxOffsetNumber(page);
1570 
1571  /* Initialize (or reset) page-level state */
1572  pagefrz.freeze_required = false;
1573  pagefrz.FreezePageRelfrozenXid = vacrel->NewRelfrozenXid;
1574  pagefrz.FreezePageRelminMxid = vacrel->NewRelminMxid;
1575  pagefrz.NoFreezePageRelfrozenXid = vacrel->NewRelfrozenXid;
1576  pagefrz.NoFreezePageRelminMxid = vacrel->NewRelminMxid;
1577  tuples_frozen = 0;
1578  lpdead_items = 0;
1579  live_tuples = 0;
1580  recently_dead_tuples = 0;
1581 
1582  /*
1583  * Prune all HOT-update chains in this page.
1584  *
1585  * We count the number of tuples removed from the page by the pruning step
1586  * in presult.ndeleted. It should not be confused with lpdead_items;
1587  * lpdead_items's final value can be thought of as the number of tuples
1588  * that were deleted from indexes.
1589  */
1590  heap_page_prune(rel, buf, vacrel->vistest, &presult, &vacrel->offnum);
1591 
1592  /*
1593  * Now scan the page to collect LP_DEAD items and check for tuples
1594  * requiring freezing among remaining tuples with storage
1595  */
1596  prunestate->hastup = false;
1597  prunestate->has_lpdead_items = false;
1598  prunestate->all_visible = true;
1599  prunestate->all_frozen = true;
1601 
1602  for (offnum = FirstOffsetNumber;
1603  offnum <= maxoff;
1604  offnum = OffsetNumberNext(offnum))
1605  {
1606  HeapTupleHeader htup;
1607  bool totally_frozen;
1608 
1609  /*
1610  * Set the offset number so that we can display it along with any
1611  * error that occurred while processing this tuple.
1612  */
1613  vacrel->offnum = offnum;
1614  itemid = PageGetItemId(page, offnum);
1615 
1616  if (!ItemIdIsUsed(itemid))
1617  continue;
1618 
1619  /* Redirect items mustn't be touched */
1620  if (ItemIdIsRedirected(itemid))
1621  {
1622  /* page makes rel truncation unsafe */
1623  prunestate->hastup = true;
1624  continue;
1625  }
1626 
1627  if (ItemIdIsDead(itemid))
1628  {
1629  /*
1630  * Deliberately don't set hastup for LP_DEAD items. We make the
1631  * soft assumption that any LP_DEAD items encountered here will
1632  * become LP_UNUSED later on, before count_nondeletable_pages is
1633  * reached. If we don't make this assumption then rel truncation
1634  * will only happen every other VACUUM, at most. Besides, VACUUM
1635  * must treat hastup/nonempty_pages as provisional no matter how
1636  * LP_DEAD items are handled (handled here, or handled later on).
1637  *
1638  * Also deliberately delay unsetting all_visible until just before
1639  * we return to lazy_scan_heap caller, as explained in full below.
1640  * (This is another case where it's useful to anticipate that any
1641  * LP_DEAD items will become LP_UNUSED during the ongoing VACUUM.)
1642  */
1643  deadoffsets[lpdead_items++] = offnum;
1644  continue;
1645  }
1646 
1647  Assert(ItemIdIsNormal(itemid));
1648 
1649  htup = (HeapTupleHeader) PageGetItem(page, itemid);
1650 
1651  /*
1652  * The criteria for counting a tuple as live in this block need to
1653  * match what analyze.c's acquire_sample_rows() does, otherwise VACUUM
1654  * and ANALYZE may produce wildly different reltuples values, e.g.
1655  * when there are many recently-dead tuples.
1656  *
1657  * The logic here is a bit simpler than acquire_sample_rows(), as
1658  * VACUUM can't run inside a transaction block, which makes some cases
1659  * impossible (e.g. in-progress insert from the same transaction).
1660  *
1661  * We treat LP_DEAD items (which are the closest thing to DEAD tuples
1662  * that might be seen here) differently, too: we assume that they'll
1663  * become LP_UNUSED before VACUUM finishes. This difference is only
1664  * superficial. VACUUM effectively agrees with ANALYZE about DEAD
1665  * items, in the end. VACUUM won't remember LP_DEAD items, but only
1666  * because they're not supposed to be left behind when it is done.
1667  * (Cases where we bypass index vacuuming will violate this optimistic
1668  * assumption, but the overall impact of that should be negligible.)
1669  */
1670  switch (htsv_get_valid_status(presult.htsv[offnum]))
1671  {
1672  case HEAPTUPLE_LIVE:
1673 
1674  /*
1675  * Count it as live. Not only is this natural, but it's also
1676  * what acquire_sample_rows() does.
1677  */
1678  live_tuples++;
1679 
1680  /*
1681  * Is the tuple definitely visible to all transactions?
1682  *
1683  * NB: Like with per-tuple hint bits, we can't set the
1684  * PD_ALL_VISIBLE flag if the inserter committed
1685  * asynchronously. See SetHintBits for more info. Check that
1686  * the tuple is hinted xmin-committed because of that.
1687  */
1688  if (prunestate->all_visible)
1689  {
1690  TransactionId xmin;
1691 
1692  if (!HeapTupleHeaderXminCommitted(htup))
1693  {
1694  prunestate->all_visible = false;
1695  break;
1696  }
1697 
1698  /*
1699  * The inserter definitely committed. But is it old enough
1700  * that everyone sees it as committed?
1701  */
1702  xmin = HeapTupleHeaderGetXmin(htup);
1703  if (!TransactionIdPrecedes(xmin,
1704  vacrel->cutoffs.OldestXmin))
1705  {
1706  prunestate->all_visible = false;
1707  break;
1708  }
1709 
1710  /* Track newest xmin on page. */
1711  if (TransactionIdFollows(xmin, prunestate->visibility_cutoff_xid) &&
1712  TransactionIdIsNormal(xmin))
1713  prunestate->visibility_cutoff_xid = xmin;
1714  }
1715  break;
1717 
1718  /*
1719  * If tuple is recently dead then we must not remove it from
1720  * the relation. (We only remove items that are LP_DEAD from
1721  * pruning.)
1722  */
1723  recently_dead_tuples++;
1724  prunestate->all_visible = false;
1725  break;
1727 
1728  /*
1729  * We do not count these rows as live, because we expect the
1730  * inserting transaction to update the counters at commit, and
1731  * we assume that will happen only after we report our
1732  * results. This assumption is a bit shaky, but it is what
1733  * acquire_sample_rows() does, so be consistent.
1734  */
1735  prunestate->all_visible = false;
1736  break;
1738  /* This is an expected case during concurrent vacuum */
1739  prunestate->all_visible = false;
1740 
1741  /*
1742  * Count such rows as live. As above, we assume the deleting
1743  * transaction will commit and update the counters after we
1744  * report.
1745  */
1746  live_tuples++;
1747  break;
1748  default:
1749  elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
1750  break;
1751  }
1752 
1753  prunestate->hastup = true; /* page makes rel truncation unsafe */
1754 
1755  /* Tuple with storage -- consider need to freeze */
1756  if (heap_prepare_freeze_tuple(htup, &vacrel->cutoffs, &pagefrz,
1757  &frozen[tuples_frozen], &totally_frozen))
1758  {
1759  /* Save prepared freeze plan for later */
1760  frozen[tuples_frozen++].offset = offnum;
1761  }
1762 
1763  /*
1764  * If any tuple isn't either totally frozen already or eligible to
1765  * become totally frozen (according to its freeze plan), then the page
1766  * definitely cannot be set all-frozen in the visibility map later on
1767  */
1768  if (!totally_frozen)
1769  prunestate->all_frozen = false;
1770  }
1771 
1772  /*
1773  * We have now divided every item on the page into either an LP_DEAD item
1774  * that will need to be vacuumed in indexes later, or a LP_NORMAL tuple
1775  * that remains and needs to be considered for freezing now (LP_UNUSED and
1776  * LP_REDIRECT items also remain, but are of no further interest to us).
1777  */
1778  vacrel->offnum = InvalidOffsetNumber;
1779 
1780  /*
1781  * Freeze the page when heap_prepare_freeze_tuple indicates that at least
1782  * one XID/MXID from before FreezeLimit/MultiXactCutoff is present. Also
1783  * freeze when pruning generated an FPI, if doing so means that we set the
1784  * page all-frozen afterwards (might not happen until final heap pass).
1785  */
1786  if (pagefrz.freeze_required || tuples_frozen == 0 ||
1787  (prunestate->all_visible && prunestate->all_frozen &&
1788  fpi_before != pgWalUsage.wal_fpi))
1789  {
1790  /*
1791  * We're freezing the page. Our final NewRelfrozenXid doesn't need to
1792  * be affected by the XIDs that are just about to be frozen anyway.
1793  */
1794  vacrel->NewRelfrozenXid = pagefrz.FreezePageRelfrozenXid;
1795  vacrel->NewRelminMxid = pagefrz.FreezePageRelminMxid;
1796 
1797  if (tuples_frozen == 0)
1798  {
1799  /*
1800  * We have no freeze plans to execute, so there's no added cost
1801  * from following the freeze path. That's why it was chosen. This
1802  * is important in the case where the page only contains totally
1803  * frozen tuples at this point (perhaps only following pruning).
1804  * Such pages can be marked all-frozen in the VM by our caller,
1805  * even though none of its tuples were newly frozen here (note
1806  * that the "no freeze" path never sets pages all-frozen).
1807  *
1808  * We never increment the frozen_pages instrumentation counter
1809  * here, since it only counts pages with newly frozen tuples
1810  * (don't confuse that with pages newly set all-frozen in VM).
1811  */
1812  }
1813  else
1814  {
1815  TransactionId snapshotConflictHorizon;
1816 
1817  vacrel->frozen_pages++;
1818 
1819  /*
1820  * We can use visibility_cutoff_xid as our cutoff for conflicts
1821  * when the whole page is eligible to become all-frozen in the VM
1822  * once we're done with it. Otherwise we generate a conservative
1823  * cutoff by stepping back from OldestXmin.
1824  */
1825  if (prunestate->all_visible && prunestate->all_frozen)
1826  {
1827  /* Using same cutoff when setting VM is now unnecessary */
1828  snapshotConflictHorizon = prunestate->visibility_cutoff_xid;
1830  }
1831  else
1832  {
1833  /* Avoids false conflicts when hot_standby_feedback in use */
1834  snapshotConflictHorizon = vacrel->cutoffs.OldestXmin;
1835  TransactionIdRetreat(snapshotConflictHorizon);
1836  }
1837 
1838  /* Execute all freeze plans for page as a single atomic action */
1840  snapshotConflictHorizon,
1841  frozen, tuples_frozen);
1842  }
1843  }
1844  else
1845  {
1846  /*
1847  * Page requires "no freeze" processing. It might be set all-visible
1848  * in the visibility map, but it can never be set all-frozen.
1849  */
1850  vacrel->NewRelfrozenXid = pagefrz.NoFreezePageRelfrozenXid;
1851  vacrel->NewRelminMxid = pagefrz.NoFreezePageRelminMxid;
1852  prunestate->all_frozen = false;
1853  tuples_frozen = 0; /* avoid miscounts in instrumentation */
1854  }
1855 
1856  /*
1857  * VACUUM will call heap_page_is_all_visible() during the second pass over
1858  * the heap to determine all_visible and all_frozen for the page -- this
1859  * is a specialized version of the logic from this function. Now that
1860  * we've finished pruning and freezing, make sure that we're in total
1861  * agreement with heap_page_is_all_visible() using an assertion.
1862  */
1863 #ifdef USE_ASSERT_CHECKING
1864  /* Note that all_frozen value does not matter when !all_visible */
1865  if (prunestate->all_visible && lpdead_items == 0)
1866  {
1867  TransactionId cutoff;
1868  bool all_frozen;
1869 
1870  if (!heap_page_is_all_visible(vacrel, buf, &cutoff, &all_frozen))
1871  Assert(false);
1872 
1873  Assert(!TransactionIdIsValid(cutoff) ||
1874  cutoff == prunestate->visibility_cutoff_xid);
1875  }
1876 #endif
1877 
1878  /*
1879  * Now save details of the LP_DEAD items from the page in vacrel
1880  */
1881  if (lpdead_items > 0)
1882  {
1883  VacDeadItems *dead_items = vacrel->dead_items;
1884  ItemPointerData tmp;
1885 
1886  vacrel->lpdead_item_pages++;
1887  prunestate->has_lpdead_items = true;
1888 
1889  ItemPointerSetBlockNumber(&tmp, blkno);
1890 
1891  for (int i = 0; i < lpdead_items; i++)
1892  {
1893  ItemPointerSetOffsetNumber(&tmp, deadoffsets[i]);
1894  dead_items->items[dead_items->num_items++] = tmp;
1895  }
1896 
1897  Assert(dead_items->num_items <= dead_items->max_items);
1899  dead_items->num_items);
1900 
1901  /*
1902  * It was convenient to ignore LP_DEAD items in all_visible earlier on
1903  * to make the choice of whether or not to freeze the page unaffected
1904  * by the short-term presence of LP_DEAD items. These LP_DEAD items
1905  * were effectively assumed to be LP_UNUSED items in the making. It
1906  * doesn't matter which heap pass (initial pass or final pass) ends up
1907  * setting the page all-frozen, as long as the ongoing VACUUM does it.
1908  *
1909  * Now that freezing has been finalized, unset all_visible. It needs
1910  * to reflect the present state of things, as expected by our caller.
1911  */
1912  prunestate->all_visible = false;
1913  }
1914 
1915  /* Finally, add page-local counts to whole-VACUUM counts */
1916  vacrel->tuples_deleted += presult.ndeleted;
1917  vacrel->tuples_frozen += tuples_frozen;
1918  vacrel->lpdead_items += lpdead_items;
1919  vacrel->live_tuples += live_tuples;
1920  vacrel->recently_dead_tuples += recently_dead_tuples;
1921 }
1922 
1923 /*
1924  * lazy_scan_noprune() -- lazy_scan_prune() without pruning or freezing
1925  *
1926  * Caller need only hold a pin and share lock on the buffer, unlike
1927  * lazy_scan_prune, which requires a full cleanup lock. While pruning isn't
1928  * performed here, it's quite possible that an earlier opportunistic pruning
1929  * operation left LP_DEAD items behind. We'll at least collect any such items
1930  * in the dead_items array for removal from indexes.
1931  *
1932  * For aggressive VACUUM callers, we may return false to indicate that a full
1933  * cleanup lock is required for processing by lazy_scan_prune. This is only
1934  * necessary when the aggressive VACUUM needs to freeze some tuple XIDs from
1935  * one or more tuples on the page. We always return true for non-aggressive
1936  * callers.
1937  *
1938  * See lazy_scan_prune for an explanation of hastup return flag.
1939  * recordfreespace flag instructs caller on whether or not it should do
1940  * generic FSM processing for page.
1941  */
1942 static bool
1944  Buffer buf,
1945  BlockNumber blkno,
1946  Page page,
1947  bool *hastup,
1948  bool *recordfreespace)
1949 {
1950  OffsetNumber offnum,
1951  maxoff;
1952  int lpdead_items,
1953  live_tuples,
1954  recently_dead_tuples,
1955  missed_dead_tuples;
1956  HeapTupleHeader tupleheader;
1957  TransactionId NoFreezePageRelfrozenXid = vacrel->NewRelfrozenXid;
1958  MultiXactId NoFreezePageRelminMxid = vacrel->NewRelminMxid;
1959  OffsetNumber deadoffsets[MaxHeapTuplesPerPage];
1960 
1961  Assert(BufferGetBlockNumber(buf) == blkno);
1962 
1963  *hastup = false; /* for now */
1964  *recordfreespace = false; /* for now */
1965 
1966  lpdead_items = 0;
1967  live_tuples = 0;
1968  recently_dead_tuples = 0;
1969  missed_dead_tuples = 0;
1970 
1971  maxoff = PageGetMaxOffsetNumber(page);
1972  for (offnum = FirstOffsetNumber;
1973  offnum <= maxoff;
1974  offnum = OffsetNumberNext(offnum))
1975  {
1976  ItemId itemid;
1977  HeapTupleData tuple;
1978 
1979  vacrel->offnum = offnum;
1980  itemid = PageGetItemId(page, offnum);
1981 
1982  if (!ItemIdIsUsed(itemid))
1983  continue;
1984 
1985  if (ItemIdIsRedirected(itemid))
1986  {
1987  *hastup = true;
1988  continue;
1989  }
1990 
1991  if (ItemIdIsDead(itemid))
1992  {
1993  /*
1994  * Deliberately don't set hastup=true here. See same point in
1995  * lazy_scan_prune for an explanation.
1996  */
1997  deadoffsets[lpdead_items++] = offnum;
1998  continue;
1999  }
2000 
2001  *hastup = true; /* page prevents rel truncation */
2002  tupleheader = (HeapTupleHeader) PageGetItem(page, itemid);
2003  if (heap_tuple_should_freeze(tupleheader, &vacrel->cutoffs,
2004  &NoFreezePageRelfrozenXid,
2005  &NoFreezePageRelminMxid))
2006  {
2007  /* Tuple with XID < FreezeLimit (or MXID < MultiXactCutoff) */
2008  if (vacrel->aggressive)
2009  {
2010  /*
2011  * Aggressive VACUUMs must always be able to advance rel's
2012  * relfrozenxid to a value >= FreezeLimit (and be able to
2013  * advance rel's relminmxid to a value >= MultiXactCutoff).
2014  * The ongoing aggressive VACUUM won't be able to do that
2015  * unless it can freeze an XID (or MXID) from this tuple now.
2016  *
2017  * The only safe option is to have caller perform processing
2018  * of this page using lazy_scan_prune. Caller might have to
2019  * wait a while for a cleanup lock, but it can't be helped.
2020  */
2021  vacrel->offnum = InvalidOffsetNumber;
2022  return false;
2023  }
2024 
2025  /*
2026  * Non-aggressive VACUUMs are under no obligation to advance
2027  * relfrozenxid (even by one XID). We can be much laxer here.
2028  *
2029  * Currently we always just accept an older final relfrozenxid
2030  * and/or relminmxid value. We never make caller wait or work a
2031  * little harder, even when it likely makes sense to do so.
2032  */
2033  }
2034 
2035  ItemPointerSet(&(tuple.t_self), blkno, offnum);
2036  tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
2037  tuple.t_len = ItemIdGetLength(itemid);
2038  tuple.t_tableOid = RelationGetRelid(vacrel->rel);
2039 
2040  switch (HeapTupleSatisfiesVacuum(&tuple, vacrel->cutoffs.OldestXmin,
2041  buf))
2042  {
2044  case HEAPTUPLE_LIVE:
2045 
2046  /*
2047  * Count both cases as live, just like lazy_scan_prune
2048  */
2049  live_tuples++;
2050 
2051  break;
2052  case HEAPTUPLE_DEAD:
2053 
2054  /*
2055  * There is some useful work for pruning to do, that won't be
2056  * done due to failure to get a cleanup lock.
2057  */
2058  missed_dead_tuples++;
2059  break;
2061 
2062  /*
2063  * Count in recently_dead_tuples, just like lazy_scan_prune
2064  */
2065  recently_dead_tuples++;
2066  break;
2068 
2069  /*
2070  * Do not count these rows as live, just like lazy_scan_prune
2071  */
2072  break;
2073  default:
2074  elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
2075  break;
2076  }
2077  }
2078 
2079  vacrel->offnum = InvalidOffsetNumber;
2080 
2081  /*
2082  * By here we know for sure that caller can put off freezing and pruning
2083  * this particular page until the next VACUUM. Remember its details now.
2084  * (lazy_scan_prune expects a clean slate, so we have to do this last.)
2085  */
2086  vacrel->NewRelfrozenXid = NoFreezePageRelfrozenXid;
2087  vacrel->NewRelminMxid = NoFreezePageRelminMxid;
2088 
2089  /* Save any LP_DEAD items found on the page in dead_items array */
2090  if (vacrel->nindexes == 0)
2091  {
2092  /* Using one-pass strategy (since table has no indexes) */
2093  if (lpdead_items > 0)
2094  {
2095  /*
2096  * Perfunctory handling for the corner case where a single pass
2097  * strategy VACUUM cannot get a cleanup lock, and it turns out
2098  * that there is one or more LP_DEAD items: just count the LP_DEAD
2099  * items as missed_dead_tuples instead. (This is a bit dishonest,
2100  * but it beats having to maintain specialized heap vacuuming code
2101  * forever, for vanishingly little benefit.)
2102  */
2103  *hastup = true;
2104  missed_dead_tuples += lpdead_items;
2105  }
2106 
2107  *recordfreespace = true;
2108  }
2109  else if (lpdead_items == 0)
2110  {
2111  /*
2112  * Won't be vacuuming this page later, so record page's freespace in
2113  * the FSM now
2114  */
2115  *recordfreespace = true;
2116  }
2117  else
2118  {
2119  VacDeadItems *dead_items = vacrel->dead_items;
2120  ItemPointerData tmp;
2121 
2122  /*
2123  * Page has LP_DEAD items, and so any references/TIDs that remain in
2124  * indexes will be deleted during index vacuuming (and then marked
2125  * LP_UNUSED in the heap)
2126  */
2127  vacrel->lpdead_item_pages++;
2128 
2129  ItemPointerSetBlockNumber(&tmp, blkno);
2130 
2131  for (int i = 0; i < lpdead_items; i++)
2132  {
2133  ItemPointerSetOffsetNumber(&tmp, deadoffsets[i]);
2134  dead_items->items[dead_items->num_items++] = tmp;
2135  }
2136 
2137  Assert(dead_items->num_items <= dead_items->max_items);
2139  dead_items->num_items);
2140 
2141  vacrel->lpdead_items += lpdead_items;
2142 
2143  /*
2144  * Assume that we'll go on to vacuum this heap page during final pass
2145  * over the heap. Don't record free space until then.
2146  */
2147  *recordfreespace = false;
2148  }
2149 
2150  /*
2151  * Finally, add relevant page-local counts to whole-VACUUM counts
2152  */
2153  vacrel->live_tuples += live_tuples;
2154  vacrel->recently_dead_tuples += recently_dead_tuples;
2155  vacrel->missed_dead_tuples += missed_dead_tuples;
2156  if (missed_dead_tuples > 0)
2157  vacrel->missed_dead_pages++;
2158 
2159  /* Caller won't need to call lazy_scan_prune with same page */
2160  return true;
2161 }
2162 
2163 /*
2164  * Main entry point for index vacuuming and heap vacuuming.
2165  *
2166  * Removes items collected in dead_items from table's indexes, then marks the
2167  * same items LP_UNUSED in the heap. See the comments above lazy_scan_heap
2168  * for full details.
2169  *
2170  * Also empties dead_items, freeing up space for later TIDs.
2171  *
2172  * We may choose to bypass index vacuuming at this point, though only when the
2173  * ongoing VACUUM operation will definitely only have one index scan/round of
2174  * index vacuuming.
2175  */
2176 static void
2178 {
2179  bool bypass;
2180 
2181  /* Should not end up here with no indexes */
2182  Assert(vacrel->nindexes > 0);
2183  Assert(vacrel->lpdead_item_pages > 0);
2184 
2185  if (!vacrel->do_index_vacuuming)
2186  {
2187  Assert(!vacrel->do_index_cleanup);
2188  vacrel->dead_items->num_items = 0;
2189  return;
2190  }
2191 
2192  /*
2193  * Consider bypassing index vacuuming (and heap vacuuming) entirely.
2194  *
2195  * We currently only do this in cases where the number of LP_DEAD items
2196  * for the entire VACUUM operation is close to zero. This avoids sharp
2197  * discontinuities in the duration and overhead of successive VACUUM
2198  * operations that run against the same table with a fixed workload.
2199  * Ideally, successive VACUUM operations will behave as if there are
2200  * exactly zero LP_DEAD items in cases where there are close to zero.
2201  *
2202  * This is likely to be helpful with a table that is continually affected
2203  * by UPDATEs that can mostly apply the HOT optimization, but occasionally
2204  * have small aberrations that lead to just a few heap pages retaining
2205  * only one or two LP_DEAD items. This is pretty common; even when the
2206  * DBA goes out of their way to make UPDATEs use HOT, it is practically
2207  * impossible to predict whether HOT will be applied in 100% of cases.
2208  * It's far easier to ensure that 99%+ of all UPDATEs against a table use
2209  * HOT through careful tuning.
2210  */
2211  bypass = false;
2212  if (vacrel->consider_bypass_optimization && vacrel->rel_pages > 0)
2213  {
2214  BlockNumber threshold;
2215 
2216  Assert(vacrel->num_index_scans == 0);
2217  Assert(vacrel->lpdead_items == vacrel->dead_items->num_items);
2218  Assert(vacrel->do_index_vacuuming);
2219  Assert(vacrel->do_index_cleanup);
2220 
2221  /*
2222  * This crossover point at which we'll start to do index vacuuming is
2223  * expressed as a percentage of the total number of heap pages in the
2224  * table that are known to have at least one LP_DEAD item. This is
2225  * much more important than the total number of LP_DEAD items, since
2226  * it's a proxy for the number of heap pages whose visibility map bits
2227  * cannot be set on account of bypassing index and heap vacuuming.
2228  *
2229  * We apply one further precautionary test: the space currently used
2230  * to store the TIDs (TIDs that now all point to LP_DEAD items) must
2231  * not exceed 32MB. This limits the risk that we will bypass index
2232  * vacuuming again and again until eventually there is a VACUUM whose
2233  * dead_items space is not CPU cache resident.
2234  *
2235  * We don't take any special steps to remember the LP_DEAD items (such
2236  * as counting them in our final update to the stats system) when the
2237  * optimization is applied. Though the accounting used in analyze.c's
2238  * acquire_sample_rows() will recognize the same LP_DEAD items as dead
2239  * rows in its own stats report, that's okay. The discrepancy should
2240  * be negligible. If this optimization is ever expanded to cover more
2241  * cases then this may need to be reconsidered.
2242  */
2243  threshold = (double) vacrel->rel_pages * BYPASS_THRESHOLD_PAGES;
2244  bypass = (vacrel->lpdead_item_pages < threshold &&
2245  vacrel->lpdead_items < MAXDEADITEMS(32L * 1024L * 1024L));
2246  }
2247 
2248  if (bypass)
2249  {
2250  /*
2251  * There are almost zero TIDs. Behave as if there were precisely
2252  * zero: bypass index vacuuming, but do index cleanup.
2253  *
2254  * We expect that the ongoing VACUUM operation will finish very
2255  * quickly, so there is no point in considering speeding up as a
2256  * failsafe against wraparound failure. (Index cleanup is expected to
2257  * finish very quickly in cases where there were no ambulkdelete()
2258  * calls.)
2259  */
2260  vacrel->do_index_vacuuming = false;
2261  }
2262  else if (lazy_vacuum_all_indexes(vacrel))
2263  {
2264  /*
2265  * We successfully completed a round of index vacuuming. Do related
2266  * heap vacuuming now.
2267  */
2268  lazy_vacuum_heap_rel(vacrel);
2269  }
2270  else
2271  {
2272  /*
2273  * Failsafe case.
2274  *
2275  * We attempted index vacuuming, but didn't finish a full round/full
2276  * index scan. This happens when relfrozenxid or relminmxid is too
2277  * far in the past.
2278  *
2279  * From this point on the VACUUM operation will do no further index
2280  * vacuuming or heap vacuuming. This VACUUM operation won't end up
2281  * back here again.
2282  */
2284  }
2285 
2286  /*
2287  * Forget the LP_DEAD items that we just vacuumed (or just decided to not
2288  * vacuum)
2289  */
2290  vacrel->dead_items->num_items = 0;
2291 }
2292 
2293 /*
2294  * lazy_vacuum_all_indexes() -- Main entry for index vacuuming
2295  *
2296  * Returns true in the common case when all indexes were successfully
2297  * vacuumed. Returns false in rare cases where we determined that the ongoing
2298  * VACUUM operation is at risk of taking too long to finish, leading to
2299  * wraparound failure.
2300  */
2301 static bool
2303 {
2304  bool allindexes = true;
2305  double old_live_tuples = vacrel->rel->rd_rel->reltuples;
2306  const int progress_start_index[] = {
2309  };
2310  const int progress_end_index[] = {
2314  };
2315  int64 progress_start_val[2];
2316  int64 progress_end_val[3];
2317 
2318  Assert(vacrel->nindexes > 0);
2319  Assert(vacrel->do_index_vacuuming);
2320  Assert(vacrel->do_index_cleanup);
2321 
2322  /* Precheck for XID wraparound emergencies */
2323  if (lazy_check_wraparound_failsafe(vacrel))
2324  {
2325  /* Wraparound emergency -- don't even start an index scan */
2326  return false;
2327  }
2328 
2329  /*
2330  * Report that we are now vacuuming indexes and the number of indexes to
2331  * vacuum.
2332  */
2333  progress_start_val[0] = PROGRESS_VACUUM_PHASE_VACUUM_INDEX;
2334  progress_start_val[1] = vacrel->nindexes;
2335  pgstat_progress_update_multi_param(2, progress_start_index, progress_start_val);
2336 
2337  if (!ParallelVacuumIsActive(vacrel))
2338  {
2339  for (int idx = 0; idx < vacrel->nindexes; idx++)
2340  {
2341  Relation indrel = vacrel->indrels[idx];
2342  IndexBulkDeleteResult *istat = vacrel->indstats[idx];
2343 
2344  vacrel->indstats[idx] = lazy_vacuum_one_index(indrel, istat,
2345  old_live_tuples,
2346  vacrel);
2347 
2348  /* Report the number of indexes vacuumed */
2350  idx + 1);
2351 
2352  if (lazy_check_wraparound_failsafe(vacrel))
2353  {
2354  /* Wraparound emergency -- end current index scan */
2355  allindexes = false;
2356  break;
2357  }
2358  }
2359  }
2360  else
2361  {
2362  /* Outsource everything to parallel variant */
2363  parallel_vacuum_bulkdel_all_indexes(vacrel->pvs, old_live_tuples,
2364  vacrel->num_index_scans);
2365 
2366  /*
2367  * Do a postcheck to consider applying wraparound failsafe now. Note
2368  * that parallel VACUUM only gets the precheck and this postcheck.
2369  */
2370  if (lazy_check_wraparound_failsafe(vacrel))
2371  allindexes = false;
2372  }
2373 
2374  /*
2375  * We delete all LP_DEAD items from the first heap pass in all indexes on
2376  * each call here (except calls where we choose to do the failsafe). This
2377  * makes the next call to lazy_vacuum_heap_rel() safe (except in the event
2378  * of the failsafe triggering, which prevents the next call from taking
2379  * place).
2380  */
2381  Assert(vacrel->num_index_scans > 0 ||
2382  vacrel->dead_items->num_items == vacrel->lpdead_items);
2383  Assert(allindexes || VacuumFailsafeActive);
2384 
2385  /*
2386  * Increase and report the number of index scans. Also, we reset
2387  * PROGRESS_VACUUM_INDEXES_TOTAL and PROGRESS_VACUUM_INDEXES_PROCESSED.
2388  *
2389  * We deliberately include the case where we started a round of bulk
2390  * deletes that we weren't able to finish due to the failsafe triggering.
2391  */
2392  vacrel->num_index_scans++;
2393  progress_end_val[0] = 0;
2394  progress_end_val[1] = 0;
2395  progress_end_val[2] = vacrel->num_index_scans;
2396  pgstat_progress_update_multi_param(3, progress_end_index, progress_end_val);
2397 
2398  return allindexes;
2399 }
2400 
2401 /*
2402  * lazy_vacuum_heap_rel() -- second pass over the heap for two pass strategy
2403  *
2404  * This routine marks LP_DEAD items in vacrel->dead_items array as LP_UNUSED.
2405  * Pages that never had lazy_scan_prune record LP_DEAD items are not visited
2406  * at all.
2407  *
2408  * We may also be able to truncate the line pointer array of the heap pages we
2409  * visit. If there is a contiguous group of LP_UNUSED items at the end of the
2410  * array, it can be reclaimed as free space. These LP_UNUSED items usually
2411  * start out as LP_DEAD items recorded by lazy_scan_prune (we set items from
2412  * each page to LP_UNUSED, and then consider if it's possible to truncate the
2413  * page's line pointer array).
2414  *
2415  * Note: the reason for doing this as a second pass is we cannot remove the
2416  * tuples until we've removed their index entries, and we want to process
2417  * index entry removal in batches as large as possible.
2418  */
2419 static void
2421 {
2422  int index = 0;
2423  BlockNumber vacuumed_pages = 0;
2424  Buffer vmbuffer = InvalidBuffer;
2425  LVSavedErrInfo saved_err_info;
2426 
2427  Assert(vacrel->do_index_vacuuming);
2428  Assert(vacrel->do_index_cleanup);
2429  Assert(vacrel->num_index_scans > 0);
2430 
2431  /* Report that we are now vacuuming the heap */
2434 
2435  /* Update error traceback information */
2436  update_vacuum_error_info(vacrel, &saved_err_info,
2439 
2440  while (index < vacrel->dead_items->num_items)
2441  {
2442  BlockNumber blkno;
2443  Buffer buf;
2444  Page page;
2445  Size freespace;
2446 
2448 
2449  blkno = ItemPointerGetBlockNumber(&vacrel->dead_items->items[index]);
2450  vacrel->blkno = blkno;
2451 
2452  /*
2453  * Pin the visibility map page in case we need to mark the page
2454  * all-visible. In most cases this will be very cheap, because we'll
2455  * already have the correct page pinned anyway.
2456  */
2457  visibilitymap_pin(vacrel->rel, blkno, &vmbuffer);
2458 
2459  /* We need a non-cleanup exclusive lock to mark dead_items unused */
2460  buf = ReadBufferExtended(vacrel->rel, MAIN_FORKNUM, blkno, RBM_NORMAL,
2461  vacrel->bstrategy);
2463  index = lazy_vacuum_heap_page(vacrel, blkno, buf, index, vmbuffer);
2464 
2465  /* Now that we've vacuumed the page, record its available space */
2466  page = BufferGetPage(buf);
2467  freespace = PageGetHeapFreeSpace(page);
2468 
2470  RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
2471  vacuumed_pages++;
2472  }
2473 
2474  vacrel->blkno = InvalidBlockNumber;
2475  if (BufferIsValid(vmbuffer))
2476  ReleaseBuffer(vmbuffer);
2477 
2478  /*
2479  * We set all LP_DEAD items from the first heap pass to LP_UNUSED during
2480  * the second heap pass. No more, no less.
2481  */
2482  Assert(index > 0);
2483  Assert(vacrel->num_index_scans > 1 ||
2484  (index == vacrel->lpdead_items &&
2485  vacuumed_pages == vacrel->lpdead_item_pages));
2486 
2487  ereport(DEBUG2,
2488  (errmsg("table \"%s\": removed %lld dead item identifiers in %u pages",
2489  vacrel->relname, (long long) index, vacuumed_pages)));
2490 
2491  /* Revert to the previous phase information for error traceback */
2492  restore_vacuum_error_info(vacrel, &saved_err_info);
2493 }
2494 
2495 /*
2496  * lazy_vacuum_heap_page() -- free page's LP_DEAD items listed in the
2497  * vacrel->dead_items array.
2498  *
2499  * Caller must have an exclusive buffer lock on the buffer (though a full
2500  * cleanup lock is also acceptable). vmbuffer must be valid and already have
2501  * a pin on blkno's visibility map page.
2502  *
2503  * index is an offset into the vacrel->dead_items array for the first listed
2504  * LP_DEAD item on the page. The return value is the first index immediately
2505  * after all LP_DEAD items for the same page in the array.
2506  */
2507 static int
2509  int index, Buffer vmbuffer)
2510 {
2511  VacDeadItems *dead_items = vacrel->dead_items;
2512  Page page = BufferGetPage(buffer);
2514  int nunused = 0;
2515  TransactionId visibility_cutoff_xid;
2516  bool all_frozen;
2517  LVSavedErrInfo saved_err_info;
2518 
2519  Assert(vacrel->nindexes == 0 || vacrel->do_index_vacuuming);
2520 
2522 
2523  /* Update error traceback information */
2524  update_vacuum_error_info(vacrel, &saved_err_info,
2527 
2529 
2530  for (; index < dead_items->num_items; index++)
2531  {
2532  BlockNumber tblk;
2533  OffsetNumber toff;
2534  ItemId itemid;
2535 
2536  tblk = ItemPointerGetBlockNumber(&dead_items->items[index]);
2537  if (tblk != blkno)
2538  break; /* past end of tuples for this block */
2539  toff = ItemPointerGetOffsetNumber(&dead_items->items[index]);
2540  itemid = PageGetItemId(page, toff);
2541 
2542  Assert(ItemIdIsDead(itemid) && !ItemIdHasStorage(itemid));
2543  ItemIdSetUnused(itemid);
2544  unused[nunused++] = toff;
2545  }
2546 
2547  Assert(nunused > 0);
2548 
2549  /* Attempt to truncate line pointer array now */
2551 
2552  /*
2553  * Mark buffer dirty before we write WAL.
2554  */
2555  MarkBufferDirty(buffer);
2556 
2557  /* XLOG stuff */
2558  if (RelationNeedsWAL(vacrel->rel))
2559  {
2560  xl_heap_vacuum xlrec;
2561  XLogRecPtr recptr;
2562 
2563  xlrec.nunused = nunused;
2564 
2565  XLogBeginInsert();
2566  XLogRegisterData((char *) &xlrec, SizeOfHeapVacuum);
2567 
2568  XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
2569  XLogRegisterBufData(0, (char *) unused, nunused * sizeof(OffsetNumber));
2570 
2571  recptr = XLogInsert(RM_HEAP2_ID, XLOG_HEAP2_VACUUM);
2572 
2573  PageSetLSN(page, recptr);
2574  }
2575 
2576  /*
2577  * End critical section, so we safely can do visibility tests (which
2578  * possibly need to perform IO and allocate memory!). If we crash now the
2579  * page (including the corresponding vm bit) might not be marked all
2580  * visible, but that's fine. A later vacuum will fix that.
2581  */
2582  END_CRIT_SECTION();
2583 
2584  /*
2585  * Now that we have removed the LP_DEAD items from the page, once again
2586  * check if the page has become all-visible. The page is already marked
2587  * dirty, exclusively locked, and, if needed, a full page image has been
2588  * emitted.
2589  */
2590  Assert(!PageIsAllVisible(page));
2591  if (heap_page_is_all_visible(vacrel, buffer, &visibility_cutoff_xid,
2592  &all_frozen))
2593  {
2595 
2596  if (all_frozen)
2597  {
2598  Assert(!TransactionIdIsValid(visibility_cutoff_xid));
2599  flags |= VISIBILITYMAP_ALL_FROZEN;
2600  }
2601 
2602  PageSetAllVisible(page);
2603  visibilitymap_set(vacrel->rel, blkno, buffer, InvalidXLogRecPtr,
2604  vmbuffer, visibility_cutoff_xid, flags);
2605  }
2606 
2607  /* Revert to the previous phase information for error traceback */
2608  restore_vacuum_error_info(vacrel, &saved_err_info);
2609  return index;
2610 }
2611 
2612 /*
2613  * Trigger the failsafe to avoid wraparound failure when vacrel table has a
2614  * relfrozenxid and/or relminmxid that is dangerously far in the past.
2615  * Triggering the failsafe makes the ongoing VACUUM bypass any further index
2616  * vacuuming and heap vacuuming. Truncating the heap is also bypassed.
2617  *
2618  * Any remaining work (work that VACUUM cannot just bypass) is typically sped
2619  * up when the failsafe triggers. VACUUM stops applying any cost-based delay
2620  * that it started out with.
2621  *
2622  * Returns true when failsafe has been triggered.
2623  */
2624 static bool
2626 {
2627  /* Don't warn more than once per VACUUM */
2629  return true;
2630 
2632  {
2633  const int progress_index[] = {
2636  };
2637  int64 progress_val[2] = {0, 0};
2638 
2639  VacuumFailsafeActive = true;
2640 
2641  /*
2642  * Abandon use of a buffer access strategy to allow use of all of
2643  * shared buffers. We assume the caller who allocated the memory for
2644  * the BufferAccessStrategy will free it.
2645  */
2646  vacrel->bstrategy = NULL;
2647 
2648  /* Disable index vacuuming, index cleanup, and heap rel truncation */
2649  vacrel->do_index_vacuuming = false;
2650  vacrel->do_index_cleanup = false;
2651  vacrel->do_rel_truncate = false;
2652 
2653  /* Reset the progress counters */
2654  pgstat_progress_update_multi_param(2, progress_index, progress_val);
2655 
2656  ereport(WARNING,
2657  (errmsg("bypassing nonessential maintenance of table \"%s.%s.%s\" as a failsafe after %d index scans",
2658  vacrel->dbname, vacrel->relnamespace, vacrel->relname,
2659  vacrel->num_index_scans),
2660  errdetail("The table's relfrozenxid or relminmxid is too far in the past."),
2661  errhint("Consider increasing configuration parameter maintenance_work_mem or autovacuum_work_mem.\n"
2662  "You might also need to consider other ways for VACUUM to keep up with the allocation of transaction IDs.")));
2663 
2664  /* Stop applying cost limits from this point on */
2665  VacuumCostActive = false;
2666  VacuumCostBalance = 0;
2667 
2668  return true;
2669  }
2670 
2671  return false;
2672 }
2673 
2674 /*
2675  * lazy_cleanup_all_indexes() -- cleanup all indexes of relation.
2676  */
2677 static void
2679 {
2680  double reltuples = vacrel->new_rel_tuples;
2681  bool estimated_count = vacrel->scanned_pages < vacrel->rel_pages;
2682  const int progress_start_index[] = {
2685  };
2686  const int progress_end_index[] = {
2689  };
2690  int64 progress_start_val[2];
2691  int64 progress_end_val[2] = {0, 0};
2692 
2693  Assert(vacrel->do_index_cleanup);
2694  Assert(vacrel->nindexes > 0);
2695 
2696  /*
2697  * Report that we are now cleaning up indexes and the number of indexes to
2698  * cleanup.
2699  */
2700  progress_start_val[0] = PROGRESS_VACUUM_PHASE_INDEX_CLEANUP;
2701  progress_start_val[1] = vacrel->nindexes;
2702  pgstat_progress_update_multi_param(2, progress_start_index, progress_start_val);
2703 
2704  if (!ParallelVacuumIsActive(vacrel))
2705  {
2706  for (int idx = 0; idx < vacrel->nindexes; idx++)
2707  {
2708  Relation indrel = vacrel->indrels[idx];
2709  IndexBulkDeleteResult *istat = vacrel->indstats[idx];
2710 
2711  vacrel->indstats[idx] =
2712  lazy_cleanup_one_index(indrel, istat, reltuples,
2713  estimated_count, vacrel);
2714 
2715  /* Report the number of indexes cleaned up */
2717  idx + 1);
2718  }
2719  }
2720  else
2721  {
2722  /* Outsource everything to parallel variant */
2723  parallel_vacuum_cleanup_all_indexes(vacrel->pvs, reltuples,
2724  vacrel->num_index_scans,
2725  estimated_count);
2726  }
2727 
2728  /* Reset the progress counters */
2729  pgstat_progress_update_multi_param(2, progress_end_index, progress_end_val);
2730 }
2731 
2732 /*
2733  * lazy_vacuum_one_index() -- vacuum index relation.
2734  *
2735  * Delete all the index tuples containing a TID collected in
2736  * vacrel->dead_items array. Also update running statistics.
2737  * Exact details depend on index AM's ambulkdelete routine.
2738  *
2739  * reltuples is the number of heap tuples to be passed to the
2740  * bulkdelete callback. It's always assumed to be estimated.
2741  * See indexam.sgml for more info.
2742  *
2743  * Returns bulk delete stats derived from input stats
2744  */
2745 static IndexBulkDeleteResult *
2747  double reltuples, LVRelState *vacrel)
2748 {
2749  IndexVacuumInfo ivinfo;
2750  LVSavedErrInfo saved_err_info;
2751 
2752  ivinfo.index = indrel;
2753  ivinfo.heaprel = vacrel->rel;
2754  ivinfo.analyze_only = false;
2755  ivinfo.report_progress = false;
2756  ivinfo.estimated_count = true;
2757  ivinfo.message_level = DEBUG2;
2758  ivinfo.num_heap_tuples = reltuples;
2759  ivinfo.strategy = vacrel->bstrategy;
2760 
2761  /*
2762  * Update error traceback information.
2763  *
2764  * The index name is saved during this phase and restored immediately
2765  * after this phase. See vacuum_error_callback.
2766  */
2767  Assert(vacrel->indname == NULL);
2768  vacrel->indname = pstrdup(RelationGetRelationName(indrel));
2769  update_vacuum_error_info(vacrel, &saved_err_info,
2772 
2773  /* Do bulk deletion */
2774  istat = vac_bulkdel_one_index(&ivinfo, istat, (void *) vacrel->dead_items);
2775 
2776  /* Revert to the previous phase information for error traceback */
2777  restore_vacuum_error_info(vacrel, &saved_err_info);
2778  pfree(vacrel->indname);
2779  vacrel->indname = NULL;
2780 
2781  return istat;
2782 }
2783 
2784 /*
2785  * lazy_cleanup_one_index() -- do post-vacuum cleanup for index relation.
2786  *
2787  * Calls index AM's amvacuumcleanup routine. reltuples is the number
2788  * of heap tuples and estimated_count is true if reltuples is an
2789  * estimated value. See indexam.sgml for more info.
2790  *
2791  * Returns bulk delete stats derived from input stats
2792  */
2793 static IndexBulkDeleteResult *
2795  double reltuples, bool estimated_count,
2796  LVRelState *vacrel)
2797 {
2798  IndexVacuumInfo ivinfo;
2799  LVSavedErrInfo saved_err_info;
2800 
2801  ivinfo.index = indrel;
2802  ivinfo.heaprel = vacrel->rel;
2803  ivinfo.analyze_only = false;
2804  ivinfo.report_progress = false;
2805  ivinfo.estimated_count = estimated_count;
2806  ivinfo.message_level = DEBUG2;
2807 
2808  ivinfo.num_heap_tuples = reltuples;
2809  ivinfo.strategy = vacrel->bstrategy;
2810 
2811  /*
2812  * Update error traceback information.
2813  *
2814  * The index name is saved during this phase and restored immediately
2815  * after this phase. See vacuum_error_callback.
2816  */
2817  Assert(vacrel->indname == NULL);
2818  vacrel->indname = pstrdup(RelationGetRelationName(indrel));
2819  update_vacuum_error_info(vacrel, &saved_err_info,
2822 
2823  istat = vac_cleanup_one_index(&ivinfo, istat);
2824 
2825  /* Revert to the previous phase information for error traceback */
2826  restore_vacuum_error_info(vacrel, &saved_err_info);
2827  pfree(vacrel->indname);
2828  vacrel->indname = NULL;
2829 
2830  return istat;
2831 }
2832 
2833 /*
2834  * should_attempt_truncation - should we attempt to truncate the heap?
2835  *
2836  * Don't even think about it unless we have a shot at releasing a goodly
2837  * number of pages. Otherwise, the time taken isn't worth it, mainly because
2838  * an AccessExclusive lock must be replayed on any hot standby, where it can
2839  * be particularly disruptive.
2840  *
2841  * Also don't attempt it if wraparound failsafe is in effect. The entire
2842  * system might be refusing to allocate new XIDs at this point. The system
2843  * definitely won't return to normal unless and until VACUUM actually advances
2844  * the oldest relfrozenxid -- which hasn't happened for target rel just yet.
2845  * If lazy_truncate_heap attempted to acquire an AccessExclusiveLock to
2846  * truncate the table under these circumstances, an XID exhaustion error might
2847  * make it impossible for VACUUM to fix the underlying XID exhaustion problem.
2848  * There is very little chance of truncation working out when the failsafe is
2849  * in effect in any case. lazy_scan_prune makes the optimistic assumption
2850  * that any LP_DEAD items it encounters will always be LP_UNUSED by the time
2851  * we're called.
2852  */
2853 static bool
2855 {
2856  BlockNumber possibly_freeable;
2857 
2858  if (!vacrel->do_rel_truncate || VacuumFailsafeActive)
2859  return false;
2860 
2861  possibly_freeable = vacrel->rel_pages - vacrel->nonempty_pages;
2862  if (possibly_freeable > 0 &&
2863  (possibly_freeable >= REL_TRUNCATE_MINIMUM ||
2864  possibly_freeable >= vacrel->rel_pages / REL_TRUNCATE_FRACTION))
2865  return true;
2866 
2867  return false;
2868 }
2869 
2870 /*
2871  * lazy_truncate_heap - try to truncate off any empty pages at the end
2872  */
2873 static void
2875 {
2876  BlockNumber orig_rel_pages = vacrel->rel_pages;
2877  BlockNumber new_rel_pages;
2878  bool lock_waiter_detected;
2879  int lock_retry;
2880 
2881  /* Report that we are now truncating */
2884 
2885  /* Update error traceback information one last time */
2888 
2889  /*
2890  * Loop until no more truncating can be done.
2891  */
2892  do
2893  {
2894  /*
2895  * We need full exclusive lock on the relation in order to do
2896  * truncation. If we can't get it, give up rather than waiting --- we
2897  * don't want to block other backends, and we don't want to deadlock
2898  * (which is quite possible considering we already hold a lower-grade
2899  * lock).
2900  */
2901  lock_waiter_detected = false;
2902  lock_retry = 0;
2903  while (true)
2904  {
2906  break;
2907 
2908  /*
2909  * Check for interrupts while trying to (re-)acquire the exclusive
2910  * lock.
2911  */
2913 
2914  if (++lock_retry > (VACUUM_TRUNCATE_LOCK_TIMEOUT /
2916  {
2917  /*
2918  * We failed to establish the lock in the specified number of
2919  * retries. This means we give up truncating.
2920  */
2921  ereport(vacrel->verbose ? INFO : DEBUG2,
2922  (errmsg("\"%s\": stopping truncate due to conflicting lock request",
2923  vacrel->relname)));
2924  return;
2925  }
2926 
2927  (void) WaitLatch(MyLatch,
2930  WAIT_EVENT_VACUUM_TRUNCATE);
2932  }
2933 
2934  /*
2935  * Now that we have exclusive lock, look to see if the rel has grown
2936  * whilst we were vacuuming with non-exclusive lock. If so, give up;
2937  * the newly added pages presumably contain non-deletable tuples.
2938  */
2939  new_rel_pages = RelationGetNumberOfBlocks(vacrel->rel);
2940  if (new_rel_pages != orig_rel_pages)
2941  {
2942  /*
2943  * Note: we intentionally don't update vacrel->rel_pages with the
2944  * new rel size here. If we did, it would amount to assuming that
2945  * the new pages are empty, which is unlikely. Leaving the numbers
2946  * alone amounts to assuming that the new pages have the same
2947  * tuple density as existing ones, which is less unlikely.
2948  */
2950  return;
2951  }
2952 
2953  /*
2954  * Scan backwards from the end to verify that the end pages actually
2955  * contain no tuples. This is *necessary*, not optional, because
2956  * other backends could have added tuples to these pages whilst we
2957  * were vacuuming.
2958  */
2959  new_rel_pages = count_nondeletable_pages(vacrel, &lock_waiter_detected);
2960  vacrel->blkno = new_rel_pages;
2961 
2962  if (new_rel_pages >= orig_rel_pages)
2963  {
2964  /* can't do anything after all */
2966  return;
2967  }
2968 
2969  /*
2970  * Okay to truncate.
2971  */
2972  RelationTruncate(vacrel->rel, new_rel_pages);
2973 
2974  /*
2975  * We can release the exclusive lock as soon as we have truncated.
2976  * Other backends can't safely access the relation until they have
2977  * processed the smgr invalidation that smgrtruncate sent out ... but
2978  * that should happen as part of standard invalidation processing once
2979  * they acquire lock on the relation.
2980  */
2982 
2983  /*
2984  * Update statistics. Here, it *is* correct to adjust rel_pages
2985  * without also touching reltuples, since the tuple count wasn't
2986  * changed by the truncation.
2987  */
2988  vacrel->removed_pages += orig_rel_pages - new_rel_pages;
2989  vacrel->rel_pages = new_rel_pages;
2990 
2991  ereport(vacrel->verbose ? INFO : DEBUG2,
2992  (errmsg("table \"%s\": truncated %u to %u pages",
2993  vacrel->relname,
2994  orig_rel_pages, new_rel_pages)));
2995  orig_rel_pages = new_rel_pages;
2996  } while (new_rel_pages > vacrel->nonempty_pages && lock_waiter_detected);
2997 }
2998 
2999 /*
3000  * Rescan end pages to verify that they are (still) empty of tuples.
3001  *
3002  * Returns number of nondeletable pages (last nonempty page + 1).
3003  */
3004 static BlockNumber
3005 count_nondeletable_pages(LVRelState *vacrel, bool *lock_waiter_detected)
3006 {
3007  BlockNumber blkno;
3008  BlockNumber prefetchedUntil;
3009  instr_time starttime;
3010 
3011  /* Initialize the starttime if we check for conflicting lock requests */
3012  INSTR_TIME_SET_CURRENT(starttime);
3013 
3014  /*
3015  * Start checking blocks at what we believe relation end to be and move
3016  * backwards. (Strange coding of loop control is needed because blkno is
3017  * unsigned.) To make the scan faster, we prefetch a few blocks at a time
3018  * in forward direction, so that OS-level readahead can kick in.
3019  */
3020  blkno = vacrel->rel_pages;
3022  "prefetch size must be power of 2");
3023  prefetchedUntil = InvalidBlockNumber;
3024  while (blkno > vacrel->nonempty_pages)
3025  {
3026  Buffer buf;
3027  Page page;
3028  OffsetNumber offnum,
3029  maxoff;
3030  bool hastup;
3031 
3032  /*
3033  * Check if another process requests a lock on our relation. We are
3034  * holding an AccessExclusiveLock here, so they will be waiting. We
3035  * only do this once per VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL, and we
3036  * only check if that interval has elapsed once every 32 blocks to
3037  * keep the number of system calls and actual shared lock table
3038  * lookups to a minimum.
3039  */
3040  if ((blkno % 32) == 0)
3041  {
3042  instr_time currenttime;
3043  instr_time elapsed;
3044 
3045  INSTR_TIME_SET_CURRENT(currenttime);
3046  elapsed = currenttime;
3047  INSTR_TIME_SUBTRACT(elapsed, starttime);
3048  if ((INSTR_TIME_GET_MICROSEC(elapsed) / 1000)
3050  {
3052  {
3053  ereport(vacrel->verbose ? INFO : DEBUG2,
3054  (errmsg("table \"%s\": suspending truncate due to conflicting lock request",
3055  vacrel->relname)));
3056 
3057  *lock_waiter_detected = true;
3058  return blkno;
3059  }
3060  starttime = currenttime;
3061  }
3062  }
3063 
3064  /*
3065  * We don't insert a vacuum delay point here, because we have an
3066  * exclusive lock on the table which we want to hold for as short a
3067  * time as possible. We still need to check for interrupts however.
3068  */
3070 
3071  blkno--;
3072 
3073  /* If we haven't prefetched this lot yet, do so now. */
3074  if (prefetchedUntil > blkno)
3075  {
3076  BlockNumber prefetchStart;
3077  BlockNumber pblkno;
3078 
3079  prefetchStart = blkno & ~(PREFETCH_SIZE - 1);
3080  for (pblkno = prefetchStart; pblkno <= blkno; pblkno++)
3081  {
3082  PrefetchBuffer(vacrel->rel, MAIN_FORKNUM, pblkno);
3084  }
3085  prefetchedUntil = prefetchStart;
3086  }
3087 
3088  buf = ReadBufferExtended(vacrel->rel, MAIN_FORKNUM, blkno, RBM_NORMAL,
3089  vacrel->bstrategy);
3090 
3091  /* In this phase we only need shared access to the buffer */
3093 
3094  page = BufferGetPage(buf);
3095 
3096  if (PageIsNew(page) || PageIsEmpty(page))
3097  {
3099  continue;
3100  }
3101 
3102  hastup = false;
3103  maxoff = PageGetMaxOffsetNumber(page);
3104  for (offnum = FirstOffsetNumber;
3105  offnum <= maxoff;
3106  offnum = OffsetNumberNext(offnum))
3107  {
3108  ItemId itemid;
3109 
3110  itemid = PageGetItemId(page, offnum);
3111 
3112  /*
3113  * Note: any non-unused item should be taken as a reason to keep
3114  * this page. Even an LP_DEAD item makes truncation unsafe, since
3115  * we must not have cleaned out its index entries.
3116  */
3117  if (ItemIdIsUsed(itemid))
3118  {
3119  hastup = true;
3120  break; /* can stop scanning */
3121  }
3122  } /* scan along page */
3123 
3125 
3126  /* Done scanning if we found a tuple here */
3127  if (hastup)
3128  return blkno + 1;
3129  }
3130 
3131  /*
3132  * If we fall out of the loop, all the previously-thought-to-be-empty
3133  * pages still are; we need not bother to look at the last known-nonempty
3134  * page.
3135  */
3136  return vacrel->nonempty_pages;
3137 }
3138 
3139 /*
3140  * Returns the number of dead TIDs that VACUUM should allocate space to
3141  * store, given a heap rel of size vacrel->rel_pages, and given current
3142  * maintenance_work_mem setting (or current autovacuum_work_mem setting,
3143  * when applicable).
3144  *
3145  * See the comments at the head of this file for rationale.
3146  */
3147 static int
3149 {
3150  int64 max_items;
3151  int vac_work_mem = IsAutoVacuumWorkerProcess() &&
3152  autovacuum_work_mem != -1 ?
3154 
3155  if (vacrel->nindexes > 0)
3156  {
3157  BlockNumber rel_pages = vacrel->rel_pages;
3158 
3159  max_items = MAXDEADITEMS(vac_work_mem * 1024L);
3160  max_items = Min(max_items, INT_MAX);
3161  max_items = Min(max_items, MAXDEADITEMS(MaxAllocSize));
3162 
3163  /* curious coding here to ensure the multiplication can't overflow */
3164  if ((BlockNumber) (max_items / MaxHeapTuplesPerPage) > rel_pages)
3165  max_items = rel_pages * MaxHeapTuplesPerPage;
3166 
3167  /* stay sane if small maintenance_work_mem */
3168  max_items = Max(max_items, MaxHeapTuplesPerPage);
3169  }
3170  else
3171  {
3172  /* One-pass case only stores a single heap page's TIDs at a time */
3173  max_items = MaxHeapTuplesPerPage;
3174  }
3175 
3176  return (int) max_items;
3177 }
3178 
3179 /*
3180  * Allocate dead_items (either using palloc, or in dynamic shared memory).
3181  * Sets dead_items in vacrel for caller.
3182  *
3183  * Also handles parallel initialization as part of allocating dead_items in
3184  * DSM when required.
3185  */
3186 static void
3187 dead_items_alloc(LVRelState *vacrel, int nworkers)
3188 {
3189  VacDeadItems *dead_items;
3190  int max_items;
3191 
3192  max_items = dead_items_max_items(vacrel);
3193  Assert(max_items >= MaxHeapTuplesPerPage);
3194 
3195  /*
3196  * Initialize state for a parallel vacuum. As of now, only one worker can
3197  * be used for an index, so we invoke parallelism only if there are at
3198  * least two indexes on a table.
3199  */
3200  if (nworkers >= 0 && vacrel->nindexes > 1 && vacrel->do_index_vacuuming)
3201  {
3202  /*
3203  * Since parallel workers cannot access data in temporary tables, we
3204  * can't perform parallel vacuum on them.
3205  */
3206  if (RelationUsesLocalBuffers(vacrel->rel))
3207  {
3208  /*
3209  * Give warning only if the user explicitly tries to perform a
3210  * parallel vacuum on the temporary table.
3211  */
3212  if (nworkers > 0)
3213  ereport(WARNING,
3214  (errmsg("disabling parallel option of vacuum on \"%s\" --- cannot vacuum temporary tables in parallel",
3215  vacrel->relname)));
3216  }
3217  else
3218  vacrel->pvs = parallel_vacuum_init(vacrel->rel, vacrel->indrels,
3219  vacrel->nindexes, nworkers,
3220  max_items,
3221  vacrel->verbose ? INFO : DEBUG2,
3222  vacrel->bstrategy);
3223 
3224  /* If parallel mode started, dead_items space is allocated in DSM */
3225  if (ParallelVacuumIsActive(vacrel))
3226  {
3227  vacrel->dead_items = parallel_vacuum_get_dead_items(vacrel->pvs);
3228  return;
3229  }
3230  }
3231 
3232  /* Serial VACUUM case */
3233  dead_items = (VacDeadItems *) palloc(vac_max_items_to_alloc_size(max_items));
3234  dead_items->max_items = max_items;
3235  dead_items->num_items = 0;
3236 
3237  vacrel->dead_items = dead_items;
3238 }
3239 
3240 /*
3241  * Perform cleanup for resources allocated in dead_items_alloc
3242  */
3243 static void
3245 {
3246  if (!ParallelVacuumIsActive(vacrel))
3247  {
3248  /* Don't bother with pfree here */
3249  return;
3250  }
3251 
3252  /* End parallel mode */
3253  parallel_vacuum_end(vacrel->pvs, vacrel->indstats);
3254  vacrel->pvs = NULL;
3255 }
3256 
3257 /*
3258  * Check if every tuple in the given page is visible to all current and future
3259  * transactions. Also return the visibility_cutoff_xid which is the highest
3260  * xmin amongst the visible tuples. Set *all_frozen to true if every tuple
3261  * on this page is frozen.
3262  *
3263  * This is a stripped down version of lazy_scan_prune(). If you change
3264  * anything here, make sure that everything stays in sync. Note that an
3265  * assertion calls us to verify that everybody still agrees. Be sure to avoid
3266  * introducing new side-effects here.
3267  */
3268 static bool
3270  TransactionId *visibility_cutoff_xid,
3271  bool *all_frozen)
3272 {
3273  Page page = BufferGetPage(buf);
3275  OffsetNumber offnum,
3276  maxoff;
3277  bool all_visible = true;
3278 
3279  *visibility_cutoff_xid = InvalidTransactionId;
3280  *all_frozen = true;
3281 
3282  maxoff = PageGetMaxOffsetNumber(page);
3283  for (offnum = FirstOffsetNumber;
3284  offnum <= maxoff && all_visible;
3285  offnum = OffsetNumberNext(offnum))
3286  {
3287  ItemId itemid;
3288  HeapTupleData tuple;
3289 
3290  /*
3291  * Set the offset number so that we can display it along with any
3292  * error that occurred while processing this tuple.
3293  */
3294  vacrel->offnum = offnum;
3295  itemid = PageGetItemId(page, offnum);
3296 
3297  /* Unused or redirect line pointers are of no interest */
3298  if (!ItemIdIsUsed(itemid) || ItemIdIsRedirected(itemid))
3299  continue;
3300 
3301  ItemPointerSet(&(tuple.t_self), blockno, offnum);
3302 
3303  /*
3304  * Dead line pointers can have index pointers pointing to them. So
3305  * they can't be treated as visible
3306  */
3307  if (ItemIdIsDead(itemid))
3308  {
3309  all_visible = false;
3310  *all_frozen = false;
3311  break;
3312  }
3313 
3314  Assert(ItemIdIsNormal(itemid));
3315 
3316  tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
3317  tuple.t_len = ItemIdGetLength(itemid);
3318  tuple.t_tableOid = RelationGetRelid(vacrel->rel);
3319 
3320  switch (HeapTupleSatisfiesVacuum(&tuple, vacrel->cutoffs.OldestXmin,
3321  buf))
3322  {
3323  case HEAPTUPLE_LIVE:
3324  {
3325  TransactionId xmin;
3326 
3327  /* Check comments in lazy_scan_prune. */
3329  {
3330  all_visible = false;
3331  *all_frozen = false;
3332  break;
3333  }
3334 
3335  /*
3336  * The inserter definitely committed. But is it old enough
3337  * that everyone sees it as committed?
3338  */
3339  xmin = HeapTupleHeaderGetXmin(tuple.t_data);
3340  if (!TransactionIdPrecedes(xmin,
3341  vacrel->cutoffs.OldestXmin))
3342  {
3343  all_visible = false;
3344  *all_frozen = false;
3345  break;
3346  }
3347 
3348  /* Track newest xmin on page. */
3349  if (TransactionIdFollows(xmin, *visibility_cutoff_xid) &&
3350  TransactionIdIsNormal(xmin))
3351  *visibility_cutoff_xid = xmin;
3352 
3353  /* Check whether this tuple is already frozen or not */
3354  if (all_visible && *all_frozen &&
3356  *all_frozen = false;
3357  }
3358  break;
3359 
3360  case HEAPTUPLE_DEAD:
3364  {
3365  all_visible = false;
3366  *all_frozen = false;
3367  break;
3368  }
3369  default:
3370  elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
3371  break;
3372  }
3373  } /* scan along page */
3374 
3375  /* Clear the offset information once we have processed the given page. */
3376  vacrel->offnum = InvalidOffsetNumber;
3377 
3378  return all_visible;
3379 }
3380 
3381 /*
3382  * Update index statistics in pg_class if the statistics are accurate.
3383  */
3384 static void
3386 {
3387  Relation *indrels = vacrel->indrels;
3388  int nindexes = vacrel->nindexes;
3389  IndexBulkDeleteResult **indstats = vacrel->indstats;
3390 
3391  Assert(vacrel->do_index_cleanup);
3392 
3393  for (int idx = 0; idx < nindexes; idx++)
3394  {
3395  Relation indrel = indrels[idx];
3396  IndexBulkDeleteResult *istat = indstats[idx];
3397 
3398  if (istat == NULL || istat->estimated_count)
3399  continue;
3400 
3401  /* Update index statistics */
3402  vac_update_relstats(indrel,
3403  istat->num_pages,
3404  istat->num_index_tuples,
3405  0,
3406  false,
3409  NULL, NULL, false);
3410  }
3411 }
3412 
3413 /*
3414  * Error context callback for errors occurring during vacuum. The error
3415  * context messages for index phases should match the messages set in parallel
3416  * vacuum. If you change this function for those phases, change
3417  * parallel_vacuum_error_callback() as well.
3418  */
3419 static void
3421 {
3422  LVRelState *errinfo = arg;
3423 
3424  switch (errinfo->phase)
3425  {
3427  if (BlockNumberIsValid(errinfo->blkno))
3428  {
3429  if (OffsetNumberIsValid(errinfo->offnum))
3430  errcontext("while scanning block %u offset %u of relation \"%s.%s\"",
3431  errinfo->blkno, errinfo->offnum, errinfo->relnamespace, errinfo->relname);
3432  else
3433  errcontext("while scanning block %u of relation \"%s.%s\"",
3434  errinfo->blkno, errinfo->relnamespace, errinfo->relname);
3435  }
3436  else
3437  errcontext("while scanning relation \"%s.%s\"",
3438  errinfo->relnamespace, errinfo->relname);
3439  break;
3440 
3442  if (BlockNumberIsValid(errinfo->blkno))
3443  {
3444  if (OffsetNumberIsValid(errinfo->offnum))
3445  errcontext("while vacuuming block %u offset %u of relation \"%s.%s\"",
3446  errinfo->blkno, errinfo->offnum, errinfo->relnamespace, errinfo->relname);
3447  else
3448  errcontext("while vacuuming block %u of relation \"%s.%s\"",
3449  errinfo->blkno, errinfo->relnamespace, errinfo->relname);
3450  }
3451  else
3452  errcontext("while vacuuming relation \"%s.%s\"",
3453  errinfo->relnamespace, errinfo->relname);
3454  break;
3455 
3457  errcontext("while vacuuming index \"%s\" of relation \"%s.%s\"",
3458  errinfo->indname, errinfo->relnamespace, errinfo->relname);
3459  break;
3460 
3462  errcontext("while cleaning up index \"%s\" of relation \"%s.%s\"",
3463  errinfo->indname, errinfo->relnamespace, errinfo->relname);
3464  break;
3465 
3467  if (BlockNumberIsValid(errinfo->blkno))
3468  errcontext("while truncating relation \"%s.%s\" to %u blocks",
3469  errinfo->relnamespace, errinfo->relname, errinfo->blkno);
3470  break;
3471 
3473  default:
3474  return; /* do nothing; the errinfo may not be
3475  * initialized */
3476  }
3477 }
3478 
3479 /*
3480  * Updates the information required for vacuum error callback. This also saves
3481  * the current information which can be later restored via restore_vacuum_error_info.
3482  */
3483 static void
3485  int phase, BlockNumber blkno, OffsetNumber offnum)
3486 {
3487  if (saved_vacrel)
3488  {
3489  saved_vacrel->offnum = vacrel->offnum;
3490  saved_vacrel->blkno = vacrel->blkno;
3491  saved_vacrel->phase = vacrel->phase;
3492  }
3493 
3494  vacrel->blkno = blkno;
3495  vacrel->offnum = offnum;
3496  vacrel->phase = phase;
3497 }
3498 
3499 /*
3500  * Restores the vacuum information saved via a prior call to update_vacuum_error_info.
3501  */
3502 static void
3504  const LVSavedErrInfo *saved_vacrel)
3505 {
3506  vacrel->blkno = saved_vacrel->blkno;
3507  vacrel->offnum = saved_vacrel->offnum;
3508  vacrel->phase = saved_vacrel->phase;
3509 }
Datum idx(PG_FUNCTION_ARGS)
Definition: _int_op.c:259
int autovacuum_work_mem
Definition: autovacuum.c:118
bool IsAutoVacuumWorkerProcess(void)
Definition: autovacuum.c:3370
void TimestampDifference(TimestampTz start_time, TimestampTz stop_time, long *secs, int *microsecs)
Definition: timestamp.c:1725
bool TimestampDifferenceExceeds(TimestampTz start_time, TimestampTz stop_time, int msec)
Definition: timestamp.c:1785
TimestampTz GetCurrentTimestamp(void)
Definition: timestamp.c:1649
void pgstat_progress_start_command(ProgressCommandType cmdtype, Oid relid)
void pgstat_progress_update_param(int index, int64 val)
void pgstat_progress_update_multi_param(int nparam, const int *index, const int64 *val)
void pgstat_progress_end_command(void)
@ PROGRESS_COMMAND_VACUUM
uint32 BlockNumber
Definition: block.h:31
#define InvalidBlockNumber
Definition: block.h:33
static bool BlockNumberIsValid(BlockNumber blockNumber)
Definition: block.h:71
int Buffer
Definition: buf.h:23
#define InvalidBuffer
Definition: buf.h:25
bool track_io_timing
Definition: bufmgr.c:139
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:3386
PrefetchBufferResult PrefetchBuffer(Relation reln, ForkNumber forkNum, BlockNumber blockNum)
Definition: bufmgr.c:628
void ReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:4573
void UnlockReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:4590
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:2198
void LockBufferForCleanup(Buffer buffer)
Definition: bufmgr.c:4888
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:4808
Buffer ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
Definition: bufmgr.c:782
bool ConditionalLockBufferForCleanup(Buffer buffer)
Definition: bufmgr.c:5049
#define BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:157
#define BUFFER_LOCK_SHARE
Definition: bufmgr.h:158
#define RelationGetNumberOfBlocks(reln)
Definition: bufmgr.h:229
static Page BufferGetPage(Buffer buffer)
Definition: bufmgr.h:350
#define BUFFER_LOCK_EXCLUSIVE
Definition: bufmgr.h:159
@ RBM_NORMAL
Definition: bufmgr.h:44
static bool BufferIsValid(Buffer bufnum)
Definition: bufmgr.h:301
Size PageGetHeapFreeSpace(Page page)
Definition: bufpage.c:991
void PageTruncateLinePointerArray(Page page)
Definition: bufpage.c:835
static bool PageIsEmpty(Page page)
Definition: bufpage.h:220
Pointer Page
Definition: bufpage.h:78
static Item PageGetItem(Page page, ItemId itemId)
Definition: bufpage.h:351
static void PageClearAllVisible(Page page)
Definition: bufpage.h:436
#define SizeOfPageHeaderData
Definition: bufpage.h:213
static void PageSetAllVisible(Page page)
Definition: bufpage.h:431
static ItemId PageGetItemId(Page page, OffsetNumber offsetNumber)
Definition: bufpage.h:240
static bool PageIsNew(Page page)
Definition: bufpage.h:230
static bool PageIsAllVisible(Page page)
Definition: bufpage.h:426
static void PageSetLSN(Page page, XLogRecPtr lsn)
Definition: bufpage.h:388
static XLogRecPtr PageGetLSN(Page page)
Definition: bufpage.h:383
static OffsetNumber PageGetMaxOffsetNumber(Page page)
Definition: bufpage.h:369
#define Min(x, y)
Definition: c.h:993
signed int int32
Definition: c.h:483
#define Max(x, y)
Definition: c.h:987
TransactionId MultiXactId
Definition: c.h:651
#define unlikely(x)
Definition: c.h:300
unsigned char uint8
Definition: c.h:493
#define StaticAssertStmt(condition, errmessage)
Definition: c.h:927
uint32 TransactionId
Definition: c.h:641
size_t Size
Definition: c.h:594
int64 TimestampTz
Definition: timestamp.h:39
char * get_database_name(Oid dbid)
Definition: dbcommands.c:3089
int errmsg_internal(const char *fmt,...)
Definition: elog.c:1156
int errdetail(const char *fmt,...)
Definition: elog.c:1202
ErrorContextCallback * error_context_stack
Definition: elog.c:95
int errhint(const char *fmt,...)
Definition: elog.c:1316
int errmsg(const char *fmt,...)
Definition: elog.c:1069
#define _(x)
Definition: elog.c:91
#define LOG
Definition: elog.h:31
#define errcontext
Definition: elog.h:196
#define WARNING
Definition: elog.h:36
#define DEBUG2
Definition: elog.h:29
#define ERROR
Definition: elog.h:39
#define INFO
Definition: elog.h:34
#define ereport(elevel,...)
Definition: elog.h:149
void FreeSpaceMapVacuumRange(Relation rel, BlockNumber start, BlockNumber end)
Definition: freespace.c:354
Size GetRecordedFreeSpace(Relation rel, BlockNumber heapBlk)
Definition: freespace.c:232
void RecordPageWithFreeSpace(Relation rel, BlockNumber heapBlk, Size spaceAvail)
Definition: freespace.c:182
int64 VacuumPageHit
Definition: globals.c:153
int64 VacuumPageMiss
Definition: globals.c:154
bool VacuumCostActive
Definition: globals.c:158
int64 VacuumPageDirty
Definition: globals.c:155
int VacuumCostBalance
Definition: globals.c:157
int maintenance_work_mem
Definition: globals.c:129
struct Latch * MyLatch
Definition: globals.c:58
Oid MyDatabaseId
Definition: globals.c:89
bool heap_tuple_needs_eventual_freeze(HeapTupleHeader tuple)
Definition: heapam.c:7339
bool heap_tuple_should_freeze(HeapTupleHeader tuple, const struct VacuumCutoffs *cutoffs, TransactionId *NoFreezePageRelfrozenXid, MultiXactId *NoFreezePageRelminMxid)
Definition: heapam.c:7394
void heap_freeze_execute_prepared(Relation rel, Buffer buffer, TransactionId snapshotConflictHorizon, HeapTupleFreeze *tuples, int ntuples)
Definition: heapam.c:6682
bool heap_prepare_freeze_tuple(HeapTupleHeader tuple, const struct VacuumCutoffs *cutoffs, HeapPageFreeze *pagefrz, HeapTupleFreeze *frz, bool *totally_frozen)
Definition: heapam.c:6379
@ HEAPTUPLE_RECENTLY_DEAD
Definition: heapam.h:98
@ HEAPTUPLE_INSERT_IN_PROGRESS
Definition: heapam.h:99
@ HEAPTUPLE_LIVE
Definition: heapam.h:97
@ HEAPTUPLE_DELETE_IN_PROGRESS
Definition: heapam.h:100
@ HEAPTUPLE_DEAD
Definition: heapam.h:96
static HTSV_Result htsv_get_valid_status(int status)
Definition: heapam.h:221
HTSV_Result HeapTupleSatisfiesVacuum(HeapTuple htup, TransactionId OldestXmin, Buffer buffer)
#define XLOG_HEAP2_VACUUM
Definition: heapam_xlog.h:55
#define SizeOfHeapVacuum
Definition: heapam_xlog.h:267
HeapTupleHeaderData * HeapTupleHeader
Definition: htup.h:23
#define HeapTupleHeaderGetXmin(tup)
Definition: htup_details.h:309
#define HeapTupleHeaderXminCommitted(tup)
Definition: htup_details.h:320
#define MaxHeapTuplesPerPage
Definition: htup_details.h:572
int verbose
#define INSTR_TIME_SET_CURRENT(t)
Definition: instr_time.h:122
#define INSTR_TIME_SUBTRACT(x, y)
Definition: instr_time.h:181
#define INSTR_TIME_GET_MICROSEC(t)
Definition: instr_time.h:194
WalUsage pgWalUsage
Definition: instrument.c:22
void WalUsageAccumDiff(WalUsage *dst, const WalUsage *add, const WalUsage *sub)
Definition: instrument.c:286
int i
Definition: isn.c:73
#define ItemIdGetLength(itemId)
Definition: itemid.h:59
#define ItemIdIsNormal(itemId)
Definition: itemid.h:99
#define ItemIdIsDead(itemId)
Definition: itemid.h:113
#define ItemIdIsUsed(itemId)
Definition: itemid.h:92
#define ItemIdSetUnused(itemId)
Definition: itemid.h:128
#define ItemIdIsRedirected(itemId)
Definition: itemid.h:106
#define ItemIdHasStorage(itemId)
Definition: itemid.h:120
static void ItemPointerSet(ItemPointerData *pointer, BlockNumber blockNumber, OffsetNumber offNum)
Definition: itemptr.h:135
static void ItemPointerSetOffsetNumber(ItemPointerData *pointer, OffsetNumber offsetNumber)
Definition: itemptr.h:158
static void ItemPointerSetBlockNumber(ItemPointerData *pointer, BlockNumber blockNumber)
Definition: itemptr.h:147
static OffsetNumber ItemPointerGetOffsetNumber(const ItemPointerData *pointer)
Definition: itemptr.h:124
static BlockNumber ItemPointerGetBlockNumber(const ItemPointerData *pointer)
Definition: itemptr.h:103
void ResetLatch(Latch *latch)
Definition: latch.c:725
int WaitLatch(Latch *latch, int wakeEvents, long timeout, uint32 wait_event_info)
Definition: latch.c:518
#define WL_TIMEOUT
Definition: latch.h:130
#define WL_EXIT_ON_PM_DEATH
Definition: latch.h:132
#define WL_LATCH_SET
Definition: latch.h:127
Assert(fmt[strlen(fmt) - 1] !='\n')
void UnlockRelation(Relation relation, LOCKMODE lockmode)
Definition: lmgr.c:311
bool ConditionalLockRelation(Relation relation, LOCKMODE lockmode)
Definition: lmgr.c:276
bool LockHasWaitersRelation(Relation relation, LOCKMODE lockmode)
Definition: lmgr.c:374
#define NoLock
Definition: lockdefs.h:34
#define AccessExclusiveLock
Definition: lockdefs.h:43
#define RowExclusiveLock
Definition: lockdefs.h:38
char * get_namespace_name(Oid nspid)
Definition: lsyscache.c:3348
char * pstrdup(const char *in)
Definition: mcxt.c:1644
void pfree(void *pointer)
Definition: mcxt.c:1456
void * palloc0(Size size)
Definition: mcxt.c:1257
void * palloc(Size size)
Definition: mcxt.c:1226
#define MaxAllocSize
Definition: memutils.h:40
#define START_CRIT_SECTION()
Definition: miscadmin.h:148
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:121
#define END_CRIT_SECTION()
Definition: miscadmin.h:150
bool MultiXactIdPrecedesOrEquals(MultiXactId multi1, MultiXactId multi2)
Definition: multixact.c:3172
#define InvalidMultiXactId
Definition: multixact.h:24
#define InvalidOffsetNumber
Definition: off.h:26
#define OffsetNumberIsValid(offsetNumber)
Definition: off.h:39
#define OffsetNumberNext(offsetNumber)
Definition: off.h:52
uint16 OffsetNumber
Definition: off.h:24
#define FirstOffsetNumber
Definition: off.h:27
void * arg
const char * pg_rusage_show(const PGRUsage *ru0)
Definition: pg_rusage.c:40
void pg_rusage_init(PGRUsage *ru0)
Definition: pg_rusage.c:27
static char * buf
Definition: pg_test_fsync.c:73
int64 PgStat_Counter
Definition: pgstat.h:89
PgStat_Counter pgStatBlockReadTime
PgStat_Counter pgStatBlockWriteTime
void pgstat_report_vacuum(Oid tableoid, bool shared, PgStat_Counter livetuples, PgStat_Counter deadtuples)
GlobalVisState * GlobalVisTestFor(Relation rel)
Definition: procarray.c:4010
#define PROGRESS_VACUUM_PHASE_FINAL_CLEANUP
Definition: progress.h:37
#define PROGRESS_VACUUM_PHASE_SCAN_HEAP
Definition: progress.h:32
#define PROGRESS_VACUUM_TOTAL_HEAP_BLKS
Definition: progress.h:22
#define PROGRESS_VACUUM_PHASE
Definition: progress.h:21
#define PROGRESS_VACUUM_NUM_DEAD_TUPLES
Definition: progress.h:27
#define PROGRESS_VACUUM_NUM_INDEX_VACUUMS
Definition: progress.h:25
#define PROGRESS_VACUUM_PHASE_VACUUM_HEAP
Definition: progress.h:34
#define PROGRESS_VACUUM_HEAP_BLKS_SCANNED
Definition: progress.h:23
#define PROGRESS_VACUUM_PHASE_INDEX_CLEANUP
Definition: progress.h:35
#define PROGRESS_VACUUM_PHASE_VACUUM_INDEX
Definition: progress.h:33
#define PROGRESS_VACUUM_MAX_DEAD_TUPLES
Definition: progress.h:26
#define PROGRESS_VACUUM_INDEXES_PROCESSED
Definition: progress.h:29
#define PROGRESS_VACUUM_INDEXES_TOTAL
Definition: progress.h:28
#define PROGRESS_VACUUM_HEAP_BLKS_VACUUMED
Definition: progress.h:24
#define PROGRESS_VACUUM_PHASE_TRUNCATE
Definition: progress.h:36
void heap_page_prune(Relation relation, Buffer buffer, GlobalVisState *vistest, PruneResult *presult, OffsetNumber *off_loc)
Definition: pruneheap.c:204
#define RelationGetRelid(relation)
Definition: rel.h:504
#define RelationGetRelationName(relation)
Definition: rel.h:538
#define RelationNeedsWAL(relation)
Definition: rel.h:629
#define RelationUsesLocalBuffers(relation)
Definition: rel.h:638
#define RelationGetNamespace(relation)
Definition: rel.h:545
@ MAIN_FORKNUM
Definition: relpath.h:50
void RelationTruncate(Relation rel, BlockNumber nblocks)
Definition: storage.c:287
void appendStringInfo(StringInfo str, const char *fmt,...)
Definition: stringinfo.c:97
void appendStringInfoString(StringInfo str, const char *s)
Definition: stringinfo.c:182
void initStringInfo(StringInfo str)
Definition: stringinfo.c:59
struct ErrorContextCallback * previous
Definition: elog.h:295
void(* callback)(void *arg)
Definition: elog.h:296
MultiXactId NoFreezePageRelminMxid
Definition: heapam.h:190
TransactionId FreezePageRelfrozenXid
Definition: heapam.h:178
bool freeze_required
Definition: heapam.h:152
MultiXactId FreezePageRelminMxid
Definition: heapam.h:179
TransactionId NoFreezePageRelfrozenXid
Definition: heapam.h:189
ItemPointerData t_self
Definition: htup.h:65
uint32 t_len
Definition: htup.h:64
HeapTupleHeader t_data
Definition: htup.h:68
Oid t_tableOid
Definition: htup.h:66
OffsetNumber offset
Definition: heapam.h:122
bool estimated_count
Definition: genam.h:78
BlockNumber pages_deleted
Definition: genam.h:82
BlockNumber pages_newly_deleted
Definition: genam.h:81
BlockNumber pages_free
Definition: genam.h:83
BlockNumber num_pages
Definition: genam.h:77
double num_index_tuples
Definition: genam.h:79
Relation index
Definition: genam.h:46
double num_heap_tuples
Definition: genam.h:52
bool analyze_only
Definition: genam.h:48
BufferAccessStrategy strategy
Definition: genam.h:53
Relation heaprel
Definition: genam.h:47
bool report_progress
Definition: genam.h:49
int message_level
Definition: genam.h:51
bool estimated_count
Definition: genam.h:50
TransactionId visibility_cutoff_xid
Definition: vacuumlazy.c:230
ParallelVacuumState * pvs
Definition: vacuumlazy.c:150
bool verbose
Definition: vacuumlazy.c:180
int nindexes
Definition: vacuumlazy.c:146
OffsetNumber offnum
Definition: vacuumlazy.c:178
int64 tuples_deleted
Definition: vacuumlazy.c:207
BlockNumber nonempty_pages
Definition: vacuumlazy.c:196
bool do_rel_truncate
Definition: vacuumlazy.c:162
BlockNumber scanned_pages
Definition: vacuumlazy.c:191
bool aggressive
Definition: vacuumlazy.c:153
GlobalVisState * vistest
Definition: vacuumlazy.c:166
BlockNumber removed_pages
Definition: vacuumlazy.c:192
int num_index_scans
Definition: vacuumlazy.c:205
IndexBulkDeleteResult ** indstats
Definition: vacuumlazy.c:202
double new_live_tuples
Definition: vacuumlazy.c:200
double new_rel_tuples
Definition: vacuumlazy.c:199
TransactionId NewRelfrozenXid
Definition: vacuumlazy.c:168
Relation rel
Definition: vacuumlazy.c:144
bool consider_bypass_optimization
Definition: vacuumlazy.c:157
BlockNumber rel_pages
Definition: vacuumlazy.c:190
int64 recently_dead_tuples
Definition: vacuumlazy.c:211
int64 tuples_frozen
Definition: vacuumlazy.c:208
BlockNumber frozen_pages
Definition: vacuumlazy.c:193
char * dbname
Definition: vacuumlazy.c:173
BlockNumber missed_dead_pages
Definition: vacuumlazy.c:195
char * relnamespace
Definition: vacuumlazy.c:174
int64 live_tuples
Definition: vacuumlazy.c:210
int64 lpdead_items
Definition: vacuumlazy.c:209
BufferAccessStrategy bstrategy
Definition: vacuumlazy.c:149
bool skippedallvis
Definition: vacuumlazy.c:170
BlockNumber lpdead_item_pages
Definition: vacuumlazy.c:194
Relation * indrels
Definition: vacuumlazy.c:145
bool skipwithvm
Definition: vacuumlazy.c:155
bool do_index_cleanup
Definition: vacuumlazy.c:161
MultiXactId NewRelminMxid
Definition: vacuumlazy.c:169
int64 missed_dead_tuples
Definition: vacuumlazy.c:212
BlockNumber blkno
Definition: vacuumlazy.c:177
struct VacuumCutoffs cutoffs
Definition: vacuumlazy.c:165
char * relname
Definition: vacuumlazy.c:175
VacDeadItems * dead_items
Definition: vacuumlazy.c:189
VacErrPhase phase
Definition: vacuumlazy.c:179
char * indname
Definition: vacuumlazy.c:176
bool do_index_vacuuming
Definition: vacuumlazy.c:160
BlockNumber blkno
Definition: vacuumlazy.c:236
VacErrPhase phase
Definition: vacuumlazy.c:238
OffsetNumber offnum
Definition: vacuumlazy.c:237
int ndeleted
Definition: heapam.h:199
int8 htsv[MaxHeapTuplesPerPage+1]
Definition: heapam.h:211
Form_pg_class rd_rel
Definition: rel.h:111
ItemPointerData items[FLEXIBLE_ARRAY_MEMBER]
Definition: vacuum.h:288
int max_items
Definition: vacuum.h:284
int num_items
Definition: vacuum.h:285
TransactionId FreezeLimit
Definition: vacuum.h:275
TransactionId OldestXmin
Definition: vacuum.h:265
TransactionId relfrozenxid
Definition: vacuum.h:249
MultiXactId relminmxid
Definition: vacuum.h:250
MultiXactId MultiXactCutoff
Definition: vacuum.h:276
MultiXactId OldestMxact
Definition: vacuum.h:266
int nworkers
Definition: vacuum.h:237
VacOptValue truncate
Definition: vacuum.h:230
bits32 options
Definition: vacuum.h:218
bool is_wraparound
Definition: vacuum.h:225
int log_min_duration
Definition: vacuum.h:226
VacOptValue index_cleanup
Definition: vacuum.h:229
uint64 wal_bytes
Definition: instrument.h:55
int64 wal_fpi
Definition: instrument.h:54
int64 wal_records
Definition: instrument.h:53
Definition: type.h:95
bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition: transam.c:280
bool TransactionIdPrecedesOrEquals(TransactionId id1, TransactionId id2)
Definition: transam.c:299
bool TransactionIdFollows(TransactionId id1, TransactionId id2)
Definition: transam.c:314
static TransactionId ReadNextTransactionId(void)
Definition: transam.h:315
#define TransactionIdRetreat(dest)
Definition: transam.h:141
#define InvalidTransactionId
Definition: transam.h:31
#define TransactionIdIsValid(xid)
Definition: transam.h:41
#define TransactionIdIsNormal(xid)
Definition: transam.h:42
IndexBulkDeleteResult * vac_bulkdel_one_index(IndexVacuumInfo *ivinfo, IndexBulkDeleteResult *istat, VacDeadItems *dead_items)
Definition: vacuum.c:2476
Size vac_max_items_to_alloc_size(int max_items)
Definition: vacuum.c:2522
void vac_open_indexes(Relation relation, LOCKMODE lockmode, int *nindexes, Relation **Irel)
Definition: vacuum.c:2258
void vac_update_relstats(Relation relation, BlockNumber num_pages, double num_tuples, BlockNumber num_all_visible_pages, bool hasindex, TransactionId frozenxid, MultiXactId minmulti, bool *frozenxid_updated, bool *minmulti_updated, bool in_outer_xact)
Definition: vacuum.c:1401
void vac_close_indexes(int nindexes, Relation *Irel, LOCKMODE lockmode)
Definition: vacuum.c:2301
void vacuum_delay_point(void)
Definition: vacuum.c:2322
bool vacuum_get_cutoffs(Relation rel, const VacuumParams *params, struct VacuumCutoffs *cutoffs)
Definition: vacuum.c:1075
bool vacuum_xid_failsafe_check(const struct VacuumCutoffs *cutoffs)
Definition: vacuum.c:1243
bool VacuumFailsafeActive
Definition: vacuum.c:98
double vac_estimate_reltuples(Relation relation, BlockNumber total_pages, BlockNumber scanned_pages, double scanned_tuples)
Definition: vacuum.c:1305
IndexBulkDeleteResult * vac_cleanup_one_index(IndexVacuumInfo *ivinfo, IndexBulkDeleteResult *istat)
Definition: vacuum.c:2497
#define VACOPT_VERBOSE
Definition: vacuum.h:181
#define MAXDEADITEMS(avail_mem)
Definition: vacuum.h:291
@ VACOPTVALUE_AUTO
Definition: vacuum.h:202
@ VACOPTVALUE_ENABLED
Definition: vacuum.h:204
@ VACOPTVALUE_UNSPECIFIED
Definition: vacuum.h:201
@ VACOPTVALUE_DISABLED
Definition: vacuum.h:203
#define VACOPT_DISABLE_PAGE_SKIPPING
Definition: vacuum.h:187
static void dead_items_cleanup(LVRelState *vacrel)
Definition: vacuumlazy.c:3244
static bool heap_page_is_all_visible(LVRelState *vacrel, Buffer buf, TransactionId *visibility_cutoff_xid, bool *all_frozen)
Definition: vacuumlazy.c:3269
static void update_relstats_all_indexes(LVRelState *vacrel)
Definition: vacuumlazy.c:3385
struct LVPagePruneState LVPagePruneState
#define VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL
Definition: vacuumlazy.c:87
static void vacuum_error_callback(void *arg)
Definition: vacuumlazy.c:3420
static void lazy_truncate_heap(LVRelState *vacrel)
Definition: vacuumlazy.c:2874
static void lazy_vacuum(LVRelState *vacrel)
Definition: vacuumlazy.c:2177
static void lazy_cleanup_all_indexes(LVRelState *vacrel)
Definition: vacuumlazy.c:2678
static int lazy_vacuum_heap_page(LVRelState *vacrel, BlockNumber blkno, Buffer buffer, int index, Buffer vmbuffer)
Definition: vacuumlazy.c:2508
#define REL_TRUNCATE_MINIMUM
Definition: vacuumlazy.c:76
static bool should_attempt_truncation(LVRelState *vacrel)
Definition: vacuumlazy.c:2854
static bool lazy_scan_new_or_empty(LVRelState *vacrel, Buffer buf, BlockNumber blkno, Page page, bool sharelock, Buffer vmbuffer)
Definition: vacuumlazy.c:1418
VacErrPhase
Definition: vacuumlazy.c:132
@ VACUUM_ERRCB_PHASE_SCAN_HEAP
Definition: vacuumlazy.c:134
@ VACUUM_ERRCB_PHASE_VACUUM_INDEX
Definition: vacuumlazy.c:135
@ VACUUM_ERRCB_PHASE_TRUNCATE
Definition: vacuumlazy.c:138
@ VACUUM_ERRCB_PHASE_INDEX_CLEANUP
Definition: vacuumlazy.c:137
@ VACUUM_ERRCB_PHASE_VACUUM_HEAP
Definition: vacuumlazy.c:136
@ VACUUM_ERRCB_PHASE_UNKNOWN
Definition: vacuumlazy.c:133
static void lazy_scan_heap(LVRelState *vacrel)
Definition: vacuumlazy.c:825
static bool lazy_scan_noprune(LVRelState *vacrel, Buffer buf, BlockNumber blkno, Page page, bool *hastup, bool *recordfreespace)
Definition: vacuumlazy.c:1943
#define ParallelVacuumIsActive(vacrel)
Definition: vacuumlazy.c:128
static void restore_vacuum_error_info(LVRelState *vacrel, const LVSavedErrInfo *saved_vacrel)
Definition: vacuumlazy.c:3503
static void lazy_scan_prune(LVRelState *vacrel, Buffer buf, BlockNumber blkno, Page page, LVPagePruneState *prunestate)
Definition: vacuumlazy.c:1542
void heap_vacuum_rel(Relation rel, VacuumParams *params, BufferAccessStrategy bstrategy)
Definition: vacuumlazy.c:303
static IndexBulkDeleteResult * lazy_vacuum_one_index(Relation indrel, IndexBulkDeleteResult *istat, double reltuples, LVRelState *vacrel)
Definition: vacuumlazy.c:2746
#define REL_TRUNCATE_FRACTION
Definition: vacuumlazy.c:77
static bool lazy_check_wraparound_failsafe(LVRelState *vacrel)
Definition: vacuumlazy.c:2625
static int dead_items_max_items(LVRelState *vacrel)
Definition: vacuumlazy.c:3148
struct LVSavedErrInfo LVSavedErrInfo
static IndexBulkDeleteResult * lazy_cleanup_one_index(Relation indrel, IndexBulkDeleteResult *istat, double reltuples, bool estimated_count, LVRelState *vacrel)
Definition: vacuumlazy.c:2794
#define PREFETCH_SIZE
Definition: vacuumlazy.c:122
struct LVRelState LVRelState
static BlockNumber lazy_scan_skip(LVRelState *vacrel, Buffer *vmbuffer, BlockNumber next_block, bool *next_unskippable_allvis, bool *skipping_current_range)
Definition: vacuumlazy.c:1302
#define BYPASS_THRESHOLD_PAGES
Definition: vacuumlazy.c:94
static void dead_items_alloc(LVRelState *vacrel, int nworkers)
Definition: vacuumlazy.c:3187
#define VACUUM_TRUNCATE_LOCK_TIMEOUT
Definition: vacuumlazy.c:88
static bool lazy_vacuum_all_indexes(LVRelState *vacrel)
Definition: vacuumlazy.c:2302
static void update_vacuum_error_info(LVRelState *vacrel, LVSavedErrInfo *saved_vacrel, int phase, BlockNumber blkno, OffsetNumber offnum)
Definition: vacuumlazy.c:3484
static BlockNumber count_nondeletable_pages(LVRelState *vacrel, bool *lock_waiter_detected)
Definition: vacuumlazy.c:3005
#define SKIP_PAGES_THRESHOLD
Definition: vacuumlazy.c:116
#define FAILSAFE_EVERY_PAGES
Definition: vacuumlazy.c:100
#define VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL
Definition: vacuumlazy.c:86
static void lazy_vacuum_heap_rel(LVRelState *vacrel)
Definition: vacuumlazy.c:2420
#define VACUUM_FSM_EVERY_PAGES
Definition: vacuumlazy.c:109
VacDeadItems * parallel_vacuum_get_dead_items(ParallelVacuumState *pvs)
void parallel_vacuum_bulkdel_all_indexes(ParallelVacuumState *pvs, long num_table_tuples, int num_index_scans)
void parallel_vacuum_cleanup_all_indexes(ParallelVacuumState *pvs, long num_table_tuples, int num_index_scans, bool estimated_count)
void parallel_vacuum_end(ParallelVacuumState *pvs, IndexBulkDeleteResult **istats)
ParallelVacuumState * parallel_vacuum_init(Relation rel, Relation *indrels, int nindexes, int nrequested_workers, int max_items, int elevel, BufferAccessStrategy bstrategy)
void visibilitymap_set(Relation rel, BlockNumber heapBlk, Buffer heapBuf, XLogRecPtr recptr, Buffer vmBuf, TransactionId cutoff_xid, uint8 flags)
bool visibilitymap_clear(Relation rel, BlockNumber heapBlk, Buffer vmbuf, uint8 flags)
void visibilitymap_pin(Relation rel, BlockNumber heapBlk, Buffer *vmbuf)
uint8 visibilitymap_get_status(Relation rel, BlockNumber heapBlk, Buffer *vmbuf)
void visibilitymap_count(Relation rel, BlockNumber *all_visible, BlockNumber *all_frozen)
#define VM_ALL_FROZEN(r, b, v)
Definition: visibilitymap.h:26
#define VISIBILITYMAP_VALID_BITS
#define VISIBILITYMAP_ALL_FROZEN
#define VISIBILITYMAP_ALL_VISIBLE
bool IsInParallelMode(void)
Definition: xact.c:1069
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define InvalidXLogRecPtr
Definition: xlogdefs.h:28
void XLogRegisterData(char *data, uint32 len)
Definition: xloginsert.c:365
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition: xloginsert.c:475
void XLogRegisterBufData(uint8 block_id, char *data, uint32 len)
Definition: xloginsert.c:406
XLogRecPtr log_newpage_buffer(Buffer buffer, bool page_std)
Definition: xloginsert.c:1238
void XLogRegisterBuffer(uint8 block_id, Buffer buffer, uint8 flags)
Definition: xloginsert.c:243
void XLogBeginInsert(void)
Definition: xloginsert.c:150
#define REGBUF_STANDARD
Definition: xloginsert.h:34