PostgreSQL Source Code  git master
vacuumlazy.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * vacuumlazy.c
4  * Concurrent ("lazy") vacuuming.
5  *
6  * The major space usage for vacuuming is storage for the dead tuple IDs that
7  * are to be removed from indexes. We want to ensure we can vacuum even the
8  * very largest relations with finite memory space usage. To do that, we set
9  * upper bounds on the memory that can be used for keeping track of dead TIDs
10  * at once.
11  *
12  * We are willing to use at most maintenance_work_mem (or perhaps
13  * autovacuum_work_mem) memory space to keep track of dead TIDs. If the
14  * TID store is full, we must call lazy_vacuum to vacuum indexes (and to vacuum
15  * the pages that we've pruned). This frees up the memory space dedicated to
16  * store dead TIDs.
17  *
18  * In practice VACUUM will often complete its initial pass over the target
19  * heap relation without ever running out of space to store TIDs. This means
20  * that there only needs to be one call to lazy_vacuum, after the initial pass
21  * completes.
22  *
23  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
24  * Portions Copyright (c) 1994, Regents of the University of California
25  *
26  *
27  * IDENTIFICATION
28  * src/backend/access/heap/vacuumlazy.c
29  *
30  *-------------------------------------------------------------------------
31  */
32 #include "postgres.h"
33 
34 #include <math.h>
35 
36 #include "access/genam.h"
37 #include "access/heapam.h"
38 #include "access/heapam_xlog.h"
39 #include "access/htup_details.h"
40 #include "access/multixact.h"
41 #include "access/tidstore.h"
42 #include "access/transam.h"
43 #include "access/visibilitymap.h"
44 #include "access/xloginsert.h"
45 #include "catalog/storage.h"
46 #include "commands/dbcommands.h"
47 #include "commands/progress.h"
48 #include "commands/vacuum.h"
49 #include "common/int.h"
50 #include "executor/instrument.h"
51 #include "miscadmin.h"
52 #include "pgstat.h"
53 #include "portability/instr_time.h"
54 #include "postmaster/autovacuum.h"
55 #include "storage/bufmgr.h"
56 #include "storage/freespace.h"
57 #include "storage/lmgr.h"
58 #include "utils/lsyscache.h"
59 #include "utils/memutils.h"
60 #include "utils/pg_rusage.h"
61 #include "utils/timestamp.h"
62 
63 
64 /*
65  * Space/time tradeoff parameters: do these need to be user-tunable?
66  *
67  * To consider truncating the relation, we want there to be at least
68  * REL_TRUNCATE_MINIMUM or (relsize / REL_TRUNCATE_FRACTION) (whichever
69  * is less) potentially-freeable pages.
70  */
71 #define REL_TRUNCATE_MINIMUM 1000
72 #define REL_TRUNCATE_FRACTION 16
73 
74 /*
75  * Timing parameters for truncate locking heuristics.
76  *
77  * These were not exposed as user tunable GUC values because it didn't seem
78  * that the potential for improvement was great enough to merit the cost of
79  * supporting them.
80  */
81 #define VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL 20 /* ms */
82 #define VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL 50 /* ms */
83 #define VACUUM_TRUNCATE_LOCK_TIMEOUT 5000 /* ms */
84 
85 /*
86  * Threshold that controls whether we bypass index vacuuming and heap
87  * vacuuming as an optimization
88  */
89 #define BYPASS_THRESHOLD_PAGES 0.02 /* i.e. 2% of rel_pages */
90 
91 /*
92  * Perform a failsafe check each time we scan another 4GB of pages.
93  * (Note that this is deliberately kept to a power-of-two, usually 2^19.)
94  */
95 #define FAILSAFE_EVERY_PAGES \
96  ((BlockNumber) (((uint64) 4 * 1024 * 1024 * 1024) / BLCKSZ))
97 
98 /*
99  * When a table has no indexes, vacuum the FSM after every 8GB, approximately
100  * (it won't be exact because we only vacuum FSM after processing a heap page
101  * that has some removable tuples). When there are indexes, this is ignored,
102  * and we vacuum FSM after each index/heap cleaning pass.
103  */
104 #define VACUUM_FSM_EVERY_PAGES \
105  ((BlockNumber) (((uint64) 8 * 1024 * 1024 * 1024) / BLCKSZ))
106 
107 /*
108  * Before we consider skipping a page that's marked as clean in
109  * visibility map, we must've seen at least this many clean pages.
110  */
111 #define SKIP_PAGES_THRESHOLD ((BlockNumber) 32)
112 
113 /*
114  * Size of the prefetch window for lazy vacuum backwards truncation scan.
115  * Needs to be a power of 2.
116  */
117 #define PREFETCH_SIZE ((BlockNumber) 32)
118 
119 /*
120  * Macro to check if we are in a parallel vacuum. If true, we are in the
121  * parallel mode and the DSM segment is initialized.
122  */
123 #define ParallelVacuumIsActive(vacrel) ((vacrel)->pvs != NULL)
124 
125 /* Phases of vacuum during which we report error context. */
126 typedef enum
127 {
134 } VacErrPhase;
135 
136 typedef struct LVRelState
137 {
138  /* Target heap relation and its indexes */
141  int nindexes;
142 
143  /* Buffer access strategy and parallel vacuum state */
146 
147  /* Aggressive VACUUM? (must set relfrozenxid >= FreezeLimit) */
149  /* Use visibility map to skip? (disabled by DISABLE_PAGE_SKIPPING) */
151  /* Consider index vacuuming bypass optimization? */
153 
154  /* Doing index vacuuming, index cleanup, rel truncation? */
158 
159  /* VACUUM operation's cutoffs for freezing and pruning */
160  struct VacuumCutoffs cutoffs;
162  /* Tracks oldest extant XID/MXID for setting relfrozenxid/relminmxid */
166 
167  /* Error reporting state */
168  char *dbname;
170  char *relname;
171  char *indname; /* Current index name */
172  BlockNumber blkno; /* used only for heap operations */
173  OffsetNumber offnum; /* used only for heap operations */
175  bool verbose; /* VACUUM VERBOSE? */
176 
177  /*
178  * dead_items stores TIDs whose index tuples are deleted by index
179  * vacuuming. Each TID points to an LP_DEAD line pointer from a heap page
180  * that has been processed by lazy_scan_prune. Also needed by
181  * lazy_vacuum_heap_rel, which marks the same LP_DEAD line pointers as
182  * LP_UNUSED during second heap pass.
183  *
184  * Both dead_items and dead_items_info are allocated in shared memory in
185  * parallel vacuum cases.
186  */
187  TidStore *dead_items; /* TIDs whose index tuples we'll delete */
189 
190  BlockNumber rel_pages; /* total number of pages */
191  BlockNumber scanned_pages; /* # pages examined (not skipped via VM) */
192  BlockNumber removed_pages; /* # pages removed by relation truncation */
193  BlockNumber frozen_pages; /* # pages with newly frozen tuples */
194  BlockNumber lpdead_item_pages; /* # pages with LP_DEAD items */
195  BlockNumber missed_dead_pages; /* # pages with missed dead tuples */
196  BlockNumber nonempty_pages; /* actually, last nonempty page + 1 */
197 
198  /* Statistics output by us, for table */
199  double new_rel_tuples; /* new estimated total # of tuples */
200  double new_live_tuples; /* new estimated total # of live tuples */
201  /* Statistics output by index AMs */
203 
204  /* Instrumentation counters */
206  /* Counters that follow are only for scanned_pages */
207  int64 tuples_deleted; /* # deleted from table */
208  int64 tuples_frozen; /* # newly frozen */
209  int64 lpdead_items; /* # deleted from indexes */
210  int64 live_tuples; /* # live tuples remaining */
211  int64 recently_dead_tuples; /* # dead, but not yet removable */
212  int64 missed_dead_tuples; /* # removable, but not removed */
213 
214  /* State maintained by heap_vac_scan_next_block() */
215  BlockNumber current_block; /* last block returned */
216  BlockNumber next_unskippable_block; /* next unskippable block */
217  bool next_unskippable_allvis; /* its visibility status */
218  Buffer next_unskippable_vmbuffer; /* buffer containing its VM bit */
220 
221 /* Struct for saving and restoring vacuum error information. */
222 typedef struct LVSavedErrInfo
223 {
228 
229 
230 /* non-export function prototypes */
231 static void lazy_scan_heap(LVRelState *vacrel);
232 static bool heap_vac_scan_next_block(LVRelState *vacrel, BlockNumber *blkno,
233  bool *all_visible_according_to_vm);
234 static void find_next_unskippable_block(LVRelState *vacrel, bool *skipsallvis);
235 static bool lazy_scan_new_or_empty(LVRelState *vacrel, Buffer buf,
236  BlockNumber blkno, Page page,
237  bool sharelock, Buffer vmbuffer);
238 static void lazy_scan_prune(LVRelState *vacrel, Buffer buf,
239  BlockNumber blkno, Page page,
240  Buffer vmbuffer, bool all_visible_according_to_vm,
241  bool *has_lpdead_items);
242 static bool lazy_scan_noprune(LVRelState *vacrel, Buffer buf,
243  BlockNumber blkno, Page page,
244  bool *has_lpdead_items);
245 static void lazy_vacuum(LVRelState *vacrel);
246 static bool lazy_vacuum_all_indexes(LVRelState *vacrel);
247 static void lazy_vacuum_heap_rel(LVRelState *vacrel);
248 static void lazy_vacuum_heap_page(LVRelState *vacrel, BlockNumber blkno,
249  Buffer buffer, OffsetNumber *deadoffsets,
250  int num_offsets, Buffer vmbuffer);
251 static bool lazy_check_wraparound_failsafe(LVRelState *vacrel);
252 static void lazy_cleanup_all_indexes(LVRelState *vacrel);
254  IndexBulkDeleteResult *istat,
255  double reltuples,
256  LVRelState *vacrel);
258  IndexBulkDeleteResult *istat,
259  double reltuples,
260  bool estimated_count,
261  LVRelState *vacrel);
262 static bool should_attempt_truncation(LVRelState *vacrel);
263 static void lazy_truncate_heap(LVRelState *vacrel);
265  bool *lock_waiter_detected);
266 static void dead_items_alloc(LVRelState *vacrel, int nworkers);
267 static void dead_items_add(LVRelState *vacrel, BlockNumber blkno, OffsetNumber *offsets,
268  int num_offsets);
269 static void dead_items_reset(LVRelState *vacrel);
270 static void dead_items_cleanup(LVRelState *vacrel);
271 static bool heap_page_is_all_visible(LVRelState *vacrel, Buffer buf,
272  TransactionId *visibility_cutoff_xid, bool *all_frozen);
273 static void update_relstats_all_indexes(LVRelState *vacrel);
274 static void vacuum_error_callback(void *arg);
275 static void update_vacuum_error_info(LVRelState *vacrel,
276  LVSavedErrInfo *saved_vacrel,
277  int phase, BlockNumber blkno,
278  OffsetNumber offnum);
279 static void restore_vacuum_error_info(LVRelState *vacrel,
280  const LVSavedErrInfo *saved_vacrel);
281 
282 
283 /*
284  * heap_vacuum_rel() -- perform VACUUM for one heap relation
285  *
286  * This routine sets things up for and then calls lazy_scan_heap, where
287  * almost all work actually takes place. Finalizes everything after call
288  * returns by managing relation truncation and updating rel's pg_class
289  * entry. (Also updates pg_class entries for any indexes that need it.)
290  *
291  * At entry, we have already established a transaction and opened
292  * and locked the relation.
293  */
294 void
296  BufferAccessStrategy bstrategy)
297 {
298  LVRelState *vacrel;
299  bool verbose,
300  instrument,
301  skipwithvm,
302  frozenxid_updated,
303  minmulti_updated;
304  BlockNumber orig_rel_pages,
305  new_rel_pages,
306  new_rel_allvisible;
307  PGRUsage ru0;
308  TimestampTz starttime = 0;
309  PgStat_Counter startreadtime = 0,
310  startwritetime = 0;
311  WalUsage startwalusage = pgWalUsage;
312  BufferUsage startbufferusage = pgBufferUsage;
313  ErrorContextCallback errcallback;
314  char **indnames = NULL;
315 
316  verbose = (params->options & VACOPT_VERBOSE) != 0;
317  instrument = (verbose || (AmAutoVacuumWorkerProcess() &&
318  params->log_min_duration >= 0));
319  if (instrument)
320  {
321  pg_rusage_init(&ru0);
322  starttime = GetCurrentTimestamp();
323  if (track_io_timing)
324  {
325  startreadtime = pgStatBlockReadTime;
326  startwritetime = pgStatBlockWriteTime;
327  }
328  }
329 
331  RelationGetRelid(rel));
332 
333  /*
334  * Setup error traceback support for ereport() first. The idea is to set
335  * up an error context callback to display additional information on any
336  * error during a vacuum. During different phases of vacuum, we update
337  * the state so that the error context callback always display current
338  * information.
339  *
340  * Copy the names of heap rel into local memory for error reporting
341  * purposes, too. It isn't always safe to assume that we can get the name
342  * of each rel. It's convenient for code in lazy_scan_heap to always use
343  * these temp copies.
344  */
345  vacrel = (LVRelState *) palloc0(sizeof(LVRelState));
348  vacrel->relname = pstrdup(RelationGetRelationName(rel));
349  vacrel->indname = NULL;
351  vacrel->verbose = verbose;
352  errcallback.callback = vacuum_error_callback;
353  errcallback.arg = vacrel;
354  errcallback.previous = error_context_stack;
355  error_context_stack = &errcallback;
356 
357  /* Set up high level stuff about rel and its indexes */
358  vacrel->rel = rel;
359  vac_open_indexes(vacrel->rel, RowExclusiveLock, &vacrel->nindexes,
360  &vacrel->indrels);
361  vacrel->bstrategy = bstrategy;
362  if (instrument && vacrel->nindexes > 0)
363  {
364  /* Copy index names used by instrumentation (not error reporting) */
365  indnames = palloc(sizeof(char *) * vacrel->nindexes);
366  for (int i = 0; i < vacrel->nindexes; i++)
367  indnames[i] = pstrdup(RelationGetRelationName(vacrel->indrels[i]));
368  }
369 
370  /*
371  * The index_cleanup param either disables index vacuuming and cleanup or
372  * forces it to go ahead when we would otherwise apply the index bypass
373  * optimization. The default is 'auto', which leaves the final decision
374  * up to lazy_vacuum().
375  *
376  * The truncate param allows user to avoid attempting relation truncation,
377  * though it can't force truncation to happen.
378  */
381  params->truncate != VACOPTVALUE_AUTO);
382 
383  /*
384  * While VacuumFailSafeActive is reset to false before calling this, we
385  * still need to reset it here due to recursive calls.
386  */
387  VacuumFailsafeActive = false;
388  vacrel->consider_bypass_optimization = true;
389  vacrel->do_index_vacuuming = true;
390  vacrel->do_index_cleanup = true;
391  vacrel->do_rel_truncate = (params->truncate != VACOPTVALUE_DISABLED);
392  if (params->index_cleanup == VACOPTVALUE_DISABLED)
393  {
394  /* Force disable index vacuuming up-front */
395  vacrel->do_index_vacuuming = false;
396  vacrel->do_index_cleanup = false;
397  }
398  else if (params->index_cleanup == VACOPTVALUE_ENABLED)
399  {
400  /* Force index vacuuming. Note that failsafe can still bypass. */
401  vacrel->consider_bypass_optimization = false;
402  }
403  else
404  {
405  /* Default/auto, make all decisions dynamically */
407  }
408 
409  /* Initialize page counters explicitly (be tidy) */
410  vacrel->scanned_pages = 0;
411  vacrel->removed_pages = 0;
412  vacrel->frozen_pages = 0;
413  vacrel->lpdead_item_pages = 0;
414  vacrel->missed_dead_pages = 0;
415  vacrel->nonempty_pages = 0;
416  /* dead_items_alloc allocates vacrel->dead_items later on */
417 
418  /* Allocate/initialize output statistics state */
419  vacrel->new_rel_tuples = 0;
420  vacrel->new_live_tuples = 0;
421  vacrel->indstats = (IndexBulkDeleteResult **)
422  palloc0(vacrel->nindexes * sizeof(IndexBulkDeleteResult *));
423 
424  /* Initialize remaining counters (be tidy) */
425  vacrel->num_index_scans = 0;
426  vacrel->tuples_deleted = 0;
427  vacrel->tuples_frozen = 0;
428  vacrel->lpdead_items = 0;
429  vacrel->live_tuples = 0;
430  vacrel->recently_dead_tuples = 0;
431  vacrel->missed_dead_tuples = 0;
432 
433  /*
434  * Get cutoffs that determine which deleted tuples are considered DEAD,
435  * not just RECENTLY_DEAD, and which XIDs/MXIDs to freeze. Then determine
436  * the extent of the blocks that we'll scan in lazy_scan_heap. It has to
437  * happen in this order to ensure that the OldestXmin cutoff field works
438  * as an upper bound on the XIDs stored in the pages we'll actually scan
439  * (NewRelfrozenXid tracking must never be allowed to miss unfrozen XIDs).
440  *
441  * Next acquire vistest, a related cutoff that's used in pruning. We use
442  * vistest in combination with OldestXmin to ensure that
443  * heap_page_prune_and_freeze() always removes any deleted tuple whose
444  * xmax is < OldestXmin. lazy_scan_prune must never become confused about
445  * whether a tuple should be frozen or removed. (In the future we might
446  * want to teach lazy_scan_prune to recompute vistest from time to time,
447  * to increase the number of dead tuples it can prune away.)
448  */
449  vacrel->aggressive = vacuum_get_cutoffs(rel, params, &vacrel->cutoffs);
450  vacrel->rel_pages = orig_rel_pages = RelationGetNumberOfBlocks(rel);
451  vacrel->vistest = GlobalVisTestFor(rel);
452  /* Initialize state used to track oldest extant XID/MXID */
453  vacrel->NewRelfrozenXid = vacrel->cutoffs.OldestXmin;
454  vacrel->NewRelminMxid = vacrel->cutoffs.OldestMxact;
455  vacrel->skippedallvis = false;
456  skipwithvm = true;
458  {
459  /*
460  * Force aggressive mode, and disable skipping blocks using the
461  * visibility map (even those set all-frozen)
462  */
463  vacrel->aggressive = true;
464  skipwithvm = false;
465  }
466 
467  vacrel->skipwithvm = skipwithvm;
468 
469  if (verbose)
470  {
471  if (vacrel->aggressive)
472  ereport(INFO,
473  (errmsg("aggressively vacuuming \"%s.%s.%s\"",
474  vacrel->dbname, vacrel->relnamespace,
475  vacrel->relname)));
476  else
477  ereport(INFO,
478  (errmsg("vacuuming \"%s.%s.%s\"",
479  vacrel->dbname, vacrel->relnamespace,
480  vacrel->relname)));
481  }
482 
483  /*
484  * Allocate dead_items memory using dead_items_alloc. This handles
485  * parallel VACUUM initialization as part of allocating shared memory
486  * space used for dead_items. (But do a failsafe precheck first, to
487  * ensure that parallel VACUUM won't be attempted at all when relfrozenxid
488  * is already dangerously old.)
489  */
491  dead_items_alloc(vacrel, params->nworkers);
492 
493  /*
494  * Call lazy_scan_heap to perform all required heap pruning, index
495  * vacuuming, and heap vacuuming (plus related processing)
496  */
497  lazy_scan_heap(vacrel);
498 
499  /*
500  * Free resources managed by dead_items_alloc. This ends parallel mode in
501  * passing when necessary.
502  */
503  dead_items_cleanup(vacrel);
505 
506  /*
507  * Update pg_class entries for each of rel's indexes where appropriate.
508  *
509  * Unlike the later update to rel's pg_class entry, this is not critical.
510  * Maintains relpages/reltuples statistics used by the planner only.
511  */
512  if (vacrel->do_index_cleanup)
514 
515  /* Done with rel's indexes */
516  vac_close_indexes(vacrel->nindexes, vacrel->indrels, NoLock);
517 
518  /* Optionally truncate rel */
519  if (should_attempt_truncation(vacrel))
520  lazy_truncate_heap(vacrel);
521 
522  /* Pop the error context stack */
523  error_context_stack = errcallback.previous;
524 
525  /* Report that we are now doing final cleanup */
528 
529  /*
530  * Prepare to update rel's pg_class entry.
531  *
532  * Aggressive VACUUMs must always be able to advance relfrozenxid to a
533  * value >= FreezeLimit, and relminmxid to a value >= MultiXactCutoff.
534  * Non-aggressive VACUUMs may advance them by any amount, or not at all.
535  */
536  Assert(vacrel->NewRelfrozenXid == vacrel->cutoffs.OldestXmin ||
538  vacrel->cutoffs.relfrozenxid,
539  vacrel->NewRelfrozenXid));
540  Assert(vacrel->NewRelminMxid == vacrel->cutoffs.OldestMxact ||
542  vacrel->cutoffs.relminmxid,
543  vacrel->NewRelminMxid));
544  if (vacrel->skippedallvis)
545  {
546  /*
547  * Must keep original relfrozenxid in a non-aggressive VACUUM that
548  * chose to skip an all-visible page range. The state that tracks new
549  * values will have missed unfrozen XIDs from the pages we skipped.
550  */
551  Assert(!vacrel->aggressive);
554  }
555 
556  /*
557  * For safety, clamp relallvisible to be not more than what we're setting
558  * pg_class.relpages to
559  */
560  new_rel_pages = vacrel->rel_pages; /* After possible rel truncation */
561  visibilitymap_count(rel, &new_rel_allvisible, NULL);
562  if (new_rel_allvisible > new_rel_pages)
563  new_rel_allvisible = new_rel_pages;
564 
565  /*
566  * Now actually update rel's pg_class entry.
567  *
568  * In principle new_live_tuples could be -1 indicating that we (still)
569  * don't know the tuple count. In practice that can't happen, since we
570  * scan every page that isn't skipped using the visibility map.
571  */
572  vac_update_relstats(rel, new_rel_pages, vacrel->new_live_tuples,
573  new_rel_allvisible, vacrel->nindexes > 0,
574  vacrel->NewRelfrozenXid, vacrel->NewRelminMxid,
575  &frozenxid_updated, &minmulti_updated, false);
576 
577  /*
578  * Report results to the cumulative stats system, too.
579  *
580  * Deliberately avoid telling the stats system about LP_DEAD items that
581  * remain in the table due to VACUUM bypassing index and heap vacuuming.
582  * ANALYZE will consider the remaining LP_DEAD items to be dead "tuples".
583  * It seems like a good idea to err on the side of not vacuuming again too
584  * soon in cases where the failsafe prevented significant amounts of heap
585  * vacuuming.
586  */
588  rel->rd_rel->relisshared,
589  Max(vacrel->new_live_tuples, 0),
590  vacrel->recently_dead_tuples +
591  vacrel->missed_dead_tuples);
593 
594  if (instrument)
595  {
596  TimestampTz endtime = GetCurrentTimestamp();
597 
598  if (verbose || params->log_min_duration == 0 ||
599  TimestampDifferenceExceeds(starttime, endtime,
600  params->log_min_duration))
601  {
602  long secs_dur;
603  int usecs_dur;
604  WalUsage walusage;
605  BufferUsage bufferusage;
607  char *msgfmt;
608  int32 diff;
609  double read_rate = 0,
610  write_rate = 0;
611  int64 total_blks_hit;
612  int64 total_blks_read;
613  int64 total_blks_dirtied;
614 
615  TimestampDifference(starttime, endtime, &secs_dur, &usecs_dur);
616  memset(&walusage, 0, sizeof(WalUsage));
617  WalUsageAccumDiff(&walusage, &pgWalUsage, &startwalusage);
618  memset(&bufferusage, 0, sizeof(BufferUsage));
619  BufferUsageAccumDiff(&bufferusage, &pgBufferUsage, &startbufferusage);
620 
621  total_blks_hit = bufferusage.shared_blks_hit +
622  bufferusage.local_blks_hit;
623  total_blks_read = bufferusage.shared_blks_read +
624  bufferusage.local_blks_read;
625  total_blks_dirtied = bufferusage.shared_blks_dirtied +
626  bufferusage.local_blks_dirtied;
627 
629  if (verbose)
630  {
631  /*
632  * Aggressiveness already reported earlier, in dedicated
633  * VACUUM VERBOSE ereport
634  */
635  Assert(!params->is_wraparound);
636  msgfmt = _("finished vacuuming \"%s.%s.%s\": index scans: %d\n");
637  }
638  else if (params->is_wraparound)
639  {
640  /*
641  * While it's possible for a VACUUM to be both is_wraparound
642  * and !aggressive, that's just a corner-case -- is_wraparound
643  * implies aggressive. Produce distinct output for the corner
644  * case all the same, just in case.
645  */
646  if (vacrel->aggressive)
647  msgfmt = _("automatic aggressive vacuum to prevent wraparound of table \"%s.%s.%s\": index scans: %d\n");
648  else
649  msgfmt = _("automatic vacuum to prevent wraparound of table \"%s.%s.%s\": index scans: %d\n");
650  }
651  else
652  {
653  if (vacrel->aggressive)
654  msgfmt = _("automatic aggressive vacuum of table \"%s.%s.%s\": index scans: %d\n");
655  else
656  msgfmt = _("automatic vacuum of table \"%s.%s.%s\": index scans: %d\n");
657  }
658  appendStringInfo(&buf, msgfmt,
659  vacrel->dbname,
660  vacrel->relnamespace,
661  vacrel->relname,
662  vacrel->num_index_scans);
663  appendStringInfo(&buf, _("pages: %u removed, %u remain, %u scanned (%.2f%% of total)\n"),
664  vacrel->removed_pages,
665  new_rel_pages,
666  vacrel->scanned_pages,
667  orig_rel_pages == 0 ? 100.0 :
668  100.0 * vacrel->scanned_pages / orig_rel_pages);
670  _("tuples: %lld removed, %lld remain, %lld are dead but not yet removable\n"),
671  (long long) vacrel->tuples_deleted,
672  (long long) vacrel->new_rel_tuples,
673  (long long) vacrel->recently_dead_tuples);
674  if (vacrel->missed_dead_tuples > 0)
676  _("tuples missed: %lld dead from %u pages not removed due to cleanup lock contention\n"),
677  (long long) vacrel->missed_dead_tuples,
678  vacrel->missed_dead_pages);
679  diff = (int32) (ReadNextTransactionId() -
680  vacrel->cutoffs.OldestXmin);
682  _("removable cutoff: %u, which was %d XIDs old when operation ended\n"),
683  vacrel->cutoffs.OldestXmin, diff);
684  if (frozenxid_updated)
685  {
686  diff = (int32) (vacrel->NewRelfrozenXid -
687  vacrel->cutoffs.relfrozenxid);
689  _("new relfrozenxid: %u, which is %d XIDs ahead of previous value\n"),
690  vacrel->NewRelfrozenXid, diff);
691  }
692  if (minmulti_updated)
693  {
694  diff = (int32) (vacrel->NewRelminMxid -
695  vacrel->cutoffs.relminmxid);
697  _("new relminmxid: %u, which is %d MXIDs ahead of previous value\n"),
698  vacrel->NewRelminMxid, diff);
699  }
700  appendStringInfo(&buf, _("frozen: %u pages from table (%.2f%% of total) had %lld tuples frozen\n"),
701  vacrel->frozen_pages,
702  orig_rel_pages == 0 ? 100.0 :
703  100.0 * vacrel->frozen_pages / orig_rel_pages,
704  (long long) vacrel->tuples_frozen);
705  if (vacrel->do_index_vacuuming)
706  {
707  if (vacrel->nindexes == 0 || vacrel->num_index_scans == 0)
708  appendStringInfoString(&buf, _("index scan not needed: "));
709  else
710  appendStringInfoString(&buf, _("index scan needed: "));
711 
712  msgfmt = _("%u pages from table (%.2f%% of total) had %lld dead item identifiers removed\n");
713  }
714  else
715  {
717  appendStringInfoString(&buf, _("index scan bypassed: "));
718  else
719  appendStringInfoString(&buf, _("index scan bypassed by failsafe: "));
720 
721  msgfmt = _("%u pages from table (%.2f%% of total) have %lld dead item identifiers\n");
722  }
723  appendStringInfo(&buf, msgfmt,
724  vacrel->lpdead_item_pages,
725  orig_rel_pages == 0 ? 100.0 :
726  100.0 * vacrel->lpdead_item_pages / orig_rel_pages,
727  (long long) vacrel->lpdead_items);
728  for (int i = 0; i < vacrel->nindexes; i++)
729  {
730  IndexBulkDeleteResult *istat = vacrel->indstats[i];
731 
732  if (!istat)
733  continue;
734 
736  _("index \"%s\": pages: %u in total, %u newly deleted, %u currently deleted, %u reusable\n"),
737  indnames[i],
738  istat->num_pages,
739  istat->pages_newly_deleted,
740  istat->pages_deleted,
741  istat->pages_free);
742  }
743  if (track_io_timing)
744  {
745  double read_ms = (double) (pgStatBlockReadTime - startreadtime) / 1000;
746  double write_ms = (double) (pgStatBlockWriteTime - startwritetime) / 1000;
747 
748  appendStringInfo(&buf, _("I/O timings: read: %.3f ms, write: %.3f ms\n"),
749  read_ms, write_ms);
750  }
751  if (secs_dur > 0 || usecs_dur > 0)
752  {
753  read_rate = (double) BLCKSZ * total_blks_read /
754  (1024 * 1024) / (secs_dur + usecs_dur / 1000000.0);
755  write_rate = (double) BLCKSZ * total_blks_dirtied /
756  (1024 * 1024) / (secs_dur + usecs_dur / 1000000.0);
757  }
758  appendStringInfo(&buf, _("avg read rate: %.3f MB/s, avg write rate: %.3f MB/s\n"),
759  read_rate, write_rate);
761  _("buffer usage: %lld hits, %lld reads, %lld dirtied\n"),
762  (long long) total_blks_hit,
763  (long long) total_blks_read,
764  (long long) total_blks_dirtied);
766  _("WAL usage: %lld records, %lld full page images, %llu bytes\n"),
767  (long long) walusage.wal_records,
768  (long long) walusage.wal_fpi,
769  (unsigned long long) walusage.wal_bytes);
770  appendStringInfo(&buf, _("system usage: %s"), pg_rusage_show(&ru0));
771 
772  ereport(verbose ? INFO : LOG,
773  (errmsg_internal("%s", buf.data)));
774  pfree(buf.data);
775  }
776  }
777 
778  /* Cleanup index statistics and index names */
779  for (int i = 0; i < vacrel->nindexes; i++)
780  {
781  if (vacrel->indstats[i])
782  pfree(vacrel->indstats[i]);
783 
784  if (instrument)
785  pfree(indnames[i]);
786  }
787 }
788 
789 /*
790  * lazy_scan_heap() -- workhorse function for VACUUM
791  *
792  * This routine prunes each page in the heap, and considers the need to
793  * freeze remaining tuples with storage (not including pages that can be
794  * skipped using the visibility map). Also performs related maintenance
795  * of the FSM and visibility map. These steps all take place during an
796  * initial pass over the target heap relation.
797  *
798  * Also invokes lazy_vacuum_all_indexes to vacuum indexes, which largely
799  * consists of deleting index tuples that point to LP_DEAD items left in
800  * heap pages following pruning. Earlier initial pass over the heap will
801  * have collected the TIDs whose index tuples need to be removed.
802  *
803  * Finally, invokes lazy_vacuum_heap_rel to vacuum heap pages, which
804  * largely consists of marking LP_DEAD items (from vacrel->dead_items)
805  * as LP_UNUSED. This has to happen in a second, final pass over the
806  * heap, to preserve a basic invariant that all index AMs rely on: no
807  * extant index tuple can ever be allowed to contain a TID that points to
808  * an LP_UNUSED line pointer in the heap. We must disallow premature
809  * recycling of line pointers to avoid index scans that get confused
810  * about which TID points to which tuple immediately after recycling.
811  * (Actually, this isn't a concern when target heap relation happens to
812  * have no indexes, which allows us to safely apply the one-pass strategy
813  * as an optimization).
814  *
815  * In practice we often have enough space to fit all TIDs, and so won't
816  * need to call lazy_vacuum more than once, after our initial pass over
817  * the heap has totally finished. Otherwise things are slightly more
818  * complicated: our "initial pass" over the heap applies only to those
819  * pages that were pruned before we needed to call lazy_vacuum, and our
820  * "final pass" over the heap only vacuums these same heap pages.
821  * However, we process indexes in full every time lazy_vacuum is called,
822  * which makes index processing very inefficient when memory is in short
823  * supply.
824  */
825 static void
827 {
828  BlockNumber rel_pages = vacrel->rel_pages,
829  blkno,
830  next_fsm_block_to_vacuum = 0;
831  bool all_visible_according_to_vm;
832 
833  TidStore *dead_items = vacrel->dead_items;
834  VacDeadItemsInfo *dead_items_info = vacrel->dead_items_info;
835  Buffer vmbuffer = InvalidBuffer;
836  const int initprog_index[] = {
840  };
841  int64 initprog_val[3];
842 
843  /* Report that we're scanning the heap, advertising total # of blocks */
844  initprog_val[0] = PROGRESS_VACUUM_PHASE_SCAN_HEAP;
845  initprog_val[1] = rel_pages;
846  initprog_val[2] = dead_items_info->max_bytes;
847  pgstat_progress_update_multi_param(3, initprog_index, initprog_val);
848 
849  /* Initialize for the first heap_vac_scan_next_block() call */
852  vacrel->next_unskippable_allvis = false;
854 
855  while (heap_vac_scan_next_block(vacrel, &blkno, &all_visible_according_to_vm))
856  {
857  Buffer buf;
858  Page page;
859  bool has_lpdead_items;
860  bool got_cleanup_lock = false;
861 
862  vacrel->scanned_pages++;
863 
864  /* Report as block scanned, update error traceback information */
867  blkno, InvalidOffsetNumber);
868 
870 
871  /*
872  * Regularly check if wraparound failsafe should trigger.
873  *
874  * There is a similar check inside lazy_vacuum_all_indexes(), but
875  * relfrozenxid might start to look dangerously old before we reach
876  * that point. This check also provides failsafe coverage for the
877  * one-pass strategy, and the two-pass strategy with the index_cleanup
878  * param set to 'off'.
879  */
880  if (vacrel->scanned_pages % FAILSAFE_EVERY_PAGES == 0)
882 
883  /*
884  * Consider if we definitely have enough space to process TIDs on page
885  * already. If we are close to overrunning the available space for
886  * dead_items TIDs, pause and do a cycle of vacuuming before we tackle
887  * this page.
888  */
889  if (TidStoreMemoryUsage(dead_items) > dead_items_info->max_bytes)
890  {
891  /*
892  * Before beginning index vacuuming, we release any pin we may
893  * hold on the visibility map page. This isn't necessary for
894  * correctness, but we do it anyway to avoid holding the pin
895  * across a lengthy, unrelated operation.
896  */
897  if (BufferIsValid(vmbuffer))
898  {
899  ReleaseBuffer(vmbuffer);
900  vmbuffer = InvalidBuffer;
901  }
902 
903  /* Perform a round of index and heap vacuuming */
904  vacrel->consider_bypass_optimization = false;
905  lazy_vacuum(vacrel);
906 
907  /*
908  * Vacuum the Free Space Map to make newly-freed space visible on
909  * upper-level FSM pages. Note we have not yet processed blkno.
910  */
911  FreeSpaceMapVacuumRange(vacrel->rel, next_fsm_block_to_vacuum,
912  blkno);
913  next_fsm_block_to_vacuum = blkno;
914 
915  /* Report that we are once again scanning the heap */
918  }
919 
920  /*
921  * Pin the visibility map page in case we need to mark the page
922  * all-visible. In most cases this will be very cheap, because we'll
923  * already have the correct page pinned anyway.
924  */
925  visibilitymap_pin(vacrel->rel, blkno, &vmbuffer);
926 
927  buf = ReadBufferExtended(vacrel->rel, MAIN_FORKNUM, blkno, RBM_NORMAL,
928  vacrel->bstrategy);
929  page = BufferGetPage(buf);
930 
931  /*
932  * We need a buffer cleanup lock to prune HOT chains and defragment
933  * the page in lazy_scan_prune. But when it's not possible to acquire
934  * a cleanup lock right away, we may be able to settle for reduced
935  * processing using lazy_scan_noprune.
936  */
937  got_cleanup_lock = ConditionalLockBufferForCleanup(buf);
938 
939  if (!got_cleanup_lock)
941 
942  /* Check for new or empty pages before lazy_scan_[no]prune call */
943  if (lazy_scan_new_or_empty(vacrel, buf, blkno, page, !got_cleanup_lock,
944  vmbuffer))
945  {
946  /* Processed as new/empty page (lock and pin released) */
947  continue;
948  }
949 
950  /*
951  * If we didn't get the cleanup lock, we can still collect LP_DEAD
952  * items in the dead_items area for later vacuuming, count live and
953  * recently dead tuples for vacuum logging, and determine if this
954  * block could later be truncated. If we encounter any xid/mxids that
955  * require advancing the relfrozenxid/relminxid, we'll have to wait
956  * for a cleanup lock and call lazy_scan_prune().
957  */
958  if (!got_cleanup_lock &&
959  !lazy_scan_noprune(vacrel, buf, blkno, page, &has_lpdead_items))
960  {
961  /*
962  * lazy_scan_noprune could not do all required processing. Wait
963  * for a cleanup lock, and call lazy_scan_prune in the usual way.
964  */
965  Assert(vacrel->aggressive);
968  got_cleanup_lock = true;
969  }
970 
971  /*
972  * If we have a cleanup lock, we must now prune, freeze, and count
973  * tuples. We may have acquired the cleanup lock originally, or we may
974  * have gone back and acquired it after lazy_scan_noprune() returned
975  * false. Either way, the page hasn't been processed yet.
976  *
977  * Like lazy_scan_noprune(), lazy_scan_prune() will count
978  * recently_dead_tuples and live tuples for vacuum logging, determine
979  * if the block can later be truncated, and accumulate the details of
980  * remaining LP_DEAD line pointers on the page into dead_items. These
981  * dead items include those pruned by lazy_scan_prune() as well as
982  * line pointers previously marked LP_DEAD.
983  */
984  if (got_cleanup_lock)
985  lazy_scan_prune(vacrel, buf, blkno, page,
986  vmbuffer, all_visible_according_to_vm,
987  &has_lpdead_items);
988 
989  /*
990  * Now drop the buffer lock and, potentially, update the FSM.
991  *
992  * Our goal is to update the freespace map the last time we touch the
993  * page. If we'll process a block in the second pass, we may free up
994  * additional space on the page, so it is better to update the FSM
995  * after the second pass. If the relation has no indexes, or if index
996  * vacuuming is disabled, there will be no second heap pass; if this
997  * particular page has no dead items, the second heap pass will not
998  * touch this page. So, in those cases, update the FSM now.
999  *
1000  * Note: In corner cases, it's possible to miss updating the FSM
1001  * entirely. If index vacuuming is currently enabled, we'll skip the
1002  * FSM update now. But if failsafe mode is later activated, or there
1003  * are so few dead tuples that index vacuuming is bypassed, there will
1004  * also be no opportunity to update the FSM later, because we'll never
1005  * revisit this page. Since updating the FSM is desirable but not
1006  * absolutely required, that's OK.
1007  */
1008  if (vacrel->nindexes == 0
1009  || !vacrel->do_index_vacuuming
1010  || !has_lpdead_items)
1011  {
1012  Size freespace = PageGetHeapFreeSpace(page);
1013 
1015  RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
1016 
1017  /*
1018  * Periodically perform FSM vacuuming to make newly-freed space
1019  * visible on upper FSM pages. This is done after vacuuming if the
1020  * table has indexes. There will only be newly-freed space if we
1021  * held the cleanup lock and lazy_scan_prune() was called.
1022  */
1023  if (got_cleanup_lock && vacrel->nindexes == 0 && has_lpdead_items &&
1024  blkno - next_fsm_block_to_vacuum >= VACUUM_FSM_EVERY_PAGES)
1025  {
1026  FreeSpaceMapVacuumRange(vacrel->rel, next_fsm_block_to_vacuum,
1027  blkno);
1028  next_fsm_block_to_vacuum = blkno;
1029  }
1030  }
1031  else
1033  }
1034 
1035  vacrel->blkno = InvalidBlockNumber;
1036  if (BufferIsValid(vmbuffer))
1037  ReleaseBuffer(vmbuffer);
1038 
1039  /* report that everything is now scanned */
1041 
1042  /* now we can compute the new value for pg_class.reltuples */
1043  vacrel->new_live_tuples = vac_estimate_reltuples(vacrel->rel, rel_pages,
1044  vacrel->scanned_pages,
1045  vacrel->live_tuples);
1046 
1047  /*
1048  * Also compute the total number of surviving heap entries. In the
1049  * (unlikely) scenario that new_live_tuples is -1, take it as zero.
1050  */
1051  vacrel->new_rel_tuples =
1052  Max(vacrel->new_live_tuples, 0) + vacrel->recently_dead_tuples +
1053  vacrel->missed_dead_tuples;
1054 
1055  /*
1056  * Do index vacuuming (call each index's ambulkdelete routine), then do
1057  * related heap vacuuming
1058  */
1059  if (dead_items_info->num_items > 0)
1060  lazy_vacuum(vacrel);
1061 
1062  /*
1063  * Vacuum the remainder of the Free Space Map. We must do this whether or
1064  * not there were indexes, and whether or not we bypassed index vacuuming.
1065  */
1066  if (blkno > next_fsm_block_to_vacuum)
1067  FreeSpaceMapVacuumRange(vacrel->rel, next_fsm_block_to_vacuum, blkno);
1068 
1069  /* report all blocks vacuumed */
1071 
1072  /* Do final index cleanup (call each index's amvacuumcleanup routine) */
1073  if (vacrel->nindexes > 0 && vacrel->do_index_cleanup)
1074  lazy_cleanup_all_indexes(vacrel);
1075 }
1076 
1077 /*
1078  * heap_vac_scan_next_block() -- get next block for vacuum to process
1079  *
1080  * lazy_scan_heap() calls here every time it needs to get the next block to
1081  * prune and vacuum. The function uses the visibility map, vacuum options,
1082  * and various thresholds to skip blocks which do not need to be processed and
1083  * sets blkno to the next block to process.
1084  *
1085  * The block number and visibility status of the next block to process are set
1086  * in *blkno and *all_visible_according_to_vm. The return value is false if
1087  * there are no further blocks to process.
1088  *
1089  * vacrel is an in/out parameter here. Vacuum options and information about
1090  * the relation are read. vacrel->skippedallvis is set if we skip a block
1091  * that's all-visible but not all-frozen, to ensure that we don't update
1092  * relfrozenxid in that case. vacrel also holds information about the next
1093  * unskippable block, as bookkeeping for this function.
1094  */
1095 static bool
1097  bool *all_visible_according_to_vm)
1098 {
1099  BlockNumber next_block;
1100 
1101  /* relies on InvalidBlockNumber + 1 overflowing to 0 on first call */
1102  next_block = vacrel->current_block + 1;
1103 
1104  /* Have we reached the end of the relation? */
1105  if (next_block >= vacrel->rel_pages)
1106  {
1108  {
1111  }
1112  *blkno = vacrel->rel_pages;
1113  return false;
1114  }
1115 
1116  /*
1117  * We must be in one of the three following states:
1118  */
1119  if (next_block > vacrel->next_unskippable_block ||
1121  {
1122  /*
1123  * 1. We have just processed an unskippable block (or we're at the
1124  * beginning of the scan). Find the next unskippable block using the
1125  * visibility map.
1126  */
1127  bool skipsallvis;
1128 
1129  find_next_unskippable_block(vacrel, &skipsallvis);
1130 
1131  /*
1132  * We now know the next block that we must process. It can be the
1133  * next block after the one we just processed, or something further
1134  * ahead. If it's further ahead, we can jump to it, but we choose to
1135  * do so only if we can skip at least SKIP_PAGES_THRESHOLD consecutive
1136  * pages. Since we're reading sequentially, the OS should be doing
1137  * readahead for us, so there's no gain in skipping a page now and
1138  * then. Skipping such a range might even discourage sequential
1139  * detection.
1140  *
1141  * This test also enables more frequent relfrozenxid advancement
1142  * during non-aggressive VACUUMs. If the range has any all-visible
1143  * pages then skipping makes updating relfrozenxid unsafe, which is a
1144  * real downside.
1145  */
1146  if (vacrel->next_unskippable_block - next_block >= SKIP_PAGES_THRESHOLD)
1147  {
1148  next_block = vacrel->next_unskippable_block;
1149  if (skipsallvis)
1150  vacrel->skippedallvis = true;
1151  }
1152  }
1153 
1154  /* Now we must be in one of the two remaining states: */
1155  if (next_block < vacrel->next_unskippable_block)
1156  {
1157  /*
1158  * 2. We are processing a range of blocks that we could have skipped
1159  * but chose not to. We know that they are all-visible in the VM,
1160  * otherwise they would've been unskippable.
1161  */
1162  *blkno = vacrel->current_block = next_block;
1163  *all_visible_according_to_vm = true;
1164  return true;
1165  }
1166  else
1167  {
1168  /*
1169  * 3. We reached the next unskippable block. Process it. On next
1170  * iteration, we will be back in state 1.
1171  */
1172  Assert(next_block == vacrel->next_unskippable_block);
1173 
1174  *blkno = vacrel->current_block = next_block;
1175  *all_visible_according_to_vm = vacrel->next_unskippable_allvis;
1176  return true;
1177  }
1178 }
1179 
1180 /*
1181  * Find the next unskippable block in a vacuum scan using the visibility map.
1182  * The next unskippable block and its visibility information is updated in
1183  * vacrel.
1184  *
1185  * Note: our opinion of which blocks can be skipped can go stale immediately.
1186  * It's okay if caller "misses" a page whose all-visible or all-frozen marking
1187  * was concurrently cleared, though. All that matters is that caller scan all
1188  * pages whose tuples might contain XIDs < OldestXmin, or MXIDs < OldestMxact.
1189  * (Actually, non-aggressive VACUUMs can choose to skip all-visible pages with
1190  * older XIDs/MXIDs. The *skippedallvis flag will be set here when the choice
1191  * to skip such a range is actually made, making everything safe.)
1192  */
1193 static void
1194 find_next_unskippable_block(LVRelState *vacrel, bool *skipsallvis)
1195 {
1196  BlockNumber rel_pages = vacrel->rel_pages;
1197  BlockNumber next_unskippable_block = vacrel->next_unskippable_block + 1;
1198  Buffer next_unskippable_vmbuffer = vacrel->next_unskippable_vmbuffer;
1199  bool next_unskippable_allvis;
1200 
1201  *skipsallvis = false;
1202 
1203  for (;;)
1204  {
1205  uint8 mapbits = visibilitymap_get_status(vacrel->rel,
1206  next_unskippable_block,
1207  &next_unskippable_vmbuffer);
1208 
1209  next_unskippable_allvis = (mapbits & VISIBILITYMAP_ALL_VISIBLE) != 0;
1210 
1211  /*
1212  * A block is unskippable if it is not all visible according to the
1213  * visibility map.
1214  */
1215  if (!next_unskippable_allvis)
1216  {
1217  Assert((mapbits & VISIBILITYMAP_ALL_FROZEN) == 0);
1218  break;
1219  }
1220 
1221  /*
1222  * Caller must scan the last page to determine whether it has tuples
1223  * (caller must have the opportunity to set vacrel->nonempty_pages).
1224  * This rule avoids having lazy_truncate_heap() take access-exclusive
1225  * lock on rel to attempt a truncation that fails anyway, just because
1226  * there are tuples on the last page (it is likely that there will be
1227  * tuples on other nearby pages as well, but those can be skipped).
1228  *
1229  * Implement this by always treating the last block as unsafe to skip.
1230  */
1231  if (next_unskippable_block == rel_pages - 1)
1232  break;
1233 
1234  /* DISABLE_PAGE_SKIPPING makes all skipping unsafe */
1235  if (!vacrel->skipwithvm)
1236  break;
1237 
1238  /*
1239  * Aggressive VACUUM caller can't skip pages just because they are
1240  * all-visible. They may still skip all-frozen pages, which can't
1241  * contain XIDs < OldestXmin (XIDs that aren't already frozen by now).
1242  */
1243  if ((mapbits & VISIBILITYMAP_ALL_FROZEN) == 0)
1244  {
1245  if (vacrel->aggressive)
1246  break;
1247 
1248  /*
1249  * All-visible block is safe to skip in non-aggressive case. But
1250  * remember that the final range contains such a block for later.
1251  */
1252  *skipsallvis = true;
1253  }
1254 
1255  next_unskippable_block++;
1256  }
1257 
1258  /* write the local variables back to vacrel */
1259  vacrel->next_unskippable_block = next_unskippable_block;
1260  vacrel->next_unskippable_allvis = next_unskippable_allvis;
1261  vacrel->next_unskippable_vmbuffer = next_unskippable_vmbuffer;
1262 }
1263 
1264 /*
1265  * lazy_scan_new_or_empty() -- lazy_scan_heap() new/empty page handling.
1266  *
1267  * Must call here to handle both new and empty pages before calling
1268  * lazy_scan_prune or lazy_scan_noprune, since they're not prepared to deal
1269  * with new or empty pages.
1270  *
1271  * It's necessary to consider new pages as a special case, since the rules for
1272  * maintaining the visibility map and FSM with empty pages are a little
1273  * different (though new pages can be truncated away during rel truncation).
1274  *
1275  * Empty pages are not really a special case -- they're just heap pages that
1276  * have no allocated tuples (including even LP_UNUSED items). You might
1277  * wonder why we need to handle them here all the same. It's only necessary
1278  * because of a corner-case involving a hard crash during heap relation
1279  * extension. If we ever make relation-extension crash safe, then it should
1280  * no longer be necessary to deal with empty pages here (or new pages, for
1281  * that matter).
1282  *
1283  * Caller must hold at least a shared lock. We might need to escalate the
1284  * lock in that case, so the type of lock caller holds needs to be specified
1285  * using 'sharelock' argument.
1286  *
1287  * Returns false in common case where caller should go on to call
1288  * lazy_scan_prune (or lazy_scan_noprune). Otherwise returns true, indicating
1289  * that lazy_scan_heap is done processing the page, releasing lock on caller's
1290  * behalf.
1291  */
1292 static bool
1294  Page page, bool sharelock, Buffer vmbuffer)
1295 {
1296  Size freespace;
1297 
1298  if (PageIsNew(page))
1299  {
1300  /*
1301  * All-zeroes pages can be left over if either a backend extends the
1302  * relation by a single page, but crashes before the newly initialized
1303  * page has been written out, or when bulk-extending the relation
1304  * (which creates a number of empty pages at the tail end of the
1305  * relation), and then enters them into the FSM.
1306  *
1307  * Note we do not enter the page into the visibilitymap. That has the
1308  * downside that we repeatedly visit this page in subsequent vacuums,
1309  * but otherwise we'll never discover the space on a promoted standby.
1310  * The harm of repeated checking ought to normally not be too bad. The
1311  * space usually should be used at some point, otherwise there
1312  * wouldn't be any regular vacuums.
1313  *
1314  * Make sure these pages are in the FSM, to ensure they can be reused.
1315  * Do that by testing if there's any space recorded for the page. If
1316  * not, enter it. We do so after releasing the lock on the heap page,
1317  * the FSM is approximate, after all.
1318  */
1320 
1321  if (GetRecordedFreeSpace(vacrel->rel, blkno) == 0)
1322  {
1323  freespace = BLCKSZ - SizeOfPageHeaderData;
1324 
1325  RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
1326  }
1327 
1328  return true;
1329  }
1330 
1331  if (PageIsEmpty(page))
1332  {
1333  /*
1334  * It seems likely that caller will always be able to get a cleanup
1335  * lock on an empty page. But don't take any chances -- escalate to
1336  * an exclusive lock (still don't need a cleanup lock, though).
1337  */
1338  if (sharelock)
1339  {
1342 
1343  if (!PageIsEmpty(page))
1344  {
1345  /* page isn't new or empty -- keep lock and pin for now */
1346  return false;
1347  }
1348  }
1349  else
1350  {
1351  /* Already have a full cleanup lock (which is more than enough) */
1352  }
1353 
1354  /*
1355  * Unlike new pages, empty pages are always set all-visible and
1356  * all-frozen.
1357  */
1358  if (!PageIsAllVisible(page))
1359  {
1361 
1362  /* mark buffer dirty before writing a WAL record */
1364 
1365  /*
1366  * It's possible that another backend has extended the heap,
1367  * initialized the page, and then failed to WAL-log the page due
1368  * to an ERROR. Since heap extension is not WAL-logged, recovery
1369  * might try to replay our record setting the page all-visible and
1370  * find that the page isn't initialized, which will cause a PANIC.
1371  * To prevent that, check whether the page has been previously
1372  * WAL-logged, and if not, do that now.
1373  */
1374  if (RelationNeedsWAL(vacrel->rel) &&
1375  PageGetLSN(page) == InvalidXLogRecPtr)
1376  log_newpage_buffer(buf, true);
1377 
1378  PageSetAllVisible(page);
1379  visibilitymap_set(vacrel->rel, blkno, buf, InvalidXLogRecPtr,
1380  vmbuffer, InvalidTransactionId,
1382  END_CRIT_SECTION();
1383  }
1384 
1385  freespace = PageGetHeapFreeSpace(page);
1387  RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
1388  return true;
1389  }
1390 
1391  /* page isn't new or empty -- keep lock and pin */
1392  return false;
1393 }
1394 
1395 /* qsort comparator for sorting OffsetNumbers */
1396 static int
1397 cmpOffsetNumbers(const void *a, const void *b)
1398 {
1399  return pg_cmp_u16(*(const OffsetNumber *) a, *(const OffsetNumber *) b);
1400 }
1401 
1402 /*
1403  * lazy_scan_prune() -- lazy_scan_heap() pruning and freezing.
1404  *
1405  * Caller must hold pin and buffer cleanup lock on the buffer.
1406  *
1407  * vmbuffer is the buffer containing the VM block with visibility information
1408  * for the heap block, blkno. all_visible_according_to_vm is the saved
1409  * visibility status of the heap block looked up earlier by the caller. We
1410  * won't rely entirely on this status, as it may be out of date.
1411  *
1412  * *has_lpdead_items is set to true or false depending on whether, upon return
1413  * from this function, any LP_DEAD items are still present on the page.
1414  */
1415 static void
1417  Buffer buf,
1418  BlockNumber blkno,
1419  Page page,
1420  Buffer vmbuffer,
1421  bool all_visible_according_to_vm,
1422  bool *has_lpdead_items)
1423 {
1424  Relation rel = vacrel->rel;
1425  PruneFreezeResult presult;
1426  int prune_options = 0;
1427 
1428  Assert(BufferGetBlockNumber(buf) == blkno);
1429 
1430  /*
1431  * Prune all HOT-update chains and potentially freeze tuples on this page.
1432  *
1433  * If the relation has no indexes, we can immediately mark would-be dead
1434  * items LP_UNUSED.
1435  *
1436  * The number of tuples removed from the page is returned in
1437  * presult.ndeleted. It should not be confused with presult.lpdead_items;
1438  * presult.lpdead_items's final value can be thought of as the number of
1439  * tuples that were deleted from indexes.
1440  *
1441  * We will update the VM after collecting LP_DEAD items and freezing
1442  * tuples. Pruning will have determined whether or not the page is
1443  * all-visible.
1444  */
1445  prune_options = HEAP_PAGE_PRUNE_FREEZE;
1446  if (vacrel->nindexes == 0)
1447  prune_options |= HEAP_PAGE_PRUNE_MARK_UNUSED_NOW;
1448 
1449  heap_page_prune_and_freeze(rel, buf, vacrel->vistest, prune_options,
1450  &vacrel->cutoffs, &presult, PRUNE_VACUUM_SCAN,
1451  &vacrel->offnum,
1452  &vacrel->NewRelfrozenXid, &vacrel->NewRelminMxid);
1453 
1456 
1457  if (presult.nfrozen > 0)
1458  {
1459  /*
1460  * We don't increment the frozen_pages instrumentation counter when
1461  * nfrozen == 0, since it only counts pages with newly frozen tuples
1462  * (don't confuse that with pages newly set all-frozen in VM).
1463  */
1464  vacrel->frozen_pages++;
1465  }
1466 
1467  /*
1468  * VACUUM will call heap_page_is_all_visible() during the second pass over
1469  * the heap to determine all_visible and all_frozen for the page -- this
1470  * is a specialized version of the logic from this function. Now that
1471  * we've finished pruning and freezing, make sure that we're in total
1472  * agreement with heap_page_is_all_visible() using an assertion.
1473  */
1474 #ifdef USE_ASSERT_CHECKING
1475  /* Note that all_frozen value does not matter when !all_visible */
1476  if (presult.all_visible)
1477  {
1478  TransactionId debug_cutoff;
1479  bool debug_all_frozen;
1480 
1481  Assert(presult.lpdead_items == 0);
1482 
1483  if (!heap_page_is_all_visible(vacrel, buf,
1484  &debug_cutoff, &debug_all_frozen))
1485  Assert(false);
1486 
1487  Assert(presult.all_frozen == debug_all_frozen);
1488 
1489  Assert(!TransactionIdIsValid(debug_cutoff) ||
1490  debug_cutoff == presult.vm_conflict_horizon);
1491  }
1492 #endif
1493 
1494  /*
1495  * Now save details of the LP_DEAD items from the page in vacrel
1496  */
1497  if (presult.lpdead_items > 0)
1498  {
1499  vacrel->lpdead_item_pages++;
1500 
1501  /*
1502  * deadoffsets are collected incrementally in
1503  * heap_page_prune_and_freeze() as each dead line pointer is recorded,
1504  * with an indeterminate order, but dead_items_add requires them to be
1505  * sorted.
1506  */
1507  qsort(presult.deadoffsets, presult.lpdead_items, sizeof(OffsetNumber),
1509 
1510  dead_items_add(vacrel, blkno, presult.deadoffsets, presult.lpdead_items);
1511  }
1512 
1513  /* Finally, add page-local counts to whole-VACUUM counts */
1514  vacrel->tuples_deleted += presult.ndeleted;
1515  vacrel->tuples_frozen += presult.nfrozen;
1516  vacrel->lpdead_items += presult.lpdead_items;
1517  vacrel->live_tuples += presult.live_tuples;
1518  vacrel->recently_dead_tuples += presult.recently_dead_tuples;
1519 
1520  /* Can't truncate this page */
1521  if (presult.hastup)
1522  vacrel->nonempty_pages = blkno + 1;
1523 
1524  /* Did we find LP_DEAD items? */
1525  *has_lpdead_items = (presult.lpdead_items > 0);
1526 
1527  Assert(!presult.all_visible || !(*has_lpdead_items));
1528 
1529  /*
1530  * Handle setting visibility map bit based on information from the VM (as
1531  * of last heap_vac_scan_next_block() call), and from all_visible and
1532  * all_frozen variables
1533  */
1534  if (!all_visible_according_to_vm && presult.all_visible)
1535  {
1537 
1538  if (presult.all_frozen)
1539  {
1541  flags |= VISIBILITYMAP_ALL_FROZEN;
1542  }
1543 
1544  /*
1545  * It should never be the case that the visibility map page is set
1546  * while the page-level bit is clear, but the reverse is allowed (if
1547  * checksums are not enabled). Regardless, set both bits so that we
1548  * get back in sync.
1549  *
1550  * NB: If the heap page is all-visible but the VM bit is not set, we
1551  * don't need to dirty the heap page. However, if checksums are
1552  * enabled, we do need to make sure that the heap page is dirtied
1553  * before passing it to visibilitymap_set(), because it may be logged.
1554  * Given that this situation should only happen in rare cases after a
1555  * crash, it is not worth optimizing.
1556  */
1557  PageSetAllVisible(page);
1559  visibilitymap_set(vacrel->rel, blkno, buf, InvalidXLogRecPtr,
1560  vmbuffer, presult.vm_conflict_horizon,
1561  flags);
1562  }
1563 
1564  /*
1565  * As of PostgreSQL 9.2, the visibility map bit should never be set if the
1566  * page-level bit is clear. However, it's possible that the bit got
1567  * cleared after heap_vac_scan_next_block() was called, so we must recheck
1568  * with buffer lock before concluding that the VM is corrupt.
1569  */
1570  else if (all_visible_according_to_vm && !PageIsAllVisible(page) &&
1571  visibilitymap_get_status(vacrel->rel, blkno, &vmbuffer) != 0)
1572  {
1573  elog(WARNING, "page is not marked all-visible but visibility map bit is set in relation \"%s\" page %u",
1574  vacrel->relname, blkno);
1575  visibilitymap_clear(vacrel->rel, blkno, vmbuffer,
1577  }
1578 
1579  /*
1580  * It's possible for the value returned by
1581  * GetOldestNonRemovableTransactionId() to move backwards, so it's not
1582  * wrong for us to see tuples that appear to not be visible to everyone
1583  * yet, while PD_ALL_VISIBLE is already set. The real safe xmin value
1584  * never moves backwards, but GetOldestNonRemovableTransactionId() is
1585  * conservative and sometimes returns a value that's unnecessarily small,
1586  * so if we see that contradiction it just means that the tuples that we
1587  * think are not visible to everyone yet actually are, and the
1588  * PD_ALL_VISIBLE flag is correct.
1589  *
1590  * There should never be LP_DEAD items on a page with PD_ALL_VISIBLE set,
1591  * however.
1592  */
1593  else if (presult.lpdead_items > 0 && PageIsAllVisible(page))
1594  {
1595  elog(WARNING, "page containing LP_DEAD items is marked as all-visible in relation \"%s\" page %u",
1596  vacrel->relname, blkno);
1597  PageClearAllVisible(page);
1599  visibilitymap_clear(vacrel->rel, blkno, vmbuffer,
1601  }
1602 
1603  /*
1604  * If the all-visible page is all-frozen but not marked as such yet, mark
1605  * it as all-frozen. Note that all_frozen is only valid if all_visible is
1606  * true, so we must check both all_visible and all_frozen.
1607  */
1608  else if (all_visible_according_to_vm && presult.all_visible &&
1609  presult.all_frozen && !VM_ALL_FROZEN(vacrel->rel, blkno, &vmbuffer))
1610  {
1611  /*
1612  * Avoid relying on all_visible_according_to_vm as a proxy for the
1613  * page-level PD_ALL_VISIBLE bit being set, since it might have become
1614  * stale -- even when all_visible is set
1615  */
1616  if (!PageIsAllVisible(page))
1617  {
1618  PageSetAllVisible(page);
1620  }
1621 
1622  /*
1623  * Set the page all-frozen (and all-visible) in the VM.
1624  *
1625  * We can pass InvalidTransactionId as our cutoff_xid, since a
1626  * snapshotConflictHorizon sufficient to make everything safe for REDO
1627  * was logged when the page's tuples were frozen.
1628  */
1630  visibilitymap_set(vacrel->rel, blkno, buf, InvalidXLogRecPtr,
1631  vmbuffer, InvalidTransactionId,
1634  }
1635 }
1636 
1637 /*
1638  * lazy_scan_noprune() -- lazy_scan_prune() without pruning or freezing
1639  *
1640  * Caller need only hold a pin and share lock on the buffer, unlike
1641  * lazy_scan_prune, which requires a full cleanup lock. While pruning isn't
1642  * performed here, it's quite possible that an earlier opportunistic pruning
1643  * operation left LP_DEAD items behind. We'll at least collect any such items
1644  * in dead_items for removal from indexes.
1645  *
1646  * For aggressive VACUUM callers, we may return false to indicate that a full
1647  * cleanup lock is required for processing by lazy_scan_prune. This is only
1648  * necessary when the aggressive VACUUM needs to freeze some tuple XIDs from
1649  * one or more tuples on the page. We always return true for non-aggressive
1650  * callers.
1651  *
1652  * If this function returns true, *has_lpdead_items gets set to true or false
1653  * depending on whether, upon return from this function, any LP_DEAD items are
1654  * present on the page. If this function returns false, *has_lpdead_items
1655  * is not updated.
1656  */
1657 static bool
1659  Buffer buf,
1660  BlockNumber blkno,
1661  Page page,
1662  bool *has_lpdead_items)
1663 {
1664  OffsetNumber offnum,
1665  maxoff;
1666  int lpdead_items,
1667  live_tuples,
1668  recently_dead_tuples,
1669  missed_dead_tuples;
1670  bool hastup;
1671  HeapTupleHeader tupleheader;
1672  TransactionId NoFreezePageRelfrozenXid = vacrel->NewRelfrozenXid;
1673  MultiXactId NoFreezePageRelminMxid = vacrel->NewRelminMxid;
1674  OffsetNumber deadoffsets[MaxHeapTuplesPerPage];
1675 
1676  Assert(BufferGetBlockNumber(buf) == blkno);
1677 
1678  hastup = false; /* for now */
1679 
1680  lpdead_items = 0;
1681  live_tuples = 0;
1682  recently_dead_tuples = 0;
1683  missed_dead_tuples = 0;
1684 
1685  maxoff = PageGetMaxOffsetNumber(page);
1686  for (offnum = FirstOffsetNumber;
1687  offnum <= maxoff;
1688  offnum = OffsetNumberNext(offnum))
1689  {
1690  ItemId itemid;
1691  HeapTupleData tuple;
1692 
1693  vacrel->offnum = offnum;
1694  itemid = PageGetItemId(page, offnum);
1695 
1696  if (!ItemIdIsUsed(itemid))
1697  continue;
1698 
1699  if (ItemIdIsRedirected(itemid))
1700  {
1701  hastup = true;
1702  continue;
1703  }
1704 
1705  if (ItemIdIsDead(itemid))
1706  {
1707  /*
1708  * Deliberately don't set hastup=true here. See same point in
1709  * lazy_scan_prune for an explanation.
1710  */
1711  deadoffsets[lpdead_items++] = offnum;
1712  continue;
1713  }
1714 
1715  hastup = true; /* page prevents rel truncation */
1716  tupleheader = (HeapTupleHeader) PageGetItem(page, itemid);
1717  if (heap_tuple_should_freeze(tupleheader, &vacrel->cutoffs,
1718  &NoFreezePageRelfrozenXid,
1719  &NoFreezePageRelminMxid))
1720  {
1721  /* Tuple with XID < FreezeLimit (or MXID < MultiXactCutoff) */
1722  if (vacrel->aggressive)
1723  {
1724  /*
1725  * Aggressive VACUUMs must always be able to advance rel's
1726  * relfrozenxid to a value >= FreezeLimit (and be able to
1727  * advance rel's relminmxid to a value >= MultiXactCutoff).
1728  * The ongoing aggressive VACUUM won't be able to do that
1729  * unless it can freeze an XID (or MXID) from this tuple now.
1730  *
1731  * The only safe option is to have caller perform processing
1732  * of this page using lazy_scan_prune. Caller might have to
1733  * wait a while for a cleanup lock, but it can't be helped.
1734  */
1735  vacrel->offnum = InvalidOffsetNumber;
1736  return false;
1737  }
1738 
1739  /*
1740  * Non-aggressive VACUUMs are under no obligation to advance
1741  * relfrozenxid (even by one XID). We can be much laxer here.
1742  *
1743  * Currently we always just accept an older final relfrozenxid
1744  * and/or relminmxid value. We never make caller wait or work a
1745  * little harder, even when it likely makes sense to do so.
1746  */
1747  }
1748 
1749  ItemPointerSet(&(tuple.t_self), blkno, offnum);
1750  tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
1751  tuple.t_len = ItemIdGetLength(itemid);
1752  tuple.t_tableOid = RelationGetRelid(vacrel->rel);
1753 
1754  switch (HeapTupleSatisfiesVacuum(&tuple, vacrel->cutoffs.OldestXmin,
1755  buf))
1756  {
1758  case HEAPTUPLE_LIVE:
1759 
1760  /*
1761  * Count both cases as live, just like lazy_scan_prune
1762  */
1763  live_tuples++;
1764 
1765  break;
1766  case HEAPTUPLE_DEAD:
1767 
1768  /*
1769  * There is some useful work for pruning to do, that won't be
1770  * done due to failure to get a cleanup lock.
1771  */
1772  missed_dead_tuples++;
1773  break;
1775 
1776  /*
1777  * Count in recently_dead_tuples, just like lazy_scan_prune
1778  */
1779  recently_dead_tuples++;
1780  break;
1782 
1783  /*
1784  * Do not count these rows as live, just like lazy_scan_prune
1785  */
1786  break;
1787  default:
1788  elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
1789  break;
1790  }
1791  }
1792 
1793  vacrel->offnum = InvalidOffsetNumber;
1794 
1795  /*
1796  * By here we know for sure that caller can put off freezing and pruning
1797  * this particular page until the next VACUUM. Remember its details now.
1798  * (lazy_scan_prune expects a clean slate, so we have to do this last.)
1799  */
1800  vacrel->NewRelfrozenXid = NoFreezePageRelfrozenXid;
1801  vacrel->NewRelminMxid = NoFreezePageRelminMxid;
1802 
1803  /* Save any LP_DEAD items found on the page in dead_items */
1804  if (vacrel->nindexes == 0)
1805  {
1806  /* Using one-pass strategy (since table has no indexes) */
1807  if (lpdead_items > 0)
1808  {
1809  /*
1810  * Perfunctory handling for the corner case where a single pass
1811  * strategy VACUUM cannot get a cleanup lock, and it turns out
1812  * that there is one or more LP_DEAD items: just count the LP_DEAD
1813  * items as missed_dead_tuples instead. (This is a bit dishonest,
1814  * but it beats having to maintain specialized heap vacuuming code
1815  * forever, for vanishingly little benefit.)
1816  */
1817  hastup = true;
1818  missed_dead_tuples += lpdead_items;
1819  }
1820  }
1821  else if (lpdead_items > 0)
1822  {
1823  /*
1824  * Page has LP_DEAD items, and so any references/TIDs that remain in
1825  * indexes will be deleted during index vacuuming (and then marked
1826  * LP_UNUSED in the heap)
1827  */
1828  vacrel->lpdead_item_pages++;
1829 
1830  dead_items_add(vacrel, blkno, deadoffsets, lpdead_items);
1831 
1832  vacrel->lpdead_items += lpdead_items;
1833  }
1834 
1835  /*
1836  * Finally, add relevant page-local counts to whole-VACUUM counts
1837  */
1838  vacrel->live_tuples += live_tuples;
1839  vacrel->recently_dead_tuples += recently_dead_tuples;
1840  vacrel->missed_dead_tuples += missed_dead_tuples;
1841  if (missed_dead_tuples > 0)
1842  vacrel->missed_dead_pages++;
1843 
1844  /* Can't truncate this page */
1845  if (hastup)
1846  vacrel->nonempty_pages = blkno + 1;
1847 
1848  /* Did we find LP_DEAD items? */
1849  *has_lpdead_items = (lpdead_items > 0);
1850 
1851  /* Caller won't need to call lazy_scan_prune with same page */
1852  return true;
1853 }
1854 
1855 /*
1856  * Main entry point for index vacuuming and heap vacuuming.
1857  *
1858  * Removes items collected in dead_items from table's indexes, then marks the
1859  * same items LP_UNUSED in the heap. See the comments above lazy_scan_heap
1860  * for full details.
1861  *
1862  * Also empties dead_items, freeing up space for later TIDs.
1863  *
1864  * We may choose to bypass index vacuuming at this point, though only when the
1865  * ongoing VACUUM operation will definitely only have one index scan/round of
1866  * index vacuuming.
1867  */
1868 static void
1870 {
1871  bool bypass;
1872 
1873  /* Should not end up here with no indexes */
1874  Assert(vacrel->nindexes > 0);
1875  Assert(vacrel->lpdead_item_pages > 0);
1876 
1877  if (!vacrel->do_index_vacuuming)
1878  {
1879  Assert(!vacrel->do_index_cleanup);
1880  dead_items_reset(vacrel);
1881  return;
1882  }
1883 
1884  /*
1885  * Consider bypassing index vacuuming (and heap vacuuming) entirely.
1886  *
1887  * We currently only do this in cases where the number of LP_DEAD items
1888  * for the entire VACUUM operation is close to zero. This avoids sharp
1889  * discontinuities in the duration and overhead of successive VACUUM
1890  * operations that run against the same table with a fixed workload.
1891  * Ideally, successive VACUUM operations will behave as if there are
1892  * exactly zero LP_DEAD items in cases where there are close to zero.
1893  *
1894  * This is likely to be helpful with a table that is continually affected
1895  * by UPDATEs that can mostly apply the HOT optimization, but occasionally
1896  * have small aberrations that lead to just a few heap pages retaining
1897  * only one or two LP_DEAD items. This is pretty common; even when the
1898  * DBA goes out of their way to make UPDATEs use HOT, it is practically
1899  * impossible to predict whether HOT will be applied in 100% of cases.
1900  * It's far easier to ensure that 99%+ of all UPDATEs against a table use
1901  * HOT through careful tuning.
1902  */
1903  bypass = false;
1904  if (vacrel->consider_bypass_optimization && vacrel->rel_pages > 0)
1905  {
1906  BlockNumber threshold;
1907 
1908  Assert(vacrel->num_index_scans == 0);
1909  Assert(vacrel->lpdead_items == vacrel->dead_items_info->num_items);
1910  Assert(vacrel->do_index_vacuuming);
1911  Assert(vacrel->do_index_cleanup);
1912 
1913  /*
1914  * This crossover point at which we'll start to do index vacuuming is
1915  * expressed as a percentage of the total number of heap pages in the
1916  * table that are known to have at least one LP_DEAD item. This is
1917  * much more important than the total number of LP_DEAD items, since
1918  * it's a proxy for the number of heap pages whose visibility map bits
1919  * cannot be set on account of bypassing index and heap vacuuming.
1920  *
1921  * We apply one further precautionary test: the space currently used
1922  * to store the TIDs (TIDs that now all point to LP_DEAD items) must
1923  * not exceed 32MB. This limits the risk that we will bypass index
1924  * vacuuming again and again until eventually there is a VACUUM whose
1925  * dead_items space is not CPU cache resident.
1926  *
1927  * We don't take any special steps to remember the LP_DEAD items (such
1928  * as counting them in our final update to the stats system) when the
1929  * optimization is applied. Though the accounting used in analyze.c's
1930  * acquire_sample_rows() will recognize the same LP_DEAD items as dead
1931  * rows in its own stats report, that's okay. The discrepancy should
1932  * be negligible. If this optimization is ever expanded to cover more
1933  * cases then this may need to be reconsidered.
1934  */
1935  threshold = (double) vacrel->rel_pages * BYPASS_THRESHOLD_PAGES;
1936  bypass = (vacrel->lpdead_item_pages < threshold &&
1937  (TidStoreMemoryUsage(vacrel->dead_items) < (32L * 1024L * 1024L)));
1938  }
1939 
1940  if (bypass)
1941  {
1942  /*
1943  * There are almost zero TIDs. Behave as if there were precisely
1944  * zero: bypass index vacuuming, but do index cleanup.
1945  *
1946  * We expect that the ongoing VACUUM operation will finish very
1947  * quickly, so there is no point in considering speeding up as a
1948  * failsafe against wraparound failure. (Index cleanup is expected to
1949  * finish very quickly in cases where there were no ambulkdelete()
1950  * calls.)
1951  */
1952  vacrel->do_index_vacuuming = false;
1953  }
1954  else if (lazy_vacuum_all_indexes(vacrel))
1955  {
1956  /*
1957  * We successfully completed a round of index vacuuming. Do related
1958  * heap vacuuming now.
1959  */
1960  lazy_vacuum_heap_rel(vacrel);
1961  }
1962  else
1963  {
1964  /*
1965  * Failsafe case.
1966  *
1967  * We attempted index vacuuming, but didn't finish a full round/full
1968  * index scan. This happens when relfrozenxid or relminmxid is too
1969  * far in the past.
1970  *
1971  * From this point on the VACUUM operation will do no further index
1972  * vacuuming or heap vacuuming. This VACUUM operation won't end up
1973  * back here again.
1974  */
1976  }
1977 
1978  /*
1979  * Forget the LP_DEAD items that we just vacuumed (or just decided to not
1980  * vacuum)
1981  */
1982  dead_items_reset(vacrel);
1983 }
1984 
1985 /*
1986  * lazy_vacuum_all_indexes() -- Main entry for index vacuuming
1987  *
1988  * Returns true in the common case when all indexes were successfully
1989  * vacuumed. Returns false in rare cases where we determined that the ongoing
1990  * VACUUM operation is at risk of taking too long to finish, leading to
1991  * wraparound failure.
1992  */
1993 static bool
1995 {
1996  bool allindexes = true;
1997  double old_live_tuples = vacrel->rel->rd_rel->reltuples;
1998  const int progress_start_index[] = {
2001  };
2002  const int progress_end_index[] = {
2006  };
2007  int64 progress_start_val[2];
2008  int64 progress_end_val[3];
2009 
2010  Assert(vacrel->nindexes > 0);
2011  Assert(vacrel->do_index_vacuuming);
2012  Assert(vacrel->do_index_cleanup);
2013 
2014  /* Precheck for XID wraparound emergencies */
2015  if (lazy_check_wraparound_failsafe(vacrel))
2016  {
2017  /* Wraparound emergency -- don't even start an index scan */
2018  return false;
2019  }
2020 
2021  /*
2022  * Report that we are now vacuuming indexes and the number of indexes to
2023  * vacuum.
2024  */
2025  progress_start_val[0] = PROGRESS_VACUUM_PHASE_VACUUM_INDEX;
2026  progress_start_val[1] = vacrel->nindexes;
2027  pgstat_progress_update_multi_param(2, progress_start_index, progress_start_val);
2028 
2029  if (!ParallelVacuumIsActive(vacrel))
2030  {
2031  for (int idx = 0; idx < vacrel->nindexes; idx++)
2032  {
2033  Relation indrel = vacrel->indrels[idx];
2034  IndexBulkDeleteResult *istat = vacrel->indstats[idx];
2035 
2036  vacrel->indstats[idx] = lazy_vacuum_one_index(indrel, istat,
2037  old_live_tuples,
2038  vacrel);
2039 
2040  /* Report the number of indexes vacuumed */
2042  idx + 1);
2043 
2044  if (lazy_check_wraparound_failsafe(vacrel))
2045  {
2046  /* Wraparound emergency -- end current index scan */
2047  allindexes = false;
2048  break;
2049  }
2050  }
2051  }
2052  else
2053  {
2054  /* Outsource everything to parallel variant */
2055  parallel_vacuum_bulkdel_all_indexes(vacrel->pvs, old_live_tuples,
2056  vacrel->num_index_scans);
2057 
2058  /*
2059  * Do a postcheck to consider applying wraparound failsafe now. Note
2060  * that parallel VACUUM only gets the precheck and this postcheck.
2061  */
2062  if (lazy_check_wraparound_failsafe(vacrel))
2063  allindexes = false;
2064  }
2065 
2066  /*
2067  * We delete all LP_DEAD items from the first heap pass in all indexes on
2068  * each call here (except calls where we choose to do the failsafe). This
2069  * makes the next call to lazy_vacuum_heap_rel() safe (except in the event
2070  * of the failsafe triggering, which prevents the next call from taking
2071  * place).
2072  */
2073  Assert(vacrel->num_index_scans > 0 ||
2074  vacrel->dead_items_info->num_items == vacrel->lpdead_items);
2075  Assert(allindexes || VacuumFailsafeActive);
2076 
2077  /*
2078  * Increase and report the number of index scans. Also, we reset
2079  * PROGRESS_VACUUM_INDEXES_TOTAL and PROGRESS_VACUUM_INDEXES_PROCESSED.
2080  *
2081  * We deliberately include the case where we started a round of bulk
2082  * deletes that we weren't able to finish due to the failsafe triggering.
2083  */
2084  vacrel->num_index_scans++;
2085  progress_end_val[0] = 0;
2086  progress_end_val[1] = 0;
2087  progress_end_val[2] = vacrel->num_index_scans;
2088  pgstat_progress_update_multi_param(3, progress_end_index, progress_end_val);
2089 
2090  return allindexes;
2091 }
2092 
2093 /*
2094  * lazy_vacuum_heap_rel() -- second pass over the heap for two pass strategy
2095  *
2096  * This routine marks LP_DEAD items in vacrel->dead_items as LP_UNUSED. Pages
2097  * that never had lazy_scan_prune record LP_DEAD items are not visited at all.
2098  *
2099  * We may also be able to truncate the line pointer array of the heap pages we
2100  * visit. If there is a contiguous group of LP_UNUSED items at the end of the
2101  * array, it can be reclaimed as free space. These LP_UNUSED items usually
2102  * start out as LP_DEAD items recorded by lazy_scan_prune (we set items from
2103  * each page to LP_UNUSED, and then consider if it's possible to truncate the
2104  * page's line pointer array).
2105  *
2106  * Note: the reason for doing this as a second pass is we cannot remove the
2107  * tuples until we've removed their index entries, and we want to process
2108  * index entry removal in batches as large as possible.
2109  */
2110 static void
2112 {
2113  BlockNumber vacuumed_pages = 0;
2114  Buffer vmbuffer = InvalidBuffer;
2115  LVSavedErrInfo saved_err_info;
2116  TidStoreIter *iter;
2117  TidStoreIterResult *iter_result;
2118 
2119  Assert(vacrel->do_index_vacuuming);
2120  Assert(vacrel->do_index_cleanup);
2121  Assert(vacrel->num_index_scans > 0);
2122 
2123  /* Report that we are now vacuuming the heap */
2126 
2127  /* Update error traceback information */
2128  update_vacuum_error_info(vacrel, &saved_err_info,
2131 
2132  iter = TidStoreBeginIterate(vacrel->dead_items);
2133  while ((iter_result = TidStoreIterateNext(iter)) != NULL)
2134  {
2135  BlockNumber blkno;
2136  Buffer buf;
2137  Page page;
2138  Size freespace;
2139  OffsetNumber offsets[MaxOffsetNumber];
2140  int num_offsets;
2141 
2143 
2144  blkno = iter_result->blkno;
2145  vacrel->blkno = blkno;
2146 
2147  num_offsets = TidStoreGetBlockOffsets(iter_result, offsets, lengthof(offsets));
2148  Assert(num_offsets <= lengthof(offsets));
2149 
2150  /*
2151  * Pin the visibility map page in case we need to mark the page
2152  * all-visible. In most cases this will be very cheap, because we'll
2153  * already have the correct page pinned anyway.
2154  */
2155  visibilitymap_pin(vacrel->rel, blkno, &vmbuffer);
2156 
2157  /* We need a non-cleanup exclusive lock to mark dead_items unused */
2158  buf = ReadBufferExtended(vacrel->rel, MAIN_FORKNUM, blkno, RBM_NORMAL,
2159  vacrel->bstrategy);
2161  lazy_vacuum_heap_page(vacrel, blkno, buf, offsets,
2162  num_offsets, vmbuffer);
2163 
2164  /* Now that we've vacuumed the page, record its available space */
2165  page = BufferGetPage(buf);
2166  freespace = PageGetHeapFreeSpace(page);
2167 
2169  RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
2170  vacuumed_pages++;
2171  }
2172  TidStoreEndIterate(iter);
2173 
2174  vacrel->blkno = InvalidBlockNumber;
2175  if (BufferIsValid(vmbuffer))
2176  ReleaseBuffer(vmbuffer);
2177 
2178  /*
2179  * We set all LP_DEAD items from the first heap pass to LP_UNUSED during
2180  * the second heap pass. No more, no less.
2181  */
2182  Assert(vacrel->num_index_scans > 1 ||
2183  (vacrel->dead_items_info->num_items == vacrel->lpdead_items &&
2184  vacuumed_pages == vacrel->lpdead_item_pages));
2185 
2186  ereport(DEBUG2,
2187  (errmsg("table \"%s\": removed %lld dead item identifiers in %u pages",
2188  vacrel->relname, (long long) vacrel->dead_items_info->num_items,
2189  vacuumed_pages)));
2190 
2191  /* Revert to the previous phase information for error traceback */
2192  restore_vacuum_error_info(vacrel, &saved_err_info);
2193 }
2194 
2195 /*
2196  * lazy_vacuum_heap_page() -- free page's LP_DEAD items listed in the
2197  * vacrel->dead_items store.
2198  *
2199  * Caller must have an exclusive buffer lock on the buffer (though a full
2200  * cleanup lock is also acceptable). vmbuffer must be valid and already have
2201  * a pin on blkno's visibility map page.
2202  */
2203 static void
2205  OffsetNumber *deadoffsets, int num_offsets,
2206  Buffer vmbuffer)
2207 {
2208  Page page = BufferGetPage(buffer);
2210  int nunused = 0;
2211  TransactionId visibility_cutoff_xid;
2212  bool all_frozen;
2213  LVSavedErrInfo saved_err_info;
2214 
2215  Assert(vacrel->do_index_vacuuming);
2216 
2218 
2219  /* Update error traceback information */
2220  update_vacuum_error_info(vacrel, &saved_err_info,
2223 
2225 
2226  for (int i = 0; i < num_offsets; i++)
2227  {
2228  ItemId itemid;
2229  OffsetNumber toff = deadoffsets[i];
2230 
2231  itemid = PageGetItemId(page, toff);
2232 
2233  Assert(ItemIdIsDead(itemid) && !ItemIdHasStorage(itemid));
2234  ItemIdSetUnused(itemid);
2235  unused[nunused++] = toff;
2236  }
2237 
2238  Assert(nunused > 0);
2239 
2240  /* Attempt to truncate line pointer array now */
2242 
2243  /*
2244  * Mark buffer dirty before we write WAL.
2245  */
2246  MarkBufferDirty(buffer);
2247 
2248  /* XLOG stuff */
2249  if (RelationNeedsWAL(vacrel->rel))
2250  {
2251  log_heap_prune_and_freeze(vacrel->rel, buffer,
2253  false, /* no cleanup lock required */
2255  NULL, 0, /* frozen */
2256  NULL, 0, /* redirected */
2257  NULL, 0, /* dead */
2258  unused, nunused);
2259  }
2260 
2261  /*
2262  * End critical section, so we safely can do visibility tests (which
2263  * possibly need to perform IO and allocate memory!). If we crash now the
2264  * page (including the corresponding vm bit) might not be marked all
2265  * visible, but that's fine. A later vacuum will fix that.
2266  */
2267  END_CRIT_SECTION();
2268 
2269  /*
2270  * Now that we have removed the LP_DEAD items from the page, once again
2271  * check if the page has become all-visible. The page is already marked
2272  * dirty, exclusively locked, and, if needed, a full page image has been
2273  * emitted.
2274  */
2275  Assert(!PageIsAllVisible(page));
2276  if (heap_page_is_all_visible(vacrel, buffer, &visibility_cutoff_xid,
2277  &all_frozen))
2278  {
2280 
2281  if (all_frozen)
2282  {
2283  Assert(!TransactionIdIsValid(visibility_cutoff_xid));
2284  flags |= VISIBILITYMAP_ALL_FROZEN;
2285  }
2286 
2287  PageSetAllVisible(page);
2288  visibilitymap_set(vacrel->rel, blkno, buffer, InvalidXLogRecPtr,
2289  vmbuffer, visibility_cutoff_xid, flags);
2290  }
2291 
2292  /* Revert to the previous phase information for error traceback */
2293  restore_vacuum_error_info(vacrel, &saved_err_info);
2294 }
2295 
2296 /*
2297  * Trigger the failsafe to avoid wraparound failure when vacrel table has a
2298  * relfrozenxid and/or relminmxid that is dangerously far in the past.
2299  * Triggering the failsafe makes the ongoing VACUUM bypass any further index
2300  * vacuuming and heap vacuuming. Truncating the heap is also bypassed.
2301  *
2302  * Any remaining work (work that VACUUM cannot just bypass) is typically sped
2303  * up when the failsafe triggers. VACUUM stops applying any cost-based delay
2304  * that it started out with.
2305  *
2306  * Returns true when failsafe has been triggered.
2307  */
2308 static bool
2310 {
2311  /* Don't warn more than once per VACUUM */
2313  return true;
2314 
2316  {
2317  const int progress_index[] = {
2320  };
2321  int64 progress_val[2] = {0, 0};
2322 
2323  VacuumFailsafeActive = true;
2324 
2325  /*
2326  * Abandon use of a buffer access strategy to allow use of all of
2327  * shared buffers. We assume the caller who allocated the memory for
2328  * the BufferAccessStrategy will free it.
2329  */
2330  vacrel->bstrategy = NULL;
2331 
2332  /* Disable index vacuuming, index cleanup, and heap rel truncation */
2333  vacrel->do_index_vacuuming = false;
2334  vacrel->do_index_cleanup = false;
2335  vacrel->do_rel_truncate = false;
2336 
2337  /* Reset the progress counters */
2338  pgstat_progress_update_multi_param(2, progress_index, progress_val);
2339 
2340  ereport(WARNING,
2341  (errmsg("bypassing nonessential maintenance of table \"%s.%s.%s\" as a failsafe after %d index scans",
2342  vacrel->dbname, vacrel->relnamespace, vacrel->relname,
2343  vacrel->num_index_scans),
2344  errdetail("The table's relfrozenxid or relminmxid is too far in the past."),
2345  errhint("Consider increasing configuration parameter \"maintenance_work_mem\" or \"autovacuum_work_mem\".\n"
2346  "You might also need to consider other ways for VACUUM to keep up with the allocation of transaction IDs.")));
2347 
2348  /* Stop applying cost limits from this point on */
2349  VacuumCostActive = false;
2350  VacuumCostBalance = 0;
2351 
2352  return true;
2353  }
2354 
2355  return false;
2356 }
2357 
2358 /*
2359  * lazy_cleanup_all_indexes() -- cleanup all indexes of relation.
2360  */
2361 static void
2363 {
2364  double reltuples = vacrel->new_rel_tuples;
2365  bool estimated_count = vacrel->scanned_pages < vacrel->rel_pages;
2366  const int progress_start_index[] = {
2369  };
2370  const int progress_end_index[] = {
2373  };
2374  int64 progress_start_val[2];
2375  int64 progress_end_val[2] = {0, 0};
2376 
2377  Assert(vacrel->do_index_cleanup);
2378  Assert(vacrel->nindexes > 0);
2379 
2380  /*
2381  * Report that we are now cleaning up indexes and the number of indexes to
2382  * cleanup.
2383  */
2384  progress_start_val[0] = PROGRESS_VACUUM_PHASE_INDEX_CLEANUP;
2385  progress_start_val[1] = vacrel->nindexes;
2386  pgstat_progress_update_multi_param(2, progress_start_index, progress_start_val);
2387 
2388  if (!ParallelVacuumIsActive(vacrel))
2389  {
2390  for (int idx = 0; idx < vacrel->nindexes; idx++)
2391  {
2392  Relation indrel = vacrel->indrels[idx];
2393  IndexBulkDeleteResult *istat = vacrel->indstats[idx];
2394 
2395  vacrel->indstats[idx] =
2396  lazy_cleanup_one_index(indrel, istat, reltuples,
2397  estimated_count, vacrel);
2398 
2399  /* Report the number of indexes cleaned up */
2401  idx + 1);
2402  }
2403  }
2404  else
2405  {
2406  /* Outsource everything to parallel variant */
2407  parallel_vacuum_cleanup_all_indexes(vacrel->pvs, reltuples,
2408  vacrel->num_index_scans,
2409  estimated_count);
2410  }
2411 
2412  /* Reset the progress counters */
2413  pgstat_progress_update_multi_param(2, progress_end_index, progress_end_val);
2414 }
2415 
2416 /*
2417  * lazy_vacuum_one_index() -- vacuum index relation.
2418  *
2419  * Delete all the index tuples containing a TID collected in
2420  * vacrel->dead_items. Also update running statistics. Exact
2421  * details depend on index AM's ambulkdelete routine.
2422  *
2423  * reltuples is the number of heap tuples to be passed to the
2424  * bulkdelete callback. It's always assumed to be estimated.
2425  * See indexam.sgml for more info.
2426  *
2427  * Returns bulk delete stats derived from input stats
2428  */
2429 static IndexBulkDeleteResult *
2431  double reltuples, LVRelState *vacrel)
2432 {
2433  IndexVacuumInfo ivinfo;
2434  LVSavedErrInfo saved_err_info;
2435 
2436  ivinfo.index = indrel;
2437  ivinfo.heaprel = vacrel->rel;
2438  ivinfo.analyze_only = false;
2439  ivinfo.report_progress = false;
2440  ivinfo.estimated_count = true;
2441  ivinfo.message_level = DEBUG2;
2442  ivinfo.num_heap_tuples = reltuples;
2443  ivinfo.strategy = vacrel->bstrategy;
2444 
2445  /*
2446  * Update error traceback information.
2447  *
2448  * The index name is saved during this phase and restored immediately
2449  * after this phase. See vacuum_error_callback.
2450  */
2451  Assert(vacrel->indname == NULL);
2452  vacrel->indname = pstrdup(RelationGetRelationName(indrel));
2453  update_vacuum_error_info(vacrel, &saved_err_info,
2456 
2457  /* Do bulk deletion */
2458  istat = vac_bulkdel_one_index(&ivinfo, istat, (void *) vacrel->dead_items,
2459  vacrel->dead_items_info);
2460 
2461  /* Revert to the previous phase information for error traceback */
2462  restore_vacuum_error_info(vacrel, &saved_err_info);
2463  pfree(vacrel->indname);
2464  vacrel->indname = NULL;
2465 
2466  return istat;
2467 }
2468 
2469 /*
2470  * lazy_cleanup_one_index() -- do post-vacuum cleanup for index relation.
2471  *
2472  * Calls index AM's amvacuumcleanup routine. reltuples is the number
2473  * of heap tuples and estimated_count is true if reltuples is an
2474  * estimated value. See indexam.sgml for more info.
2475  *
2476  * Returns bulk delete stats derived from input stats
2477  */
2478 static IndexBulkDeleteResult *
2480  double reltuples, bool estimated_count,
2481  LVRelState *vacrel)
2482 {
2483  IndexVacuumInfo ivinfo;
2484  LVSavedErrInfo saved_err_info;
2485 
2486  ivinfo.index = indrel;
2487  ivinfo.heaprel = vacrel->rel;
2488  ivinfo.analyze_only = false;
2489  ivinfo.report_progress = false;
2490  ivinfo.estimated_count = estimated_count;
2491  ivinfo.message_level = DEBUG2;
2492 
2493  ivinfo.num_heap_tuples = reltuples;
2494  ivinfo.strategy = vacrel->bstrategy;
2495 
2496  /*
2497  * Update error traceback information.
2498  *
2499  * The index name is saved during this phase and restored immediately
2500  * after this phase. See vacuum_error_callback.
2501  */
2502  Assert(vacrel->indname == NULL);
2503  vacrel->indname = pstrdup(RelationGetRelationName(indrel));
2504  update_vacuum_error_info(vacrel, &saved_err_info,
2507 
2508  istat = vac_cleanup_one_index(&ivinfo, istat);
2509 
2510  /* Revert to the previous phase information for error traceback */
2511  restore_vacuum_error_info(vacrel, &saved_err_info);
2512  pfree(vacrel->indname);
2513  vacrel->indname = NULL;
2514 
2515  return istat;
2516 }
2517 
2518 /*
2519  * should_attempt_truncation - should we attempt to truncate the heap?
2520  *
2521  * Don't even think about it unless we have a shot at releasing a goodly
2522  * number of pages. Otherwise, the time taken isn't worth it, mainly because
2523  * an AccessExclusive lock must be replayed on any hot standby, where it can
2524  * be particularly disruptive.
2525  *
2526  * Also don't attempt it if wraparound failsafe is in effect. The entire
2527  * system might be refusing to allocate new XIDs at this point. The system
2528  * definitely won't return to normal unless and until VACUUM actually advances
2529  * the oldest relfrozenxid -- which hasn't happened for target rel just yet.
2530  * If lazy_truncate_heap attempted to acquire an AccessExclusiveLock to
2531  * truncate the table under these circumstances, an XID exhaustion error might
2532  * make it impossible for VACUUM to fix the underlying XID exhaustion problem.
2533  * There is very little chance of truncation working out when the failsafe is
2534  * in effect in any case. lazy_scan_prune makes the optimistic assumption
2535  * that any LP_DEAD items it encounters will always be LP_UNUSED by the time
2536  * we're called.
2537  */
2538 static bool
2540 {
2541  BlockNumber possibly_freeable;
2542 
2543  if (!vacrel->do_rel_truncate || VacuumFailsafeActive)
2544  return false;
2545 
2546  possibly_freeable = vacrel->rel_pages - vacrel->nonempty_pages;
2547  if (possibly_freeable > 0 &&
2548  (possibly_freeable >= REL_TRUNCATE_MINIMUM ||
2549  possibly_freeable >= vacrel->rel_pages / REL_TRUNCATE_FRACTION))
2550  return true;
2551 
2552  return false;
2553 }
2554 
2555 /*
2556  * lazy_truncate_heap - try to truncate off any empty pages at the end
2557  */
2558 static void
2560 {
2561  BlockNumber orig_rel_pages = vacrel->rel_pages;
2562  BlockNumber new_rel_pages;
2563  bool lock_waiter_detected;
2564  int lock_retry;
2565 
2566  /* Report that we are now truncating */
2569 
2570  /* Update error traceback information one last time */
2573 
2574  /*
2575  * Loop until no more truncating can be done.
2576  */
2577  do
2578  {
2579  /*
2580  * We need full exclusive lock on the relation in order to do
2581  * truncation. If we can't get it, give up rather than waiting --- we
2582  * don't want to block other backends, and we don't want to deadlock
2583  * (which is quite possible considering we already hold a lower-grade
2584  * lock).
2585  */
2586  lock_waiter_detected = false;
2587  lock_retry = 0;
2588  while (true)
2589  {
2591  break;
2592 
2593  /*
2594  * Check for interrupts while trying to (re-)acquire the exclusive
2595  * lock.
2596  */
2598 
2599  if (++lock_retry > (VACUUM_TRUNCATE_LOCK_TIMEOUT /
2601  {
2602  /*
2603  * We failed to establish the lock in the specified number of
2604  * retries. This means we give up truncating.
2605  */
2606  ereport(vacrel->verbose ? INFO : DEBUG2,
2607  (errmsg("\"%s\": stopping truncate due to conflicting lock request",
2608  vacrel->relname)));
2609  return;
2610  }
2611 
2612  (void) WaitLatch(MyLatch,
2615  WAIT_EVENT_VACUUM_TRUNCATE);
2617  }
2618 
2619  /*
2620  * Now that we have exclusive lock, look to see if the rel has grown
2621  * whilst we were vacuuming with non-exclusive lock. If so, give up;
2622  * the newly added pages presumably contain non-deletable tuples.
2623  */
2624  new_rel_pages = RelationGetNumberOfBlocks(vacrel->rel);
2625  if (new_rel_pages != orig_rel_pages)
2626  {
2627  /*
2628  * Note: we intentionally don't update vacrel->rel_pages with the
2629  * new rel size here. If we did, it would amount to assuming that
2630  * the new pages are empty, which is unlikely. Leaving the numbers
2631  * alone amounts to assuming that the new pages have the same
2632  * tuple density as existing ones, which is less unlikely.
2633  */
2635  return;
2636  }
2637 
2638  /*
2639  * Scan backwards from the end to verify that the end pages actually
2640  * contain no tuples. This is *necessary*, not optional, because
2641  * other backends could have added tuples to these pages whilst we
2642  * were vacuuming.
2643  */
2644  new_rel_pages = count_nondeletable_pages(vacrel, &lock_waiter_detected);
2645  vacrel->blkno = new_rel_pages;
2646 
2647  if (new_rel_pages >= orig_rel_pages)
2648  {
2649  /* can't do anything after all */
2651  return;
2652  }
2653 
2654  /*
2655  * Okay to truncate.
2656  */
2657  RelationTruncate(vacrel->rel, new_rel_pages);
2658 
2659  /*
2660  * We can release the exclusive lock as soon as we have truncated.
2661  * Other backends can't safely access the relation until they have
2662  * processed the smgr invalidation that smgrtruncate sent out ... but
2663  * that should happen as part of standard invalidation processing once
2664  * they acquire lock on the relation.
2665  */
2667 
2668  /*
2669  * Update statistics. Here, it *is* correct to adjust rel_pages
2670  * without also touching reltuples, since the tuple count wasn't
2671  * changed by the truncation.
2672  */
2673  vacrel->removed_pages += orig_rel_pages - new_rel_pages;
2674  vacrel->rel_pages = new_rel_pages;
2675 
2676  ereport(vacrel->verbose ? INFO : DEBUG2,
2677  (errmsg("table \"%s\": truncated %u to %u pages",
2678  vacrel->relname,
2679  orig_rel_pages, new_rel_pages)));
2680  orig_rel_pages = new_rel_pages;
2681  } while (new_rel_pages > vacrel->nonempty_pages && lock_waiter_detected);
2682 }
2683 
2684 /*
2685  * Rescan end pages to verify that they are (still) empty of tuples.
2686  *
2687  * Returns number of nondeletable pages (last nonempty page + 1).
2688  */
2689 static BlockNumber
2690 count_nondeletable_pages(LVRelState *vacrel, bool *lock_waiter_detected)
2691 {
2692  BlockNumber blkno;
2693  BlockNumber prefetchedUntil;
2694  instr_time starttime;
2695 
2696  /* Initialize the starttime if we check for conflicting lock requests */
2697  INSTR_TIME_SET_CURRENT(starttime);
2698 
2699  /*
2700  * Start checking blocks at what we believe relation end to be and move
2701  * backwards. (Strange coding of loop control is needed because blkno is
2702  * unsigned.) To make the scan faster, we prefetch a few blocks at a time
2703  * in forward direction, so that OS-level readahead can kick in.
2704  */
2705  blkno = vacrel->rel_pages;
2707  "prefetch size must be power of 2");
2708  prefetchedUntil = InvalidBlockNumber;
2709  while (blkno > vacrel->nonempty_pages)
2710  {
2711  Buffer buf;
2712  Page page;
2713  OffsetNumber offnum,
2714  maxoff;
2715  bool hastup;
2716 
2717  /*
2718  * Check if another process requests a lock on our relation. We are
2719  * holding an AccessExclusiveLock here, so they will be waiting. We
2720  * only do this once per VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL, and we
2721  * only check if that interval has elapsed once every 32 blocks to
2722  * keep the number of system calls and actual shared lock table
2723  * lookups to a minimum.
2724  */
2725  if ((blkno % 32) == 0)
2726  {
2727  instr_time currenttime;
2728  instr_time elapsed;
2729 
2730  INSTR_TIME_SET_CURRENT(currenttime);
2731  elapsed = currenttime;
2732  INSTR_TIME_SUBTRACT(elapsed, starttime);
2733  if ((INSTR_TIME_GET_MICROSEC(elapsed) / 1000)
2735  {
2737  {
2738  ereport(vacrel->verbose ? INFO : DEBUG2,
2739  (errmsg("table \"%s\": suspending truncate due to conflicting lock request",
2740  vacrel->relname)));
2741 
2742  *lock_waiter_detected = true;
2743  return blkno;
2744  }
2745  starttime = currenttime;
2746  }
2747  }
2748 
2749  /*
2750  * We don't insert a vacuum delay point here, because we have an
2751  * exclusive lock on the table which we want to hold for as short a
2752  * time as possible. We still need to check for interrupts however.
2753  */
2755 
2756  blkno--;
2757 
2758  /* If we haven't prefetched this lot yet, do so now. */
2759  if (prefetchedUntil > blkno)
2760  {
2761  BlockNumber prefetchStart;
2762  BlockNumber pblkno;
2763 
2764  prefetchStart = blkno & ~(PREFETCH_SIZE - 1);
2765  for (pblkno = prefetchStart; pblkno <= blkno; pblkno++)
2766  {
2767  PrefetchBuffer(vacrel->rel, MAIN_FORKNUM, pblkno);
2769  }
2770  prefetchedUntil = prefetchStart;
2771  }
2772 
2773  buf = ReadBufferExtended(vacrel->rel, MAIN_FORKNUM, blkno, RBM_NORMAL,
2774  vacrel->bstrategy);
2775 
2776  /* In this phase we only need shared access to the buffer */
2778 
2779  page = BufferGetPage(buf);
2780 
2781  if (PageIsNew(page) || PageIsEmpty(page))
2782  {
2784  continue;
2785  }
2786 
2787  hastup = false;
2788  maxoff = PageGetMaxOffsetNumber(page);
2789  for (offnum = FirstOffsetNumber;
2790  offnum <= maxoff;
2791  offnum = OffsetNumberNext(offnum))
2792  {
2793  ItemId itemid;
2794 
2795  itemid = PageGetItemId(page, offnum);
2796 
2797  /*
2798  * Note: any non-unused item should be taken as a reason to keep
2799  * this page. Even an LP_DEAD item makes truncation unsafe, since
2800  * we must not have cleaned out its index entries.
2801  */
2802  if (ItemIdIsUsed(itemid))
2803  {
2804  hastup = true;
2805  break; /* can stop scanning */
2806  }
2807  } /* scan along page */
2808 
2810 
2811  /* Done scanning if we found a tuple here */
2812  if (hastup)
2813  return blkno + 1;
2814  }
2815 
2816  /*
2817  * If we fall out of the loop, all the previously-thought-to-be-empty
2818  * pages still are; we need not bother to look at the last known-nonempty
2819  * page.
2820  */
2821  return vacrel->nonempty_pages;
2822 }
2823 
2824 /*
2825  * Allocate dead_items and dead_items_info (either using palloc, or in dynamic
2826  * shared memory). Sets both in vacrel for caller.
2827  *
2828  * Also handles parallel initialization as part of allocating dead_items in
2829  * DSM when required.
2830  */
2831 static void
2832 dead_items_alloc(LVRelState *vacrel, int nworkers)
2833 {
2834  VacDeadItemsInfo *dead_items_info;
2835  int vac_work_mem = AmAutoVacuumWorkerProcess() &&
2836  autovacuum_work_mem != -1 ?
2838 
2839  /*
2840  * Initialize state for a parallel vacuum. As of now, only one worker can
2841  * be used for an index, so we invoke parallelism only if there are at
2842  * least two indexes on a table.
2843  */
2844  if (nworkers >= 0 && vacrel->nindexes > 1 && vacrel->do_index_vacuuming)
2845  {
2846  /*
2847  * Since parallel workers cannot access data in temporary tables, we
2848  * can't perform parallel vacuum on them.
2849  */
2850  if (RelationUsesLocalBuffers(vacrel->rel))
2851  {
2852  /*
2853  * Give warning only if the user explicitly tries to perform a
2854  * parallel vacuum on the temporary table.
2855  */
2856  if (nworkers > 0)
2857  ereport(WARNING,
2858  (errmsg("disabling parallel option of vacuum on \"%s\" --- cannot vacuum temporary tables in parallel",
2859  vacrel->relname)));
2860  }
2861  else
2862  vacrel->pvs = parallel_vacuum_init(vacrel->rel, vacrel->indrels,
2863  vacrel->nindexes, nworkers,
2864  vac_work_mem,
2865  vacrel->verbose ? INFO : DEBUG2,
2866  vacrel->bstrategy);
2867 
2868  /*
2869  * If parallel mode started, dead_items and dead_items_info spaces are
2870  * allocated in DSM.
2871  */
2872  if (ParallelVacuumIsActive(vacrel))
2873  {
2874  vacrel->dead_items = parallel_vacuum_get_dead_items(vacrel->pvs,
2875  &vacrel->dead_items_info);
2876  return;
2877  }
2878  }
2879 
2880  /*
2881  * Serial VACUUM case. Allocate both dead_items and dead_items_info
2882  * locally.
2883  */
2884 
2885  dead_items_info = (VacDeadItemsInfo *) palloc(sizeof(VacDeadItemsInfo));
2886  dead_items_info->max_bytes = vac_work_mem * 1024L;
2887  dead_items_info->num_items = 0;
2888  vacrel->dead_items_info = dead_items_info;
2889 
2890  vacrel->dead_items = TidStoreCreateLocal(dead_items_info->max_bytes, true);
2891 }
2892 
2893 /*
2894  * Add the given block number and offset numbers to dead_items.
2895  */
2896 static void
2898  int num_offsets)
2899 {
2900  TidStore *dead_items = vacrel->dead_items;
2901  const int prog_index[2] = {
2904  };
2905  int64 prog_val[2];
2906 
2907  TidStoreSetBlockOffsets(dead_items, blkno, offsets, num_offsets);
2908  vacrel->dead_items_info->num_items += num_offsets;
2909 
2910  /* update the progress information */
2911  prog_val[0] = vacrel->dead_items_info->num_items;
2912  prog_val[1] = TidStoreMemoryUsage(dead_items);
2913  pgstat_progress_update_multi_param(2, prog_index, prog_val);
2914 }
2915 
2916 /*
2917  * Forget all collected dead items.
2918  */
2919 static void
2921 {
2922  TidStore *dead_items = vacrel->dead_items;
2923 
2924  if (ParallelVacuumIsActive(vacrel))
2925  {
2927  return;
2928  }
2929 
2930  /* Recreate the tidstore with the same max_bytes limitation */
2931  TidStoreDestroy(dead_items);
2932  vacrel->dead_items = TidStoreCreateLocal(vacrel->dead_items_info->max_bytes, true);
2933 
2934  /* Reset the counter */
2935  vacrel->dead_items_info->num_items = 0;
2936 }
2937 
2938 /*
2939  * Perform cleanup for resources allocated in dead_items_alloc
2940  */
2941 static void
2943 {
2944  if (!ParallelVacuumIsActive(vacrel))
2945  {
2946  /* Don't bother with pfree here */
2947  return;
2948  }
2949 
2950  /* End parallel mode */
2951  parallel_vacuum_end(vacrel->pvs, vacrel->indstats);
2952  vacrel->pvs = NULL;
2953 }
2954 
2955 /*
2956  * Check if every tuple in the given page is visible to all current and future
2957  * transactions. Also return the visibility_cutoff_xid which is the highest
2958  * xmin amongst the visible tuples. Set *all_frozen to true if every tuple
2959  * on this page is frozen.
2960  *
2961  * This is a stripped down version of lazy_scan_prune(). If you change
2962  * anything here, make sure that everything stays in sync. Note that an
2963  * assertion calls us to verify that everybody still agrees. Be sure to avoid
2964  * introducing new side-effects here.
2965  */
2966 static bool
2968  TransactionId *visibility_cutoff_xid,
2969  bool *all_frozen)
2970 {
2971  Page page = BufferGetPage(buf);
2973  OffsetNumber offnum,
2974  maxoff;
2975  bool all_visible = true;
2976 
2977  *visibility_cutoff_xid = InvalidTransactionId;
2978  *all_frozen = true;
2979 
2980  maxoff = PageGetMaxOffsetNumber(page);
2981  for (offnum = FirstOffsetNumber;
2982  offnum <= maxoff && all_visible;
2983  offnum = OffsetNumberNext(offnum))
2984  {
2985  ItemId itemid;
2986  HeapTupleData tuple;
2987 
2988  /*
2989  * Set the offset number so that we can display it along with any
2990  * error that occurred while processing this tuple.
2991  */
2992  vacrel->offnum = offnum;
2993  itemid = PageGetItemId(page, offnum);
2994 
2995  /* Unused or redirect line pointers are of no interest */
2996  if (!ItemIdIsUsed(itemid) || ItemIdIsRedirected(itemid))
2997  continue;
2998 
2999  ItemPointerSet(&(tuple.t_self), blockno, offnum);
3000 
3001  /*
3002  * Dead line pointers can have index pointers pointing to them. So
3003  * they can't be treated as visible
3004  */
3005  if (ItemIdIsDead(itemid))
3006  {
3007  all_visible = false;
3008  *all_frozen = false;
3009  break;
3010  }
3011 
3012  Assert(ItemIdIsNormal(itemid));
3013 
3014  tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
3015  tuple.t_len = ItemIdGetLength(itemid);
3016  tuple.t_tableOid = RelationGetRelid(vacrel->rel);
3017 
3018  switch (HeapTupleSatisfiesVacuum(&tuple, vacrel->cutoffs.OldestXmin,
3019  buf))
3020  {
3021  case HEAPTUPLE_LIVE:
3022  {
3023  TransactionId xmin;
3024 
3025  /* Check comments in lazy_scan_prune. */
3027  {
3028  all_visible = false;
3029  *all_frozen = false;
3030  break;
3031  }
3032 
3033  /*
3034  * The inserter definitely committed. But is it old enough
3035  * that everyone sees it as committed?
3036  */
3037  xmin = HeapTupleHeaderGetXmin(tuple.t_data);
3038  if (!TransactionIdPrecedes(xmin,
3039  vacrel->cutoffs.OldestXmin))
3040  {
3041  all_visible = false;
3042  *all_frozen = false;
3043  break;
3044  }
3045 
3046  /* Track newest xmin on page. */
3047  if (TransactionIdFollows(xmin, *visibility_cutoff_xid) &&
3048  TransactionIdIsNormal(xmin))
3049  *visibility_cutoff_xid = xmin;
3050 
3051  /* Check whether this tuple is already frozen or not */
3052  if (all_visible && *all_frozen &&
3054  *all_frozen = false;
3055  }
3056  break;
3057 
3058  case HEAPTUPLE_DEAD:
3062  {
3063  all_visible = false;
3064  *all_frozen = false;
3065  break;
3066  }
3067  default:
3068  elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
3069  break;
3070  }
3071  } /* scan along page */
3072 
3073  /* Clear the offset information once we have processed the given page. */
3074  vacrel->offnum = InvalidOffsetNumber;
3075 
3076  return all_visible;
3077 }
3078 
3079 /*
3080  * Update index statistics in pg_class if the statistics are accurate.
3081  */
3082 static void
3084 {
3085  Relation *indrels = vacrel->indrels;
3086  int nindexes = vacrel->nindexes;
3087  IndexBulkDeleteResult **indstats = vacrel->indstats;
3088 
3089  Assert(vacrel->do_index_cleanup);
3090 
3091  for (int idx = 0; idx < nindexes; idx++)
3092  {
3093  Relation indrel = indrels[idx];
3094  IndexBulkDeleteResult *istat = indstats[idx];
3095 
3096  if (istat == NULL || istat->estimated_count)
3097  continue;
3098 
3099  /* Update index statistics */
3100  vac_update_relstats(indrel,
3101  istat->num_pages,
3102  istat->num_index_tuples,
3103  0,
3104  false,
3107  NULL, NULL, false);
3108  }
3109 }
3110 
3111 /*
3112  * Error context callback for errors occurring during vacuum. The error
3113  * context messages for index phases should match the messages set in parallel
3114  * vacuum. If you change this function for those phases, change
3115  * parallel_vacuum_error_callback() as well.
3116  */
3117 static void
3119 {
3120  LVRelState *errinfo = arg;
3121 
3122  switch (errinfo->phase)
3123  {
3125  if (BlockNumberIsValid(errinfo->blkno))
3126  {
3127  if (OffsetNumberIsValid(errinfo->offnum))
3128  errcontext("while scanning block %u offset %u of relation \"%s.%s\"",
3129  errinfo->blkno, errinfo->offnum, errinfo->relnamespace, errinfo->relname);
3130  else
3131  errcontext("while scanning block %u of relation \"%s.%s\"",
3132  errinfo->blkno, errinfo->relnamespace, errinfo->relname);
3133  }
3134  else
3135  errcontext("while scanning relation \"%s.%s\"",
3136  errinfo->relnamespace, errinfo->relname);
3137  break;
3138 
3140  if (BlockNumberIsValid(errinfo->blkno))
3141  {
3142  if (OffsetNumberIsValid(errinfo->offnum))
3143  errcontext("while vacuuming block %u offset %u of relation \"%s.%s\"",
3144  errinfo->blkno, errinfo->offnum, errinfo->relnamespace, errinfo->relname);
3145  else
3146  errcontext("while vacuuming block %u of relation \"%s.%s\"",
3147  errinfo->blkno, errinfo->relnamespace, errinfo->relname);
3148  }
3149  else
3150  errcontext("while vacuuming relation \"%s.%s\"",
3151  errinfo->relnamespace, errinfo->relname);
3152  break;
3153 
3155  errcontext("while vacuuming index \"%s\" of relation \"%s.%s\"",
3156  errinfo->indname, errinfo->relnamespace, errinfo->relname);
3157  break;
3158 
3160  errcontext("while cleaning up index \"%s\" of relation \"%s.%s\"",
3161  errinfo->indname, errinfo->relnamespace, errinfo->relname);
3162  break;
3163 
3165  if (BlockNumberIsValid(errinfo->blkno))
3166  errcontext("while truncating relation \"%s.%s\" to %u blocks",
3167  errinfo->relnamespace, errinfo->relname, errinfo->blkno);
3168  break;
3169 
3171  default:
3172  return; /* do nothing; the errinfo may not be
3173  * initialized */
3174  }
3175 }
3176 
3177 /*
3178  * Updates the information required for vacuum error callback. This also saves
3179  * the current information which can be later restored via restore_vacuum_error_info.
3180  */
3181 static void
3183  int phase, BlockNumber blkno, OffsetNumber offnum)
3184 {
3185  if (saved_vacrel)
3186  {
3187  saved_vacrel->offnum = vacrel->offnum;
3188  saved_vacrel->blkno = vacrel->blkno;
3189  saved_vacrel->phase = vacrel->phase;
3190  }
3191 
3192  vacrel->blkno = blkno;
3193  vacrel->offnum = offnum;
3194  vacrel->phase = phase;
3195 }
3196 
3197 /*
3198  * Restores the vacuum information saved via a prior call to update_vacuum_error_info.
3199  */
3200 static void
3202  const LVSavedErrInfo *saved_vacrel)
3203 {
3204  vacrel->blkno = saved_vacrel->blkno;
3205  vacrel->offnum = saved_vacrel->offnum;
3206  vacrel->phase = saved_vacrel->phase;
3207 }
Datum idx(PG_FUNCTION_ARGS)
Definition: _int_op.c:259
int autovacuum_work_mem
Definition: autovacuum.c:119
void TimestampDifference(TimestampTz start_time, TimestampTz stop_time, long *secs, int *microsecs)
Definition: timestamp.c:1720
bool TimestampDifferenceExceeds(TimestampTz start_time, TimestampTz stop_time, int msec)
Definition: timestamp.c:1780
TimestampTz GetCurrentTimestamp(void)
Definition: timestamp.c:1644
void pgstat_progress_start_command(ProgressCommandType cmdtype, Oid relid)
void pgstat_progress_update_param(int index, int64 val)
void pgstat_progress_update_multi_param(int nparam, const int *index, const int64 *val)
void pgstat_progress_end_command(void)
@ PROGRESS_COMMAND_VACUUM
uint32 BlockNumber
Definition: block.h:31
#define InvalidBlockNumber
Definition: block.h:33
static bool BlockNumberIsValid(BlockNumber blockNumber)
Definition: block.h:71
int Buffer
Definition: buf.h:23
#define InvalidBuffer
Definition: buf.h:25
bool track_io_timing
Definition: bufmgr.c:143
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:3724
PrefetchBufferResult PrefetchBuffer(Relation reln, ForkNumber forkNum, BlockNumber blockNum)
Definition: bufmgr.c:639
void ReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:4924
void UnlockReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:4941
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:2532
void LockBufferForCleanup(Buffer buffer)
Definition: bufmgr.c:5238
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:5158
Buffer ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
Definition: bufmgr.c:793
bool ConditionalLockBufferForCleanup(Buffer buffer)
Definition: bufmgr.c:5399
#define BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:189
#define BUFFER_LOCK_SHARE
Definition: bufmgr.h:190
#define RelationGetNumberOfBlocks(reln)
Definition: bufmgr.h:273
static Page BufferGetPage(Buffer buffer)
Definition: bufmgr.h:400
#define BUFFER_LOCK_EXCLUSIVE
Definition: bufmgr.h:191
@ RBM_NORMAL
Definition: bufmgr.h:45
static bool BufferIsValid(Buffer bufnum)
Definition: bufmgr.h:351
Size PageGetHeapFreeSpace(Page page)
Definition: bufpage.c:991
void PageTruncateLinePointerArray(Page page)
Definition: bufpage.c:835
static bool PageIsEmpty(Page page)
Definition: bufpage.h:223
Pointer Page
Definition: bufpage.h:81
static Item PageGetItem(Page page, ItemId itemId)
Definition: bufpage.h:354
static void PageClearAllVisible(Page page)
Definition: bufpage.h:439
#define SizeOfPageHeaderData
Definition: bufpage.h:216
static void PageSetAllVisible(Page page)
Definition: bufpage.h:434
static ItemId PageGetItemId(Page page, OffsetNumber offsetNumber)
Definition: bufpage.h:243
static bool PageIsNew(Page page)
Definition: bufpage.h:233
static bool PageIsAllVisible(Page page)
Definition: bufpage.h:429
static XLogRecPtr PageGetLSN(const char *page)
Definition: bufpage.h:386
static OffsetNumber PageGetMaxOffsetNumber(Page page)
Definition: bufpage.h:372
signed int int32
Definition: c.h:497
#define Max(x, y)
Definition: c.h:1001
#define Assert(condition)
Definition: c.h:861
TransactionId MultiXactId
Definition: c.h:665
#define unlikely(x)
Definition: c.h:314
#define lengthof(array)
Definition: c.h:791
unsigned char uint8
Definition: c.h:507
#define StaticAssertStmt(condition, errmessage)
Definition: c.h:941
uint32 TransactionId
Definition: c.h:655
size_t Size
Definition: c.h:608
int64 TimestampTz
Definition: timestamp.h:39
char * get_database_name(Oid dbid)
Definition: dbcommands.c:3187
int errmsg_internal(const char *fmt,...)
Definition: elog.c:1157
int errdetail(const char *fmt,...)
Definition: elog.c:1203
ErrorContextCallback * error_context_stack
Definition: elog.c:94
int errhint(const char *fmt,...)
Definition: elog.c:1317
int errmsg(const char *fmt,...)
Definition: elog.c:1070
#define _(x)
Definition: elog.c:90
#define LOG
Definition: elog.h:31
#define errcontext
Definition: elog.h:196
#define WARNING
Definition: elog.h:36
#define DEBUG2
Definition: elog.h:29
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:225
#define INFO
Definition: elog.h:34
#define ereport(elevel,...)
Definition: elog.h:149
void FreeSpaceMapVacuumRange(Relation rel, BlockNumber start, BlockNumber end)
Definition: freespace.c:377
Size GetRecordedFreeSpace(Relation rel, BlockNumber heapBlk)
Definition: freespace.c:244
void RecordPageWithFreeSpace(Relation rel, BlockNumber heapBlk, Size spaceAvail)
Definition: freespace.c:194
bool VacuumCostActive
Definition: globals.c:157
int VacuumCostBalance
Definition: globals.c:156
int maintenance_work_mem
Definition: globals.c:132
struct Latch * MyLatch
Definition: globals.c:62
Oid MyDatabaseId
Definition: globals.c:93
bool heap_tuple_needs_eventual_freeze(HeapTupleHeader tuple)
Definition: heapam.c:7564
bool heap_tuple_should_freeze(HeapTupleHeader tuple, const struct VacuumCutoffs *cutoffs, TransactionId *NoFreezePageRelfrozenXid, MultiXactId *NoFreezePageRelminMxid)
Definition: heapam.c:7619
#define HEAP_PAGE_PRUNE_FREEZE
Definition: heapam.h:43
@ HEAPTUPLE_RECENTLY_DEAD
Definition: heapam.h:128
@ HEAPTUPLE_INSERT_IN_PROGRESS
Definition: heapam.h:129
@ HEAPTUPLE_LIVE
Definition: heapam.h:127
@ HEAPTUPLE_DELETE_IN_PROGRESS
Definition: heapam.h:130
@ HEAPTUPLE_DEAD
Definition: heapam.h:126
@ PRUNE_VACUUM_CLEANUP
Definition: heapam.h:272
@ PRUNE_VACUUM_SCAN
Definition: heapam.h:271
#define HEAP_PAGE_PRUNE_MARK_UNUSED_NOW
Definition: heapam.h:42
HTSV_Result HeapTupleSatisfiesVacuum(HeapTuple htup, TransactionId OldestXmin, Buffer buffer)
HeapTupleHeaderData * HeapTupleHeader
Definition: htup.h:23
#define HeapTupleHeaderGetXmin(tup)
Definition: htup_details.h:309
#define HeapTupleHeaderXminCommitted(tup)
Definition: htup_details.h:320
#define MaxHeapTuplesPerPage
Definition: htup_details.h:572
int verbose
#define INSTR_TIME_SET_CURRENT(t)
Definition: instr_time.h:122
#define INSTR_TIME_SUBTRACT(x, y)
Definition: instr_time.h:181
#define INSTR_TIME_GET_MICROSEC(t)
Definition: instr_time.h:194
WalUsage pgWalUsage
Definition: instrument.c:22
void WalUsageAccumDiff(WalUsage *dst, const WalUsage *add, const WalUsage *sub)
Definition: instrument.c:286
BufferUsage pgBufferUsage
Definition: instrument.c:20
void BufferUsageAccumDiff(BufferUsage *dst, const BufferUsage *add, const BufferUsage *sub)
Definition: instrument.c:248
static int pg_cmp_u16(uint16 a, uint16 b)
Definition: int.h:592
int b
Definition: isn.c:70
int a
Definition: isn.c:69
int i
Definition: isn.c:73
#define ItemIdGetLength(itemId)
Definition: itemid.h:59
#define ItemIdIsNormal(itemId)
Definition: itemid.h:99
#define ItemIdIsDead(itemId)
Definition: itemid.h:113
#define ItemIdIsUsed(itemId)
Definition: itemid.h:92
#define ItemIdSetUnused(itemId)
Definition: itemid.h:128
#define ItemIdIsRedirected(itemId)
Definition: itemid.h:106
#define ItemIdHasStorage(itemId)
Definition: itemid.h:120
static void ItemPointerSet(ItemPointerData *pointer, BlockNumber blockNumber, OffsetNumber offNum)
Definition: itemptr.h:135
void ResetLatch(Latch *latch)
Definition: latch.c:724
int WaitLatch(Latch *latch, int wakeEvents, long timeout, uint32 wait_event_info)
Definition: latch.c:517
#define WL_TIMEOUT
Definition: latch.h:130
#define WL_EXIT_ON_PM_DEATH
Definition: latch.h:132
#define WL_LATCH_SET
Definition: latch.h:127
void UnlockRelation(Relation relation, LOCKMODE lockmode)
Definition: lmgr.c:310
bool ConditionalLockRelation(Relation relation, LOCKMODE lockmode)
Definition: lmgr.c:275
bool LockHasWaitersRelation(Relation relation, LOCKMODE lockmode)
Definition: lmgr.c:363
#define NoLock
Definition: lockdefs.h:34
#define AccessExclusiveLock
Definition: lockdefs.h:43
#define RowExclusiveLock
Definition: lockdefs.h:38
char * get_namespace_name(Oid nspid)
Definition: lsyscache.c:3366
char * pstrdup(const char *in)
Definition: mcxt.c:1696
void pfree(void *pointer)
Definition: mcxt.c:1521
void * palloc0(Size size)
Definition: mcxt.c:1347
void * palloc(Size size)
Definition: mcxt.c:1317
#define AmAutoVacuumWorkerProcess()
Definition: miscadmin.h:372
#define START_CRIT_SECTION()
Definition: miscadmin.h:149
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:122
#define END_CRIT_SECTION()
Definition: miscadmin.h:151
bool MultiXactIdPrecedesOrEquals(MultiXactId multi1, MultiXactId multi2)
Definition: multixact.c:3331
#define MultiXactIdIsValid(multi)
Definition: multixact.h:28
#define InvalidMultiXactId
Definition: multixact.h:24
#define InvalidOffsetNumber
Definition: off.h:26
#define OffsetNumberIsValid(offsetNumber)
Definition: off.h:39
#define OffsetNumberNext(offsetNumber)
Definition: off.h:52
uint16 OffsetNumber
Definition: off.h:24
#define FirstOffsetNumber
Definition: off.h:27
#define MaxOffsetNumber
Definition: off.h:28
void * arg
const char * pg_rusage_show(const PGRUsage *ru0)
Definition: pg_rusage.c:40
void pg_rusage_init(PGRUsage *ru0)
Definition: pg_rusage.c:27
static char * buf
Definition: pg_test_fsync.c:73
int64 PgStat_Counter
Definition: pgstat.h:120
PgStat_Counter pgStatBlockReadTime
PgStat_Counter pgStatBlockWriteTime
void pgstat_report_vacuum(Oid tableoid, bool shared, PgStat_Counter livetuples, PgStat_Counter deadtuples)
#define qsort(a, b, c, d)
Definition: port.h:447
GlobalVisState * GlobalVisTestFor(Relation rel)
Definition: procarray.c:4111
#define PROGRESS_VACUUM_PHASE_FINAL_CLEANUP
Definition: progress.h:38
#define PROGRESS_VACUUM_DEAD_TUPLE_BYTES
Definition: progress.h:27
#define PROGRESS_VACUUM_PHASE_SCAN_HEAP
Definition: progress.h:33
#define PROGRESS_VACUUM_TOTAL_HEAP_BLKS
Definition: progress.h:22
#define PROGRESS_VACUUM_PHASE
Definition: progress.h:21
#define PROGRESS_VACUUM_NUM_INDEX_VACUUMS
Definition: progress.h:25
#define PROGRESS_VACUUM_PHASE_VACUUM_HEAP
Definition: progress.h:35
#define PROGRESS_VACUUM_NUM_DEAD_ITEM_IDS
Definition: progress.h:28
#define PROGRESS_VACUUM_MAX_DEAD_TUPLE_BYTES
Definition: progress.h:26
#define PROGRESS_VACUUM_HEAP_BLKS_SCANNED
Definition: progress.h:23
#define PROGRESS_VACUUM_PHASE_INDEX_CLEANUP
Definition: progress.h:36
#define PROGRESS_VACUUM_PHASE_VACUUM_INDEX
Definition: progress.h:34
#define PROGRESS_VACUUM_INDEXES_PROCESSED
Definition: progress.h:30
#define PROGRESS_VACUUM_INDEXES_TOTAL
Definition: progress.h:29
#define PROGRESS_VACUUM_HEAP_BLKS_VACUUMED
Definition: progress.h:24
#define PROGRESS_VACUUM_PHASE_TRUNCATE
Definition: progress.h:37
void heap_page_prune_and_freeze(Relation relation, Buffer buffer, GlobalVisState *vistest, int options, struct VacuumCutoffs *cutoffs, PruneFreezeResult *presult, PruneReason reason, OffsetNumber *off_loc, TransactionId *new_relfrozen_xid, MultiXactId *new_relmin_mxid)
Definition: pruneheap.c:350
void log_heap_prune_and_freeze(Relation relation, Buffer buffer, TransactionId conflict_xid, bool cleanup_lock, PruneReason reason, HeapTupleFreeze *frozen, int nfrozen, OffsetNumber *redirected, int nredirected, OffsetNumber *dead, int ndead, OffsetNumber *unused, int nunused)
Definition: pruneheap.c:2053
#define RelationGetRelid(relation)
Definition: rel.h:505
#define RelationGetRelationName(relation)
Definition: rel.h:539
#define RelationNeedsWAL(relation)
Definition: rel.h:628
#define RelationUsesLocalBuffers(relation)
Definition: rel.h:637
#define RelationGetNamespace(relation)
Definition: rel.h:546
@ MAIN_FORKNUM
Definition: relpath.h:58
void RelationTruncate(Relation rel, BlockNumber nblocks)
Definition: storage.c:288
void appendStringInfo(StringInfo str, const char *fmt,...)
Definition: stringinfo.c:97
void appendStringInfoString(StringInfo str, const char *s)
Definition: stringinfo.c:182
void initStringInfo(StringInfo str)
Definition: stringinfo.c:59
int64 shared_blks_dirtied
Definition: instrument.h:28
int64 local_blks_hit
Definition: instrument.h:30
int64 shared_blks_read
Definition: instrument.h:27
int64 local_blks_read
Definition: instrument.h:31
int64 local_blks_dirtied
Definition: instrument.h:32
int64 shared_blks_hit
Definition: instrument.h:26
struct ErrorContextCallback * previous
Definition: elog.h:296
void(* callback)(void *arg)
Definition: elog.h:297
ItemPointerData t_self
Definition: htup.h:65
uint32 t_len
Definition: htup.h:64
HeapTupleHeader t_data
Definition: htup.h:68
Oid t_tableOid
Definition: htup.h:66
bool estimated_count
Definition: genam.h:78
BlockNumber pages_deleted
Definition: genam.h:82
BlockNumber pages_newly_deleted
Definition: genam.h:81
BlockNumber pages_free
Definition: genam.h:83
BlockNumber num_pages
Definition: genam.h:77
double num_index_tuples
Definition: genam.h:79
Relation index
Definition: genam.h:46
double num_heap_tuples
Definition: genam.h:52
bool analyze_only
Definition: genam.h:48
BufferAccessStrategy strategy
Definition: genam.h:53
Relation heaprel
Definition: genam.h:47
bool report_progress
Definition: genam.h:49
int message_level
Definition: genam.h:51
bool estimated_count
Definition: genam.h:50
ParallelVacuumState * pvs
Definition: vacuumlazy.c:145
bool verbose
Definition: vacuumlazy.c:175
VacDeadItemsInfo * dead_items_info
Definition: vacuumlazy.c:188
int nindexes
Definition: vacuumlazy.c:141
Buffer next_unskippable_vmbuffer
Definition: vacuumlazy.c:218
OffsetNumber offnum
Definition: vacuumlazy.c:173
TidStore * dead_items
Definition: vacuumlazy.c:187
int64 tuples_deleted
Definition: vacuumlazy.c:207
BlockNumber nonempty_pages
Definition: vacuumlazy.c:196
bool do_rel_truncate
Definition: vacuumlazy.c:157
BlockNumber scanned_pages
Definition: vacuumlazy.c:191
bool aggressive
Definition: vacuumlazy.c:148
GlobalVisState * vistest
Definition: vacuumlazy.c:161
BlockNumber removed_pages
Definition: vacuumlazy.c:192
int num_index_scans
Definition: vacuumlazy.c:205
IndexBulkDeleteResult ** indstats
Definition: vacuumlazy.c:202
double new_live_tuples
Definition: vacuumlazy.c:200
double new_rel_tuples
Definition: vacuumlazy.c:199
TransactionId NewRelfrozenXid
Definition: vacuumlazy.c:163
Relation rel
Definition: vacuumlazy.c:139
bool consider_bypass_optimization
Definition: vacuumlazy.c:152
BlockNumber rel_pages
Definition: vacuumlazy.c:190
BlockNumber next_unskippable_block
Definition: vacuumlazy.c:216
int64 recently_dead_tuples
Definition: vacuumlazy.c:211
int64 tuples_frozen
Definition: vacuumlazy.c:208
BlockNumber frozen_pages
Definition: vacuumlazy.c:193
char * dbname
Definition: vacuumlazy.c:168
BlockNumber missed_dead_pages
Definition: vacuumlazy.c:195
BlockNumber current_block
Definition: vacuumlazy.c:215
char * relnamespace
Definition: vacuumlazy.c:169
int64 live_tuples
Definition: vacuumlazy.c:210
int64 lpdead_items
Definition: vacuumlazy.c:209
BufferAccessStrategy bstrategy
Definition: vacuumlazy.c:144
bool skippedallvis
Definition: vacuumlazy.c:165
BlockNumber lpdead_item_pages
Definition: vacuumlazy.c:194
Relation * indrels
Definition: vacuumlazy.c:140
bool skipwithvm
Definition: vacuumlazy.c:150
bool do_index_cleanup
Definition: vacuumlazy.c:156
MultiXactId NewRelminMxid
Definition: vacuumlazy.c:164
int64 missed_dead_tuples
Definition: vacuumlazy.c:212
BlockNumber blkno
Definition: vacuumlazy.c:172
struct VacuumCutoffs cutoffs
Definition: vacuumlazy.c:160
bool next_unskippable_allvis
Definition: vacuumlazy.c:217
char * relname
Definition: vacuumlazy.c:170
VacErrPhase phase
Definition: vacuumlazy.c:174
char * indname
Definition: vacuumlazy.c:171
bool do_index_vacuuming
Definition: vacuumlazy.c:155
BlockNumber blkno
Definition: vacuumlazy.c:224
VacErrPhase phase
Definition: vacuumlazy.c:226
OffsetNumber offnum
Definition: vacuumlazy.c:225
int recently_dead_tuples
Definition: heapam.h:235
TransactionId vm_conflict_horizon
Definition: heapam.h:250
OffsetNumber deadoffsets[MaxHeapTuplesPerPage]
Definition: heapam.h:264
bool all_visible
Definition: heapam.h:248
Form_pg_class rd_rel
Definition: rel.h:111
BlockNumber blkno
Definition: tidstore.h:29
size_t max_bytes
Definition: vacuum.h:287
int64 num_items
Definition: vacuum.h:288
TransactionId FreezeLimit
Definition: vacuum.h:277
TransactionId OldestXmin
Definition: vacuum.h:267
TransactionId relfrozenxid
Definition: vacuum.h:251
MultiXactId relminmxid
Definition: vacuum.h:252
MultiXactId MultiXactCutoff
Definition: vacuum.h:278
MultiXactId OldestMxact
Definition: vacuum.h:268
int nworkers
Definition: vacuum.h:239
VacOptValue truncate
Definition: vacuum.h:231
bits32 options
Definition: vacuum.h:219
bool is_wraparound
Definition: vacuum.h:226
int log_min_duration
Definition: vacuum.h:227
VacOptValue index_cleanup
Definition: vacuum.h:230
uint64 wal_bytes
Definition: instrument.h:55
int64 wal_fpi
Definition: instrument.h:54
int64 wal_records
Definition: instrument.h:53
void TidStoreEndIterate(TidStoreIter *iter)
Definition: tidstore.c:526
TidStoreIterResult * TidStoreIterateNext(TidStoreIter *iter)
Definition: tidstore.c:501
void TidStoreDestroy(TidStore *ts)
Definition: tidstore.c:325
TidStore * TidStoreCreateLocal(size_t max_bytes, bool insert_only)
Definition: tidstore.c:162
int TidStoreGetBlockOffsets(TidStoreIterResult *result, OffsetNumber *offsets, int max_offsets)
Definition: tidstore.c:574
TidStoreIter * TidStoreBeginIterate(TidStore *ts)
Definition: tidstore.c:479
void TidStoreSetBlockOffsets(TidStore *ts, BlockNumber blkno, OffsetNumber *offsets, int num_offsets)
Definition: tidstore.c:353
size_t TidStoreMemoryUsage(TidStore *ts)
Definition: tidstore.c:540
bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition: transam.c:280
bool TransactionIdPrecedesOrEquals(TransactionId id1, TransactionId id2)
Definition: transam.c:299
bool TransactionIdFollows(TransactionId id1, TransactionId id2)
Definition: transam.c:314
static TransactionId ReadNextTransactionId(void)
Definition: transam.h:315
#define InvalidTransactionId
Definition: transam.h:31
#define TransactionIdIsValid(xid)
Definition: transam.h:41
#define TransactionIdIsNormal(xid)
Definition: transam.h:42
IndexBulkDeleteResult * vac_bulkdel_one_index(IndexVacuumInfo *ivinfo, IndexBulkDeleteResult *istat, TidStore *dead_items, VacDeadItemsInfo *dead_items_info)
Definition: vacuum.c:2516
void vac_open_indexes(Relation relation, LOCKMODE lockmode, int *nindexes, Relation **Irel)
Definition: vacuum.c:2298
void vac_update_relstats(Relation relation, BlockNumber num_pages, double num_tuples, BlockNumber num_all_visible_pages, bool hasindex, TransactionId frozenxid, MultiXactId minmulti, bool *frozenxid_updated, bool *minmulti_updated, bool in_outer_xact)
Definition: vacuum.c:1410
void vac_close_indexes(int nindexes, Relation *Irel, LOCKMODE lockmode)
Definition: vacuum.c:2341
void vacuum_delay_point(void)
Definition: vacuum.c:2362
bool vacuum_get_cutoffs(Relation rel, const VacuumParams *params, struct VacuumCutoffs *cutoffs)
Definition: vacuum.c:1084
bool vacuum_xid_failsafe_check(const struct VacuumCutoffs *cutoffs)
Definition: vacuum.c:1252
bool VacuumFailsafeActive
Definition: vacuum.c:96
double vac_estimate_reltuples(Relation relation, BlockNumber total_pages, BlockNumber scanned_pages, double scanned_tuples)
Definition: vacuum.c:1314
IndexBulkDeleteResult * vac_cleanup_one_index(IndexVacuumInfo *ivinfo, IndexBulkDeleteResult *istat)
Definition: vacuum.c:2537
#define VACOPT_VERBOSE
Definition: vacuum.h:182
@ VACOPTVALUE_AUTO
Definition: vacuum.h:203
@ VACOPTVALUE_ENABLED
Definition: vacuum.h:205
@ VACOPTVALUE_UNSPECIFIED
Definition: vacuum.h:202
@ VACOPTVALUE_DISABLED
Definition: vacuum.h:204
#define VACOPT_DISABLE_PAGE_SKIPPING
Definition: vacuum.h:188
static void dead_items_cleanup(LVRelState *vacrel)
Definition: vacuumlazy.c:2942
static bool heap_page_is_all_visible(LVRelState *vacrel, Buffer buf, TransactionId *visibility_cutoff_xid, bool *all_frozen)
Definition: vacuumlazy.c:2967
static void update_relstats_all_indexes(LVRelState *vacrel)
Definition: vacuumlazy.c:3083
static void dead_items_add(LVRelState *vacrel, BlockNumber blkno, OffsetNumber *offsets, int num_offsets)
Definition: vacuumlazy.c:2897
#define VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL
Definition: vacuumlazy.c:82
static void vacuum_error_callback(void *arg)
Definition: vacuumlazy.c:3118
static void lazy_truncate_heap(LVRelState *vacrel)
Definition: vacuumlazy.c:2559
static void lazy_vacuum(LVRelState *vacrel)
Definition: vacuumlazy.c:1869
static void lazy_cleanup_all_indexes(LVRelState *vacrel)
Definition: vacuumlazy.c:2362
static bool lazy_scan_noprune(LVRelState *vacrel, Buffer buf, BlockNumber blkno, Page page, bool *has_lpdead_items)
Definition: vacuumlazy.c:1658
#define REL_TRUNCATE_MINIMUM
Definition: vacuumlazy.c:71
static bool should_attempt_truncation(LVRelState *vacrel)
Definition: vacuumlazy.c:2539
static bool lazy_scan_new_or_empty(LVRelState *vacrel, Buffer buf, BlockNumber blkno, Page page, bool sharelock, Buffer vmbuffer)
Definition: vacuumlazy.c:1293
VacErrPhase
Definition: vacuumlazy.c:127
@ VACUUM_ERRCB_PHASE_SCAN_HEAP
Definition: vacuumlazy.c:129
@ VACUUM_ERRCB_PHASE_VACUUM_INDEX
Definition: vacuumlazy.c:130
@ VACUUM_ERRCB_PHASE_TRUNCATE
Definition: vacuumlazy.c:133
@ VACUUM_ERRCB_PHASE_INDEX_CLEANUP
Definition: vacuumlazy.c:132
@ VACUUM_ERRCB_PHASE_VACUUM_HEAP
Definition: vacuumlazy.c:131
@ VACUUM_ERRCB_PHASE_UNKNOWN
Definition: vacuumlazy.c:128
static void lazy_scan_heap(LVRelState *vacrel)
Definition: vacuumlazy.c:826
#define ParallelVacuumIsActive(vacrel)
Definition: vacuumlazy.c:123
static void restore_vacuum_error_info(LVRelState *vacrel, const LVSavedErrInfo *saved_vacrel)
Definition: vacuumlazy.c:3201
static bool heap_vac_scan_next_block(LVRelState *vacrel, BlockNumber *blkno, bool *all_visible_according_to_vm)
Definition: vacuumlazy.c:1096
static void lazy_scan_prune(LVRelState *vacrel, Buffer buf, BlockNumber blkno, Page page, Buffer vmbuffer, bool all_visible_according_to_vm, bool *has_lpdead_items)
Definition: vacuumlazy.c:1416
void heap_vacuum_rel(Relation rel, VacuumParams *params, BufferAccessStrategy bstrategy)
Definition: vacuumlazy.c:295
static IndexBulkDeleteResult * lazy_vacuum_one_index(Relation indrel, IndexBulkDeleteResult *istat, double reltuples, LVRelState *vacrel)
Definition: vacuumlazy.c:2430
static void find_next_unskippable_block(LVRelState *vacrel, bool *skipsallvis)
Definition: vacuumlazy.c:1194
static void dead_items_reset(LVRelState *vacrel)
Definition: vacuumlazy.c:2920
#define REL_TRUNCATE_FRACTION
Definition: vacuumlazy.c:72
static bool lazy_check_wraparound_failsafe(LVRelState *vacrel)
Definition: vacuumlazy.c:2309
struct LVSavedErrInfo LVSavedErrInfo
static IndexBulkDeleteResult * lazy_cleanup_one_index(Relation indrel, IndexBulkDeleteResult *istat, double reltuples, bool estimated_count, LVRelState *vacrel)
Definition: vacuumlazy.c:2479
#define PREFETCH_SIZE
Definition: vacuumlazy.c:117
static void lazy_vacuum_heap_page(LVRelState *vacrel, BlockNumber blkno, Buffer buffer, OffsetNumber *deadoffsets, int num_offsets, Buffer vmbuffer)
Definition: vacuumlazy.c:2204
struct LVRelState LVRelState
#define BYPASS_THRESHOLD_PAGES
Definition: vacuumlazy.c:89
static void dead_items_alloc(LVRelState *vacrel, int nworkers)
Definition: vacuumlazy.c:2832
#define VACUUM_TRUNCATE_LOCK_TIMEOUT
Definition: vacuumlazy.c:83
static bool lazy_vacuum_all_indexes(LVRelState *vacrel)
Definition: vacuumlazy.c:1994
static void update_vacuum_error_info(LVRelState *vacrel, LVSavedErrInfo *saved_vacrel, int phase, BlockNumber blkno, OffsetNumber offnum)
Definition: vacuumlazy.c:3182
static BlockNumber count_nondeletable_pages(LVRelState *vacrel, bool *lock_waiter_detected)
Definition: vacuumlazy.c:2690
#define SKIP_PAGES_THRESHOLD
Definition: vacuumlazy.c:111
#define FAILSAFE_EVERY_PAGES
Definition: vacuumlazy.c:95
#define VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL
Definition: vacuumlazy.c:81
static int cmpOffsetNumbers(const void *a, const void *b)
Definition: vacuumlazy.c:1397
static void lazy_vacuum_heap_rel(LVRelState *vacrel)
Definition: vacuumlazy.c:2111
#define VACUUM_FSM_EVERY_PAGES
Definition: vacuumlazy.c:104
ParallelVacuumState * parallel_vacuum_init(Relation rel, Relation *indrels, int nindexes, int nrequested_workers, int vac_work_mem, int elevel, BufferAccessStrategy bstrategy)
void parallel_vacuum_bulkdel_all_indexes(ParallelVacuumState *pvs, long num_table_tuples, int num_index_scans)
void parallel_vacuum_reset_dead_items(ParallelVacuumState *pvs)
void parallel_vacuum_cleanup_all_indexes(ParallelVacuumState *pvs, long num_table_tuples, int num_index_scans, bool estimated_count)
TidStore * parallel_vacuum_get_dead_items(ParallelVacuumState *pvs, VacDeadItemsInfo **dead_items_info_p)
void parallel_vacuum_end(ParallelVacuumState *pvs, IndexBulkDeleteResult **istats)
void visibilitymap_set(Relation rel, BlockNumber heapBlk, Buffer heapBuf, XLogRecPtr recptr, Buffer vmBuf, TransactionId cutoff_xid, uint8 flags)
bool visibilitymap_clear(Relation rel, BlockNumber heapBlk, Buffer vmbuf, uint8 flags)
void visibilitymap_pin(Relation rel, BlockNumber heapBlk, Buffer *vmbuf)
uint8 visibilitymap_get_status(Relation rel, BlockNumber heapBlk, Buffer *vmbuf)
void visibilitymap_count(Relation rel, BlockNumber *all_visible, BlockNumber *all_frozen)
#define VM_ALL_FROZEN(r, b, v)
Definition: visibilitymap.h:26
#define VISIBILITYMAP_VALID_BITS
#define VISIBILITYMAP_ALL_FROZEN
#define VISIBILITYMAP_ALL_VISIBLE
bool IsInParallelMode(void)
Definition: xact.c:1088
#define InvalidXLogRecPtr
Definition: xlogdefs.h:28
XLogRecPtr log_newpage_buffer(Buffer buffer, bool page_std)
Definition: xloginsert.c:1237