PostgreSQL Source Code git master
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
vacuumlazy.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * vacuumlazy.c
4 * Concurrent ("lazy") vacuuming.
5 *
6 * The major space usage for vacuuming is storage for the dead tuple IDs that
7 * are to be removed from indexes. We want to ensure we can vacuum even the
8 * very largest relations with finite memory space usage. To do that, we set
9 * upper bounds on the memory that can be used for keeping track of dead TIDs
10 * at once.
11 *
12 * We are willing to use at most maintenance_work_mem (or perhaps
13 * autovacuum_work_mem) memory space to keep track of dead TIDs. If the
14 * TID store is full, we must call lazy_vacuum to vacuum indexes (and to vacuum
15 * the pages that we've pruned). This frees up the memory space dedicated to
16 * store dead TIDs.
17 *
18 * In practice VACUUM will often complete its initial pass over the target
19 * heap relation without ever running out of space to store TIDs. This means
20 * that there only needs to be one call to lazy_vacuum, after the initial pass
21 * completes.
22 *
23 * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
24 * Portions Copyright (c) 1994, Regents of the University of California
25 *
26 *
27 * IDENTIFICATION
28 * src/backend/access/heap/vacuumlazy.c
29 *
30 *-------------------------------------------------------------------------
31 */
32#include "postgres.h"
33
34#include <math.h>
35
36#include "access/genam.h"
37#include "access/heapam.h"
38#include "access/htup_details.h"
39#include "access/multixact.h"
40#include "access/tidstore.h"
41#include "access/transam.h"
43#include "access/xloginsert.h"
44#include "catalog/storage.h"
45#include "commands/dbcommands.h"
46#include "commands/progress.h"
47#include "commands/vacuum.h"
48#include "common/int.h"
49#include "executor/instrument.h"
50#include "miscadmin.h"
51#include "pgstat.h"
54#include "storage/bufmgr.h"
55#include "storage/freespace.h"
56#include "storage/lmgr.h"
57#include "utils/lsyscache.h"
58#include "utils/pg_rusage.h"
59#include "utils/timestamp.h"
60
61
62/*
63 * Space/time tradeoff parameters: do these need to be user-tunable?
64 *
65 * To consider truncating the relation, we want there to be at least
66 * REL_TRUNCATE_MINIMUM or (relsize / REL_TRUNCATE_FRACTION) (whichever
67 * is less) potentially-freeable pages.
68 */
69#define REL_TRUNCATE_MINIMUM 1000
70#define REL_TRUNCATE_FRACTION 16
71
72/*
73 * Timing parameters for truncate locking heuristics.
74 *
75 * These were not exposed as user tunable GUC values because it didn't seem
76 * that the potential for improvement was great enough to merit the cost of
77 * supporting them.
78 */
79#define VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL 20 /* ms */
80#define VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL 50 /* ms */
81#define VACUUM_TRUNCATE_LOCK_TIMEOUT 5000 /* ms */
82
83/*
84 * Threshold that controls whether we bypass index vacuuming and heap
85 * vacuuming as an optimization
86 */
87#define BYPASS_THRESHOLD_PAGES 0.02 /* i.e. 2% of rel_pages */
88
89/*
90 * Perform a failsafe check each time we scan another 4GB of pages.
91 * (Note that this is deliberately kept to a power-of-two, usually 2^19.)
92 */
93#define FAILSAFE_EVERY_PAGES \
94 ((BlockNumber) (((uint64) 4 * 1024 * 1024 * 1024) / BLCKSZ))
95
96/*
97 * When a table has no indexes, vacuum the FSM after every 8GB, approximately
98 * (it won't be exact because we only vacuum FSM after processing a heap page
99 * that has some removable tuples). When there are indexes, this is ignored,
100 * and we vacuum FSM after each index/heap cleaning pass.
101 */
102#define VACUUM_FSM_EVERY_PAGES \
103 ((BlockNumber) (((uint64) 8 * 1024 * 1024 * 1024) / BLCKSZ))
104
105/*
106 * Before we consider skipping a page that's marked as clean in
107 * visibility map, we must've seen at least this many clean pages.
108 */
109#define SKIP_PAGES_THRESHOLD ((BlockNumber) 32)
110
111/*
112 * Size of the prefetch window for lazy vacuum backwards truncation scan.
113 * Needs to be a power of 2.
114 */
115#define PREFETCH_SIZE ((BlockNumber) 32)
116
117/*
118 * Macro to check if we are in a parallel vacuum. If true, we are in the
119 * parallel mode and the DSM segment is initialized.
120 */
121#define ParallelVacuumIsActive(vacrel) ((vacrel)->pvs != NULL)
122
123/* Phases of vacuum during which we report error context. */
124typedef enum
125{
133
134typedef struct LVRelState
135{
136 /* Target heap relation and its indexes */
140
141 /* Buffer access strategy and parallel vacuum state */
144
145 /* Aggressive VACUUM? (must set relfrozenxid >= FreezeLimit) */
147 /* Use visibility map to skip? (disabled by DISABLE_PAGE_SKIPPING) */
149 /* Consider index vacuuming bypass optimization? */
151
152 /* Doing index vacuuming, index cleanup, rel truncation? */
156
157 /* VACUUM operation's cutoffs for freezing and pruning */
160 /* Tracks oldest extant XID/MXID for setting relfrozenxid/relminmxid */
164
165 /* Error reporting state */
166 char *dbname;
168 char *relname;
169 char *indname; /* Current index name */
170 BlockNumber blkno; /* used only for heap operations */
171 OffsetNumber offnum; /* used only for heap operations */
173 bool verbose; /* VACUUM VERBOSE? */
174
175 /*
176 * dead_items stores TIDs whose index tuples are deleted by index
177 * vacuuming. Each TID points to an LP_DEAD line pointer from a heap page
178 * that has been processed by lazy_scan_prune. Also needed by
179 * lazy_vacuum_heap_rel, which marks the same LP_DEAD line pointers as
180 * LP_UNUSED during second heap pass.
181 *
182 * Both dead_items and dead_items_info are allocated in shared memory in
183 * parallel vacuum cases.
184 */
185 TidStore *dead_items; /* TIDs whose index tuples we'll delete */
187
188 BlockNumber rel_pages; /* total number of pages */
189 BlockNumber scanned_pages; /* # pages examined (not skipped via VM) */
190 BlockNumber removed_pages; /* # pages removed by relation truncation */
191 BlockNumber new_frozen_tuple_pages; /* # pages with newly frozen tuples */
192
193 /* # pages newly set all-visible in the VM */
195
196 /*
197 * # pages newly set all-visible and all-frozen in the VM. This is a
198 * subset of vm_new_visible_pages. That is, vm_new_visible_pages includes
199 * all pages set all-visible, but vm_new_visible_frozen_pages includes
200 * only those which were also set all-frozen.
201 */
203
204 /* # all-visible pages newly set all-frozen in the VM */
206
207 BlockNumber lpdead_item_pages; /* # pages with LP_DEAD items */
208 BlockNumber missed_dead_pages; /* # pages with missed dead tuples */
209 BlockNumber nonempty_pages; /* actually, last nonempty page + 1 */
210
211 /* Statistics output by us, for table */
212 double new_rel_tuples; /* new estimated total # of tuples */
213 double new_live_tuples; /* new estimated total # of live tuples */
214 /* Statistics output by index AMs */
216
217 /* Instrumentation counters */
219 /* Counters that follow are only for scanned_pages */
220 int64 tuples_deleted; /* # deleted from table */
221 int64 tuples_frozen; /* # newly frozen */
222 int64 lpdead_items; /* # deleted from indexes */
223 int64 live_tuples; /* # live tuples remaining */
224 int64 recently_dead_tuples; /* # dead, but not yet removable */
225 int64 missed_dead_tuples; /* # removable, but not removed */
226
227 /* State maintained by heap_vac_scan_next_block() */
228 BlockNumber current_block; /* last block returned */
229 BlockNumber next_unskippable_block; /* next unskippable block */
230 bool next_unskippable_allvis; /* its visibility status */
231 Buffer next_unskippable_vmbuffer; /* buffer containing its VM bit */
233
234/* Struct for saving and restoring vacuum error information. */
235typedef struct LVSavedErrInfo
236{
241
242
243/* non-export function prototypes */
244static void lazy_scan_heap(LVRelState *vacrel);
245static bool heap_vac_scan_next_block(LVRelState *vacrel, BlockNumber *blkno,
246 bool *all_visible_according_to_vm);
247static void find_next_unskippable_block(LVRelState *vacrel, bool *skipsallvis);
248static bool lazy_scan_new_or_empty(LVRelState *vacrel, Buffer buf,
249 BlockNumber blkno, Page page,
250 bool sharelock, Buffer vmbuffer);
251static void lazy_scan_prune(LVRelState *vacrel, Buffer buf,
252 BlockNumber blkno, Page page,
253 Buffer vmbuffer, bool all_visible_according_to_vm,
254 bool *has_lpdead_items);
255static bool lazy_scan_noprune(LVRelState *vacrel, Buffer buf,
256 BlockNumber blkno, Page page,
257 bool *has_lpdead_items);
258static void lazy_vacuum(LVRelState *vacrel);
259static bool lazy_vacuum_all_indexes(LVRelState *vacrel);
260static void lazy_vacuum_heap_rel(LVRelState *vacrel);
261static void lazy_vacuum_heap_page(LVRelState *vacrel, BlockNumber blkno,
262 Buffer buffer, OffsetNumber *deadoffsets,
263 int num_offsets, Buffer vmbuffer);
264static bool lazy_check_wraparound_failsafe(LVRelState *vacrel);
265static void lazy_cleanup_all_indexes(LVRelState *vacrel);
268 double reltuples,
269 LVRelState *vacrel);
272 double reltuples,
273 bool estimated_count,
274 LVRelState *vacrel);
275static bool should_attempt_truncation(LVRelState *vacrel);
276static void lazy_truncate_heap(LVRelState *vacrel);
278 bool *lock_waiter_detected);
279static void dead_items_alloc(LVRelState *vacrel, int nworkers);
280static void dead_items_add(LVRelState *vacrel, BlockNumber blkno, OffsetNumber *offsets,
281 int num_offsets);
282static void dead_items_reset(LVRelState *vacrel);
283static void dead_items_cleanup(LVRelState *vacrel);
284static bool heap_page_is_all_visible(LVRelState *vacrel, Buffer buf,
285 TransactionId *visibility_cutoff_xid, bool *all_frozen);
286static void update_relstats_all_indexes(LVRelState *vacrel);
287static void vacuum_error_callback(void *arg);
288static void update_vacuum_error_info(LVRelState *vacrel,
289 LVSavedErrInfo *saved_vacrel,
290 int phase, BlockNumber blkno,
291 OffsetNumber offnum);
292static void restore_vacuum_error_info(LVRelState *vacrel,
293 const LVSavedErrInfo *saved_vacrel);
294
295
296/*
297 * heap_vacuum_rel() -- perform VACUUM for one heap relation
298 *
299 * This routine sets things up for and then calls lazy_scan_heap, where
300 * almost all work actually takes place. Finalizes everything after call
301 * returns by managing relation truncation and updating rel's pg_class
302 * entry. (Also updates pg_class entries for any indexes that need it.)
303 *
304 * At entry, we have already established a transaction and opened
305 * and locked the relation.
306 */
307void
309 BufferAccessStrategy bstrategy)
310{
311 LVRelState *vacrel;
312 bool verbose,
313 instrument,
314 skipwithvm,
315 frozenxid_updated,
316 minmulti_updated;
317 BlockNumber orig_rel_pages,
318 new_rel_pages,
319 new_rel_allvisible;
320 PGRUsage ru0;
321 TimestampTz starttime = 0;
322 PgStat_Counter startreadtime = 0,
323 startwritetime = 0;
324 WalUsage startwalusage = pgWalUsage;
325 BufferUsage startbufferusage = pgBufferUsage;
326 ErrorContextCallback errcallback;
327 char **indnames = NULL;
328
329 verbose = (params->options & VACOPT_VERBOSE) != 0;
330 instrument = (verbose || (AmAutoVacuumWorkerProcess() &&
331 params->log_min_duration >= 0));
332 if (instrument)
333 {
334 pg_rusage_init(&ru0);
335 starttime = GetCurrentTimestamp();
336 if (track_io_timing)
337 {
338 startreadtime = pgStatBlockReadTime;
339 startwritetime = pgStatBlockWriteTime;
340 }
341 }
342
344 RelationGetRelid(rel));
345
346 /*
347 * Setup error traceback support for ereport() first. The idea is to set
348 * up an error context callback to display additional information on any
349 * error during a vacuum. During different phases of vacuum, we update
350 * the state so that the error context callback always display current
351 * information.
352 *
353 * Copy the names of heap rel into local memory for error reporting
354 * purposes, too. It isn't always safe to assume that we can get the name
355 * of each rel. It's convenient for code in lazy_scan_heap to always use
356 * these temp copies.
357 */
358 vacrel = (LVRelState *) palloc0(sizeof(LVRelState));
362 vacrel->indname = NULL;
364 vacrel->verbose = verbose;
365 errcallback.callback = vacuum_error_callback;
366 errcallback.arg = vacrel;
367 errcallback.previous = error_context_stack;
368 error_context_stack = &errcallback;
369
370 /* Set up high level stuff about rel and its indexes */
371 vacrel->rel = rel;
373 &vacrel->indrels);
374 vacrel->bstrategy = bstrategy;
375 if (instrument && vacrel->nindexes > 0)
376 {
377 /* Copy index names used by instrumentation (not error reporting) */
378 indnames = palloc(sizeof(char *) * vacrel->nindexes);
379 for (int i = 0; i < vacrel->nindexes; i++)
380 indnames[i] = pstrdup(RelationGetRelationName(vacrel->indrels[i]));
381 }
382
383 /*
384 * The index_cleanup param either disables index vacuuming and cleanup or
385 * forces it to go ahead when we would otherwise apply the index bypass
386 * optimization. The default is 'auto', which leaves the final decision
387 * up to lazy_vacuum().
388 *
389 * The truncate param allows user to avoid attempting relation truncation,
390 * though it can't force truncation to happen.
391 */
394 params->truncate != VACOPTVALUE_AUTO);
395
396 /*
397 * While VacuumFailSafeActive is reset to false before calling this, we
398 * still need to reset it here due to recursive calls.
399 */
400 VacuumFailsafeActive = false;
401 vacrel->consider_bypass_optimization = true;
402 vacrel->do_index_vacuuming = true;
403 vacrel->do_index_cleanup = true;
404 vacrel->do_rel_truncate = (params->truncate != VACOPTVALUE_DISABLED);
405 if (params->index_cleanup == VACOPTVALUE_DISABLED)
406 {
407 /* Force disable index vacuuming up-front */
408 vacrel->do_index_vacuuming = false;
409 vacrel->do_index_cleanup = false;
410 }
411 else if (params->index_cleanup == VACOPTVALUE_ENABLED)
412 {
413 /* Force index vacuuming. Note that failsafe can still bypass. */
414 vacrel->consider_bypass_optimization = false;
415 }
416 else
417 {
418 /* Default/auto, make all decisions dynamically */
420 }
421
422 /* Initialize page counters explicitly (be tidy) */
423 vacrel->scanned_pages = 0;
424 vacrel->removed_pages = 0;
425 vacrel->new_frozen_tuple_pages = 0;
426 vacrel->lpdead_item_pages = 0;
427 vacrel->missed_dead_pages = 0;
428 vacrel->nonempty_pages = 0;
429 /* dead_items_alloc allocates vacrel->dead_items later on */
430
431 /* Allocate/initialize output statistics state */
432 vacrel->new_rel_tuples = 0;
433 vacrel->new_live_tuples = 0;
434 vacrel->indstats = (IndexBulkDeleteResult **)
435 palloc0(vacrel->nindexes * sizeof(IndexBulkDeleteResult *));
436
437 /* Initialize remaining counters (be tidy) */
438 vacrel->num_index_scans = 0;
439 vacrel->tuples_deleted = 0;
440 vacrel->tuples_frozen = 0;
441 vacrel->lpdead_items = 0;
442 vacrel->live_tuples = 0;
443 vacrel->recently_dead_tuples = 0;
444 vacrel->missed_dead_tuples = 0;
445
446 vacrel->vm_new_visible_pages = 0;
447 vacrel->vm_new_visible_frozen_pages = 0;
448 vacrel->vm_new_frozen_pages = 0;
449
450 /*
451 * Get cutoffs that determine which deleted tuples are considered DEAD,
452 * not just RECENTLY_DEAD, and which XIDs/MXIDs to freeze. Then determine
453 * the extent of the blocks that we'll scan in lazy_scan_heap. It has to
454 * happen in this order to ensure that the OldestXmin cutoff field works
455 * as an upper bound on the XIDs stored in the pages we'll actually scan
456 * (NewRelfrozenXid tracking must never be allowed to miss unfrozen XIDs).
457 *
458 * Next acquire vistest, a related cutoff that's used in pruning. We use
459 * vistest in combination with OldestXmin to ensure that
460 * heap_page_prune_and_freeze() always removes any deleted tuple whose
461 * xmax is < OldestXmin. lazy_scan_prune must never become confused about
462 * whether a tuple should be frozen or removed. (In the future we might
463 * want to teach lazy_scan_prune to recompute vistest from time to time,
464 * to increase the number of dead tuples it can prune away.)
465 */
466 vacrel->aggressive = vacuum_get_cutoffs(rel, params, &vacrel->cutoffs);
467 vacrel->rel_pages = orig_rel_pages = RelationGetNumberOfBlocks(rel);
468 vacrel->vistest = GlobalVisTestFor(rel);
469 /* Initialize state used to track oldest extant XID/MXID */
470 vacrel->NewRelfrozenXid = vacrel->cutoffs.OldestXmin;
471 vacrel->NewRelminMxid = vacrel->cutoffs.OldestMxact;
472 vacrel->skippedallvis = false;
473 skipwithvm = true;
475 {
476 /*
477 * Force aggressive mode, and disable skipping blocks using the
478 * visibility map (even those set all-frozen)
479 */
480 vacrel->aggressive = true;
481 skipwithvm = false;
482 }
483
484 vacrel->skipwithvm = skipwithvm;
485
486 if (verbose)
487 {
488 if (vacrel->aggressive)
490 (errmsg("aggressively vacuuming \"%s.%s.%s\"",
491 vacrel->dbname, vacrel->relnamespace,
492 vacrel->relname)));
493 else
495 (errmsg("vacuuming \"%s.%s.%s\"",
496 vacrel->dbname, vacrel->relnamespace,
497 vacrel->relname)));
498 }
499
500 /*
501 * Allocate dead_items memory using dead_items_alloc. This handles
502 * parallel VACUUM initialization as part of allocating shared memory
503 * space used for dead_items. (But do a failsafe precheck first, to
504 * ensure that parallel VACUUM won't be attempted at all when relfrozenxid
505 * is already dangerously old.)
506 */
508 dead_items_alloc(vacrel, params->nworkers);
509
510 /*
511 * Call lazy_scan_heap to perform all required heap pruning, index
512 * vacuuming, and heap vacuuming (plus related processing)
513 */
514 lazy_scan_heap(vacrel);
515
516 /*
517 * Free resources managed by dead_items_alloc. This ends parallel mode in
518 * passing when necessary.
519 */
520 dead_items_cleanup(vacrel);
522
523 /*
524 * Update pg_class entries for each of rel's indexes where appropriate.
525 *
526 * Unlike the later update to rel's pg_class entry, this is not critical.
527 * Maintains relpages/reltuples statistics used by the planner only.
528 */
529 if (vacrel->do_index_cleanup)
531
532 /* Done with rel's indexes */
533 vac_close_indexes(vacrel->nindexes, vacrel->indrels, NoLock);
534
535 /* Optionally truncate rel */
536 if (should_attempt_truncation(vacrel))
537 lazy_truncate_heap(vacrel);
538
539 /* Pop the error context stack */
540 error_context_stack = errcallback.previous;
541
542 /* Report that we are now doing final cleanup */
545
546 /*
547 * Prepare to update rel's pg_class entry.
548 *
549 * Aggressive VACUUMs must always be able to advance relfrozenxid to a
550 * value >= FreezeLimit, and relminmxid to a value >= MultiXactCutoff.
551 * Non-aggressive VACUUMs may advance them by any amount, or not at all.
552 */
553 Assert(vacrel->NewRelfrozenXid == vacrel->cutoffs.OldestXmin ||
555 vacrel->cutoffs.relfrozenxid,
556 vacrel->NewRelfrozenXid));
557 Assert(vacrel->NewRelminMxid == vacrel->cutoffs.OldestMxact ||
559 vacrel->cutoffs.relminmxid,
560 vacrel->NewRelminMxid));
561 if (vacrel->skippedallvis)
562 {
563 /*
564 * Must keep original relfrozenxid in a non-aggressive VACUUM that
565 * chose to skip an all-visible page range. The state that tracks new
566 * values will have missed unfrozen XIDs from the pages we skipped.
567 */
568 Assert(!vacrel->aggressive);
571 }
572
573 /*
574 * For safety, clamp relallvisible to be not more than what we're setting
575 * pg_class.relpages to
576 */
577 new_rel_pages = vacrel->rel_pages; /* After possible rel truncation */
578 visibilitymap_count(rel, &new_rel_allvisible, NULL);
579 if (new_rel_allvisible > new_rel_pages)
580 new_rel_allvisible = new_rel_pages;
581
582 /*
583 * Now actually update rel's pg_class entry.
584 *
585 * In principle new_live_tuples could be -1 indicating that we (still)
586 * don't know the tuple count. In practice that can't happen, since we
587 * scan every page that isn't skipped using the visibility map.
588 */
589 vac_update_relstats(rel, new_rel_pages, vacrel->new_live_tuples,
590 new_rel_allvisible, vacrel->nindexes > 0,
591 vacrel->NewRelfrozenXid, vacrel->NewRelminMxid,
592 &frozenxid_updated, &minmulti_updated, false);
593
594 /*
595 * Report results to the cumulative stats system, too.
596 *
597 * Deliberately avoid telling the stats system about LP_DEAD items that
598 * remain in the table due to VACUUM bypassing index and heap vacuuming.
599 * ANALYZE will consider the remaining LP_DEAD items to be dead "tuples".
600 * It seems like a good idea to err on the side of not vacuuming again too
601 * soon in cases where the failsafe prevented significant amounts of heap
602 * vacuuming.
603 */
605 rel->rd_rel->relisshared,
606 Max(vacrel->new_live_tuples, 0),
607 vacrel->recently_dead_tuples +
608 vacrel->missed_dead_tuples);
610
611 if (instrument)
612 {
614
615 if (verbose || params->log_min_duration == 0 ||
616 TimestampDifferenceExceeds(starttime, endtime,
617 params->log_min_duration))
618 {
619 long secs_dur;
620 int usecs_dur;
621 WalUsage walusage;
622 BufferUsage bufferusage;
624 char *msgfmt;
625 int32 diff;
626 double read_rate = 0,
627 write_rate = 0;
628 int64 total_blks_hit;
629 int64 total_blks_read;
630 int64 total_blks_dirtied;
631
632 TimestampDifference(starttime, endtime, &secs_dur, &usecs_dur);
633 memset(&walusage, 0, sizeof(WalUsage));
634 WalUsageAccumDiff(&walusage, &pgWalUsage, &startwalusage);
635 memset(&bufferusage, 0, sizeof(BufferUsage));
636 BufferUsageAccumDiff(&bufferusage, &pgBufferUsage, &startbufferusage);
637
638 total_blks_hit = bufferusage.shared_blks_hit +
639 bufferusage.local_blks_hit;
640 total_blks_read = bufferusage.shared_blks_read +
641 bufferusage.local_blks_read;
642 total_blks_dirtied = bufferusage.shared_blks_dirtied +
643 bufferusage.local_blks_dirtied;
644
646 if (verbose)
647 {
648 /*
649 * Aggressiveness already reported earlier, in dedicated
650 * VACUUM VERBOSE ereport
651 */
652 Assert(!params->is_wraparound);
653 msgfmt = _("finished vacuuming \"%s.%s.%s\": index scans: %d\n");
654 }
655 else if (params->is_wraparound)
656 {
657 /*
658 * While it's possible for a VACUUM to be both is_wraparound
659 * and !aggressive, that's just a corner-case -- is_wraparound
660 * implies aggressive. Produce distinct output for the corner
661 * case all the same, just in case.
662 */
663 if (vacrel->aggressive)
664 msgfmt = _("automatic aggressive vacuum to prevent wraparound of table \"%s.%s.%s\": index scans: %d\n");
665 else
666 msgfmt = _("automatic vacuum to prevent wraparound of table \"%s.%s.%s\": index scans: %d\n");
667 }
668 else
669 {
670 if (vacrel->aggressive)
671 msgfmt = _("automatic aggressive vacuum of table \"%s.%s.%s\": index scans: %d\n");
672 else
673 msgfmt = _("automatic vacuum of table \"%s.%s.%s\": index scans: %d\n");
674 }
675 appendStringInfo(&buf, msgfmt,
676 vacrel->dbname,
677 vacrel->relnamespace,
678 vacrel->relname,
679 vacrel->num_index_scans);
680 appendStringInfo(&buf, _("pages: %u removed, %u remain, %u scanned (%.2f%% of total)\n"),
681 vacrel->removed_pages,
682 new_rel_pages,
683 vacrel->scanned_pages,
684 orig_rel_pages == 0 ? 100.0 :
685 100.0 * vacrel->scanned_pages / orig_rel_pages);
687 _("tuples: %lld removed, %lld remain, %lld are dead but not yet removable\n"),
688 (long long) vacrel->tuples_deleted,
689 (long long) vacrel->new_rel_tuples,
690 (long long) vacrel->recently_dead_tuples);
691 if (vacrel->missed_dead_tuples > 0)
693 _("tuples missed: %lld dead from %u pages not removed due to cleanup lock contention\n"),
694 (long long) vacrel->missed_dead_tuples,
695 vacrel->missed_dead_pages);
696 diff = (int32) (ReadNextTransactionId() -
697 vacrel->cutoffs.OldestXmin);
699 _("removable cutoff: %u, which was %d XIDs old when operation ended\n"),
700 vacrel->cutoffs.OldestXmin, diff);
701 if (frozenxid_updated)
702 {
703 diff = (int32) (vacrel->NewRelfrozenXid -
704 vacrel->cutoffs.relfrozenxid);
706 _("new relfrozenxid: %u, which is %d XIDs ahead of previous value\n"),
707 vacrel->NewRelfrozenXid, diff);
708 }
709 if (minmulti_updated)
710 {
711 diff = (int32) (vacrel->NewRelminMxid -
712 vacrel->cutoffs.relminmxid);
714 _("new relminmxid: %u, which is %d MXIDs ahead of previous value\n"),
715 vacrel->NewRelminMxid, diff);
716 }
717 appendStringInfo(&buf, _("frozen: %u pages from table (%.2f%% of total) had %lld tuples frozen\n"),
719 orig_rel_pages == 0 ? 100.0 :
720 100.0 * vacrel->new_frozen_tuple_pages /
721 orig_rel_pages,
722 (long long) vacrel->tuples_frozen);
723
725 _("visibility map: %u pages set all-visible, %u pages set all-frozen (%u were all-visible)\n"),
726 vacrel->vm_new_visible_pages,
728 vacrel->vm_new_frozen_pages,
729 vacrel->vm_new_frozen_pages);
730 if (vacrel->do_index_vacuuming)
731 {
732 if (vacrel->nindexes == 0 || vacrel->num_index_scans == 0)
733 appendStringInfoString(&buf, _("index scan not needed: "));
734 else
735 appendStringInfoString(&buf, _("index scan needed: "));
736
737 msgfmt = _("%u pages from table (%.2f%% of total) had %lld dead item identifiers removed\n");
738 }
739 else
740 {
742 appendStringInfoString(&buf, _("index scan bypassed: "));
743 else
744 appendStringInfoString(&buf, _("index scan bypassed by failsafe: "));
745
746 msgfmt = _("%u pages from table (%.2f%% of total) have %lld dead item identifiers\n");
747 }
748 appendStringInfo(&buf, msgfmt,
749 vacrel->lpdead_item_pages,
750 orig_rel_pages == 0 ? 100.0 :
751 100.0 * vacrel->lpdead_item_pages / orig_rel_pages,
752 (long long) vacrel->lpdead_items);
753 for (int i = 0; i < vacrel->nindexes; i++)
754 {
755 IndexBulkDeleteResult *istat = vacrel->indstats[i];
756
757 if (!istat)
758 continue;
759
761 _("index \"%s\": pages: %u in total, %u newly deleted, %u currently deleted, %u reusable\n"),
762 indnames[i],
763 istat->num_pages,
764 istat->pages_newly_deleted,
765 istat->pages_deleted,
766 istat->pages_free);
767 }
768 if (track_io_timing)
769 {
770 double read_ms = (double) (pgStatBlockReadTime - startreadtime) / 1000;
771 double write_ms = (double) (pgStatBlockWriteTime - startwritetime) / 1000;
772
773 appendStringInfo(&buf, _("I/O timings: read: %.3f ms, write: %.3f ms\n"),
774 read_ms, write_ms);
775 }
776 if (secs_dur > 0 || usecs_dur > 0)
777 {
778 read_rate = (double) BLCKSZ * total_blks_read /
779 (1024 * 1024) / (secs_dur + usecs_dur / 1000000.0);
780 write_rate = (double) BLCKSZ * total_blks_dirtied /
781 (1024 * 1024) / (secs_dur + usecs_dur / 1000000.0);
782 }
783 appendStringInfo(&buf, _("avg read rate: %.3f MB/s, avg write rate: %.3f MB/s\n"),
784 read_rate, write_rate);
786 _("buffer usage: %lld hits, %lld reads, %lld dirtied\n"),
787 (long long) total_blks_hit,
788 (long long) total_blks_read,
789 (long long) total_blks_dirtied);
791 _("WAL usage: %lld records, %lld full page images, %llu bytes\n"),
792 (long long) walusage.wal_records,
793 (long long) walusage.wal_fpi,
794 (unsigned long long) walusage.wal_bytes);
795 appendStringInfo(&buf, _("system usage: %s"), pg_rusage_show(&ru0));
796
798 (errmsg_internal("%s", buf.data)));
799 pfree(buf.data);
800 }
801 }
802
803 /* Cleanup index statistics and index names */
804 for (int i = 0; i < vacrel->nindexes; i++)
805 {
806 if (vacrel->indstats[i])
807 pfree(vacrel->indstats[i]);
808
809 if (instrument)
810 pfree(indnames[i]);
811 }
812}
813
814/*
815 * lazy_scan_heap() -- workhorse function for VACUUM
816 *
817 * This routine prunes each page in the heap, and considers the need to
818 * freeze remaining tuples with storage (not including pages that can be
819 * skipped using the visibility map). Also performs related maintenance
820 * of the FSM and visibility map. These steps all take place during an
821 * initial pass over the target heap relation.
822 *
823 * Also invokes lazy_vacuum_all_indexes to vacuum indexes, which largely
824 * consists of deleting index tuples that point to LP_DEAD items left in
825 * heap pages following pruning. Earlier initial pass over the heap will
826 * have collected the TIDs whose index tuples need to be removed.
827 *
828 * Finally, invokes lazy_vacuum_heap_rel to vacuum heap pages, which
829 * largely consists of marking LP_DEAD items (from vacrel->dead_items)
830 * as LP_UNUSED. This has to happen in a second, final pass over the
831 * heap, to preserve a basic invariant that all index AMs rely on: no
832 * extant index tuple can ever be allowed to contain a TID that points to
833 * an LP_UNUSED line pointer in the heap. We must disallow premature
834 * recycling of line pointers to avoid index scans that get confused
835 * about which TID points to which tuple immediately after recycling.
836 * (Actually, this isn't a concern when target heap relation happens to
837 * have no indexes, which allows us to safely apply the one-pass strategy
838 * as an optimization).
839 *
840 * In practice we often have enough space to fit all TIDs, and so won't
841 * need to call lazy_vacuum more than once, after our initial pass over
842 * the heap has totally finished. Otherwise things are slightly more
843 * complicated: our "initial pass" over the heap applies only to those
844 * pages that were pruned before we needed to call lazy_vacuum, and our
845 * "final pass" over the heap only vacuums these same heap pages.
846 * However, we process indexes in full every time lazy_vacuum is called,
847 * which makes index processing very inefficient when memory is in short
848 * supply.
849 */
850static void
852{
853 BlockNumber rel_pages = vacrel->rel_pages,
854 blkno,
855 next_fsm_block_to_vacuum = 0;
856 bool all_visible_according_to_vm;
857
858 Buffer vmbuffer = InvalidBuffer;
859 const int initprog_index[] = {
863 };
864 int64 initprog_val[3];
865
866 /* Report that we're scanning the heap, advertising total # of blocks */
867 initprog_val[0] = PROGRESS_VACUUM_PHASE_SCAN_HEAP;
868 initprog_val[1] = rel_pages;
869 initprog_val[2] = vacrel->dead_items_info->max_bytes;
870 pgstat_progress_update_multi_param(3, initprog_index, initprog_val);
871
872 /* Initialize for the first heap_vac_scan_next_block() call */
875 vacrel->next_unskippable_allvis = false;
877
878 while (heap_vac_scan_next_block(vacrel, &blkno, &all_visible_according_to_vm))
879 {
880 Buffer buf;
881 Page page;
882 bool has_lpdead_items;
883 bool got_cleanup_lock = false;
884
885 vacrel->scanned_pages++;
886
887 /* Report as block scanned, update error traceback information */
890 blkno, InvalidOffsetNumber);
891
893
894 /*
895 * Regularly check if wraparound failsafe should trigger.
896 *
897 * There is a similar check inside lazy_vacuum_all_indexes(), but
898 * relfrozenxid might start to look dangerously old before we reach
899 * that point. This check also provides failsafe coverage for the
900 * one-pass strategy, and the two-pass strategy with the index_cleanup
901 * param set to 'off'.
902 */
903 if (vacrel->scanned_pages % FAILSAFE_EVERY_PAGES == 0)
905
906 /*
907 * Consider if we definitely have enough space to process TIDs on page
908 * already. If we are close to overrunning the available space for
909 * dead_items TIDs, pause and do a cycle of vacuuming before we tackle
910 * this page.
911 */
913 {
914 /*
915 * Before beginning index vacuuming, we release any pin we may
916 * hold on the visibility map page. This isn't necessary for
917 * correctness, but we do it anyway to avoid holding the pin
918 * across a lengthy, unrelated operation.
919 */
920 if (BufferIsValid(vmbuffer))
921 {
922 ReleaseBuffer(vmbuffer);
923 vmbuffer = InvalidBuffer;
924 }
925
926 /* Perform a round of index and heap vacuuming */
927 vacrel->consider_bypass_optimization = false;
928 lazy_vacuum(vacrel);
929
930 /*
931 * Vacuum the Free Space Map to make newly-freed space visible on
932 * upper-level FSM pages. Note we have not yet processed blkno.
933 */
934 FreeSpaceMapVacuumRange(vacrel->rel, next_fsm_block_to_vacuum,
935 blkno);
936 next_fsm_block_to_vacuum = blkno;
937
938 /* Report that we are once again scanning the heap */
941 }
942
943 /*
944 * Pin the visibility map page in case we need to mark the page
945 * all-visible. In most cases this will be very cheap, because we'll
946 * already have the correct page pinned anyway.
947 */
948 visibilitymap_pin(vacrel->rel, blkno, &vmbuffer);
949
951 vacrel->bstrategy);
952 page = BufferGetPage(buf);
953
954 /*
955 * We need a buffer cleanup lock to prune HOT chains and defragment
956 * the page in lazy_scan_prune. But when it's not possible to acquire
957 * a cleanup lock right away, we may be able to settle for reduced
958 * processing using lazy_scan_noprune.
959 */
960 got_cleanup_lock = ConditionalLockBufferForCleanup(buf);
961
962 if (!got_cleanup_lock)
964
965 /* Check for new or empty pages before lazy_scan_[no]prune call */
966 if (lazy_scan_new_or_empty(vacrel, buf, blkno, page, !got_cleanup_lock,
967 vmbuffer))
968 {
969 /* Processed as new/empty page (lock and pin released) */
970 continue;
971 }
972
973 /*
974 * If we didn't get the cleanup lock, we can still collect LP_DEAD
975 * items in the dead_items area for later vacuuming, count live and
976 * recently dead tuples for vacuum logging, and determine if this
977 * block could later be truncated. If we encounter any xid/mxids that
978 * require advancing the relfrozenxid/relminxid, we'll have to wait
979 * for a cleanup lock and call lazy_scan_prune().
980 */
981 if (!got_cleanup_lock &&
982 !lazy_scan_noprune(vacrel, buf, blkno, page, &has_lpdead_items))
983 {
984 /*
985 * lazy_scan_noprune could not do all required processing. Wait
986 * for a cleanup lock, and call lazy_scan_prune in the usual way.
987 */
988 Assert(vacrel->aggressive);
991 got_cleanup_lock = true;
992 }
993
994 /*
995 * If we have a cleanup lock, we must now prune, freeze, and count
996 * tuples. We may have acquired the cleanup lock originally, or we may
997 * have gone back and acquired it after lazy_scan_noprune() returned
998 * false. Either way, the page hasn't been processed yet.
999 *
1000 * Like lazy_scan_noprune(), lazy_scan_prune() will count
1001 * recently_dead_tuples and live tuples for vacuum logging, determine
1002 * if the block can later be truncated, and accumulate the details of
1003 * remaining LP_DEAD line pointers on the page into dead_items. These
1004 * dead items include those pruned by lazy_scan_prune() as well as
1005 * line pointers previously marked LP_DEAD.
1006 */
1007 if (got_cleanup_lock)
1008 lazy_scan_prune(vacrel, buf, blkno, page,
1009 vmbuffer, all_visible_according_to_vm,
1010 &has_lpdead_items);
1011
1012 /*
1013 * Now drop the buffer lock and, potentially, update the FSM.
1014 *
1015 * Our goal is to update the freespace map the last time we touch the
1016 * page. If we'll process a block in the second pass, we may free up
1017 * additional space on the page, so it is better to update the FSM
1018 * after the second pass. If the relation has no indexes, or if index
1019 * vacuuming is disabled, there will be no second heap pass; if this
1020 * particular page has no dead items, the second heap pass will not
1021 * touch this page. So, in those cases, update the FSM now.
1022 *
1023 * Note: In corner cases, it's possible to miss updating the FSM
1024 * entirely. If index vacuuming is currently enabled, we'll skip the
1025 * FSM update now. But if failsafe mode is later activated, or there
1026 * are so few dead tuples that index vacuuming is bypassed, there will
1027 * also be no opportunity to update the FSM later, because we'll never
1028 * revisit this page. Since updating the FSM is desirable but not
1029 * absolutely required, that's OK.
1030 */
1031 if (vacrel->nindexes == 0
1032 || !vacrel->do_index_vacuuming
1033 || !has_lpdead_items)
1034 {
1035 Size freespace = PageGetHeapFreeSpace(page);
1036
1038 RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
1039
1040 /*
1041 * Periodically perform FSM vacuuming to make newly-freed space
1042 * visible on upper FSM pages. This is done after vacuuming if the
1043 * table has indexes. There will only be newly-freed space if we
1044 * held the cleanup lock and lazy_scan_prune() was called.
1045 */
1046 if (got_cleanup_lock && vacrel->nindexes == 0 && has_lpdead_items &&
1047 blkno - next_fsm_block_to_vacuum >= VACUUM_FSM_EVERY_PAGES)
1048 {
1049 FreeSpaceMapVacuumRange(vacrel->rel, next_fsm_block_to_vacuum,
1050 blkno);
1051 next_fsm_block_to_vacuum = blkno;
1052 }
1053 }
1054 else
1056 }
1057
1058 vacrel->blkno = InvalidBlockNumber;
1059 if (BufferIsValid(vmbuffer))
1060 ReleaseBuffer(vmbuffer);
1061
1062 /* report that everything is now scanned */
1064
1065 /* now we can compute the new value for pg_class.reltuples */
1066 vacrel->new_live_tuples = vac_estimate_reltuples(vacrel->rel, rel_pages,
1067 vacrel->scanned_pages,
1068 vacrel->live_tuples);
1069
1070 /*
1071 * Also compute the total number of surviving heap entries. In the
1072 * (unlikely) scenario that new_live_tuples is -1, take it as zero.
1073 */
1074 vacrel->new_rel_tuples =
1075 Max(vacrel->new_live_tuples, 0) + vacrel->recently_dead_tuples +
1076 vacrel->missed_dead_tuples;
1077
1078 /*
1079 * Do index vacuuming (call each index's ambulkdelete routine), then do
1080 * related heap vacuuming
1081 */
1082 if (vacrel->dead_items_info->num_items > 0)
1083 lazy_vacuum(vacrel);
1084
1085 /*
1086 * Vacuum the remainder of the Free Space Map. We must do this whether or
1087 * not there were indexes, and whether or not we bypassed index vacuuming.
1088 */
1089 if (blkno > next_fsm_block_to_vacuum)
1090 FreeSpaceMapVacuumRange(vacrel->rel, next_fsm_block_to_vacuum, blkno);
1091
1092 /* report all blocks vacuumed */
1094
1095 /* Do final index cleanup (call each index's amvacuumcleanup routine) */
1096 if (vacrel->nindexes > 0 && vacrel->do_index_cleanup)
1098}
1099
1100/*
1101 * heap_vac_scan_next_block() -- get next block for vacuum to process
1102 *
1103 * lazy_scan_heap() calls here every time it needs to get the next block to
1104 * prune and vacuum. The function uses the visibility map, vacuum options,
1105 * and various thresholds to skip blocks which do not need to be processed and
1106 * sets blkno to the next block to process.
1107 *
1108 * The block number and visibility status of the next block to process are set
1109 * in *blkno and *all_visible_according_to_vm. The return value is false if
1110 * there are no further blocks to process.
1111 *
1112 * vacrel is an in/out parameter here. Vacuum options and information about
1113 * the relation are read. vacrel->skippedallvis is set if we skip a block
1114 * that's all-visible but not all-frozen, to ensure that we don't update
1115 * relfrozenxid in that case. vacrel also holds information about the next
1116 * unskippable block, as bookkeeping for this function.
1117 */
1118static bool
1120 bool *all_visible_according_to_vm)
1121{
1122 BlockNumber next_block;
1123
1124 /* relies on InvalidBlockNumber + 1 overflowing to 0 on first call */
1125 next_block = vacrel->current_block + 1;
1126
1127 /* Have we reached the end of the relation? */
1128 if (next_block >= vacrel->rel_pages)
1129 {
1131 {
1134 }
1135 *blkno = vacrel->rel_pages;
1136 return false;
1137 }
1138
1139 /*
1140 * We must be in one of the three following states:
1141 */
1142 if (next_block > vacrel->next_unskippable_block ||
1144 {
1145 /*
1146 * 1. We have just processed an unskippable block (or we're at the
1147 * beginning of the scan). Find the next unskippable block using the
1148 * visibility map.
1149 */
1150 bool skipsallvis;
1151
1152 find_next_unskippable_block(vacrel, &skipsallvis);
1153
1154 /*
1155 * We now know the next block that we must process. It can be the
1156 * next block after the one we just processed, or something further
1157 * ahead. If it's further ahead, we can jump to it, but we choose to
1158 * do so only if we can skip at least SKIP_PAGES_THRESHOLD consecutive
1159 * pages. Since we're reading sequentially, the OS should be doing
1160 * readahead for us, so there's no gain in skipping a page now and
1161 * then. Skipping such a range might even discourage sequential
1162 * detection.
1163 *
1164 * This test also enables more frequent relfrozenxid advancement
1165 * during non-aggressive VACUUMs. If the range has any all-visible
1166 * pages then skipping makes updating relfrozenxid unsafe, which is a
1167 * real downside.
1168 */
1169 if (vacrel->next_unskippable_block - next_block >= SKIP_PAGES_THRESHOLD)
1170 {
1171 next_block = vacrel->next_unskippable_block;
1172 if (skipsallvis)
1173 vacrel->skippedallvis = true;
1174 }
1175 }
1176
1177 /* Now we must be in one of the two remaining states: */
1178 if (next_block < vacrel->next_unskippable_block)
1179 {
1180 /*
1181 * 2. We are processing a range of blocks that we could have skipped
1182 * but chose not to. We know that they are all-visible in the VM,
1183 * otherwise they would've been unskippable.
1184 */
1185 *blkno = vacrel->current_block = next_block;
1186 *all_visible_according_to_vm = true;
1187 return true;
1188 }
1189 else
1190 {
1191 /*
1192 * 3. We reached the next unskippable block. Process it. On next
1193 * iteration, we will be back in state 1.
1194 */
1195 Assert(next_block == vacrel->next_unskippable_block);
1196
1197 *blkno = vacrel->current_block = next_block;
1198 *all_visible_according_to_vm = vacrel->next_unskippable_allvis;
1199 return true;
1200 }
1201}
1202
1203/*
1204 * Find the next unskippable block in a vacuum scan using the visibility map.
1205 * The next unskippable block and its visibility information is updated in
1206 * vacrel.
1207 *
1208 * Note: our opinion of which blocks can be skipped can go stale immediately.
1209 * It's okay if caller "misses" a page whose all-visible or all-frozen marking
1210 * was concurrently cleared, though. All that matters is that caller scan all
1211 * pages whose tuples might contain XIDs < OldestXmin, or MXIDs < OldestMxact.
1212 * (Actually, non-aggressive VACUUMs can choose to skip all-visible pages with
1213 * older XIDs/MXIDs. The *skippedallvis flag will be set here when the choice
1214 * to skip such a range is actually made, making everything safe.)
1215 */
1216static void
1217find_next_unskippable_block(LVRelState *vacrel, bool *skipsallvis)
1218{
1219 BlockNumber rel_pages = vacrel->rel_pages;
1220 BlockNumber next_unskippable_block = vacrel->next_unskippable_block + 1;
1221 Buffer next_unskippable_vmbuffer = vacrel->next_unskippable_vmbuffer;
1222 bool next_unskippable_allvis;
1223
1224 *skipsallvis = false;
1225
1226 for (;;)
1227 {
1228 uint8 mapbits = visibilitymap_get_status(vacrel->rel,
1229 next_unskippable_block,
1230 &next_unskippable_vmbuffer);
1231
1232 next_unskippable_allvis = (mapbits & VISIBILITYMAP_ALL_VISIBLE) != 0;
1233
1234 /*
1235 * A block is unskippable if it is not all visible according to the
1236 * visibility map.
1237 */
1238 if (!next_unskippable_allvis)
1239 {
1240 Assert((mapbits & VISIBILITYMAP_ALL_FROZEN) == 0);
1241 break;
1242 }
1243
1244 /*
1245 * Caller must scan the last page to determine whether it has tuples
1246 * (caller must have the opportunity to set vacrel->nonempty_pages).
1247 * This rule avoids having lazy_truncate_heap() take access-exclusive
1248 * lock on rel to attempt a truncation that fails anyway, just because
1249 * there are tuples on the last page (it is likely that there will be
1250 * tuples on other nearby pages as well, but those can be skipped).
1251 *
1252 * Implement this by always treating the last block as unsafe to skip.
1253 */
1254 if (next_unskippable_block == rel_pages - 1)
1255 break;
1256
1257 /* DISABLE_PAGE_SKIPPING makes all skipping unsafe */
1258 if (!vacrel->skipwithvm)
1259 break;
1260
1261 /*
1262 * Aggressive VACUUM caller can't skip pages just because they are
1263 * all-visible. They may still skip all-frozen pages, which can't
1264 * contain XIDs < OldestXmin (XIDs that aren't already frozen by now).
1265 */
1266 if ((mapbits & VISIBILITYMAP_ALL_FROZEN) == 0)
1267 {
1268 if (vacrel->aggressive)
1269 break;
1270
1271 /*
1272 * All-visible block is safe to skip in non-aggressive case. But
1273 * remember that the final range contains such a block for later.
1274 */
1275 *skipsallvis = true;
1276 }
1277
1278 next_unskippable_block++;
1279 }
1280
1281 /* write the local variables back to vacrel */
1282 vacrel->next_unskippable_block = next_unskippable_block;
1283 vacrel->next_unskippable_allvis = next_unskippable_allvis;
1284 vacrel->next_unskippable_vmbuffer = next_unskippable_vmbuffer;
1285}
1286
1287/*
1288 * lazy_scan_new_or_empty() -- lazy_scan_heap() new/empty page handling.
1289 *
1290 * Must call here to handle both new and empty pages before calling
1291 * lazy_scan_prune or lazy_scan_noprune, since they're not prepared to deal
1292 * with new or empty pages.
1293 *
1294 * It's necessary to consider new pages as a special case, since the rules for
1295 * maintaining the visibility map and FSM with empty pages are a little
1296 * different (though new pages can be truncated away during rel truncation).
1297 *
1298 * Empty pages are not really a special case -- they're just heap pages that
1299 * have no allocated tuples (including even LP_UNUSED items). You might
1300 * wonder why we need to handle them here all the same. It's only necessary
1301 * because of a corner-case involving a hard crash during heap relation
1302 * extension. If we ever make relation-extension crash safe, then it should
1303 * no longer be necessary to deal with empty pages here (or new pages, for
1304 * that matter).
1305 *
1306 * Caller must hold at least a shared lock. We might need to escalate the
1307 * lock in that case, so the type of lock caller holds needs to be specified
1308 * using 'sharelock' argument.
1309 *
1310 * Returns false in common case where caller should go on to call
1311 * lazy_scan_prune (or lazy_scan_noprune). Otherwise returns true, indicating
1312 * that lazy_scan_heap is done processing the page, releasing lock on caller's
1313 * behalf.
1314 */
1315static bool
1317 Page page, bool sharelock, Buffer vmbuffer)
1318{
1319 Size freespace;
1320
1321 if (PageIsNew(page))
1322 {
1323 /*
1324 * All-zeroes pages can be left over if either a backend extends the
1325 * relation by a single page, but crashes before the newly initialized
1326 * page has been written out, or when bulk-extending the relation
1327 * (which creates a number of empty pages at the tail end of the
1328 * relation), and then enters them into the FSM.
1329 *
1330 * Note we do not enter the page into the visibilitymap. That has the
1331 * downside that we repeatedly visit this page in subsequent vacuums,
1332 * but otherwise we'll never discover the space on a promoted standby.
1333 * The harm of repeated checking ought to normally not be too bad. The
1334 * space usually should be used at some point, otherwise there
1335 * wouldn't be any regular vacuums.
1336 *
1337 * Make sure these pages are in the FSM, to ensure they can be reused.
1338 * Do that by testing if there's any space recorded for the page. If
1339 * not, enter it. We do so after releasing the lock on the heap page,
1340 * the FSM is approximate, after all.
1341 */
1343
1344 if (GetRecordedFreeSpace(vacrel->rel, blkno) == 0)
1345 {
1346 freespace = BLCKSZ - SizeOfPageHeaderData;
1347
1348 RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
1349 }
1350
1351 return true;
1352 }
1353
1354 if (PageIsEmpty(page))
1355 {
1356 /*
1357 * It seems likely that caller will always be able to get a cleanup
1358 * lock on an empty page. But don't take any chances -- escalate to
1359 * an exclusive lock (still don't need a cleanup lock, though).
1360 */
1361 if (sharelock)
1362 {
1365
1366 if (!PageIsEmpty(page))
1367 {
1368 /* page isn't new or empty -- keep lock and pin for now */
1369 return false;
1370 }
1371 }
1372 else
1373 {
1374 /* Already have a full cleanup lock (which is more than enough) */
1375 }
1376
1377 /*
1378 * Unlike new pages, empty pages are always set all-visible and
1379 * all-frozen.
1380 */
1381 if (!PageIsAllVisible(page))
1382 {
1383 uint8 old_vmbits;
1384
1386
1387 /* mark buffer dirty before writing a WAL record */
1389
1390 /*
1391 * It's possible that another backend has extended the heap,
1392 * initialized the page, and then failed to WAL-log the page due
1393 * to an ERROR. Since heap extension is not WAL-logged, recovery
1394 * might try to replay our record setting the page all-visible and
1395 * find that the page isn't initialized, which will cause a PANIC.
1396 * To prevent that, check whether the page has been previously
1397 * WAL-logged, and if not, do that now.
1398 */
1399 if (RelationNeedsWAL(vacrel->rel) &&
1401 log_newpage_buffer(buf, true);
1402
1403 PageSetAllVisible(page);
1404 old_vmbits = visibilitymap_set(vacrel->rel, blkno, buf,
1406 vmbuffer, InvalidTransactionId,
1410
1411 /*
1412 * If the page wasn't already set all-visible and/or all-frozen in
1413 * the VM, count it as newly set for logging.
1414 */
1415 if ((old_vmbits & VISIBILITYMAP_ALL_VISIBLE) == 0)
1416 {
1417 vacrel->vm_new_visible_pages++;
1419 }
1420 else if ((old_vmbits & VISIBILITYMAP_ALL_FROZEN) == 0)
1421 vacrel->vm_new_frozen_pages++;
1422 }
1423
1424 freespace = PageGetHeapFreeSpace(page);
1426 RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
1427 return true;
1428 }
1429
1430 /* page isn't new or empty -- keep lock and pin */
1431 return false;
1432}
1433
1434/* qsort comparator for sorting OffsetNumbers */
1435static int
1436cmpOffsetNumbers(const void *a, const void *b)
1437{
1438 return pg_cmp_u16(*(const OffsetNumber *) a, *(const OffsetNumber *) b);
1439}
1440
1441/*
1442 * lazy_scan_prune() -- lazy_scan_heap() pruning and freezing.
1443 *
1444 * Caller must hold pin and buffer cleanup lock on the buffer.
1445 *
1446 * vmbuffer is the buffer containing the VM block with visibility information
1447 * for the heap block, blkno. all_visible_according_to_vm is the saved
1448 * visibility status of the heap block looked up earlier by the caller. We
1449 * won't rely entirely on this status, as it may be out of date.
1450 *
1451 * *has_lpdead_items is set to true or false depending on whether, upon return
1452 * from this function, any LP_DEAD items are still present on the page.
1453 */
1454static void
1456 Buffer buf,
1457 BlockNumber blkno,
1458 Page page,
1459 Buffer vmbuffer,
1460 bool all_visible_according_to_vm,
1461 bool *has_lpdead_items)
1462{
1463 Relation rel = vacrel->rel;
1464 PruneFreezeResult presult;
1465 int prune_options = 0;
1466
1467 Assert(BufferGetBlockNumber(buf) == blkno);
1468
1469 /*
1470 * Prune all HOT-update chains and potentially freeze tuples on this page.
1471 *
1472 * If the relation has no indexes, we can immediately mark would-be dead
1473 * items LP_UNUSED.
1474 *
1475 * The number of tuples removed from the page is returned in
1476 * presult.ndeleted. It should not be confused with presult.lpdead_items;
1477 * presult.lpdead_items's final value can be thought of as the number of
1478 * tuples that were deleted from indexes.
1479 *
1480 * We will update the VM after collecting LP_DEAD items and freezing
1481 * tuples. Pruning will have determined whether or not the page is
1482 * all-visible.
1483 */
1484 prune_options = HEAP_PAGE_PRUNE_FREEZE;
1485 if (vacrel->nindexes == 0)
1486 prune_options |= HEAP_PAGE_PRUNE_MARK_UNUSED_NOW;
1487
1488 heap_page_prune_and_freeze(rel, buf, vacrel->vistest, prune_options,
1489 &vacrel->cutoffs, &presult, PRUNE_VACUUM_SCAN,
1490 &vacrel->offnum,
1491 &vacrel->NewRelfrozenXid, &vacrel->NewRelminMxid);
1492
1495
1496 if (presult.nfrozen > 0)
1497 {
1498 /*
1499 * We don't increment the new_frozen_tuple_pages instrumentation
1500 * counter when nfrozen == 0, since it only counts pages with newly
1501 * frozen tuples (don't confuse that with pages newly set all-frozen
1502 * in VM).
1503 */
1504 vacrel->new_frozen_tuple_pages++;
1505 }
1506
1507 /*
1508 * VACUUM will call heap_page_is_all_visible() during the second pass over
1509 * the heap to determine all_visible and all_frozen for the page -- this
1510 * is a specialized version of the logic from this function. Now that
1511 * we've finished pruning and freezing, make sure that we're in total
1512 * agreement with heap_page_is_all_visible() using an assertion.
1513 */
1514#ifdef USE_ASSERT_CHECKING
1515 /* Note that all_frozen value does not matter when !all_visible */
1516 if (presult.all_visible)
1517 {
1518 TransactionId debug_cutoff;
1519 bool debug_all_frozen;
1520
1521 Assert(presult.lpdead_items == 0);
1522
1523 if (!heap_page_is_all_visible(vacrel, buf,
1524 &debug_cutoff, &debug_all_frozen))
1525 Assert(false);
1526
1527 Assert(presult.all_frozen == debug_all_frozen);
1528
1529 Assert(!TransactionIdIsValid(debug_cutoff) ||
1530 debug_cutoff == presult.vm_conflict_horizon);
1531 }
1532#endif
1533
1534 /*
1535 * Now save details of the LP_DEAD items from the page in vacrel
1536 */
1537 if (presult.lpdead_items > 0)
1538 {
1539 vacrel->lpdead_item_pages++;
1540
1541 /*
1542 * deadoffsets are collected incrementally in
1543 * heap_page_prune_and_freeze() as each dead line pointer is recorded,
1544 * with an indeterminate order, but dead_items_add requires them to be
1545 * sorted.
1546 */
1547 qsort(presult.deadoffsets, presult.lpdead_items, sizeof(OffsetNumber),
1549
1550 dead_items_add(vacrel, blkno, presult.deadoffsets, presult.lpdead_items);
1551 }
1552
1553 /* Finally, add page-local counts to whole-VACUUM counts */
1554 vacrel->tuples_deleted += presult.ndeleted;
1555 vacrel->tuples_frozen += presult.nfrozen;
1556 vacrel->lpdead_items += presult.lpdead_items;
1557 vacrel->live_tuples += presult.live_tuples;
1558 vacrel->recently_dead_tuples += presult.recently_dead_tuples;
1559
1560 /* Can't truncate this page */
1561 if (presult.hastup)
1562 vacrel->nonempty_pages = blkno + 1;
1563
1564 /* Did we find LP_DEAD items? */
1565 *has_lpdead_items = (presult.lpdead_items > 0);
1566
1567 Assert(!presult.all_visible || !(*has_lpdead_items));
1568
1569 /*
1570 * Handle setting visibility map bit based on information from the VM (as
1571 * of last heap_vac_scan_next_block() call), and from all_visible and
1572 * all_frozen variables
1573 */
1574 if (!all_visible_according_to_vm && presult.all_visible)
1575 {
1576 uint8 old_vmbits;
1578
1579 if (presult.all_frozen)
1580 {
1582 flags |= VISIBILITYMAP_ALL_FROZEN;
1583 }
1584
1585 /*
1586 * It should never be the case that the visibility map page is set
1587 * while the page-level bit is clear, but the reverse is allowed (if
1588 * checksums are not enabled). Regardless, set both bits so that we
1589 * get back in sync.
1590 *
1591 * NB: If the heap page is all-visible but the VM bit is not set, we
1592 * don't need to dirty the heap page. However, if checksums are
1593 * enabled, we do need to make sure that the heap page is dirtied
1594 * before passing it to visibilitymap_set(), because it may be logged.
1595 * Given that this situation should only happen in rare cases after a
1596 * crash, it is not worth optimizing.
1597 */
1598 PageSetAllVisible(page);
1600 old_vmbits = visibilitymap_set(vacrel->rel, blkno, buf,
1602 vmbuffer, presult.vm_conflict_horizon,
1603 flags);
1604
1605 /*
1606 * If the page wasn't already set all-visible and/or all-frozen in the
1607 * VM, count it as newly set for logging.
1608 */
1609 if ((old_vmbits & VISIBILITYMAP_ALL_VISIBLE) == 0)
1610 {
1611 vacrel->vm_new_visible_pages++;
1612 if (presult.all_frozen)
1614 }
1615 else if ((old_vmbits & VISIBILITYMAP_ALL_FROZEN) == 0 &&
1616 presult.all_frozen)
1617 vacrel->vm_new_frozen_pages++;
1618 }
1619
1620 /*
1621 * As of PostgreSQL 9.2, the visibility map bit should never be set if the
1622 * page-level bit is clear. However, it's possible that the bit got
1623 * cleared after heap_vac_scan_next_block() was called, so we must recheck
1624 * with buffer lock before concluding that the VM is corrupt.
1625 */
1626 else if (all_visible_according_to_vm && !PageIsAllVisible(page) &&
1627 visibilitymap_get_status(vacrel->rel, blkno, &vmbuffer) != 0)
1628 {
1629 elog(WARNING, "page is not marked all-visible but visibility map bit is set in relation \"%s\" page %u",
1630 vacrel->relname, blkno);
1631 visibilitymap_clear(vacrel->rel, blkno, vmbuffer,
1633 }
1634
1635 /*
1636 * It's possible for the value returned by
1637 * GetOldestNonRemovableTransactionId() to move backwards, so it's not
1638 * wrong for us to see tuples that appear to not be visible to everyone
1639 * yet, while PD_ALL_VISIBLE is already set. The real safe xmin value
1640 * never moves backwards, but GetOldestNonRemovableTransactionId() is
1641 * conservative and sometimes returns a value that's unnecessarily small,
1642 * so if we see that contradiction it just means that the tuples that we
1643 * think are not visible to everyone yet actually are, and the
1644 * PD_ALL_VISIBLE flag is correct.
1645 *
1646 * There should never be LP_DEAD items on a page with PD_ALL_VISIBLE set,
1647 * however.
1648 */
1649 else if (presult.lpdead_items > 0 && PageIsAllVisible(page))
1650 {
1651 elog(WARNING, "page containing LP_DEAD items is marked as all-visible in relation \"%s\" page %u",
1652 vacrel->relname, blkno);
1653 PageClearAllVisible(page);
1655 visibilitymap_clear(vacrel->rel, blkno, vmbuffer,
1657 }
1658
1659 /*
1660 * If the all-visible page is all-frozen but not marked as such yet, mark
1661 * it as all-frozen. Note that all_frozen is only valid if all_visible is
1662 * true, so we must check both all_visible and all_frozen.
1663 */
1664 else if (all_visible_according_to_vm && presult.all_visible &&
1665 presult.all_frozen && !VM_ALL_FROZEN(vacrel->rel, blkno, &vmbuffer))
1666 {
1667 uint8 old_vmbits;
1668
1669 /*
1670 * Avoid relying on all_visible_according_to_vm as a proxy for the
1671 * page-level PD_ALL_VISIBLE bit being set, since it might have become
1672 * stale -- even when all_visible is set
1673 */
1674 if (!PageIsAllVisible(page))
1675 {
1676 PageSetAllVisible(page);
1678 }
1679
1680 /*
1681 * Set the page all-frozen (and all-visible) in the VM.
1682 *
1683 * We can pass InvalidTransactionId as our cutoff_xid, since a
1684 * snapshotConflictHorizon sufficient to make everything safe for REDO
1685 * was logged when the page's tuples were frozen.
1686 */
1688 old_vmbits = visibilitymap_set(vacrel->rel, blkno, buf,
1690 vmbuffer, InvalidTransactionId,
1693
1694 /*
1695 * The page was likely already set all-visible in the VM. However,
1696 * there is a small chance that it was modified sometime between
1697 * setting all_visible_according_to_vm and checking the visibility
1698 * during pruning. Check the return value of old_vmbits anyway to
1699 * ensure the visibility map counters used for logging are accurate.
1700 */
1701 if ((old_vmbits & VISIBILITYMAP_ALL_VISIBLE) == 0)
1702 {
1703 vacrel->vm_new_visible_pages++;
1705 }
1706
1707 /*
1708 * We already checked that the page was not set all-frozen in the VM
1709 * above, so we don't need to test the value of old_vmbits.
1710 */
1711 else
1712 vacrel->vm_new_frozen_pages++;
1713 }
1714}
1715
1716/*
1717 * lazy_scan_noprune() -- lazy_scan_prune() without pruning or freezing
1718 *
1719 * Caller need only hold a pin and share lock on the buffer, unlike
1720 * lazy_scan_prune, which requires a full cleanup lock. While pruning isn't
1721 * performed here, it's quite possible that an earlier opportunistic pruning
1722 * operation left LP_DEAD items behind. We'll at least collect any such items
1723 * in dead_items for removal from indexes.
1724 *
1725 * For aggressive VACUUM callers, we may return false to indicate that a full
1726 * cleanup lock is required for processing by lazy_scan_prune. This is only
1727 * necessary when the aggressive VACUUM needs to freeze some tuple XIDs from
1728 * one or more tuples on the page. We always return true for non-aggressive
1729 * callers.
1730 *
1731 * If this function returns true, *has_lpdead_items gets set to true or false
1732 * depending on whether, upon return from this function, any LP_DEAD items are
1733 * present on the page. If this function returns false, *has_lpdead_items
1734 * is not updated.
1735 */
1736static bool
1738 Buffer buf,
1739 BlockNumber blkno,
1740 Page page,
1741 bool *has_lpdead_items)
1742{
1743 OffsetNumber offnum,
1744 maxoff;
1745 int lpdead_items,
1746 live_tuples,
1747 recently_dead_tuples,
1748 missed_dead_tuples;
1749 bool hastup;
1750 HeapTupleHeader tupleheader;
1751 TransactionId NoFreezePageRelfrozenXid = vacrel->NewRelfrozenXid;
1752 MultiXactId NoFreezePageRelminMxid = vacrel->NewRelminMxid;
1754
1755 Assert(BufferGetBlockNumber(buf) == blkno);
1756
1757 hastup = false; /* for now */
1758
1759 lpdead_items = 0;
1760 live_tuples = 0;
1761 recently_dead_tuples = 0;
1762 missed_dead_tuples = 0;
1763
1764 maxoff = PageGetMaxOffsetNumber(page);
1765 for (offnum = FirstOffsetNumber;
1766 offnum <= maxoff;
1767 offnum = OffsetNumberNext(offnum))
1768 {
1769 ItemId itemid;
1770 HeapTupleData tuple;
1771
1772 vacrel->offnum = offnum;
1773 itemid = PageGetItemId(page, offnum);
1774
1775 if (!ItemIdIsUsed(itemid))
1776 continue;
1777
1778 if (ItemIdIsRedirected(itemid))
1779 {
1780 hastup = true;
1781 continue;
1782 }
1783
1784 if (ItemIdIsDead(itemid))
1785 {
1786 /*
1787 * Deliberately don't set hastup=true here. See same point in
1788 * lazy_scan_prune for an explanation.
1789 */
1790 deadoffsets[lpdead_items++] = offnum;
1791 continue;
1792 }
1793
1794 hastup = true; /* page prevents rel truncation */
1795 tupleheader = (HeapTupleHeader) PageGetItem(page, itemid);
1796 if (heap_tuple_should_freeze(tupleheader, &vacrel->cutoffs,
1797 &NoFreezePageRelfrozenXid,
1798 &NoFreezePageRelminMxid))
1799 {
1800 /* Tuple with XID < FreezeLimit (or MXID < MultiXactCutoff) */
1801 if (vacrel->aggressive)
1802 {
1803 /*
1804 * Aggressive VACUUMs must always be able to advance rel's
1805 * relfrozenxid to a value >= FreezeLimit (and be able to
1806 * advance rel's relminmxid to a value >= MultiXactCutoff).
1807 * The ongoing aggressive VACUUM won't be able to do that
1808 * unless it can freeze an XID (or MXID) from this tuple now.
1809 *
1810 * The only safe option is to have caller perform processing
1811 * of this page using lazy_scan_prune. Caller might have to
1812 * wait a while for a cleanup lock, but it can't be helped.
1813 */
1814 vacrel->offnum = InvalidOffsetNumber;
1815 return false;
1816 }
1817
1818 /*
1819 * Non-aggressive VACUUMs are under no obligation to advance
1820 * relfrozenxid (even by one XID). We can be much laxer here.
1821 *
1822 * Currently we always just accept an older final relfrozenxid
1823 * and/or relminmxid value. We never make caller wait or work a
1824 * little harder, even when it likely makes sense to do so.
1825 */
1826 }
1827
1828 ItemPointerSet(&(tuple.t_self), blkno, offnum);
1829 tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
1830 tuple.t_len = ItemIdGetLength(itemid);
1831 tuple.t_tableOid = RelationGetRelid(vacrel->rel);
1832
1833 switch (HeapTupleSatisfiesVacuum(&tuple, vacrel->cutoffs.OldestXmin,
1834 buf))
1835 {
1837 case HEAPTUPLE_LIVE:
1838
1839 /*
1840 * Count both cases as live, just like lazy_scan_prune
1841 */
1842 live_tuples++;
1843
1844 break;
1845 case HEAPTUPLE_DEAD:
1846
1847 /*
1848 * There is some useful work for pruning to do, that won't be
1849 * done due to failure to get a cleanup lock.
1850 */
1851 missed_dead_tuples++;
1852 break;
1854
1855 /*
1856 * Count in recently_dead_tuples, just like lazy_scan_prune
1857 */
1858 recently_dead_tuples++;
1859 break;
1861
1862 /*
1863 * Do not count these rows as live, just like lazy_scan_prune
1864 */
1865 break;
1866 default:
1867 elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
1868 break;
1869 }
1870 }
1871
1872 vacrel->offnum = InvalidOffsetNumber;
1873
1874 /*
1875 * By here we know for sure that caller can put off freezing and pruning
1876 * this particular page until the next VACUUM. Remember its details now.
1877 * (lazy_scan_prune expects a clean slate, so we have to do this last.)
1878 */
1879 vacrel->NewRelfrozenXid = NoFreezePageRelfrozenXid;
1880 vacrel->NewRelminMxid = NoFreezePageRelminMxid;
1881
1882 /* Save any LP_DEAD items found on the page in dead_items */
1883 if (vacrel->nindexes == 0)
1884 {
1885 /* Using one-pass strategy (since table has no indexes) */
1886 if (lpdead_items > 0)
1887 {
1888 /*
1889 * Perfunctory handling for the corner case where a single pass
1890 * strategy VACUUM cannot get a cleanup lock, and it turns out
1891 * that there is one or more LP_DEAD items: just count the LP_DEAD
1892 * items as missed_dead_tuples instead. (This is a bit dishonest,
1893 * but it beats having to maintain specialized heap vacuuming code
1894 * forever, for vanishingly little benefit.)
1895 */
1896 hastup = true;
1897 missed_dead_tuples += lpdead_items;
1898 }
1899 }
1900 else if (lpdead_items > 0)
1901 {
1902 /*
1903 * Page has LP_DEAD items, and so any references/TIDs that remain in
1904 * indexes will be deleted during index vacuuming (and then marked
1905 * LP_UNUSED in the heap)
1906 */
1907 vacrel->lpdead_item_pages++;
1908
1909 dead_items_add(vacrel, blkno, deadoffsets, lpdead_items);
1910
1911 vacrel->lpdead_items += lpdead_items;
1912 }
1913
1914 /*
1915 * Finally, add relevant page-local counts to whole-VACUUM counts
1916 */
1917 vacrel->live_tuples += live_tuples;
1918 vacrel->recently_dead_tuples += recently_dead_tuples;
1919 vacrel->missed_dead_tuples += missed_dead_tuples;
1920 if (missed_dead_tuples > 0)
1921 vacrel->missed_dead_pages++;
1922
1923 /* Can't truncate this page */
1924 if (hastup)
1925 vacrel->nonempty_pages = blkno + 1;
1926
1927 /* Did we find LP_DEAD items? */
1928 *has_lpdead_items = (lpdead_items > 0);
1929
1930 /* Caller won't need to call lazy_scan_prune with same page */
1931 return true;
1932}
1933
1934/*
1935 * Main entry point for index vacuuming and heap vacuuming.
1936 *
1937 * Removes items collected in dead_items from table's indexes, then marks the
1938 * same items LP_UNUSED in the heap. See the comments above lazy_scan_heap
1939 * for full details.
1940 *
1941 * Also empties dead_items, freeing up space for later TIDs.
1942 *
1943 * We may choose to bypass index vacuuming at this point, though only when the
1944 * ongoing VACUUM operation will definitely only have one index scan/round of
1945 * index vacuuming.
1946 */
1947static void
1949{
1950 bool bypass;
1951
1952 /* Should not end up here with no indexes */
1953 Assert(vacrel->nindexes > 0);
1954 Assert(vacrel->lpdead_item_pages > 0);
1955
1956 if (!vacrel->do_index_vacuuming)
1957 {
1958 Assert(!vacrel->do_index_cleanup);
1959 dead_items_reset(vacrel);
1960 return;
1961 }
1962
1963 /*
1964 * Consider bypassing index vacuuming (and heap vacuuming) entirely.
1965 *
1966 * We currently only do this in cases where the number of LP_DEAD items
1967 * for the entire VACUUM operation is close to zero. This avoids sharp
1968 * discontinuities in the duration and overhead of successive VACUUM
1969 * operations that run against the same table with a fixed workload.
1970 * Ideally, successive VACUUM operations will behave as if there are
1971 * exactly zero LP_DEAD items in cases where there are close to zero.
1972 *
1973 * This is likely to be helpful with a table that is continually affected
1974 * by UPDATEs that can mostly apply the HOT optimization, but occasionally
1975 * have small aberrations that lead to just a few heap pages retaining
1976 * only one or two LP_DEAD items. This is pretty common; even when the
1977 * DBA goes out of their way to make UPDATEs use HOT, it is practically
1978 * impossible to predict whether HOT will be applied in 100% of cases.
1979 * It's far easier to ensure that 99%+ of all UPDATEs against a table use
1980 * HOT through careful tuning.
1981 */
1982 bypass = false;
1983 if (vacrel->consider_bypass_optimization && vacrel->rel_pages > 0)
1984 {
1985 BlockNumber threshold;
1986
1987 Assert(vacrel->num_index_scans == 0);
1988 Assert(vacrel->lpdead_items == vacrel->dead_items_info->num_items);
1989 Assert(vacrel->do_index_vacuuming);
1990 Assert(vacrel->do_index_cleanup);
1991
1992 /*
1993 * This crossover point at which we'll start to do index vacuuming is
1994 * expressed as a percentage of the total number of heap pages in the
1995 * table that are known to have at least one LP_DEAD item. This is
1996 * much more important than the total number of LP_DEAD items, since
1997 * it's a proxy for the number of heap pages whose visibility map bits
1998 * cannot be set on account of bypassing index and heap vacuuming.
1999 *
2000 * We apply one further precautionary test: the space currently used
2001 * to store the TIDs (TIDs that now all point to LP_DEAD items) must
2002 * not exceed 32MB. This limits the risk that we will bypass index
2003 * vacuuming again and again until eventually there is a VACUUM whose
2004 * dead_items space is not CPU cache resident.
2005 *
2006 * We don't take any special steps to remember the LP_DEAD items (such
2007 * as counting them in our final update to the stats system) when the
2008 * optimization is applied. Though the accounting used in analyze.c's
2009 * acquire_sample_rows() will recognize the same LP_DEAD items as dead
2010 * rows in its own stats report, that's okay. The discrepancy should
2011 * be negligible. If this optimization is ever expanded to cover more
2012 * cases then this may need to be reconsidered.
2013 */
2014 threshold = (double) vacrel->rel_pages * BYPASS_THRESHOLD_PAGES;
2015 bypass = (vacrel->lpdead_item_pages < threshold &&
2016 (TidStoreMemoryUsage(vacrel->dead_items) < (32L * 1024L * 1024L)));
2017 }
2018
2019 if (bypass)
2020 {
2021 /*
2022 * There are almost zero TIDs. Behave as if there were precisely
2023 * zero: bypass index vacuuming, but do index cleanup.
2024 *
2025 * We expect that the ongoing VACUUM operation will finish very
2026 * quickly, so there is no point in considering speeding up as a
2027 * failsafe against wraparound failure. (Index cleanup is expected to
2028 * finish very quickly in cases where there were no ambulkdelete()
2029 * calls.)
2030 */
2031 vacrel->do_index_vacuuming = false;
2032 }
2033 else if (lazy_vacuum_all_indexes(vacrel))
2034 {
2035 /*
2036 * We successfully completed a round of index vacuuming. Do related
2037 * heap vacuuming now.
2038 */
2039 lazy_vacuum_heap_rel(vacrel);
2040 }
2041 else
2042 {
2043 /*
2044 * Failsafe case.
2045 *
2046 * We attempted index vacuuming, but didn't finish a full round/full
2047 * index scan. This happens when relfrozenxid or relminmxid is too
2048 * far in the past.
2049 *
2050 * From this point on the VACUUM operation will do no further index
2051 * vacuuming or heap vacuuming. This VACUUM operation won't end up
2052 * back here again.
2053 */
2055 }
2056
2057 /*
2058 * Forget the LP_DEAD items that we just vacuumed (or just decided to not
2059 * vacuum)
2060 */
2061 dead_items_reset(vacrel);
2062}
2063
2064/*
2065 * lazy_vacuum_all_indexes() -- Main entry for index vacuuming
2066 *
2067 * Returns true in the common case when all indexes were successfully
2068 * vacuumed. Returns false in rare cases where we determined that the ongoing
2069 * VACUUM operation is at risk of taking too long to finish, leading to
2070 * wraparound failure.
2071 */
2072static bool
2074{
2075 bool allindexes = true;
2076 double old_live_tuples = vacrel->rel->rd_rel->reltuples;
2077 const int progress_start_index[] = {
2080 };
2081 const int progress_end_index[] = {
2085 };
2086 int64 progress_start_val[2];
2087 int64 progress_end_val[3];
2088
2089 Assert(vacrel->nindexes > 0);
2090 Assert(vacrel->do_index_vacuuming);
2091 Assert(vacrel->do_index_cleanup);
2092
2093 /* Precheck for XID wraparound emergencies */
2095 {
2096 /* Wraparound emergency -- don't even start an index scan */
2097 return false;
2098 }
2099
2100 /*
2101 * Report that we are now vacuuming indexes and the number of indexes to
2102 * vacuum.
2103 */
2104 progress_start_val[0] = PROGRESS_VACUUM_PHASE_VACUUM_INDEX;
2105 progress_start_val[1] = vacrel->nindexes;
2106 pgstat_progress_update_multi_param(2, progress_start_index, progress_start_val);
2107
2108 if (!ParallelVacuumIsActive(vacrel))
2109 {
2110 for (int idx = 0; idx < vacrel->nindexes; idx++)
2111 {
2112 Relation indrel = vacrel->indrels[idx];
2113 IndexBulkDeleteResult *istat = vacrel->indstats[idx];
2114
2115 vacrel->indstats[idx] = lazy_vacuum_one_index(indrel, istat,
2116 old_live_tuples,
2117 vacrel);
2118
2119 /* Report the number of indexes vacuumed */
2121 idx + 1);
2122
2124 {
2125 /* Wraparound emergency -- end current index scan */
2126 allindexes = false;
2127 break;
2128 }
2129 }
2130 }
2131 else
2132 {
2133 /* Outsource everything to parallel variant */
2134 parallel_vacuum_bulkdel_all_indexes(vacrel->pvs, old_live_tuples,
2135 vacrel->num_index_scans);
2136
2137 /*
2138 * Do a postcheck to consider applying wraparound failsafe now. Note
2139 * that parallel VACUUM only gets the precheck and this postcheck.
2140 */
2142 allindexes = false;
2143 }
2144
2145 /*
2146 * We delete all LP_DEAD items from the first heap pass in all indexes on
2147 * each call here (except calls where we choose to do the failsafe). This
2148 * makes the next call to lazy_vacuum_heap_rel() safe (except in the event
2149 * of the failsafe triggering, which prevents the next call from taking
2150 * place).
2151 */
2152 Assert(vacrel->num_index_scans > 0 ||
2153 vacrel->dead_items_info->num_items == vacrel->lpdead_items);
2154 Assert(allindexes || VacuumFailsafeActive);
2155
2156 /*
2157 * Increase and report the number of index scans. Also, we reset
2158 * PROGRESS_VACUUM_INDEXES_TOTAL and PROGRESS_VACUUM_INDEXES_PROCESSED.
2159 *
2160 * We deliberately include the case where we started a round of bulk
2161 * deletes that we weren't able to finish due to the failsafe triggering.
2162 */
2163 vacrel->num_index_scans++;
2164 progress_end_val[0] = 0;
2165 progress_end_val[1] = 0;
2166 progress_end_val[2] = vacrel->num_index_scans;
2167 pgstat_progress_update_multi_param(3, progress_end_index, progress_end_val);
2168
2169 return allindexes;
2170}
2171
2172/*
2173 * lazy_vacuum_heap_rel() -- second pass over the heap for two pass strategy
2174 *
2175 * This routine marks LP_DEAD items in vacrel->dead_items as LP_UNUSED. Pages
2176 * that never had lazy_scan_prune record LP_DEAD items are not visited at all.
2177 *
2178 * We may also be able to truncate the line pointer array of the heap pages we
2179 * visit. If there is a contiguous group of LP_UNUSED items at the end of the
2180 * array, it can be reclaimed as free space. These LP_UNUSED items usually
2181 * start out as LP_DEAD items recorded by lazy_scan_prune (we set items from
2182 * each page to LP_UNUSED, and then consider if it's possible to truncate the
2183 * page's line pointer array).
2184 *
2185 * Note: the reason for doing this as a second pass is we cannot remove the
2186 * tuples until we've removed their index entries, and we want to process
2187 * index entry removal in batches as large as possible.
2188 */
2189static void
2191{
2192 BlockNumber vacuumed_pages = 0;
2193 Buffer vmbuffer = InvalidBuffer;
2194 LVSavedErrInfo saved_err_info;
2195 TidStoreIter *iter;
2196 TidStoreIterResult *iter_result;
2197
2198 Assert(vacrel->do_index_vacuuming);
2199 Assert(vacrel->do_index_cleanup);
2200 Assert(vacrel->num_index_scans > 0);
2201
2202 /* Report that we are now vacuuming the heap */
2205
2206 /* Update error traceback information */
2207 update_vacuum_error_info(vacrel, &saved_err_info,
2210
2211 iter = TidStoreBeginIterate(vacrel->dead_items);
2212 while ((iter_result = TidStoreIterateNext(iter)) != NULL)
2213 {
2214 BlockNumber blkno;
2215 Buffer buf;
2216 Page page;
2217 Size freespace;
2219 int num_offsets;
2220
2222
2223 blkno = iter_result->blkno;
2224 vacrel->blkno = blkno;
2225
2226 num_offsets = TidStoreGetBlockOffsets(iter_result, offsets, lengthof(offsets));
2227 Assert(num_offsets <= lengthof(offsets));
2228
2229 /*
2230 * Pin the visibility map page in case we need to mark the page
2231 * all-visible. In most cases this will be very cheap, because we'll
2232 * already have the correct page pinned anyway.
2233 */
2234 visibilitymap_pin(vacrel->rel, blkno, &vmbuffer);
2235
2236 /* We need a non-cleanup exclusive lock to mark dead_items unused */
2238 vacrel->bstrategy);
2240 lazy_vacuum_heap_page(vacrel, blkno, buf, offsets,
2241 num_offsets, vmbuffer);
2242
2243 /* Now that we've vacuumed the page, record its available space */
2244 page = BufferGetPage(buf);
2245 freespace = PageGetHeapFreeSpace(page);
2246
2248 RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
2249 vacuumed_pages++;
2250 }
2251 TidStoreEndIterate(iter);
2252
2253 vacrel->blkno = InvalidBlockNumber;
2254 if (BufferIsValid(vmbuffer))
2255 ReleaseBuffer(vmbuffer);
2256
2257 /*
2258 * We set all LP_DEAD items from the first heap pass to LP_UNUSED during
2259 * the second heap pass. No more, no less.
2260 */
2261 Assert(vacrel->num_index_scans > 1 ||
2262 (vacrel->dead_items_info->num_items == vacrel->lpdead_items &&
2263 vacuumed_pages == vacrel->lpdead_item_pages));
2264
2266 (errmsg("table \"%s\": removed %lld dead item identifiers in %u pages",
2267 vacrel->relname, (long long) vacrel->dead_items_info->num_items,
2268 vacuumed_pages)));
2269
2270 /* Revert to the previous phase information for error traceback */
2271 restore_vacuum_error_info(vacrel, &saved_err_info);
2272}
2273
2274/*
2275 * lazy_vacuum_heap_page() -- free page's LP_DEAD items listed in the
2276 * vacrel->dead_items store.
2277 *
2278 * Caller must have an exclusive buffer lock on the buffer (though a full
2279 * cleanup lock is also acceptable). vmbuffer must be valid and already have
2280 * a pin on blkno's visibility map page.
2281 */
2282static void
2284 OffsetNumber *deadoffsets, int num_offsets,
2285 Buffer vmbuffer)
2286{
2287 Page page = BufferGetPage(buffer);
2289 int nunused = 0;
2290 TransactionId visibility_cutoff_xid;
2291 bool all_frozen;
2292 LVSavedErrInfo saved_err_info;
2293
2294 Assert(vacrel->do_index_vacuuming);
2295
2297
2298 /* Update error traceback information */
2299 update_vacuum_error_info(vacrel, &saved_err_info,
2302
2304
2305 for (int i = 0; i < num_offsets; i++)
2306 {
2307 ItemId itemid;
2308 OffsetNumber toff = deadoffsets[i];
2309
2310 itemid = PageGetItemId(page, toff);
2311
2312 Assert(ItemIdIsDead(itemid) && !ItemIdHasStorage(itemid));
2313 ItemIdSetUnused(itemid);
2314 unused[nunused++] = toff;
2315 }
2316
2317 Assert(nunused > 0);
2318
2319 /* Attempt to truncate line pointer array now */
2321
2322 /*
2323 * Mark buffer dirty before we write WAL.
2324 */
2325 MarkBufferDirty(buffer);
2326
2327 /* XLOG stuff */
2328 if (RelationNeedsWAL(vacrel->rel))
2329 {
2330 log_heap_prune_and_freeze(vacrel->rel, buffer,
2332 false, /* no cleanup lock required */
2334 NULL, 0, /* frozen */
2335 NULL, 0, /* redirected */
2336 NULL, 0, /* dead */
2337 unused, nunused);
2338 }
2339
2340 /*
2341 * End critical section, so we safely can do visibility tests (which
2342 * possibly need to perform IO and allocate memory!). If we crash now the
2343 * page (including the corresponding vm bit) might not be marked all
2344 * visible, but that's fine. A later vacuum will fix that.
2345 */
2347
2348 /*
2349 * Now that we have removed the LP_DEAD items from the page, once again
2350 * check if the page has become all-visible. The page is already marked
2351 * dirty, exclusively locked, and, if needed, a full page image has been
2352 * emitted.
2353 */
2354 Assert(!PageIsAllVisible(page));
2355 if (heap_page_is_all_visible(vacrel, buffer, &visibility_cutoff_xid,
2356 &all_frozen))
2357 {
2358 uint8 old_vmbits;
2360
2361 if (all_frozen)
2362 {
2363 Assert(!TransactionIdIsValid(visibility_cutoff_xid));
2364 flags |= VISIBILITYMAP_ALL_FROZEN;
2365 }
2366
2367 PageSetAllVisible(page);
2368 old_vmbits = visibilitymap_set(vacrel->rel, blkno, buffer,
2370 vmbuffer, visibility_cutoff_xid,
2371 flags);
2372
2373 /*
2374 * If the page wasn't already set all-visible and/or all-frozen in the
2375 * VM, count it as newly set for logging.
2376 */
2377 if ((old_vmbits & VISIBILITYMAP_ALL_VISIBLE) == 0)
2378 {
2379 vacrel->vm_new_visible_pages++;
2380 if (all_frozen)
2382 }
2383
2384 else if ((old_vmbits & VISIBILITYMAP_ALL_FROZEN) == 0 &&
2385 all_frozen)
2386 vacrel->vm_new_frozen_pages++;
2387 }
2388
2389 /* Revert to the previous phase information for error traceback */
2390 restore_vacuum_error_info(vacrel, &saved_err_info);
2391}
2392
2393/*
2394 * Trigger the failsafe to avoid wraparound failure when vacrel table has a
2395 * relfrozenxid and/or relminmxid that is dangerously far in the past.
2396 * Triggering the failsafe makes the ongoing VACUUM bypass any further index
2397 * vacuuming and heap vacuuming. Truncating the heap is also bypassed.
2398 *
2399 * Any remaining work (work that VACUUM cannot just bypass) is typically sped
2400 * up when the failsafe triggers. VACUUM stops applying any cost-based delay
2401 * that it started out with.
2402 *
2403 * Returns true when failsafe has been triggered.
2404 */
2405static bool
2407{
2408 /* Don't warn more than once per VACUUM */
2410 return true;
2411
2413 {
2414 const int progress_index[] = {
2417 };
2418 int64 progress_val[2] = {0, 0};
2419
2420 VacuumFailsafeActive = true;
2421
2422 /*
2423 * Abandon use of a buffer access strategy to allow use of all of
2424 * shared buffers. We assume the caller who allocated the memory for
2425 * the BufferAccessStrategy will free it.
2426 */
2427 vacrel->bstrategy = NULL;
2428
2429 /* Disable index vacuuming, index cleanup, and heap rel truncation */
2430 vacrel->do_index_vacuuming = false;
2431 vacrel->do_index_cleanup = false;
2432 vacrel->do_rel_truncate = false;
2433
2434 /* Reset the progress counters */
2435 pgstat_progress_update_multi_param(2, progress_index, progress_val);
2436
2438 (errmsg("bypassing nonessential maintenance of table \"%s.%s.%s\" as a failsafe after %d index scans",
2439 vacrel->dbname, vacrel->relnamespace, vacrel->relname,
2440 vacrel->num_index_scans),
2441 errdetail("The table's relfrozenxid or relminmxid is too far in the past."),
2442 errhint("Consider increasing configuration parameter \"maintenance_work_mem\" or \"autovacuum_work_mem\".\n"
2443 "You might also need to consider other ways for VACUUM to keep up with the allocation of transaction IDs.")));
2444
2445 /* Stop applying cost limits from this point on */
2446 VacuumCostActive = false;
2448
2449 return true;
2450 }
2451
2452 return false;
2453}
2454
2455/*
2456 * lazy_cleanup_all_indexes() -- cleanup all indexes of relation.
2457 */
2458static void
2460{
2461 double reltuples = vacrel->new_rel_tuples;
2462 bool estimated_count = vacrel->scanned_pages < vacrel->rel_pages;
2463 const int progress_start_index[] = {
2466 };
2467 const int progress_end_index[] = {
2470 };
2471 int64 progress_start_val[2];
2472 int64 progress_end_val[2] = {0, 0};
2473
2474 Assert(vacrel->do_index_cleanup);
2475 Assert(vacrel->nindexes > 0);
2476
2477 /*
2478 * Report that we are now cleaning up indexes and the number of indexes to
2479 * cleanup.
2480 */
2481 progress_start_val[0] = PROGRESS_VACUUM_PHASE_INDEX_CLEANUP;
2482 progress_start_val[1] = vacrel->nindexes;
2483 pgstat_progress_update_multi_param(2, progress_start_index, progress_start_val);
2484
2485 if (!ParallelVacuumIsActive(vacrel))
2486 {
2487 for (int idx = 0; idx < vacrel->nindexes; idx++)
2488 {
2489 Relation indrel = vacrel->indrels[idx];
2490 IndexBulkDeleteResult *istat = vacrel->indstats[idx];
2491
2492 vacrel->indstats[idx] =
2493 lazy_cleanup_one_index(indrel, istat, reltuples,
2494 estimated_count, vacrel);
2495
2496 /* Report the number of indexes cleaned up */
2498 idx + 1);
2499 }
2500 }
2501 else
2502 {
2503 /* Outsource everything to parallel variant */
2504 parallel_vacuum_cleanup_all_indexes(vacrel->pvs, reltuples,
2505 vacrel->num_index_scans,
2506 estimated_count);
2507 }
2508
2509 /* Reset the progress counters */
2510 pgstat_progress_update_multi_param(2, progress_end_index, progress_end_val);
2511}
2512
2513/*
2514 * lazy_vacuum_one_index() -- vacuum index relation.
2515 *
2516 * Delete all the index tuples containing a TID collected in
2517 * vacrel->dead_items. Also update running statistics. Exact
2518 * details depend on index AM's ambulkdelete routine.
2519 *
2520 * reltuples is the number of heap tuples to be passed to the
2521 * bulkdelete callback. It's always assumed to be estimated.
2522 * See indexam.sgml for more info.
2523 *
2524 * Returns bulk delete stats derived from input stats
2525 */
2526static IndexBulkDeleteResult *
2528 double reltuples, LVRelState *vacrel)
2529{
2530 IndexVacuumInfo ivinfo;
2531 LVSavedErrInfo saved_err_info;
2532
2533 ivinfo.index = indrel;
2534 ivinfo.heaprel = vacrel->rel;
2535 ivinfo.analyze_only = false;
2536 ivinfo.report_progress = false;
2537 ivinfo.estimated_count = true;
2538 ivinfo.message_level = DEBUG2;
2539 ivinfo.num_heap_tuples = reltuples;
2540 ivinfo.strategy = vacrel->bstrategy;
2541
2542 /*
2543 * Update error traceback information.
2544 *
2545 * The index name is saved during this phase and restored immediately
2546 * after this phase. See vacuum_error_callback.
2547 */
2548 Assert(vacrel->indname == NULL);
2549 vacrel->indname = pstrdup(RelationGetRelationName(indrel));
2550 update_vacuum_error_info(vacrel, &saved_err_info,
2553
2554 /* Do bulk deletion */
2555 istat = vac_bulkdel_one_index(&ivinfo, istat, vacrel->dead_items,
2556 vacrel->dead_items_info);
2557
2558 /* Revert to the previous phase information for error traceback */
2559 restore_vacuum_error_info(vacrel, &saved_err_info);
2560 pfree(vacrel->indname);
2561 vacrel->indname = NULL;
2562
2563 return istat;
2564}
2565
2566/*
2567 * lazy_cleanup_one_index() -- do post-vacuum cleanup for index relation.
2568 *
2569 * Calls index AM's amvacuumcleanup routine. reltuples is the number
2570 * of heap tuples and estimated_count is true if reltuples is an
2571 * estimated value. See indexam.sgml for more info.
2572 *
2573 * Returns bulk delete stats derived from input stats
2574 */
2575static IndexBulkDeleteResult *
2577 double reltuples, bool estimated_count,
2578 LVRelState *vacrel)
2579{
2580 IndexVacuumInfo ivinfo;
2581 LVSavedErrInfo saved_err_info;
2582
2583 ivinfo.index = indrel;
2584 ivinfo.heaprel = vacrel->rel;
2585 ivinfo.analyze_only = false;
2586 ivinfo.report_progress = false;
2587 ivinfo.estimated_count = estimated_count;
2588 ivinfo.message_level = DEBUG2;
2589
2590 ivinfo.num_heap_tuples = reltuples;
2591 ivinfo.strategy = vacrel->bstrategy;
2592
2593 /*
2594 * Update error traceback information.
2595 *
2596 * The index name is saved during this phase and restored immediately
2597 * after this phase. See vacuum_error_callback.
2598 */
2599 Assert(vacrel->indname == NULL);
2600 vacrel->indname = pstrdup(RelationGetRelationName(indrel));
2601 update_vacuum_error_info(vacrel, &saved_err_info,
2604
2605 istat = vac_cleanup_one_index(&ivinfo, istat);
2606
2607 /* Revert to the previous phase information for error traceback */
2608 restore_vacuum_error_info(vacrel, &saved_err_info);
2609 pfree(vacrel->indname);
2610 vacrel->indname = NULL;
2611
2612 return istat;
2613}
2614
2615/*
2616 * should_attempt_truncation - should we attempt to truncate the heap?
2617 *
2618 * Don't even think about it unless we have a shot at releasing a goodly
2619 * number of pages. Otherwise, the time taken isn't worth it, mainly because
2620 * an AccessExclusive lock must be replayed on any hot standby, where it can
2621 * be particularly disruptive.
2622 *
2623 * Also don't attempt it if wraparound failsafe is in effect. The entire
2624 * system might be refusing to allocate new XIDs at this point. The system
2625 * definitely won't return to normal unless and until VACUUM actually advances
2626 * the oldest relfrozenxid -- which hasn't happened for target rel just yet.
2627 * If lazy_truncate_heap attempted to acquire an AccessExclusiveLock to
2628 * truncate the table under these circumstances, an XID exhaustion error might
2629 * make it impossible for VACUUM to fix the underlying XID exhaustion problem.
2630 * There is very little chance of truncation working out when the failsafe is
2631 * in effect in any case. lazy_scan_prune makes the optimistic assumption
2632 * that any LP_DEAD items it encounters will always be LP_UNUSED by the time
2633 * we're called.
2634 */
2635static bool
2637{
2638 BlockNumber possibly_freeable;
2639
2640 if (!vacrel->do_rel_truncate || VacuumFailsafeActive)
2641 return false;
2642
2643 possibly_freeable = vacrel->rel_pages - vacrel->nonempty_pages;
2644 if (possibly_freeable > 0 &&
2645 (possibly_freeable >= REL_TRUNCATE_MINIMUM ||
2646 possibly_freeable >= vacrel->rel_pages / REL_TRUNCATE_FRACTION))
2647 return true;
2648
2649 return false;
2650}
2651
2652/*
2653 * lazy_truncate_heap - try to truncate off any empty pages at the end
2654 */
2655static void
2657{
2658 BlockNumber orig_rel_pages = vacrel->rel_pages;
2659 BlockNumber new_rel_pages;
2660 bool lock_waiter_detected;
2661 int lock_retry;
2662
2663 /* Report that we are now truncating */
2666
2667 /* Update error traceback information one last time */
2670
2671 /*
2672 * Loop until no more truncating can be done.
2673 */
2674 do
2675 {
2676 /*
2677 * We need full exclusive lock on the relation in order to do
2678 * truncation. If we can't get it, give up rather than waiting --- we
2679 * don't want to block other backends, and we don't want to deadlock
2680 * (which is quite possible considering we already hold a lower-grade
2681 * lock).
2682 */
2683 lock_waiter_detected = false;
2684 lock_retry = 0;
2685 while (true)
2686 {
2688 break;
2689
2690 /*
2691 * Check for interrupts while trying to (re-)acquire the exclusive
2692 * lock.
2693 */
2695
2696 if (++lock_retry > (VACUUM_TRUNCATE_LOCK_TIMEOUT /
2698 {
2699 /*
2700 * We failed to establish the lock in the specified number of
2701 * retries. This means we give up truncating.
2702 */
2703 ereport(vacrel->verbose ? INFO : DEBUG2,
2704 (errmsg("\"%s\": stopping truncate due to conflicting lock request",
2705 vacrel->relname)));
2706 return;
2707 }
2708
2709 (void) WaitLatch(MyLatch,
2712 WAIT_EVENT_VACUUM_TRUNCATE);
2714 }
2715
2716 /*
2717 * Now that we have exclusive lock, look to see if the rel has grown
2718 * whilst we were vacuuming with non-exclusive lock. If so, give up;
2719 * the newly added pages presumably contain non-deletable tuples.
2720 */
2721 new_rel_pages = RelationGetNumberOfBlocks(vacrel->rel);
2722 if (new_rel_pages != orig_rel_pages)
2723 {
2724 /*
2725 * Note: we intentionally don't update vacrel->rel_pages with the
2726 * new rel size here. If we did, it would amount to assuming that
2727 * the new pages are empty, which is unlikely. Leaving the numbers
2728 * alone amounts to assuming that the new pages have the same
2729 * tuple density as existing ones, which is less unlikely.
2730 */
2732 return;
2733 }
2734
2735 /*
2736 * Scan backwards from the end to verify that the end pages actually
2737 * contain no tuples. This is *necessary*, not optional, because
2738 * other backends could have added tuples to these pages whilst we
2739 * were vacuuming.
2740 */
2741 new_rel_pages = count_nondeletable_pages(vacrel, &lock_waiter_detected);
2742 vacrel->blkno = new_rel_pages;
2743
2744 if (new_rel_pages >= orig_rel_pages)
2745 {
2746 /* can't do anything after all */
2748 return;
2749 }
2750
2751 /*
2752 * Okay to truncate.
2753 */
2754 RelationTruncate(vacrel->rel, new_rel_pages);
2755
2756 /*
2757 * We can release the exclusive lock as soon as we have truncated.
2758 * Other backends can't safely access the relation until they have
2759 * processed the smgr invalidation that smgrtruncate sent out ... but
2760 * that should happen as part of standard invalidation processing once
2761 * they acquire lock on the relation.
2762 */
2764
2765 /*
2766 * Update statistics. Here, it *is* correct to adjust rel_pages
2767 * without also touching reltuples, since the tuple count wasn't
2768 * changed by the truncation.
2769 */
2770 vacrel->removed_pages += orig_rel_pages - new_rel_pages;
2771 vacrel->rel_pages = new_rel_pages;
2772
2773 ereport(vacrel->verbose ? INFO : DEBUG2,
2774 (errmsg("table \"%s\": truncated %u to %u pages",
2775 vacrel->relname,
2776 orig_rel_pages, new_rel_pages)));
2777 orig_rel_pages = new_rel_pages;
2778 } while (new_rel_pages > vacrel->nonempty_pages && lock_waiter_detected);
2779}
2780
2781/*
2782 * Rescan end pages to verify that they are (still) empty of tuples.
2783 *
2784 * Returns number of nondeletable pages (last nonempty page + 1).
2785 */
2786static BlockNumber
2787count_nondeletable_pages(LVRelState *vacrel, bool *lock_waiter_detected)
2788{
2789 BlockNumber blkno;
2790 BlockNumber prefetchedUntil;
2791 instr_time starttime;
2792
2793 /* Initialize the starttime if we check for conflicting lock requests */
2794 INSTR_TIME_SET_CURRENT(starttime);
2795
2796 /*
2797 * Start checking blocks at what we believe relation end to be and move
2798 * backwards. (Strange coding of loop control is needed because blkno is
2799 * unsigned.) To make the scan faster, we prefetch a few blocks at a time
2800 * in forward direction, so that OS-level readahead can kick in.
2801 */
2802 blkno = vacrel->rel_pages;
2804 "prefetch size must be power of 2");
2805 prefetchedUntil = InvalidBlockNumber;
2806 while (blkno > vacrel->nonempty_pages)
2807 {
2808 Buffer buf;
2809 Page page;
2810 OffsetNumber offnum,
2811 maxoff;
2812 bool hastup;
2813
2814 /*
2815 * Check if another process requests a lock on our relation. We are
2816 * holding an AccessExclusiveLock here, so they will be waiting. We
2817 * only do this once per VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL, and we
2818 * only check if that interval has elapsed once every 32 blocks to
2819 * keep the number of system calls and actual shared lock table
2820 * lookups to a minimum.
2821 */
2822 if ((blkno % 32) == 0)
2823 {
2824 instr_time currenttime;
2825 instr_time elapsed;
2826
2827 INSTR_TIME_SET_CURRENT(currenttime);
2828 elapsed = currenttime;
2829 INSTR_TIME_SUBTRACT(elapsed, starttime);
2830 if ((INSTR_TIME_GET_MICROSEC(elapsed) / 1000)
2832 {
2834 {
2835 ereport(vacrel->verbose ? INFO : DEBUG2,
2836 (errmsg("table \"%s\": suspending truncate due to conflicting lock request",
2837 vacrel->relname)));
2838
2839 *lock_waiter_detected = true;
2840 return blkno;
2841 }
2842 starttime = currenttime;
2843 }
2844 }
2845
2846 /*
2847 * We don't insert a vacuum delay point here, because we have an
2848 * exclusive lock on the table which we want to hold for as short a
2849 * time as possible. We still need to check for interrupts however.
2850 */
2852
2853 blkno--;
2854
2855 /* If we haven't prefetched this lot yet, do so now. */
2856 if (prefetchedUntil > blkno)
2857 {
2858 BlockNumber prefetchStart;
2859 BlockNumber pblkno;
2860
2861 prefetchStart = blkno & ~(PREFETCH_SIZE - 1);
2862 for (pblkno = prefetchStart; pblkno <= blkno; pblkno++)
2863 {
2864 PrefetchBuffer(vacrel->rel, MAIN_FORKNUM, pblkno);
2866 }
2867 prefetchedUntil = prefetchStart;
2868 }
2869
2871 vacrel->bstrategy);
2872
2873 /* In this phase we only need shared access to the buffer */
2875
2876 page = BufferGetPage(buf);
2877
2878 if (PageIsNew(page) || PageIsEmpty(page))
2879 {
2881 continue;
2882 }
2883
2884 hastup = false;
2885 maxoff = PageGetMaxOffsetNumber(page);
2886 for (offnum = FirstOffsetNumber;
2887 offnum <= maxoff;
2888 offnum = OffsetNumberNext(offnum))
2889 {
2890 ItemId itemid;
2891
2892 itemid = PageGetItemId(page, offnum);
2893
2894 /*
2895 * Note: any non-unused item should be taken as a reason to keep
2896 * this page. Even an LP_DEAD item makes truncation unsafe, since
2897 * we must not have cleaned out its index entries.
2898 */
2899 if (ItemIdIsUsed(itemid))
2900 {
2901 hastup = true;
2902 break; /* can stop scanning */
2903 }
2904 } /* scan along page */
2905
2907
2908 /* Done scanning if we found a tuple here */
2909 if (hastup)
2910 return blkno + 1;
2911 }
2912
2913 /*
2914 * If we fall out of the loop, all the previously-thought-to-be-empty
2915 * pages still are; we need not bother to look at the last known-nonempty
2916 * page.
2917 */
2918 return vacrel->nonempty_pages;
2919}
2920
2921/*
2922 * Allocate dead_items and dead_items_info (either using palloc, or in dynamic
2923 * shared memory). Sets both in vacrel for caller.
2924 *
2925 * Also handles parallel initialization as part of allocating dead_items in
2926 * DSM when required.
2927 */
2928static void
2929dead_items_alloc(LVRelState *vacrel, int nworkers)
2930{
2931 VacDeadItemsInfo *dead_items_info;
2932 int vac_work_mem = AmAutoVacuumWorkerProcess() &&
2933 autovacuum_work_mem != -1 ?
2935
2936 /*
2937 * Initialize state for a parallel vacuum. As of now, only one worker can
2938 * be used for an index, so we invoke parallelism only if there are at
2939 * least two indexes on a table.
2940 */
2941 if (nworkers >= 0 && vacrel->nindexes > 1 && vacrel->do_index_vacuuming)
2942 {
2943 /*
2944 * Since parallel workers cannot access data in temporary tables, we
2945 * can't perform parallel vacuum on them.
2946 */
2947 if (RelationUsesLocalBuffers(vacrel->rel))
2948 {
2949 /*
2950 * Give warning only if the user explicitly tries to perform a
2951 * parallel vacuum on the temporary table.
2952 */
2953 if (nworkers > 0)
2955 (errmsg("disabling parallel option of vacuum on \"%s\" --- cannot vacuum temporary tables in parallel",
2956 vacrel->relname)));
2957 }
2958 else
2959 vacrel->pvs = parallel_vacuum_init(vacrel->rel, vacrel->indrels,
2960 vacrel->nindexes, nworkers,
2961 vac_work_mem,
2962 vacrel->verbose ? INFO : DEBUG2,
2963 vacrel->bstrategy);
2964
2965 /*
2966 * If parallel mode started, dead_items and dead_items_info spaces are
2967 * allocated in DSM.
2968 */
2969 if (ParallelVacuumIsActive(vacrel))
2970 {
2972 &vacrel->dead_items_info);
2973 return;
2974 }
2975 }
2976
2977 /*
2978 * Serial VACUUM case. Allocate both dead_items and dead_items_info
2979 * locally.
2980 */
2981
2982 dead_items_info = (VacDeadItemsInfo *) palloc(sizeof(VacDeadItemsInfo));
2983 dead_items_info->max_bytes = vac_work_mem * 1024L;
2984 dead_items_info->num_items = 0;
2985 vacrel->dead_items_info = dead_items_info;
2986
2987 vacrel->dead_items = TidStoreCreateLocal(dead_items_info->max_bytes, true);
2988}
2989
2990/*
2991 * Add the given block number and offset numbers to dead_items.
2992 */
2993static void
2995 int num_offsets)
2996{
2997 const int prog_index[2] = {
3000 };
3001 int64 prog_val[2];
3002
3003 TidStoreSetBlockOffsets(vacrel->dead_items, blkno, offsets, num_offsets);
3004 vacrel->dead_items_info->num_items += num_offsets;
3005
3006 /* update the progress information */
3007 prog_val[0] = vacrel->dead_items_info->num_items;
3008 prog_val[1] = TidStoreMemoryUsage(vacrel->dead_items);
3009 pgstat_progress_update_multi_param(2, prog_index, prog_val);
3010}
3011
3012/*
3013 * Forget all collected dead items.
3014 */
3015static void
3017{
3018 if (ParallelVacuumIsActive(vacrel))
3019 {
3021 return;
3022 }
3023
3024 /* Recreate the tidstore with the same max_bytes limitation */
3025 TidStoreDestroy(vacrel->dead_items);
3026 vacrel->dead_items = TidStoreCreateLocal(vacrel->dead_items_info->max_bytes, true);
3027
3028 /* Reset the counter */
3029 vacrel->dead_items_info->num_items = 0;
3030}
3031
3032/*
3033 * Perform cleanup for resources allocated in dead_items_alloc
3034 */
3035static void
3037{
3038 if (!ParallelVacuumIsActive(vacrel))
3039 {
3040 /* Don't bother with pfree here */
3041 return;
3042 }
3043
3044 /* End parallel mode */
3045 parallel_vacuum_end(vacrel->pvs, vacrel->indstats);
3046 vacrel->pvs = NULL;
3047}
3048
3049/*
3050 * Check if every tuple in the given page is visible to all current and future
3051 * transactions. Also return the visibility_cutoff_xid which is the highest
3052 * xmin amongst the visible tuples. Set *all_frozen to true if every tuple
3053 * on this page is frozen.
3054 *
3055 * This is a stripped down version of lazy_scan_prune(). If you change
3056 * anything here, make sure that everything stays in sync. Note that an
3057 * assertion calls us to verify that everybody still agrees. Be sure to avoid
3058 * introducing new side-effects here.
3059 */
3060static bool
3062 TransactionId *visibility_cutoff_xid,
3063 bool *all_frozen)
3064{
3065 Page page = BufferGetPage(buf);
3067 OffsetNumber offnum,
3068 maxoff;
3069 bool all_visible = true;
3070
3071 *visibility_cutoff_xid = InvalidTransactionId;
3072 *all_frozen = true;
3073
3074 maxoff = PageGetMaxOffsetNumber(page);
3075 for (offnum = FirstOffsetNumber;
3076 offnum <= maxoff && all_visible;
3077 offnum = OffsetNumberNext(offnum))
3078 {
3079 ItemId itemid;
3080 HeapTupleData tuple;
3081
3082 /*
3083 * Set the offset number so that we can display it along with any
3084 * error that occurred while processing this tuple.
3085 */
3086 vacrel->offnum = offnum;
3087 itemid = PageGetItemId(page, offnum);
3088
3089 /* Unused or redirect line pointers are of no interest */
3090 if (!ItemIdIsUsed(itemid) || ItemIdIsRedirected(itemid))
3091 continue;
3092
3093 ItemPointerSet(&(tuple.t_self), blockno, offnum);
3094
3095 /*
3096 * Dead line pointers can have index pointers pointing to them. So
3097 * they can't be treated as visible
3098 */
3099 if (ItemIdIsDead(itemid))
3100 {
3101 all_visible = false;
3102 *all_frozen = false;
3103 break;
3104 }
3105
3106 Assert(ItemIdIsNormal(itemid));
3107
3108 tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
3109 tuple.t_len = ItemIdGetLength(itemid);
3110 tuple.t_tableOid = RelationGetRelid(vacrel->rel);
3111
3112 switch (HeapTupleSatisfiesVacuum(&tuple, vacrel->cutoffs.OldestXmin,
3113 buf))
3114 {
3115 case HEAPTUPLE_LIVE:
3116 {
3117 TransactionId xmin;
3118
3119 /* Check comments in lazy_scan_prune. */
3121 {
3122 all_visible = false;
3123 *all_frozen = false;
3124 break;
3125 }
3126
3127 /*
3128 * The inserter definitely committed. But is it old enough
3129 * that everyone sees it as committed?
3130 */
3131 xmin = HeapTupleHeaderGetXmin(tuple.t_data);
3132 if (!TransactionIdPrecedes(xmin,
3133 vacrel->cutoffs.OldestXmin))
3134 {
3135 all_visible = false;
3136 *all_frozen = false;
3137 break;
3138 }
3139
3140 /* Track newest xmin on page. */
3141 if (TransactionIdFollows(xmin, *visibility_cutoff_xid) &&
3143 *visibility_cutoff_xid = xmin;
3144
3145 /* Check whether this tuple is already frozen or not */
3146 if (all_visible && *all_frozen &&
3148 *all_frozen = false;
3149 }
3150 break;
3151
3152 case HEAPTUPLE_DEAD:
3156 {
3157 all_visible = false;
3158 *all_frozen = false;
3159 break;
3160 }
3161 default:
3162 elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
3163 break;
3164 }
3165 } /* scan along page */
3166
3167 /* Clear the offset information once we have processed the given page. */
3168 vacrel->offnum = InvalidOffsetNumber;
3169
3170 return all_visible;
3171}
3172
3173/*
3174 * Update index statistics in pg_class if the statistics are accurate.
3175 */
3176static void
3178{
3179 Relation *indrels = vacrel->indrels;
3180 int nindexes = vacrel->nindexes;
3181 IndexBulkDeleteResult **indstats = vacrel->indstats;
3182
3183 Assert(vacrel->do_index_cleanup);
3184
3185 for (int idx = 0; idx < nindexes; idx++)
3186 {
3187 Relation indrel = indrels[idx];
3188 IndexBulkDeleteResult *istat = indstats[idx];
3189
3190 if (istat == NULL || istat->estimated_count)
3191 continue;
3192
3193 /* Update index statistics */
3194 vac_update_relstats(indrel,
3195 istat->num_pages,
3196 istat->num_index_tuples,
3197 0,
3198 false,
3201 NULL, NULL, false);
3202 }
3203}
3204
3205/*
3206 * Error context callback for errors occurring during vacuum. The error
3207 * context messages for index phases should match the messages set in parallel
3208 * vacuum. If you change this function for those phases, change
3209 * parallel_vacuum_error_callback() as well.
3210 */
3211static void
3213{
3214 LVRelState *errinfo = arg;
3215
3216 switch (errinfo->phase)
3217 {
3219 if (BlockNumberIsValid(errinfo->blkno))
3220 {
3221 if (OffsetNumberIsValid(errinfo->offnum))
3222 errcontext("while scanning block %u offset %u of relation \"%s.%s\"",
3223 errinfo->blkno, errinfo->offnum, errinfo->relnamespace, errinfo->relname);
3224 else
3225 errcontext("while scanning block %u of relation \"%s.%s\"",
3226 errinfo->blkno, errinfo->relnamespace, errinfo->relname);
3227 }
3228 else
3229 errcontext("while scanning relation \"%s.%s\"",
3230 errinfo->relnamespace, errinfo->relname);
3231 break;
3232
3234 if (BlockNumberIsValid(errinfo->blkno))
3235 {
3236 if (OffsetNumberIsValid(errinfo->offnum))
3237 errcontext("while vacuuming block %u offset %u of relation \"%s.%s\"",
3238 errinfo->blkno, errinfo->offnum, errinfo->relnamespace, errinfo->relname);
3239 else
3240 errcontext("while vacuuming block %u of relation \"%s.%s\"",
3241 errinfo->blkno, errinfo->relnamespace, errinfo->relname);
3242 }
3243 else
3244 errcontext("while vacuuming relation \"%s.%s\"",
3245 errinfo->relnamespace, errinfo->relname);
3246 break;
3247
3249 errcontext("while vacuuming index \"%s\" of relation \"%s.%s\"",
3250 errinfo->indname, errinfo->relnamespace, errinfo->relname);
3251 break;
3252
3254 errcontext("while cleaning up index \"%s\" of relation \"%s.%s\"",
3255 errinfo->indname, errinfo->relnamespace, errinfo->relname);
3256 break;
3257
3259 if (BlockNumberIsValid(errinfo->blkno))
3260 errcontext("while truncating relation \"%s.%s\" to %u blocks",
3261 errinfo->relnamespace, errinfo->relname, errinfo->blkno);
3262 break;
3263
3265 default:
3266 return; /* do nothing; the errinfo may not be
3267 * initialized */
3268 }
3269}
3270
3271/*
3272 * Updates the information required for vacuum error callback. This also saves
3273 * the current information which can be later restored via restore_vacuum_error_info.
3274 */
3275static void
3277 int phase, BlockNumber blkno, OffsetNumber offnum)
3278{
3279 if (saved_vacrel)
3280 {
3281 saved_vacrel->offnum = vacrel->offnum;
3282 saved_vacrel->blkno = vacrel->blkno;
3283 saved_vacrel->phase = vacrel->phase;
3284 }
3285
3286 vacrel->blkno = blkno;
3287 vacrel->offnum = offnum;
3288 vacrel->phase = phase;
3289}
3290
3291/*
3292 * Restores the vacuum information saved via a prior call to update_vacuum_error_info.
3293 */
3294static void
3296 const LVSavedErrInfo *saved_vacrel)
3297{
3298 vacrel->blkno = saved_vacrel->blkno;
3299 vacrel->offnum = saved_vacrel->offnum;
3300 vacrel->phase = saved_vacrel->phase;
3301}
Datum idx(PG_FUNCTION_ARGS)
Definition: _int_op.c:259
int autovacuum_work_mem
Definition: autovacuum.c:119
void TimestampDifference(TimestampTz start_time, TimestampTz stop_time, long *secs, int *microsecs)
Definition: timestamp.c:1720
bool TimestampDifferenceExceeds(TimestampTz start_time, TimestampTz stop_time, int msec)
Definition: timestamp.c:1780
TimestampTz GetCurrentTimestamp(void)
Definition: timestamp.c:1644
void pgstat_progress_start_command(ProgressCommandType cmdtype, Oid relid)
void pgstat_progress_update_param(int index, int64 val)
void pgstat_progress_update_multi_param(int nparam, const int *index, const int64 *val)
void pgstat_progress_end_command(void)
@ PROGRESS_COMMAND_VACUUM
uint32 BlockNumber
Definition: block.h:31
#define InvalidBlockNumber
Definition: block.h:33
static bool BlockNumberIsValid(BlockNumber blockNumber)
Definition: block.h:71
int Buffer
Definition: buf.h:23
#define InvalidBuffer
Definition: buf.h:25
bool track_io_timing
Definition: bufmgr.c:143
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:3724
PrefetchBufferResult PrefetchBuffer(Relation reln, ForkNumber forkNum, BlockNumber blockNum)
Definition: bufmgr.c:639
void ReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:4924
void UnlockReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:4941
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:2532
void LockBufferForCleanup(Buffer buffer)
Definition: bufmgr.c:5238
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:5158
Buffer ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
Definition: bufmgr.c:793
bool ConditionalLockBufferForCleanup(Buffer buffer)
Definition: bufmgr.c:5399
#define BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:189
#define BUFFER_LOCK_SHARE
Definition: bufmgr.h:190
#define RelationGetNumberOfBlocks(reln)
Definition: bufmgr.h:273
static Page BufferGetPage(Buffer buffer)
Definition: bufmgr.h:400
#define BUFFER_LOCK_EXCLUSIVE
Definition: bufmgr.h:191
@ RBM_NORMAL
Definition: bufmgr.h:45
static bool BufferIsValid(Buffer bufnum)
Definition: bufmgr.h:351
Size PageGetHeapFreeSpace(Page page)
Definition: bufpage.c:980
void PageTruncateLinePointerArray(Page page)
Definition: bufpage.c:824
static bool PageIsEmpty(Page page)
Definition: bufpage.h:223
Pointer Page
Definition: bufpage.h:81
static Item PageGetItem(Page page, ItemId itemId)
Definition: bufpage.h:354
static void PageClearAllVisible(Page page)
Definition: bufpage.h:439
#define SizeOfPageHeaderData
Definition: bufpage.h:216
static void PageSetAllVisible(Page page)
Definition: bufpage.h:434
static ItemId PageGetItemId(Page page, OffsetNumber offsetNumber)
Definition: bufpage.h:243
static bool PageIsNew(Page page)
Definition: bufpage.h:233
static bool PageIsAllVisible(Page page)
Definition: bufpage.h:429
static XLogRecPtr PageGetLSN(const char *page)
Definition: bufpage.h:386
static OffsetNumber PageGetMaxOffsetNumber(Page page)
Definition: bufpage.h:372
uint8_t uint8
Definition: c.h:483
#define Max(x, y)
Definition: c.h:952
#define Assert(condition)
Definition: c.h:812
int64_t int64
Definition: c.h:482
TransactionId MultiXactId
Definition: c.h:616
int32_t int32
Definition: c.h:481
#define unlikely(x)
Definition: c.h:330
#define lengthof(array)
Definition: c.h:742
#define StaticAssertStmt(condition, errmessage)
Definition: c.h:892
uint32 TransactionId
Definition: c.h:606
size_t Size
Definition: c.h:559
int64 TimestampTz
Definition: timestamp.h:39
char * get_database_name(Oid dbid)
Definition: dbcommands.c:3187
int errmsg_internal(const char *fmt,...)
Definition: elog.c:1157
int errdetail(const char *fmt,...)
Definition: elog.c:1203
ErrorContextCallback * error_context_stack
Definition: elog.c:94
int errhint(const char *fmt,...)
Definition: elog.c:1317
int errmsg(const char *fmt,...)
Definition: elog.c:1070
#define _(x)
Definition: elog.c:90
#define LOG
Definition: elog.h:31
#define errcontext
Definition: elog.h:196
#define WARNING
Definition: elog.h:36
#define DEBUG2
Definition: elog.h:29
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:225
#define INFO
Definition: elog.h:34
#define ereport(elevel,...)
Definition: elog.h:149
void FreeSpaceMapVacuumRange(Relation rel, BlockNumber start, BlockNumber end)
Definition: freespace.c:377
Size GetRecordedFreeSpace(Relation rel, BlockNumber heapBlk)
Definition: freespace.c:244
void RecordPageWithFreeSpace(Relation rel, BlockNumber heapBlk, Size spaceAvail)
Definition: freespace.c:194
bool VacuumCostActive
Definition: globals.c:157
int VacuumCostBalance
Definition: globals.c:156
int maintenance_work_mem
Definition: globals.c:132
struct Latch * MyLatch
Definition: globals.c:62
Oid MyDatabaseId
Definition: globals.c:93
bool heap_tuple_needs_eventual_freeze(HeapTupleHeader tuple)
Definition: heapam.c:7651
bool heap_tuple_should_freeze(HeapTupleHeader tuple, const struct VacuumCutoffs *cutoffs, TransactionId *NoFreezePageRelfrozenXid, MultiXactId *NoFreezePageRelminMxid)
Definition: heapam.c:7706
#define HEAP_PAGE_PRUNE_FREEZE
Definition: heapam.h:43
@ HEAPTUPLE_RECENTLY_DEAD
Definition: heapam.h:128
@ HEAPTUPLE_INSERT_IN_PROGRESS
Definition: heapam.h:129
@ HEAPTUPLE_LIVE
Definition: heapam.h:127
@ HEAPTUPLE_DELETE_IN_PROGRESS
Definition: heapam.h:130
@ HEAPTUPLE_DEAD
Definition: heapam.h:126
@ PRUNE_VACUUM_CLEANUP
Definition: heapam.h:272
@ PRUNE_VACUUM_SCAN
Definition: heapam.h:271
#define HEAP_PAGE_PRUNE_MARK_UNUSED_NOW
Definition: heapam.h:42
HTSV_Result HeapTupleSatisfiesVacuum(HeapTuple htup, TransactionId OldestXmin, Buffer buffer)
HeapTupleHeaderData * HeapTupleHeader
Definition: htup.h:23
#define HeapTupleHeaderGetXmin(tup)
Definition: htup_details.h:309
#define HeapTupleHeaderXminCommitted(tup)
Definition: htup_details.h:320
#define MaxHeapTuplesPerPage
Definition: htup_details.h:572
int verbose
#define INSTR_TIME_SET_CURRENT(t)
Definition: instr_time.h:122
#define INSTR_TIME_SUBTRACT(x, y)
Definition: instr_time.h:181
#define INSTR_TIME_GET_MICROSEC(t)
Definition: instr_time.h:194
WalUsage pgWalUsage
Definition: instrument.c:22
void WalUsageAccumDiff(WalUsage *dst, const WalUsage *add, const WalUsage *sub)
Definition: instrument.c:286
BufferUsage pgBufferUsage
Definition: instrument.c:20
void BufferUsageAccumDiff(BufferUsage *dst, const BufferUsage *add, const BufferUsage *sub)
Definition: instrument.c:248
static int pg_cmp_u16(uint16 a, uint16 b)
Definition: int.h:640
int b
Definition: isn.c:69
int a
Definition: isn.c:68
int i
Definition: isn.c:72
#define ItemIdGetLength(itemId)
Definition: itemid.h:59
#define ItemIdIsNormal(itemId)
Definition: itemid.h:99
#define ItemIdIsDead(itemId)
Definition: itemid.h:113
#define ItemIdIsUsed(itemId)
Definition: itemid.h:92
#define ItemIdSetUnused(itemId)
Definition: itemid.h:128
#define ItemIdIsRedirected(itemId)
Definition: itemid.h:106
#define ItemIdHasStorage(itemId)
Definition: itemid.h:120
static void ItemPointerSet(ItemPointerData *pointer, BlockNumber blockNumber, OffsetNumber offNum)
Definition: itemptr.h:135
void ResetLatch(Latch *latch)
Definition: latch.c:724
int WaitLatch(Latch *latch, int wakeEvents, long timeout, uint32 wait_event_info)
Definition: latch.c:517
#define WL_TIMEOUT
Definition: latch.h:130
#define WL_EXIT_ON_PM_DEATH
Definition: latch.h:132
#define WL_LATCH_SET
Definition: latch.h:127
void UnlockRelation(Relation relation, LOCKMODE lockmode)
Definition: lmgr.c:309
bool ConditionalLockRelation(Relation relation, LOCKMODE lockmode)
Definition: lmgr.c:274
bool LockHasWaitersRelation(Relation relation, LOCKMODE lockmode)
Definition: lmgr.c:362
#define NoLock
Definition: lockdefs.h:34
#define AccessExclusiveLock
Definition: lockdefs.h:43
#define RowExclusiveLock
Definition: lockdefs.h:38
char * get_namespace_name(Oid nspid)
Definition: lsyscache.c:3366
char * pstrdup(const char *in)
Definition: mcxt.c:1696
void pfree(void *pointer)
Definition: mcxt.c:1521
void * palloc0(Size size)
Definition: mcxt.c:1347
void * palloc(Size size)
Definition: mcxt.c:1317
#define AmAutoVacuumWorkerProcess()
Definition: miscadmin.h:381
#define START_CRIT_SECTION()
Definition: miscadmin.h:149
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:122
#define END_CRIT_SECTION()
Definition: miscadmin.h:151
bool MultiXactIdPrecedesOrEquals(MultiXactId multi1, MultiXactId multi2)
Definition: multixact.c:3331
#define MultiXactIdIsValid(multi)
Definition: multixact.h:28
#define InvalidMultiXactId
Definition: multixact.h:24
#define InvalidOffsetNumber
Definition: off.h:26
#define OffsetNumberIsValid(offsetNumber)
Definition: off.h:39
#define OffsetNumberNext(offsetNumber)
Definition: off.h:52
uint16 OffsetNumber
Definition: off.h:24
#define FirstOffsetNumber
Definition: off.h:27
#define MaxOffsetNumber
Definition: off.h:28
void * arg
const char * pg_rusage_show(const PGRUsage *ru0)
Definition: pg_rusage.c:40
void pg_rusage_init(PGRUsage *ru0)
Definition: pg_rusage.c:27
static char * buf
Definition: pg_test_fsync.c:72
int64 PgStat_Counter
Definition: pgstat.h:121
PgStat_Counter pgStatBlockReadTime
PgStat_Counter pgStatBlockWriteTime
void pgstat_report_vacuum(Oid tableoid, bool shared, PgStat_Counter livetuples, PgStat_Counter deadtuples)
#define qsort(a, b, c, d)
Definition: port.h:447
GlobalVisState * GlobalVisTestFor(Relation rel)
Definition: procarray.c:4107
#define PROGRESS_VACUUM_PHASE_FINAL_CLEANUP
Definition: progress.h:38
#define PROGRESS_VACUUM_DEAD_TUPLE_BYTES
Definition: progress.h:27
#define PROGRESS_VACUUM_PHASE_SCAN_HEAP
Definition: progress.h:33
#define PROGRESS_VACUUM_TOTAL_HEAP_BLKS
Definition: progress.h:22
#define PROGRESS_VACUUM_PHASE
Definition: progress.h:21
#define PROGRESS_VACUUM_NUM_INDEX_VACUUMS
Definition: progress.h:25
#define PROGRESS_VACUUM_PHASE_VACUUM_HEAP
Definition: progress.h:35
#define PROGRESS_VACUUM_NUM_DEAD_ITEM_IDS
Definition: progress.h:28
#define PROGRESS_VACUUM_MAX_DEAD_TUPLE_BYTES
Definition: progress.h:26
#define PROGRESS_VACUUM_HEAP_BLKS_SCANNED
Definition: progress.h:23
#define PROGRESS_VACUUM_PHASE_INDEX_CLEANUP
Definition: progress.h:36
#define PROGRESS_VACUUM_PHASE_VACUUM_INDEX
Definition: progress.h:34
#define PROGRESS_VACUUM_INDEXES_PROCESSED
Definition: progress.h:30
#define PROGRESS_VACUUM_INDEXES_TOTAL
Definition: progress.h:29
#define PROGRESS_VACUUM_HEAP_BLKS_VACUUMED
Definition: progress.h:24
#define PROGRESS_VACUUM_PHASE_TRUNCATE
Definition: progress.h:37
void heap_page_prune_and_freeze(Relation relation, Buffer buffer, GlobalVisState *vistest, int options, struct VacuumCutoffs *cutoffs, PruneFreezeResult *presult, PruneReason reason, OffsetNumber *off_loc, TransactionId *new_relfrozen_xid, MultiXactId *new_relmin_mxid)
Definition: pruneheap.c:350
void log_heap_prune_and_freeze(Relation relation, Buffer buffer, TransactionId conflict_xid, bool cleanup_lock, PruneReason reason, HeapTupleFreeze *frozen, int nfrozen, OffsetNumber *redirected, int nredirected, OffsetNumber *dead, int ndead, OffsetNumber *unused, int nunused)
Definition: pruneheap.c:2053
#define RelationGetRelid(relation)
Definition: rel.h:505
#define RelationGetRelationName(relation)
Definition: rel.h:539
#define RelationNeedsWAL(relation)
Definition: rel.h:628
#define RelationUsesLocalBuffers(relation)
Definition: rel.h:637
#define RelationGetNamespace(relation)
Definition: rel.h:546
@ MAIN_FORKNUM
Definition: relpath.h:58
void RelationTruncate(Relation rel, BlockNumber nblocks)
Definition: storage.c:288
void appendStringInfo(StringInfo str, const char *fmt,...)
Definition: stringinfo.c:94
void appendStringInfoString(StringInfo str, const char *s)
Definition: stringinfo.c:179
void initStringInfo(StringInfo str)
Definition: stringinfo.c:56
int64 shared_blks_dirtied
Definition: instrument.h:28
int64 local_blks_hit
Definition: instrument.h:30
int64 shared_blks_read
Definition: instrument.h:27
int64 local_blks_read
Definition: instrument.h:31
int64 local_blks_dirtied
Definition: instrument.h:32
int64 shared_blks_hit
Definition: instrument.h:26
struct ErrorContextCallback * previous
Definition: elog.h:296
void(* callback)(void *arg)
Definition: elog.h:297
ItemPointerData t_self
Definition: htup.h:65
uint32 t_len
Definition: htup.h:64
HeapTupleHeader t_data
Definition: htup.h:68
Oid t_tableOid
Definition: htup.h:66
bool estimated_count
Definition: genam.h:80
BlockNumber pages_deleted
Definition: genam.h:84
BlockNumber pages_newly_deleted
Definition: genam.h:83
BlockNumber pages_free
Definition: genam.h:85
BlockNumber num_pages
Definition: genam.h:79
double num_index_tuples
Definition: genam.h:81
Relation index
Definition: genam.h:48
double num_heap_tuples
Definition: genam.h:54
bool analyze_only
Definition: genam.h:50
BufferAccessStrategy strategy
Definition: genam.h:55
Relation heaprel
Definition: genam.h:49
bool report_progress
Definition: genam.h:51
int message_level
Definition: genam.h:53
bool estimated_count
Definition: genam.h:52
ParallelVacuumState * pvs
Definition: vacuumlazy.c:143
bool verbose
Definition: vacuumlazy.c:173
VacDeadItemsInfo * dead_items_info
Definition: vacuumlazy.c:186
BlockNumber vm_new_frozen_pages
Definition: vacuumlazy.c:205
int nindexes
Definition: vacuumlazy.c:139
Buffer next_unskippable_vmbuffer
Definition: vacuumlazy.c:231
OffsetNumber offnum
Definition: vacuumlazy.c:171
TidStore * dead_items
Definition: vacuumlazy.c:185
int64 tuples_deleted
Definition: vacuumlazy.c:220
BlockNumber nonempty_pages
Definition: vacuumlazy.c:209
bool do_rel_truncate
Definition: vacuumlazy.c:155
BlockNumber scanned_pages
Definition: vacuumlazy.c:189
bool aggressive
Definition: vacuumlazy.c:146
BlockNumber new_frozen_tuple_pages
Definition: vacuumlazy.c:191
GlobalVisState * vistest
Definition: vacuumlazy.c:159
BlockNumber removed_pages
Definition: vacuumlazy.c:190
int num_index_scans
Definition: vacuumlazy.c:218
IndexBulkDeleteResult ** indstats
Definition: vacuumlazy.c:215
double new_live_tuples
Definition: vacuumlazy.c:213
double new_rel_tuples
Definition: vacuumlazy.c:212
TransactionId NewRelfrozenXid
Definition: vacuumlazy.c:161
Relation rel
Definition: vacuumlazy.c:137
bool consider_bypass_optimization
Definition: vacuumlazy.c:150
BlockNumber rel_pages
Definition: vacuumlazy.c:188
BlockNumber next_unskippable_block
Definition: vacuumlazy.c:229
int64 recently_dead_tuples
Definition: vacuumlazy.c:224
int64 tuples_frozen
Definition: vacuumlazy.c:221
char * dbname
Definition: vacuumlazy.c:166
BlockNumber missed_dead_pages
Definition: vacuumlazy.c:208
BlockNumber current_block
Definition: vacuumlazy.c:228
char * relnamespace
Definition: vacuumlazy.c:167
int64 live_tuples
Definition: vacuumlazy.c:223
int64 lpdead_items
Definition: vacuumlazy.c:222
BufferAccessStrategy bstrategy
Definition: vacuumlazy.c:142
bool skippedallvis
Definition: vacuumlazy.c:163
BlockNumber lpdead_item_pages
Definition: vacuumlazy.c:207
Relation * indrels
Definition: vacuumlazy.c:138
bool skipwithvm
Definition: vacuumlazy.c:148
bool do_index_cleanup
Definition: vacuumlazy.c:154
MultiXactId NewRelminMxid
Definition: vacuumlazy.c:162
int64 missed_dead_tuples
Definition: vacuumlazy.c:225
BlockNumber blkno
Definition: vacuumlazy.c:170
struct VacuumCutoffs cutoffs
Definition: vacuumlazy.c:158
bool next_unskippable_allvis
Definition: vacuumlazy.c:230
BlockNumber vm_new_visible_pages
Definition: vacuumlazy.c:194
char * relname
Definition: vacuumlazy.c:168
VacErrPhase phase
Definition: vacuumlazy.c:172
char * indname
Definition: vacuumlazy.c:169
BlockNumber vm_new_visible_frozen_pages
Definition: vacuumlazy.c:202
bool do_index_vacuuming
Definition: vacuumlazy.c:153
BlockNumber blkno
Definition: vacuumlazy.c:237
VacErrPhase phase
Definition: vacuumlazy.c:239
OffsetNumber offnum
Definition: vacuumlazy.c:238
int recently_dead_tuples
Definition: heapam.h:235
TransactionId vm_conflict_horizon
Definition: heapam.h:250
OffsetNumber deadoffsets[MaxHeapTuplesPerPage]
Definition: heapam.h:264
bool all_visible
Definition: heapam.h:248
Form_pg_class rd_rel
Definition: rel.h:111
BlockNumber blkno
Definition: tidstore.h:29
size_t max_bytes
Definition: vacuum.h:287
int64 num_items
Definition: vacuum.h:288
TransactionId FreezeLimit
Definition: vacuum.h:277
TransactionId OldestXmin
Definition: vacuum.h:267
TransactionId relfrozenxid
Definition: vacuum.h:251
MultiXactId relminmxid
Definition: vacuum.h:252
MultiXactId MultiXactCutoff
Definition: vacuum.h:278
MultiXactId OldestMxact
Definition: vacuum.h:268
int nworkers
Definition: vacuum.h:239
VacOptValue truncate
Definition: vacuum.h:231
bits32 options
Definition: vacuum.h:219
bool is_wraparound
Definition: vacuum.h:226
int log_min_duration
Definition: vacuum.h:227
VacOptValue index_cleanup
Definition: vacuum.h:230
uint64 wal_bytes
Definition: instrument.h:55
int64 wal_fpi
Definition: instrument.h:54
int64 wal_records
Definition: instrument.h:53
TidStoreIter * TidStoreBeginIterate(TidStore *ts)
Definition: tidstore.c:479
void TidStoreEndIterate(TidStoreIter *iter)
Definition: tidstore.c:526
TidStoreIterResult * TidStoreIterateNext(TidStoreIter *iter)
Definition: tidstore.c:501
TidStore * TidStoreCreateLocal(size_t max_bytes, bool insert_only)
Definition: tidstore.c:162
void TidStoreDestroy(TidStore *ts)
Definition: tidstore.c:325
int TidStoreGetBlockOffsets(TidStoreIterResult *result, OffsetNumber *offsets, int max_offsets)
Definition: tidstore.c:574
void TidStoreSetBlockOffsets(TidStore *ts, BlockNumber blkno, OffsetNumber *offsets, int num_offsets)
Definition: tidstore.c:353
size_t TidStoreMemoryUsage(TidStore *ts)
Definition: tidstore.c:540
bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition: transam.c:280
bool TransactionIdPrecedesOrEquals(TransactionId id1, TransactionId id2)
Definition: transam.c:299
bool TransactionIdFollows(TransactionId id1, TransactionId id2)
Definition: transam.c:314
static TransactionId ReadNextTransactionId(void)
Definition: transam.h:315
#define InvalidTransactionId
Definition: transam.h:31
#define TransactionIdIsValid(xid)
Definition: transam.h:41
#define TransactionIdIsNormal(xid)
Definition: transam.h:42
void vac_open_indexes(Relation relation, LOCKMODE lockmode, int *nindexes, Relation **Irel)
Definition: vacuum.c:2298
void vac_update_relstats(Relation relation, BlockNumber num_pages, double num_tuples, BlockNumber num_all_visible_pages, bool hasindex, TransactionId frozenxid, MultiXactId minmulti, bool *frozenxid_updated, bool *minmulti_updated, bool in_outer_xact)
Definition: vacuum.c:1410
IndexBulkDeleteResult * vac_cleanup_one_index(IndexVacuumInfo *ivinfo, IndexBulkDeleteResult *istat)
Definition: vacuum.c:2537
void vac_close_indexes(int nindexes, Relation *Irel, LOCKMODE lockmode)
Definition: vacuum.c:2341
void vacuum_delay_point(void)
Definition: vacuum.c:2362
bool vacuum_get_cutoffs(Relation rel, const VacuumParams *params, struct VacuumCutoffs *cutoffs)
Definition: vacuum.c:1084
bool vacuum_xid_failsafe_check(const struct VacuumCutoffs *cutoffs)
Definition: vacuum.c:1252
bool VacuumFailsafeActive
Definition: vacuum.c:95
double vac_estimate_reltuples(Relation relation, BlockNumber total_pages, BlockNumber scanned_pages, double scanned_tuples)
Definition: vacuum.c:1314
IndexBulkDeleteResult * vac_bulkdel_one_index(IndexVacuumInfo *ivinfo, IndexBulkDeleteResult *istat, TidStore *dead_items, VacDeadItemsInfo *dead_items_info)
Definition: vacuum.c:2516
#define VACOPT_VERBOSE
Definition: vacuum.h:182
@ VACOPTVALUE_AUTO
Definition: vacuum.h:203
@ VACOPTVALUE_ENABLED
Definition: vacuum.h:205
@ VACOPTVALUE_UNSPECIFIED
Definition: vacuum.h:202
@ VACOPTVALUE_DISABLED
Definition: vacuum.h:204
#define VACOPT_DISABLE_PAGE_SKIPPING
Definition: vacuum.h:188
static void dead_items_cleanup(LVRelState *vacrel)
Definition: vacuumlazy.c:3036
static bool heap_page_is_all_visible(LVRelState *vacrel, Buffer buf, TransactionId *visibility_cutoff_xid, bool *all_frozen)
Definition: vacuumlazy.c:3061
static void update_relstats_all_indexes(LVRelState *vacrel)
Definition: vacuumlazy.c:3177
static void dead_items_add(LVRelState *vacrel, BlockNumber blkno, OffsetNumber *offsets, int num_offsets)
Definition: vacuumlazy.c:2994
#define VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL
Definition: vacuumlazy.c:80
static void vacuum_error_callback(void *arg)
Definition: vacuumlazy.c:3212
static void lazy_truncate_heap(LVRelState *vacrel)
Definition: vacuumlazy.c:2656
static void lazy_vacuum(LVRelState *vacrel)
Definition: vacuumlazy.c:1948
static void lazy_cleanup_all_indexes(LVRelState *vacrel)
Definition: vacuumlazy.c:2459
static bool lazy_scan_noprune(LVRelState *vacrel, Buffer buf, BlockNumber blkno, Page page, bool *has_lpdead_items)
Definition: vacuumlazy.c:1737
#define REL_TRUNCATE_MINIMUM
Definition: vacuumlazy.c:69
static bool should_attempt_truncation(LVRelState *vacrel)
Definition: vacuumlazy.c:2636
static bool lazy_scan_new_or_empty(LVRelState *vacrel, Buffer buf, BlockNumber blkno, Page page, bool sharelock, Buffer vmbuffer)
Definition: vacuumlazy.c:1316
VacErrPhase
Definition: vacuumlazy.c:125
@ VACUUM_ERRCB_PHASE_SCAN_HEAP
Definition: vacuumlazy.c:127
@ VACUUM_ERRCB_PHASE_VACUUM_INDEX
Definition: vacuumlazy.c:128
@ VACUUM_ERRCB_PHASE_TRUNCATE
Definition: vacuumlazy.c:131
@ VACUUM_ERRCB_PHASE_INDEX_CLEANUP
Definition: vacuumlazy.c:130
@ VACUUM_ERRCB_PHASE_VACUUM_HEAP
Definition: vacuumlazy.c:129
@ VACUUM_ERRCB_PHASE_UNKNOWN
Definition: vacuumlazy.c:126
static void lazy_scan_heap(LVRelState *vacrel)
Definition: vacuumlazy.c:851
#define ParallelVacuumIsActive(vacrel)
Definition: vacuumlazy.c:121
static void restore_vacuum_error_info(LVRelState *vacrel, const LVSavedErrInfo *saved_vacrel)
Definition: vacuumlazy.c:3295
static bool heap_vac_scan_next_block(LVRelState *vacrel, BlockNumber *blkno, bool *all_visible_according_to_vm)
Definition: vacuumlazy.c:1119
static void lazy_scan_prune(LVRelState *vacrel, Buffer buf, BlockNumber blkno, Page page, Buffer vmbuffer, bool all_visible_according_to_vm, bool *has_lpdead_items)
Definition: vacuumlazy.c:1455
void heap_vacuum_rel(Relation rel, VacuumParams *params, BufferAccessStrategy bstrategy)
Definition: vacuumlazy.c:308
static IndexBulkDeleteResult * lazy_vacuum_one_index(Relation indrel, IndexBulkDeleteResult *istat, double reltuples, LVRelState *vacrel)
Definition: vacuumlazy.c:2527
static void find_next_unskippable_block(LVRelState *vacrel, bool *skipsallvis)
Definition: vacuumlazy.c:1217
static void dead_items_reset(LVRelState *vacrel)
Definition: vacuumlazy.c:3016
#define REL_TRUNCATE_FRACTION
Definition: vacuumlazy.c:70
static bool lazy_check_wraparound_failsafe(LVRelState *vacrel)
Definition: vacuumlazy.c:2406
struct LVSavedErrInfo LVSavedErrInfo
static IndexBulkDeleteResult * lazy_cleanup_one_index(Relation indrel, IndexBulkDeleteResult *istat, double reltuples, bool estimated_count, LVRelState *vacrel)
Definition: vacuumlazy.c:2576
#define PREFETCH_SIZE
Definition: vacuumlazy.c:115
static void lazy_vacuum_heap_page(LVRelState *vacrel, BlockNumber blkno, Buffer buffer, OffsetNumber *deadoffsets, int num_offsets, Buffer vmbuffer)
Definition: vacuumlazy.c:2283
struct LVRelState LVRelState
#define BYPASS_THRESHOLD_PAGES
Definition: vacuumlazy.c:87
static void dead_items_alloc(LVRelState *vacrel, int nworkers)
Definition: vacuumlazy.c:2929
#define VACUUM_TRUNCATE_LOCK_TIMEOUT
Definition: vacuumlazy.c:81
static bool lazy_vacuum_all_indexes(LVRelState *vacrel)
Definition: vacuumlazy.c:2073
static void update_vacuum_error_info(LVRelState *vacrel, LVSavedErrInfo *saved_vacrel, int phase, BlockNumber blkno, OffsetNumber offnum)
Definition: vacuumlazy.c:3276
static BlockNumber count_nondeletable_pages(LVRelState *vacrel, bool *lock_waiter_detected)
Definition: vacuumlazy.c:2787
#define SKIP_PAGES_THRESHOLD
Definition: vacuumlazy.c:109
#define FAILSAFE_EVERY_PAGES
Definition: vacuumlazy.c:93
#define VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL
Definition: vacuumlazy.c:79
static int cmpOffsetNumbers(const void *a, const void *b)
Definition: vacuumlazy.c:1436
static void lazy_vacuum_heap_rel(LVRelState *vacrel)
Definition: vacuumlazy.c:2190
#define VACUUM_FSM_EVERY_PAGES
Definition: vacuumlazy.c:102
TidStore * parallel_vacuum_get_dead_items(ParallelVacuumState *pvs, VacDeadItemsInfo **dead_items_info_p)
ParallelVacuumState * parallel_vacuum_init(Relation rel, Relation *indrels, int nindexes, int nrequested_workers, int vac_work_mem, int elevel, BufferAccessStrategy bstrategy)
void parallel_vacuum_bulkdel_all_indexes(ParallelVacuumState *pvs, long num_table_tuples, int num_index_scans)
void parallel_vacuum_reset_dead_items(ParallelVacuumState *pvs)
void parallel_vacuum_cleanup_all_indexes(ParallelVacuumState *pvs, long num_table_tuples, int num_index_scans, bool estimated_count)
void parallel_vacuum_end(ParallelVacuumState *pvs, IndexBulkDeleteResult **istats)
bool visibilitymap_clear(Relation rel, BlockNumber heapBlk, Buffer vmbuf, uint8 flags)
void visibilitymap_pin(Relation rel, BlockNumber heapBlk, Buffer *vmbuf)
uint8 visibilitymap_get_status(Relation rel, BlockNumber heapBlk, Buffer *vmbuf)
void visibilitymap_count(Relation rel, BlockNumber *all_visible, BlockNumber *all_frozen)
uint8 visibilitymap_set(Relation rel, BlockNumber heapBlk, Buffer heapBuf, XLogRecPtr recptr, Buffer vmBuf, TransactionId cutoff_xid, uint8 flags)
#define VM_ALL_FROZEN(r, b, v)
Definition: visibilitymap.h:26
#define VISIBILITYMAP_VALID_BITS
#define VISIBILITYMAP_ALL_FROZEN
#define VISIBILITYMAP_ALL_VISIBLE
bool IsInParallelMode(void)
Definition: xact.c:1088
#define InvalidXLogRecPtr
Definition: xlogdefs.h:28
XLogRecPtr log_newpage_buffer(Buffer buffer, bool page_std)
Definition: xloginsert.c:1237