PostgreSQL Source Code git master
Loading...
Searching...
No Matches
vacuumlazy.c File Reference
#include "postgres.h"
#include "access/genam.h"
#include "access/heapam.h"
#include "access/htup_details.h"
#include "access/multixact.h"
#include "access/tidstore.h"
#include "access/transam.h"
#include "access/visibilitymap.h"
#include "access/xloginsert.h"
#include "catalog/storage.h"
#include "commands/progress.h"
#include "commands/vacuum.h"
#include "common/int.h"
#include "common/pg_prng.h"
#include "executor/instrument.h"
#include "miscadmin.h"
#include "pgstat.h"
#include "portability/instr_time.h"
#include "postmaster/autovacuum.h"
#include "storage/bufmgr.h"
#include "storage/freespace.h"
#include "storage/latch.h"
#include "storage/lmgr.h"
#include "storage/read_stream.h"
#include "utils/lsyscache.h"
#include "utils/pg_rusage.h"
#include "utils/timestamp.h"
#include "utils/wait_event.h"
Include dependency graph for vacuumlazy.c:

Go to the source code of this file.

Data Structures

struct  LVRelState
 
struct  LVSavedErrInfo
 

Macros

#define REL_TRUNCATE_MINIMUM   1000
 
#define REL_TRUNCATE_FRACTION   16
 
#define VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL   20 /* ms */
 
#define VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL   50 /* ms */
 
#define VACUUM_TRUNCATE_LOCK_TIMEOUT   5000 /* ms */
 
#define BYPASS_THRESHOLD_PAGES   0.02 /* i.e. 2% of rel_pages */
 
#define FAILSAFE_EVERY_PAGES    ((BlockNumber) (((uint64) 4 * 1024 * 1024 * 1024) / BLCKSZ))
 
#define VACUUM_FSM_EVERY_PAGES    ((BlockNumber) (((uint64) 8 * 1024 * 1024 * 1024) / BLCKSZ))
 
#define SKIP_PAGES_THRESHOLD   ((BlockNumber) 32)
 
#define PREFETCH_SIZE   ((BlockNumber) 32)
 
#define ParallelVacuumIsActive(vacrel)   ((vacrel)->pvs != NULL)
 
#define MAX_EAGER_FREEZE_SUCCESS_RATE   0.2
 
#define EAGER_SCAN_REGION_SIZE   4096
 

Typedefs

typedef struct LVRelState LVRelState
 
typedef struct LVSavedErrInfo LVSavedErrInfo
 

Enumerations

enum  VacErrPhase {
  VACUUM_ERRCB_PHASE_UNKNOWN , VACUUM_ERRCB_PHASE_SCAN_HEAP , VACUUM_ERRCB_PHASE_VACUUM_INDEX , VACUUM_ERRCB_PHASE_VACUUM_HEAP ,
  VACUUM_ERRCB_PHASE_INDEX_CLEANUP , VACUUM_ERRCB_PHASE_TRUNCATE
}
 

Functions

static void lazy_scan_heap (LVRelState *vacrel)
 
static void heap_vacuum_eager_scan_setup (LVRelState *vacrel, const VacuumParams params)
 
static BlockNumber heap_vac_scan_next_block (ReadStream *stream, void *callback_private_data, void *per_buffer_data)
 
static void find_next_unskippable_block (LVRelState *vacrel, bool *skipsallvis)
 
static bool lazy_scan_new_or_empty (LVRelState *vacrel, Buffer buf, BlockNumber blkno, Page page, bool sharelock, Buffer vmbuffer)
 
static int lazy_scan_prune (LVRelState *vacrel, Buffer buf, BlockNumber blkno, Page page, Buffer vmbuffer, bool *has_lpdead_items, bool *vm_page_frozen)
 
static bool lazy_scan_noprune (LVRelState *vacrel, Buffer buf, BlockNumber blkno, Page page, bool *has_lpdead_items)
 
static void lazy_vacuum (LVRelState *vacrel)
 
static bool lazy_vacuum_all_indexes (LVRelState *vacrel)
 
static void lazy_vacuum_heap_rel (LVRelState *vacrel)
 
static void lazy_vacuum_heap_page (LVRelState *vacrel, BlockNumber blkno, Buffer buffer, OffsetNumber *deadoffsets, int num_offsets, Buffer vmbuffer)
 
static bool lazy_check_wraparound_failsafe (LVRelState *vacrel)
 
static void lazy_cleanup_all_indexes (LVRelState *vacrel)
 
static IndexBulkDeleteResultlazy_vacuum_one_index (Relation indrel, IndexBulkDeleteResult *istat, double reltuples, LVRelState *vacrel)
 
static IndexBulkDeleteResultlazy_cleanup_one_index (Relation indrel, IndexBulkDeleteResult *istat, double reltuples, bool estimated_count, LVRelState *vacrel)
 
static bool should_attempt_truncation (LVRelState *vacrel)
 
static void lazy_truncate_heap (LVRelState *vacrel)
 
static BlockNumber count_nondeletable_pages (LVRelState *vacrel, bool *lock_waiter_detected)
 
static void dead_items_alloc (LVRelState *vacrel, int nworkers)
 
static void dead_items_add (LVRelState *vacrel, BlockNumber blkno, OffsetNumber *offsets, int num_offsets)
 
static void dead_items_reset (LVRelState *vacrel)
 
static void dead_items_cleanup (LVRelState *vacrel)
 
static bool heap_page_would_be_all_visible (Relation rel, Buffer buf, GlobalVisState *vistest, bool allow_update_vistest, OffsetNumber *deadoffsets, int ndeadoffsets, bool *all_frozen, TransactionId *newest_live_xid, OffsetNumber *logging_offnum)
 
static void update_relstats_all_indexes (LVRelState *vacrel)
 
static void vacuum_error_callback (void *arg)
 
static void update_vacuum_error_info (LVRelState *vacrel, LVSavedErrInfo *saved_vacrel, int phase, BlockNumber blkno, OffsetNumber offnum)
 
static void restore_vacuum_error_info (LVRelState *vacrel, const LVSavedErrInfo *saved_vacrel)
 
void heap_vacuum_rel (Relation rel, const VacuumParams params, BufferAccessStrategy bstrategy)
 
static int cmpOffsetNumbers (const void *a, const void *b)
 
static BlockNumber vacuum_reap_lp_read_stream_next (ReadStream *stream, void *callback_private_data, void *per_buffer_data)
 

Macro Definition Documentation

◆ BYPASS_THRESHOLD_PAGES

#define BYPASS_THRESHOLD_PAGES   0.02 /* i.e. 2% of rel_pages */

Definition at line 186 of file vacuumlazy.c.

◆ EAGER_SCAN_REGION_SIZE

#define EAGER_SCAN_REGION_SIZE   4096

Definition at line 249 of file vacuumlazy.c.

◆ FAILSAFE_EVERY_PAGES

#define FAILSAFE_EVERY_PAGES    ((BlockNumber) (((uint64) 4 * 1024 * 1024 * 1024) / BLCKSZ))

Definition at line 192 of file vacuumlazy.c.

223{
231
232/*
233 * An eager scan of a page that is set all-frozen in the VM is considered
234 * "successful". To spread out freezing overhead across multiple normal
235 * vacuums, we limit the number of successful eager page freezes. The maximum
236 * number of eager page freezes is calculated as a ratio of the all-visible
237 * but not all-frozen pages at the beginning of the vacuum.
238 */
239#define MAX_EAGER_FREEZE_SUCCESS_RATE 0.2
240
241/*
242 * On the assumption that different regions of the table tend to have
243 * similarly aged data, once vacuum fails to freeze
244 * vacuum_max_eager_freeze_failure_rate of the blocks in a region of size
245 * EAGER_SCAN_REGION_SIZE, it suspends eager scanning until it has progressed
246 * to another region of the table with potentially older data.
247 */
248#define EAGER_SCAN_REGION_SIZE 4096
249
250typedef struct LVRelState
251{
252 /* Target heap relation and its indexes */
255 int nindexes;
256
257 /* Buffer access strategy and parallel vacuum state */
260
261 /* Aggressive VACUUM? (must set relfrozenxid >= FreezeLimit) */
262 bool aggressive;
263 /* Use visibility map to skip? (disabled by DISABLE_PAGE_SKIPPING) */
264 bool skipwithvm;
265 /* Consider index vacuuming bypass optimization? */
267
268 /* Doing index vacuuming, index cleanup, rel truncation? */
270 bool do_index_cleanup;
271 bool do_rel_truncate;
272
273 /* VACUUM operation's cutoffs for freezing and pruning */
274 struct VacuumCutoffs cutoffs;
276 /* Tracks oldest extant XID/MXID for setting relfrozenxid/relminmxid */
279 bool skippedallvis;
280
281 /* Error reporting state */
282 char *dbname;
283 char *relnamespace;
284 char *relname;
285 char *indname; /* Current index name */
286 BlockNumber blkno; /* used only for heap operations */
287 OffsetNumber offnum; /* used only for heap operations */
289 bool verbose; /* VACUUM VERBOSE? */
290
291 /*
292 * dead_items stores TIDs whose index tuples are deleted by index
293 * vacuuming. Each TID points to an LP_DEAD line pointer from a heap page
294 * that has been processed by lazy_scan_prune. Also needed by
295 * lazy_vacuum_heap_rel, which marks the same LP_DEAD line pointers as
296 * LP_UNUSED during second heap pass.
297 *
298 * Both dead_items and dead_items_info are allocated in shared memory in
299 * parallel vacuum cases.
300 */
301 TidStore *dead_items; /* TIDs whose index tuples we'll delete */
303
304 BlockNumber rel_pages; /* total number of pages */
305 BlockNumber scanned_pages; /* # pages examined (not skipped via VM) */
306
307 /*
308 * Count of all-visible blocks eagerly scanned (for logging only). This
309 * does not include skippable blocks scanned due to SKIP_PAGES_THRESHOLD.
310 */
312
313 BlockNumber removed_pages; /* # pages removed by relation truncation */
314 BlockNumber new_frozen_tuple_pages; /* # pages with newly frozen tuples */
315
316 /* # pages newly set all-visible in the VM */
318
319 /*
320 * # pages newly set all-visible and all-frozen in the VM. This is a
321 * subset of new_all_visible_pages. That is, new_all_visible_pages
322 * includes all pages set all-visible, but
323 * new_all_visible_all_frozen_pages includes only those which were also
324 * set all-frozen.
325 */
327
328 /* # all-visible pages newly set all-frozen in the VM */
330
331 BlockNumber lpdead_item_pages; /* # pages with LP_DEAD items */
332 BlockNumber missed_dead_pages; /* # pages with missed dead tuples */
333 BlockNumber nonempty_pages; /* actually, last nonempty page + 1 */
334
335 /* Statistics output by us, for table */
336 double new_rel_tuples; /* new estimated total # of tuples */
337 double new_live_tuples; /* new estimated total # of live tuples */
338 /* Statistics output by index AMs */
340
341 /* Instrumentation counters */
342 int num_index_scans;
345
346 /*
347 * Total number of planned and actually launched parallel workers for
348 * index vacuuming and index cleanup.
349 */
351
352 /* Counters that follow are only for scanned_pages */
353 int64 tuples_deleted; /* # deleted from table */
354 int64 tuples_frozen; /* # newly frozen */
355 int64 lpdead_items; /* # deleted from indexes */
356 int64 live_tuples; /* # live tuples remaining */
357 int64 recently_dead_tuples; /* # dead, but not yet removable */
358 int64 missed_dead_tuples; /* # removable, but not removed */
359
360 /* State maintained by heap_vac_scan_next_block() */
361 BlockNumber current_block; /* last block returned */
362 BlockNumber next_unskippable_block; /* next unskippable block */
363 bool next_unskippable_eager_scanned; /* if it was eagerly scanned */
364 Buffer next_unskippable_vmbuffer; /* buffer containing its VM bit */
365
366 /* State related to managing eager scanning of all-visible pages */
367
368 /*
369 * A normal vacuum that has failed to freeze too many eagerly scanned
370 * blocks in a region suspends eager scanning.
371 * next_eager_scan_region_start is the block number of the first block
372 * eligible for resumed eager scanning.
373 *
374 * When eager scanning is permanently disabled, either initially
375 * (including for aggressive vacuum) or due to hitting the success cap,
376 * this is set to InvalidBlockNumber.
377 */
379
380 /*
381 * The remaining number of blocks a normal vacuum will consider eager
382 * scanning when it is successful. When eager scanning is enabled, this is
383 * initialized to MAX_EAGER_FREEZE_SUCCESS_RATE of the total number of
384 * all-visible but not all-frozen pages. For each eager freeze success,
385 * this is decremented. Once it hits 0, eager scanning is permanently
386 * disabled. It is initialized to 0 if eager scanning starts out disabled
387 * (including for aggressive vacuum).
388 */
390
391 /*
392 * The maximum number of blocks which may be eagerly scanned and not
393 * frozen before eager scanning is temporarily suspended. This is
394 * configurable both globally, via the
395 * vacuum_max_eager_freeze_failure_rate GUC, and per table, with a table
396 * storage parameter of the same name. It is calculated as
397 * vacuum_max_eager_freeze_failure_rate of EAGER_SCAN_REGION_SIZE blocks.
398 * It is 0 when eager scanning is disabled.
399 */
401
402 /*
403 * The number of eagerly scanned blocks vacuum failed to freeze (due to
404 * age) in the current eager scan region. Vacuum resets it to
405 * eager_scan_max_fails_per_region each time it enters a new region of the
406 * relation. If eager_scan_remaining_fails hits 0, eager scanning is
407 * suspended until the next region. It is also 0 if eager scanning has
408 * been permanently disabled.
409 */
411} LVRelState;
412
413
414/* Struct for saving and restoring vacuum error information. */
415typedef struct LVSavedErrInfo
416{
421
422
423/* non-export function prototypes */
424static void lazy_scan_heap(LVRelState *vacrel);
426 const VacuumParams params);
428 void *callback_private_data,
429 void *per_buffer_data);
432 BlockNumber blkno, Page page,
433 bool sharelock, Buffer vmbuffer);
435 BlockNumber blkno, Page page,
436 Buffer vmbuffer,
437 bool *has_lpdead_items, bool *vm_page_frozen);
439 BlockNumber blkno, Page page,
440 bool *has_lpdead_items);
441static void lazy_vacuum(LVRelState *vacrel);
445 Buffer buffer, OffsetNumber *deadoffsets,
446 int num_offsets, Buffer vmbuffer);
451 double reltuples,
455 double reltuples,
456 bool estimated_count,
462static void dead_items_alloc(LVRelState *vacrel, int nworkers);
463static void dead_items_add(LVRelState *vacrel, BlockNumber blkno, OffsetNumber *offsets,
464 int num_offsets);
467
469 GlobalVisState *vistest,
471 OffsetNumber *deadoffsets,
472 int ndeadoffsets,
473 bool *all_frozen,
474 TransactionId *newest_live_xid,
477static void vacuum_error_callback(void *arg);
480 int phase, BlockNumber blkno,
481 OffsetNumber offnum);
484
485
486
487/*
488 * Helper to set up the eager scanning state for vacuuming a single relation.
489 * Initializes the eager scan management related members of the LVRelState.
490 *
491 * Caller provides whether or not an aggressive vacuum is required due to
492 * vacuum options or for relfrozenxid/relminmxid advancement.
493 */
494static void
496{
500 float first_region_ratio;
502
503 /*
504 * Initialize eager scan management fields to their disabled values.
505 * Aggressive vacuums, normal vacuums of small tables, and normal vacuums
506 * of tables without sufficiently old tuples disable eager scanning.
507 */
508 vacrel->next_eager_scan_region_start = InvalidBlockNumber;
509 vacrel->eager_scan_max_fails_per_region = 0;
510 vacrel->eager_scan_remaining_fails = 0;
511 vacrel->eager_scan_remaining_successes = 0;
512
513 /* If eager scanning is explicitly disabled, just return. */
514 if (params.max_eager_freeze_failure_rate == 0)
515 return;
516
517 /*
518 * The caller will have determined whether or not an aggressive vacuum is
519 * required by either the vacuum parameters or the relative age of the
520 * oldest unfrozen transaction IDs. An aggressive vacuum must scan every
521 * all-visible page to safely advance the relfrozenxid and/or relminmxid,
522 * so scans of all-visible pages are not considered eager.
523 */
524 if (vacrel->aggressive)
525 return;
526
527 /*
528 * Aggressively vacuuming a small relation shouldn't take long, so it
529 * isn't worth amortizing. We use two times the region size as the size
530 * cutoff because the eager scan start block is a random spot somewhere in
531 * the first region, making the second region the first to be eager
532 * scanned normally.
533 */
534 if (vacrel->rel_pages < 2 * EAGER_SCAN_REGION_SIZE)
535 return;
536
537 /*
538 * We only want to enable eager scanning if we are likely to be able to
539 * freeze some of the pages in the relation.
540 *
541 * Tuples with XIDs older than OldestXmin or MXIDs older than OldestMxact
542 * are technically freezable, but we won't freeze them unless the criteria
543 * for opportunistic freezing is met. Only tuples with XIDs/MXIDs older
544 * than the FreezeLimit/MultiXactCutoff are frozen in the common case.
545 *
546 * So, as a heuristic, we wait until the FreezeLimit has advanced past the
547 * relfrozenxid or the MultiXactCutoff has advanced past the relminmxid to
548 * enable eager scanning.
549 */
550 if (TransactionIdIsNormal(vacrel->cutoffs.relfrozenxid) &&
551 TransactionIdPrecedes(vacrel->cutoffs.relfrozenxid,
552 vacrel->cutoffs.FreezeLimit))
554
556 MultiXactIdIsValid(vacrel->cutoffs.relminmxid) &&
557 MultiXactIdPrecedes(vacrel->cutoffs.relminmxid,
558 vacrel->cutoffs.MultiXactCutoff))
560
562 return;
563
564 /* We have met the criteria to eagerly scan some pages. */
565
566 /*
567 * Our success cap is MAX_EAGER_FREEZE_SUCCESS_RATE of the number of
568 * all-visible but not all-frozen blocks in the relation.
569 */
571
572 vacrel->eager_scan_remaining_successes =
575
576 /* If every all-visible page is frozen, eager scanning is disabled. */
577 if (vacrel->eager_scan_remaining_successes == 0)
578 return;
579
580 /*
581 * Now calculate the bounds of the first eager scan region. Its end block
582 * will be a random spot somewhere in the first EAGER_SCAN_REGION_SIZE
583 * blocks. This affects the bounds of all subsequent regions and avoids
584 * eager scanning and failing to freeze the same blocks each vacuum of the
585 * relation.
586 */
588
589 vacrel->next_eager_scan_region_start = randseed % EAGER_SCAN_REGION_SIZE;
590
593
594 vacrel->eager_scan_max_fails_per_region =
597
598 /*
599 * The first region will be smaller than subsequent regions. As such,
600 * adjust the eager freeze failures tolerated for this region.
601 */
602 first_region_ratio = 1 - (float) vacrel->next_eager_scan_region_start /
604
605 vacrel->eager_scan_remaining_fails =
606 vacrel->eager_scan_max_fails_per_region *
608}
609
610/*
611 * heap_vacuum_rel() -- perform VACUUM for one heap relation
612 *
613 * This routine sets things up for and then calls lazy_scan_heap, where
614 * almost all work actually takes place. Finalizes everything after call
615 * returns by managing relation truncation and updating rel's pg_class
616 * entry. (Also updates pg_class entries for any indexes that need it.)
617 *
618 * At entry, we have already established a transaction and opened
619 * and locked the relation.
620 */
621void
622heap_vacuum_rel(Relation rel, const VacuumParams params,
623 BufferAccessStrategy bstrategy)
624{
626 bool verbose,
627 instrument,
628 skipwithvm,
636 TimestampTz starttime = 0;
638 startwritetime = 0;
641 ErrorContextCallback errcallback;
642 char **indnames = NULL;
644
645 verbose = (params.options & VACOPT_VERBOSE) != 0;
646 instrument = (verbose || (AmAutoVacuumWorkerProcess() &&
647 params.log_vacuum_min_duration >= 0));
648 if (instrument)
649 {
651 if (track_io_timing)
652 {
655 }
656 }
657
658 /* Used for instrumentation and stats report */
659 starttime = GetCurrentTimestamp();
660
662 RelationGetRelid(rel));
665 params.is_wraparound
668 else
671
672 /*
673 * Setup error traceback support for ereport() first. The idea is to set
674 * up an error context callback to display additional information on any
675 * error during a vacuum. During different phases of vacuum, we update
676 * the state so that the error context callback always display current
677 * information.
678 *
679 * Copy the names of heap rel into local memory for error reporting
680 * purposes, too. It isn't always safe to assume that we can get the name
681 * of each rel. It's convenient for code in lazy_scan_heap to always use
682 * these temp copies.
683 */
686 vacrel->relnamespace = get_namespace_name(RelationGetNamespace(rel));
687 vacrel->relname = pstrdup(RelationGetRelationName(rel));
688 vacrel->indname = NULL;
690 vacrel->verbose = verbose;
691 errcallback.callback = vacuum_error_callback;
692 errcallback.arg = vacrel;
693 errcallback.previous = error_context_stack;
694 error_context_stack = &errcallback;
695
696 /* Set up high level stuff about rel and its indexes */
697 vacrel->rel = rel;
699 &vacrel->indrels);
700 vacrel->bstrategy = bstrategy;
701 if (instrument && vacrel->nindexes > 0)
702 {
703 /* Copy index names used by instrumentation (not error reporting) */
704 indnames = palloc_array(char *, vacrel->nindexes);
705 for (int i = 0; i < vacrel->nindexes; i++)
707 }
708
709 /*
710 * The index_cleanup param either disables index vacuuming and cleanup or
711 * forces it to go ahead when we would otherwise apply the index bypass
712 * optimization. The default is 'auto', which leaves the final decision
713 * up to lazy_vacuum().
714 *
715 * The truncate param allows user to avoid attempting relation truncation,
716 * though it can't force truncation to happen.
717 */
720 params.truncate != VACOPTVALUE_AUTO);
721
722 /*
723 * While VacuumFailSafeActive is reset to false before calling this, we
724 * still need to reset it here due to recursive calls.
725 */
726 VacuumFailsafeActive = false;
727 vacrel->consider_bypass_optimization = true;
728 vacrel->do_index_vacuuming = true;
729 vacrel->do_index_cleanup = true;
730 vacrel->do_rel_truncate = (params.truncate != VACOPTVALUE_DISABLED);
732 {
733 /* Force disable index vacuuming up-front */
734 vacrel->do_index_vacuuming = false;
735 vacrel->do_index_cleanup = false;
736 }
737 else if (params.index_cleanup == VACOPTVALUE_ENABLED)
738 {
739 /* Force index vacuuming. Note that failsafe can still bypass. */
740 vacrel->consider_bypass_optimization = false;
741 }
742 else
743 {
744 /* Default/auto, make all decisions dynamically */
746 }
747
748 /* Initialize page counters explicitly (be tidy) */
749 vacrel->scanned_pages = 0;
750 vacrel->eager_scanned_pages = 0;
751 vacrel->removed_pages = 0;
752 vacrel->new_frozen_tuple_pages = 0;
753 vacrel->lpdead_item_pages = 0;
754 vacrel->missed_dead_pages = 0;
755 vacrel->nonempty_pages = 0;
756 /* dead_items_alloc allocates vacrel->dead_items later on */
757
758 /* Allocate/initialize output statistics state */
759 vacrel->new_rel_tuples = 0;
760 vacrel->new_live_tuples = 0;
761 vacrel->indstats = (IndexBulkDeleteResult **)
762 palloc0(vacrel->nindexes * sizeof(IndexBulkDeleteResult *));
763
764 /* Initialize remaining counters (be tidy) */
765 vacrel->num_index_scans = 0;
766 vacrel->num_dead_items_resets = 0;
767 vacrel->total_dead_items_bytes = 0;
768 vacrel->tuples_deleted = 0;
769 vacrel->tuples_frozen = 0;
770 vacrel->lpdead_items = 0;
771 vacrel->live_tuples = 0;
772 vacrel->recently_dead_tuples = 0;
773 vacrel->missed_dead_tuples = 0;
774
775 vacrel->new_all_visible_pages = 0;
776 vacrel->new_all_visible_all_frozen_pages = 0;
777 vacrel->new_all_frozen_pages = 0;
778
779 vacrel->worker_usage.vacuum.nlaunched = 0;
780 vacrel->worker_usage.vacuum.nplanned = 0;
781 vacrel->worker_usage.cleanup.nlaunched = 0;
782 vacrel->worker_usage.cleanup.nplanned = 0;
783
784 /*
785 * Get cutoffs that determine which deleted tuples are considered DEAD,
786 * not just RECENTLY_DEAD, and which XIDs/MXIDs to freeze. Then determine
787 * the extent of the blocks that we'll scan in lazy_scan_heap. It has to
788 * happen in this order to ensure that the OldestXmin cutoff field works
789 * as an upper bound on the XIDs stored in the pages we'll actually scan
790 * (NewRelfrozenXid tracking must never be allowed to miss unfrozen XIDs).
791 *
792 * Next acquire vistest, a related cutoff that's used in pruning. We use
793 * vistest in combination with OldestXmin to ensure that
794 * heap_page_prune_and_freeze() always removes any deleted tuple whose
795 * xmax is < OldestXmin. lazy_scan_prune must never become confused about
796 * whether a tuple should be frozen or removed. (In the future we might
797 * want to teach lazy_scan_prune to recompute vistest from time to time,
798 * to increase the number of dead tuples it can prune away.)
799 */
800 vacrel->aggressive = vacuum_get_cutoffs(rel, params, &vacrel->cutoffs);
802 vacrel->vistest = GlobalVisTestFor(rel);
803
804 /* Initialize state used to track oldest extant XID/MXID */
805 vacrel->NewRelfrozenXid = vacrel->cutoffs.OldestXmin;
806 vacrel->NewRelminMxid = vacrel->cutoffs.OldestMxact;
807
808 /*
809 * Initialize state related to tracking all-visible page skipping. This is
810 * very important to determine whether or not it is safe to advance the
811 * relfrozenxid/relminmxid.
812 */
813 vacrel->skippedallvis = false;
814 skipwithvm = true;
816 {
817 /*
818 * Force aggressive mode, and disable skipping blocks using the
819 * visibility map (even those set all-frozen)
820 */
821 vacrel->aggressive = true;
822 skipwithvm = false;
823 }
824
825 vacrel->skipwithvm = skipwithvm;
826
827 /*
828 * Set up eager scan tracking state. This must happen after determining
829 * whether or not the vacuum must be aggressive, because only normal
830 * vacuums use the eager scan algorithm.
831 */
833
834 /* Report the vacuum mode: 'normal' or 'aggressive' */
836 vacrel->aggressive
839
840 if (verbose)
841 {
842 if (vacrel->aggressive)
844 (errmsg("aggressively vacuuming \"%s.%s.%s\"",
845 vacrel->dbname, vacrel->relnamespace,
846 vacrel->relname)));
847 else
849 (errmsg("vacuuming \"%s.%s.%s\"",
850 vacrel->dbname, vacrel->relnamespace,
851 vacrel->relname)));
852 }
853
854 /*
855 * Allocate dead_items memory using dead_items_alloc. This handles
856 * parallel VACUUM initialization as part of allocating shared memory
857 * space used for dead_items. (But do a failsafe precheck first, to
858 * ensure that parallel VACUUM won't be attempted at all when relfrozenxid
859 * is already dangerously old.)
860 */
863
864 /*
865 * Call lazy_scan_heap to perform all required heap pruning, index
866 * vacuuming, and heap vacuuming (plus related processing)
867 */
869
870 /*
871 * Save dead items max_bytes and update the memory usage statistics before
872 * cleanup, they are freed in parallel vacuum cases during
873 * dead_items_cleanup().
874 */
875 dead_items_max_bytes = vacrel->dead_items_info->max_bytes;
876 vacrel->total_dead_items_bytes += TidStoreMemoryUsage(vacrel->dead_items);
877
878 /*
879 * Free resources managed by dead_items_alloc. This ends parallel mode in
880 * passing when necessary.
881 */
884
885 /*
886 * Update pg_class entries for each of rel's indexes where appropriate.
887 *
888 * Unlike the later update to rel's pg_class entry, this is not critical.
889 * Maintains relpages/reltuples statistics used by the planner only.
890 */
891 if (vacrel->do_index_cleanup)
893
894 /* Done with rel's indexes */
895 vac_close_indexes(vacrel->nindexes, vacrel->indrels, NoLock);
896
897 /* Optionally truncate rel */
900
901 /* Pop the error context stack */
902 error_context_stack = errcallback.previous;
903
904 /* Report that we are now doing final cleanup */
907
908 /*
909 * Prepare to update rel's pg_class entry.
910 *
911 * Aggressive VACUUMs must always be able to advance relfrozenxid to a
912 * value >= FreezeLimit, and relminmxid to a value >= MultiXactCutoff.
913 * Non-aggressive VACUUMs may advance them by any amount, or not at all.
914 */
915 Assert(vacrel->NewRelfrozenXid == vacrel->cutoffs.OldestXmin ||
916 TransactionIdPrecedesOrEquals(vacrel->aggressive ? vacrel->cutoffs.FreezeLimit :
917 vacrel->cutoffs.relfrozenxid,
918 vacrel->NewRelfrozenXid));
919 Assert(vacrel->NewRelminMxid == vacrel->cutoffs.OldestMxact ||
920 MultiXactIdPrecedesOrEquals(vacrel->aggressive ? vacrel->cutoffs.MultiXactCutoff :
921 vacrel->cutoffs.relminmxid,
922 vacrel->NewRelminMxid));
923 if (vacrel->skippedallvis)
924 {
925 /*
926 * Must keep original relfrozenxid in a non-aggressive VACUUM that
927 * chose to skip an all-visible page range. The state that tracks new
928 * values will have missed unfrozen XIDs from the pages we skipped.
929 */
930 Assert(!vacrel->aggressive);
931 vacrel->NewRelfrozenXid = InvalidTransactionId;
932 vacrel->NewRelminMxid = InvalidMultiXactId;
933 }
934
935 /*
936 * For safety, clamp relallvisible to be not more than what we're setting
937 * pg_class.relpages to
938 */
939 new_rel_pages = vacrel->rel_pages; /* After possible rel truncation */
943
944 /*
945 * An all-frozen block _must_ be all-visible. As such, clamp the count of
946 * all-frozen blocks to the count of all-visible blocks. This matches the
947 * clamping of relallvisible above.
948 */
951
952 /*
953 * Now actually update rel's pg_class entry.
954 *
955 * In principle new_live_tuples could be -1 indicating that we (still)
956 * don't know the tuple count. In practice that can't happen, since we
957 * scan every page that isn't skipped using the visibility map.
958 */
959 vac_update_relstats(rel, new_rel_pages, vacrel->new_live_tuples,
961 vacrel->nindexes > 0,
962 vacrel->NewRelfrozenXid, vacrel->NewRelminMxid,
964
965 /*
966 * Report results to the cumulative stats system, too.
967 *
968 * Deliberately avoid telling the stats system about LP_DEAD items that
969 * remain in the table due to VACUUM bypassing index and heap vacuuming.
970 * ANALYZE will consider the remaining LP_DEAD items to be dead "tuples".
971 * It seems like a good idea to err on the side of not vacuuming again too
972 * soon in cases where the failsafe prevented significant amounts of heap
973 * vacuuming.
974 */
976 Max(vacrel->new_live_tuples, 0),
977 vacrel->recently_dead_tuples +
978 vacrel->missed_dead_tuples,
979 starttime);
981
982 if (instrument)
983 {
985
986 if (verbose || params.log_vacuum_min_duration == 0 ||
989 {
990 long secs_dur;
991 int usecs_dur;
992 WalUsage walusage;
993 BufferUsage bufferusage;
995 char *msgfmt;
996 int32 diff;
997 double read_rate = 0,
998 write_rate = 0;
1002
1004 memset(&walusage, 0, sizeof(WalUsage));
1006 memset(&bufferusage, 0, sizeof(BufferUsage));
1008
1009 total_blks_hit = bufferusage.shared_blks_hit +
1010 bufferusage.local_blks_hit;
1011 total_blks_read = bufferusage.shared_blks_read +
1012 bufferusage.local_blks_read;
1014 bufferusage.local_blks_dirtied;
1015
1017 if (verbose)
1018 {
1019 /*
1020 * Aggressiveness already reported earlier, in dedicated
1021 * VACUUM VERBOSE ereport
1022 */
1023 Assert(!params.is_wraparound);
1024 msgfmt = _("finished vacuuming \"%s.%s.%s\": index scans: %d\n");
1025 }
1026 else if (params.is_wraparound)
1027 {
1028 /*
1029 * While it's possible for a VACUUM to be both is_wraparound
1030 * and !aggressive, that's just a corner-case -- is_wraparound
1031 * implies aggressive. Produce distinct output for the corner
1032 * case all the same, just in case.
1033 */
1034 if (vacrel->aggressive)
1035 msgfmt = _("automatic aggressive vacuum to prevent wraparound of table \"%s.%s.%s\": index scans: %d\n");
1036 else
1037 msgfmt = _("automatic vacuum to prevent wraparound of table \"%s.%s.%s\": index scans: %d\n");
1038 }
1039 else
1040 {
1041 if (vacrel->aggressive)
1042 msgfmt = _("automatic aggressive vacuum of table \"%s.%s.%s\": index scans: %d\n");
1043 else
1044 msgfmt = _("automatic vacuum of table \"%s.%s.%s\": index scans: %d\n");
1045 }
1047 vacrel->dbname,
1048 vacrel->relnamespace,
1049 vacrel->relname,
1050 vacrel->num_index_scans);
1051 appendStringInfo(&buf, _("pages: %u removed, %u remain, %u scanned (%.2f%% of total), %u eagerly scanned\n"),
1052 vacrel->removed_pages,
1054 vacrel->scanned_pages,
1055 orig_rel_pages == 0 ? 100.0 :
1056 100.0 * vacrel->scanned_pages /
1058 vacrel->eager_scanned_pages);
1060 _("tuples: %" PRId64 " removed, %" PRId64 " remain, %" PRId64 " are dead but not yet removable\n"),
1061 vacrel->tuples_deleted,
1062 (int64) vacrel->new_rel_tuples,
1063 vacrel->recently_dead_tuples);
1064 if (vacrel->missed_dead_tuples > 0)
1066 _("tuples missed: %" PRId64 " dead from %u pages not removed due to cleanup lock contention\n"),
1067 vacrel->missed_dead_tuples,
1068 vacrel->missed_dead_pages);
1070 vacrel->cutoffs.OldestXmin);
1072 _("removable cutoff: %u, which was %d XIDs old when operation ended\n"),
1073 vacrel->cutoffs.OldestXmin, diff);
1075 {
1076 diff = (int32) (vacrel->NewRelfrozenXid -
1077 vacrel->cutoffs.relfrozenxid);
1079 _("new relfrozenxid: %u, which is %d XIDs ahead of previous value\n"),
1080 vacrel->NewRelfrozenXid, diff);
1081 }
1082 if (minmulti_updated)
1083 {
1084 diff = (int32) (vacrel->NewRelminMxid -
1085 vacrel->cutoffs.relminmxid);
1087 _("new relminmxid: %u, which is %d MXIDs ahead of previous value\n"),
1088 vacrel->NewRelminMxid, diff);
1089 }
1090 appendStringInfo(&buf, _("frozen: %u pages from table (%.2f%% of total) had %" PRId64 " tuples frozen\n"),
1091 vacrel->new_frozen_tuple_pages,
1092 orig_rel_pages == 0 ? 100.0 :
1093 100.0 * vacrel->new_frozen_tuple_pages /
1095 vacrel->tuples_frozen);
1096
1098 _("visibility map: %u pages set all-visible, %u pages set all-frozen (%u were all-visible)\n"),
1099 vacrel->new_all_visible_pages,
1100 vacrel->new_all_visible_all_frozen_pages +
1101 vacrel->new_all_frozen_pages,
1102 vacrel->new_all_frozen_pages);
1103 if (vacrel->do_index_vacuuming)
1104 {
1105 if (vacrel->nindexes == 0 || vacrel->num_index_scans == 0)
1106 appendStringInfoString(&buf, _("index scan not needed: "));
1107 else
1108 appendStringInfoString(&buf, _("index scan needed: "));
1109
1110 msgfmt = _("%u pages from table (%.2f%% of total) had %" PRId64 " dead item identifiers removed\n");
1111 }
1112 else
1113 {
1115 appendStringInfoString(&buf, _("index scan bypassed: "));
1116 else
1117 appendStringInfoString(&buf, _("index scan bypassed by failsafe: "));
1118
1119 msgfmt = _("%u pages from table (%.2f%% of total) have %" PRId64 " dead item identifiers\n");
1120 }
1122 vacrel->lpdead_item_pages,
1123 orig_rel_pages == 0 ? 100.0 :
1124 100.0 * vacrel->lpdead_item_pages / orig_rel_pages,
1125 vacrel->lpdead_items);
1126
1127 if (vacrel->worker_usage.vacuum.nplanned > 0)
1129 _("parallel workers: index vacuum: %d planned, %d launched in total\n"),
1130 vacrel->worker_usage.vacuum.nplanned,
1131 vacrel->worker_usage.vacuum.nlaunched);
1132
1133 if (vacrel->worker_usage.cleanup.nplanned > 0)
1135 _("parallel workers: index cleanup: %d planned, %d launched\n"),
1136 vacrel->worker_usage.cleanup.nplanned,
1137 vacrel->worker_usage.cleanup.nlaunched);
1138
1139 for (int i = 0; i < vacrel->nindexes; i++)
1140 {
1141 IndexBulkDeleteResult *istat = vacrel->indstats[i];
1142
1143 if (!istat)
1144 continue;
1145
1147 _("index \"%s\": pages: %u in total, %u newly deleted, %u currently deleted, %u reusable\n"),
1148 indnames[i],
1149 istat->num_pages,
1150 istat->pages_newly_deleted,
1151 istat->pages_deleted,
1152 istat->pages_free);
1153 }
1155 {
1156 /*
1157 * We bypass the changecount mechanism because this value is
1158 * only updated by the calling process. We also rely on the
1159 * above call to pgstat_progress_end_command() to not clear
1160 * the st_progress_param array.
1161 */
1162 appendStringInfo(&buf, _("delay time: %.3f ms\n"),
1164 }
1165 if (track_io_timing)
1166 {
1167 double read_ms = (double) (pgStatBlockReadTime - startreadtime) / 1000;
1168 double write_ms = (double) (pgStatBlockWriteTime - startwritetime) / 1000;
1169
1170 appendStringInfo(&buf, _("I/O timings: read: %.3f ms, write: %.3f ms\n"),
1171 read_ms, write_ms);
1172 }
1173 if (secs_dur > 0 || usecs_dur > 0)
1174 {
1176 (1024 * 1024) / (secs_dur + usecs_dur / 1000000.0);
1178 (1024 * 1024) / (secs_dur + usecs_dur / 1000000.0);
1179 }
1180 appendStringInfo(&buf, _("avg read rate: %.3f MB/s, avg write rate: %.3f MB/s\n"),
1183 _("buffer usage: %" PRId64 " hits, %" PRId64 " reads, %" PRId64 " dirtied\n"),
1188 _("WAL usage: %" PRId64 " records, %" PRId64 " full page images, %" PRIu64 " bytes, %" PRIu64 " full page image bytes, %" PRId64 " buffers full\n"),
1189 walusage.wal_records,
1190 walusage.wal_fpi,
1191 walusage.wal_bytes,
1192 walusage.wal_fpi_bytes,
1193 walusage.wal_buffers_full);
1194
1195 /*
1196 * Report the dead items memory usage.
1197 *
1198 * The num_dead_items_resets counter increases when we reset the
1199 * collected dead items, so the counter is non-zero if at least
1200 * one dead items are collected, even if index vacuuming is
1201 * disabled.
1202 */
1204 ngettext("memory usage: dead item storage %.2f MB accumulated across %d reset (limit %.2f MB each)\n",
1205 "memory usage: dead item storage %.2f MB accumulated across %d resets (limit %.2f MB each)\n",
1206 vacrel->num_dead_items_resets),
1207 (double) vacrel->total_dead_items_bytes / (1024 * 1024),
1208 vacrel->num_dead_items_resets,
1209 (double) dead_items_max_bytes / (1024 * 1024));
1210 appendStringInfo(&buf, _("system usage: %s"), pg_rusage_show(&ru0));
1211
1212 ereport(verbose ? INFO : LOG,
1213 (errmsg_internal("%s", buf.data)));
1214 pfree(buf.data);
1215 }
1216 }
1217
1218 /* Cleanup index statistics and index names */
1219 for (int i = 0; i < vacrel->nindexes; i++)
1220 {
1221 if (vacrel->indstats[i])
1222 pfree(vacrel->indstats[i]);
1223
1224 if (instrument)
1225 pfree(indnames[i]);
1226 }
1227}
1228
1229/*
1230 * lazy_scan_heap() -- workhorse function for VACUUM
1231 *
1232 * This routine prunes each page in the heap, and considers the need to
1233 * freeze remaining tuples with storage (not including pages that can be
1234 * skipped using the visibility map). Also performs related maintenance
1235 * of the FSM and visibility map. These steps all take place during an
1236 * initial pass over the target heap relation.
1237 *
1238 * Also invokes lazy_vacuum_all_indexes to vacuum indexes, which largely
1239 * consists of deleting index tuples that point to LP_DEAD items left in
1240 * heap pages following pruning. Earlier initial pass over the heap will
1241 * have collected the TIDs whose index tuples need to be removed.
1242 *
1243 * Finally, invokes lazy_vacuum_heap_rel to vacuum heap pages, which
1244 * largely consists of marking LP_DEAD items (from vacrel->dead_items)
1245 * as LP_UNUSED. This has to happen in a second, final pass over the
1246 * heap, to preserve a basic invariant that all index AMs rely on: no
1247 * extant index tuple can ever be allowed to contain a TID that points to
1248 * an LP_UNUSED line pointer in the heap. We must disallow premature
1249 * recycling of line pointers to avoid index scans that get confused
1250 * about which TID points to which tuple immediately after recycling.
1251 * (Actually, this isn't a concern when target heap relation happens to
1252 * have no indexes, which allows us to safely apply the one-pass strategy
1253 * as an optimization).
1254 *
1255 * In practice we often have enough space to fit all TIDs, and so won't
1256 * need to call lazy_vacuum more than once, after our initial pass over
1257 * the heap has totally finished. Otherwise things are slightly more
1258 * complicated: our "initial pass" over the heap applies only to those
1259 * pages that were pruned before we needed to call lazy_vacuum, and our
1260 * "final pass" over the heap only vacuums these same heap pages.
1261 * However, we process indexes in full every time lazy_vacuum is called,
1262 * which makes index processing very inefficient when memory is in short
1263 * supply.
1264 */
1265static void
1267{
1268 ReadStream *stream;
1269 BlockNumber rel_pages = vacrel->rel_pages,
1270 blkno = 0,
1273 vacrel->eager_scan_remaining_successes; /* for logging */
1274 Buffer vmbuffer = InvalidBuffer;
1275 const int initprog_index[] = {
1279 };
1281
1282 /* Report that we're scanning the heap, advertising total # of blocks */
1284 initprog_val[1] = rel_pages;
1285 initprog_val[2] = vacrel->dead_items_info->max_bytes;
1287
1288 /* Initialize for the first heap_vac_scan_next_block() call */
1289 vacrel->current_block = InvalidBlockNumber;
1290 vacrel->next_unskippable_block = InvalidBlockNumber;
1291 vacrel->next_unskippable_eager_scanned = false;
1292 vacrel->next_unskippable_vmbuffer = InvalidBuffer;
1293
1294 /*
1295 * Set up the read stream for vacuum's first pass through the heap.
1296 *
1297 * This could be made safe for READ_STREAM_USE_BATCHING, but only with
1298 * explicit work in heap_vac_scan_next_block.
1299 */
1301 vacrel->bstrategy,
1302 vacrel->rel,
1305 vacrel,
1306 sizeof(bool));
1307
1308 while (true)
1309 {
1310 Buffer buf;
1311 Page page;
1312 bool was_eager_scanned = false;
1313 int ndeleted = 0;
1314 bool has_lpdead_items;
1315 void *per_buffer_data = NULL;
1316 bool vm_page_frozen = false;
1317 bool got_cleanup_lock = false;
1318
1319 vacuum_delay_point(false);
1320
1321 /*
1322 * Regularly check if wraparound failsafe should trigger.
1323 *
1324 * There is a similar check inside lazy_vacuum_all_indexes(), but
1325 * relfrozenxid might start to look dangerously old before we reach
1326 * that point. This check also provides failsafe coverage for the
1327 * one-pass strategy, and the two-pass strategy with the index_cleanup
1328 * param set to 'off'.
1329 */
1330 if (vacrel->scanned_pages > 0 &&
1331 vacrel->scanned_pages % FAILSAFE_EVERY_PAGES == 0)
1333
1334 /*
1335 * Consider if we definitely have enough space to process TIDs on page
1336 * already. If we are close to overrunning the available space for
1337 * dead_items TIDs, pause and do a cycle of vacuuming before we tackle
1338 * this page. However, let's force at least one page-worth of tuples
1339 * to be stored as to ensure we do at least some work when the memory
1340 * configured is so low that we run out before storing anything.
1341 */
1342 if (vacrel->dead_items_info->num_items > 0 &&
1343 TidStoreMemoryUsage(vacrel->dead_items) > vacrel->dead_items_info->max_bytes)
1344 {
1345 /*
1346 * Before beginning index vacuuming, we release any pin we may
1347 * hold on the visibility map page. This isn't necessary for
1348 * correctness, but we do it anyway to avoid holding the pin
1349 * across a lengthy, unrelated operation.
1350 */
1351 if (BufferIsValid(vmbuffer))
1352 {
1353 ReleaseBuffer(vmbuffer);
1354 vmbuffer = InvalidBuffer;
1355 }
1356
1357 /* Perform a round of index and heap vacuuming */
1358 vacrel->consider_bypass_optimization = false;
1360
1361 /*
1362 * Vacuum the Free Space Map to make newly-freed space visible on
1363 * upper-level FSM pages. Note that blkno is the previously
1364 * processed block.
1365 */
1367 blkno + 1);
1369
1370 /* Report that we are once again scanning the heap */
1373 }
1374
1375 buf = read_stream_next_buffer(stream, &per_buffer_data);
1376
1377 /* The relation is exhausted. */
1378 if (!BufferIsValid(buf))
1379 break;
1380
1381 was_eager_scanned = *((bool *) per_buffer_data);
1383 page = BufferGetPage(buf);
1384 blkno = BufferGetBlockNumber(buf);
1385
1386 vacrel->scanned_pages++;
1388 vacrel->eager_scanned_pages++;
1389
1390 /* Report as block scanned, update error traceback information */
1393 blkno, InvalidOffsetNumber);
1394
1395 /*
1396 * Pin the visibility map page in case we need to mark the page
1397 * all-visible. In most cases this will be very cheap, because we'll
1398 * already have the correct page pinned anyway.
1399 */
1400 visibilitymap_pin(vacrel->rel, blkno, &vmbuffer);
1401
1402 /*
1403 * We need a buffer cleanup lock to prune HOT chains and defragment
1404 * the page in lazy_scan_prune. But when it's not possible to acquire
1405 * a cleanup lock right away, we may be able to settle for reduced
1406 * processing using lazy_scan_noprune.
1407 */
1409
1410 if (!got_cleanup_lock)
1412
1413 /* Check for new or empty pages before lazy_scan_[no]prune call */
1415 vmbuffer))
1416 {
1417 /* Processed as new/empty page (lock and pin released) */
1418 continue;
1419 }
1420
1421 /*
1422 * If we didn't get the cleanup lock, we can still collect LP_DEAD
1423 * items in the dead_items area for later vacuuming, count live and
1424 * recently dead tuples for vacuum logging, and determine if this
1425 * block could later be truncated. If we encounter any xid/mxids that
1426 * require advancing the relfrozenxid/relminxid, we'll have to wait
1427 * for a cleanup lock and call lazy_scan_prune().
1428 */
1429 if (!got_cleanup_lock &&
1430 !lazy_scan_noprune(vacrel, buf, blkno, page, &has_lpdead_items))
1431 {
1432 /*
1433 * lazy_scan_noprune could not do all required processing. Wait
1434 * for a cleanup lock, and call lazy_scan_prune in the usual way.
1435 */
1436 Assert(vacrel->aggressive);
1439 got_cleanup_lock = true;
1440 }
1441
1442 /*
1443 * If we have a cleanup lock, we must now prune, freeze, and count
1444 * tuples. We may have acquired the cleanup lock originally, or we may
1445 * have gone back and acquired it after lazy_scan_noprune() returned
1446 * false. Either way, the page hasn't been processed yet.
1447 *
1448 * Like lazy_scan_noprune(), lazy_scan_prune() will count
1449 * recently_dead_tuples and live tuples for vacuum logging, determine
1450 * if the block can later be truncated, and accumulate the details of
1451 * remaining LP_DEAD line pointers on the page into dead_items. These
1452 * dead items include those pruned by lazy_scan_prune() as well as
1453 * line pointers previously marked LP_DEAD.
1454 */
1455 if (got_cleanup_lock)
1456 ndeleted = lazy_scan_prune(vacrel, buf, blkno, page,
1457 vmbuffer,
1459
1460 /*
1461 * Count an eagerly scanned page as a failure or a success.
1462 *
1463 * Only lazy_scan_prune() freezes pages, so if we didn't get the
1464 * cleanup lock, we won't have frozen the page. However, we only count
1465 * pages that were too new to require freezing as eager freeze
1466 * failures.
1467 *
1468 * We could gather more information from lazy_scan_noprune() about
1469 * whether or not there were tuples with XIDs or MXIDs older than the
1470 * FreezeLimit or MultiXactCutoff. However, for simplicity, we simply
1471 * exclude pages skipped due to cleanup lock contention from eager
1472 * freeze algorithm caps.
1473 */
1475 {
1476 /* Aggressive vacuums do not eager scan. */
1477 Assert(!vacrel->aggressive);
1478
1479 if (vm_page_frozen)
1480 {
1481 if (vacrel->eager_scan_remaining_successes > 0)
1482 vacrel->eager_scan_remaining_successes--;
1483
1484 if (vacrel->eager_scan_remaining_successes == 0)
1485 {
1486 /*
1487 * Report only once that we disabled eager scanning. We
1488 * may eagerly read ahead blocks in excess of the success
1489 * or failure caps before attempting to freeze them, so we
1490 * could reach here even after disabling additional eager
1491 * scanning.
1492 */
1493 if (vacrel->eager_scan_max_fails_per_region > 0)
1494 ereport(vacrel->verbose ? INFO : DEBUG2,
1495 (errmsg("disabling eager scanning after freezing %u eagerly scanned blocks of relation \"%s.%s.%s\"",
1497 vacrel->dbname, vacrel->relnamespace,
1498 vacrel->relname)));
1499
1500 /*
1501 * If we hit our success cap, permanently disable eager
1502 * scanning by setting the other eager scan management
1503 * fields to their disabled values.
1504 */
1505 vacrel->eager_scan_remaining_fails = 0;
1506 vacrel->next_eager_scan_region_start = InvalidBlockNumber;
1507 vacrel->eager_scan_max_fails_per_region = 0;
1508 }
1509 }
1510 else if (vacrel->eager_scan_remaining_fails > 0)
1511 vacrel->eager_scan_remaining_fails--;
1512 }
1513
1514 /*
1515 * Now drop the buffer lock and, potentially, update the FSM.
1516 *
1517 * Our goal is to update the freespace map the last time we touch the
1518 * page. If we'll process a block in the second pass, we may free up
1519 * additional space on the page, so it is better to update the FSM
1520 * after the second pass. If the relation has no indexes, or if index
1521 * vacuuming is disabled, there will be no second heap pass; if this
1522 * particular page has no dead items, the second heap pass will not
1523 * touch this page. So, in those cases, update the FSM now.
1524 *
1525 * Note: In corner cases, it's possible to miss updating the FSM
1526 * entirely. If index vacuuming is currently enabled, we'll skip the
1527 * FSM update now. But if failsafe mode is later activated, or there
1528 * are so few dead tuples that index vacuuming is bypassed, there will
1529 * also be no opportunity to update the FSM later, because we'll never
1530 * revisit this page. Since updating the FSM is desirable but not
1531 * absolutely required, that's OK.
1532 */
1533 if (vacrel->nindexes == 0
1534 || !vacrel->do_index_vacuuming
1535 || !has_lpdead_items)
1536 {
1537 Size freespace = PageGetHeapFreeSpace(page);
1538
1540 RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
1541
1542 /*
1543 * Periodically perform FSM vacuuming to make newly-freed space
1544 * visible on upper FSM pages. This is done after vacuuming if the
1545 * table has indexes. There will only be newly-freed space if we
1546 * held the cleanup lock and lazy_scan_prune() was called.
1547 */
1548 if (got_cleanup_lock && vacrel->nindexes == 0 && ndeleted > 0 &&
1550 {
1552 blkno);
1554 }
1555 }
1556 else
1558 }
1559
1560 vacrel->blkno = InvalidBlockNumber;
1561 if (BufferIsValid(vmbuffer))
1562 ReleaseBuffer(vmbuffer);
1563
1564 /*
1565 * Report that everything is now scanned. We never skip scanning the last
1566 * block in the relation, so we can pass rel_pages here.
1567 */
1569 rel_pages);
1570
1571 /* now we can compute the new value for pg_class.reltuples */
1572 vacrel->new_live_tuples = vac_estimate_reltuples(vacrel->rel, rel_pages,
1573 vacrel->scanned_pages,
1574 vacrel->live_tuples);
1575
1576 /*
1577 * Also compute the total number of surviving heap entries. In the
1578 * (unlikely) scenario that new_live_tuples is -1, take it as zero.
1579 */
1580 vacrel->new_rel_tuples =
1581 Max(vacrel->new_live_tuples, 0) + vacrel->recently_dead_tuples +
1582 vacrel->missed_dead_tuples;
1583
1584 read_stream_end(stream);
1585
1586 /*
1587 * Do index vacuuming (call each index's ambulkdelete routine), then do
1588 * related heap vacuuming
1589 */
1590 if (vacrel->dead_items_info->num_items > 0)
1592
1593 /*
1594 * Vacuum the remainder of the Free Space Map. We must do this whether or
1595 * not there were indexes, and whether or not we bypassed index vacuuming.
1596 * We can pass rel_pages here because we never skip scanning the last
1597 * block of the relation.
1598 */
1599 if (rel_pages > next_fsm_block_to_vacuum)
1601
1602 /* report all blocks vacuumed */
1604
1605 /* Do final index cleanup (call each index's amvacuumcleanup routine) */
1606 if (vacrel->nindexes > 0 && vacrel->do_index_cleanup)
1608}
1609
1610/*
1611 * heap_vac_scan_next_block() -- read stream callback to get the next block
1612 * for vacuum to process
1613 *
1614 * Every time lazy_scan_heap() needs a new block to process during its first
1615 * phase, it invokes read_stream_next_buffer() with a stream set up to call
1616 * heap_vac_scan_next_block() to get the next block.
1617 *
1618 * heap_vac_scan_next_block() uses the visibility map, vacuum options, and
1619 * various thresholds to skip blocks which do not need to be processed and
1620 * returns the next block to process or InvalidBlockNumber if there are no
1621 * remaining blocks.
1622 *
1623 * The visibility status of the next block to process and whether or not it
1624 * was eager scanned is set in the per_buffer_data.
1625 *
1626 * callback_private_data contains a reference to the LVRelState, passed to the
1627 * read stream API during stream setup. The LVRelState is an in/out parameter
1628 * here (locally named `vacrel`). Vacuum options and information about the
1629 * relation are read from it. vacrel->skippedallvis is set if we skip a block
1630 * that's all-visible but not all-frozen (to ensure that we don't update
1631 * relfrozenxid in that case). vacrel also holds information about the next
1632 * unskippable block -- as bookkeeping for this function.
1633 */
1634static BlockNumber
1636 void *callback_private_data,
1637 void *per_buffer_data)
1638{
1640 LVRelState *vacrel = callback_private_data;
1641
1642 /* relies on InvalidBlockNumber + 1 overflowing to 0 on first call */
1644
1645 /* Have we reached the end of the relation? */
1646 if (next_block >= vacrel->rel_pages)
1647 {
1648 if (BufferIsValid(vacrel->next_unskippable_vmbuffer))
1649 {
1650 ReleaseBuffer(vacrel->next_unskippable_vmbuffer);
1651 vacrel->next_unskippable_vmbuffer = InvalidBuffer;
1652 }
1653 return InvalidBlockNumber;
1654 }
1655
1656 /*
1657 * We must be in one of the three following states:
1658 */
1659 if (next_block > vacrel->next_unskippable_block ||
1660 vacrel->next_unskippable_block == InvalidBlockNumber)
1661 {
1662 /*
1663 * 1. We have just processed an unskippable block (or we're at the
1664 * beginning of the scan). Find the next unskippable block using the
1665 * visibility map.
1666 */
1667 bool skipsallvis;
1668
1670
1671 /*
1672 * We now know the next block that we must process. It can be the
1673 * next block after the one we just processed, or something further
1674 * ahead. If it's further ahead, we can jump to it, but we choose to
1675 * do so only if we can skip at least SKIP_PAGES_THRESHOLD consecutive
1676 * pages. Since we're reading sequentially, the OS should be doing
1677 * readahead for us, so there's no gain in skipping a page now and
1678 * then. Skipping such a range might even discourage sequential
1679 * detection.
1680 *
1681 * This test also enables more frequent relfrozenxid advancement
1682 * during non-aggressive VACUUMs. If the range has any all-visible
1683 * pages then skipping makes updating relfrozenxid unsafe, which is a
1684 * real downside.
1685 */
1686 if (vacrel->next_unskippable_block - next_block >= SKIP_PAGES_THRESHOLD)
1687 {
1688 next_block = vacrel->next_unskippable_block;
1689 if (skipsallvis)
1690 vacrel->skippedallvis = true;
1691 }
1692 }
1693
1694 /* Now we must be in one of the two remaining states: */
1695 if (next_block < vacrel->next_unskippable_block)
1696 {
1697 /*
1698 * 2. We are processing a range of blocks that we could have skipped
1699 * but chose not to. We know that they are all-visible in the VM,
1700 * otherwise they would've been unskippable.
1701 */
1702 vacrel->current_block = next_block;
1703 /* Block was not eager scanned */
1704 *((bool *) per_buffer_data) = false;
1705 return vacrel->current_block;
1706 }
1707 else
1708 {
1709 /*
1710 * 3. We reached the next unskippable block. Process it. On next
1711 * iteration, we will be back in state 1.
1712 */
1713 Assert(next_block == vacrel->next_unskippable_block);
1714
1715 vacrel->current_block = next_block;
1716 *((bool *) per_buffer_data) = vacrel->next_unskippable_eager_scanned;
1717 return vacrel->current_block;
1718 }
1719}
1720
1721/*
1722 * Find the next unskippable block in a vacuum scan using the visibility map.
1723 * The next unskippable block and its visibility information is updated in
1724 * vacrel.
1725 *
1726 * Note: our opinion of which blocks can be skipped can go stale immediately.
1727 * It's okay if caller "misses" a page whose all-visible or all-frozen marking
1728 * was concurrently cleared, though. All that matters is that caller scan all
1729 * pages whose tuples might contain XIDs < OldestXmin, or MXIDs < OldestMxact.
1730 * (Actually, non-aggressive VACUUMs can choose to skip all-visible pages with
1731 * older XIDs/MXIDs. The *skippedallvis flag will be set here when the choice
1732 * to skip such a range is actually made, making everything safe.)
1733 */
1734static void
1736{
1737 BlockNumber rel_pages = vacrel->rel_pages;
1738 BlockNumber next_unskippable_block = vacrel->next_unskippable_block + 1;
1739 Buffer next_unskippable_vmbuffer = vacrel->next_unskippable_vmbuffer;
1740 bool next_unskippable_eager_scanned = false;
1741
1742 *skipsallvis = false;
1743
1744 for (;; next_unskippable_block++)
1745 {
1747 next_unskippable_block,
1748 &next_unskippable_vmbuffer);
1749
1750
1751 /*
1752 * At the start of each eager scan region, normal vacuums with eager
1753 * scanning enabled reset the failure counter, allowing vacuum to
1754 * resume eager scanning if it had been suspended in the previous
1755 * region.
1756 */
1757 if (next_unskippable_block >= vacrel->next_eager_scan_region_start)
1758 {
1759 vacrel->eager_scan_remaining_fails =
1760 vacrel->eager_scan_max_fails_per_region;
1761 vacrel->next_eager_scan_region_start += EAGER_SCAN_REGION_SIZE;
1762 }
1763
1764 /*
1765 * A block is unskippable if it is not all visible according to the
1766 * visibility map.
1767 */
1769 {
1771 break;
1772 }
1773
1774 /*
1775 * Caller must scan the last page to determine whether it has tuples
1776 * (caller must have the opportunity to set vacrel->nonempty_pages).
1777 * This rule avoids having lazy_truncate_heap() take access-exclusive
1778 * lock on rel to attempt a truncation that fails anyway, just because
1779 * there are tuples on the last page (it is likely that there will be
1780 * tuples on other nearby pages as well, but those can be skipped).
1781 *
1782 * Implement this by always treating the last block as unsafe to skip.
1783 */
1784 if (next_unskippable_block == rel_pages - 1)
1785 break;
1786
1787 /* DISABLE_PAGE_SKIPPING makes all skipping unsafe */
1788 if (!vacrel->skipwithvm)
1789 break;
1790
1791 /*
1792 * All-frozen pages cannot contain XIDs < OldestXmin (XIDs that aren't
1793 * already frozen by now), so this page can be skipped.
1794 */
1795 if ((mapbits & VISIBILITYMAP_ALL_FROZEN) != 0)
1796 continue;
1797
1798 /*
1799 * Aggressive vacuums cannot skip any all-visible pages that are not
1800 * also all-frozen.
1801 */
1802 if (vacrel->aggressive)
1803 break;
1804
1805 /*
1806 * Normal vacuums with eager scanning enabled only skip all-visible
1807 * but not all-frozen pages if they have hit the failure limit for the
1808 * current eager scan region.
1809 */
1810 if (vacrel->eager_scan_remaining_fails > 0)
1811 {
1812 next_unskippable_eager_scanned = true;
1813 break;
1814 }
1815
1816 /*
1817 * All-visible blocks are safe to skip in a normal vacuum. But
1818 * remember that the final range contains such a block for later.
1819 */
1820 *skipsallvis = true;
1821 }
1822
1823 /* write the local variables back to vacrel */
1824 vacrel->next_unskippable_block = next_unskippable_block;
1825 vacrel->next_unskippable_eager_scanned = next_unskippable_eager_scanned;
1826 vacrel->next_unskippable_vmbuffer = next_unskippable_vmbuffer;
1827}
1828
1829/*
1830 * lazy_scan_new_or_empty() -- lazy_scan_heap() new/empty page handling.
1831 *
1832 * Must call here to handle both new and empty pages before calling
1833 * lazy_scan_prune or lazy_scan_noprune, since they're not prepared to deal
1834 * with new or empty pages.
1835 *
1836 * It's necessary to consider new pages as a special case, since the rules for
1837 * maintaining the visibility map and FSM with empty pages are a little
1838 * different (though new pages can be truncated away during rel truncation).
1839 *
1840 * Empty pages are not really a special case -- they're just heap pages that
1841 * have no allocated tuples (including even LP_UNUSED items). You might
1842 * wonder why we need to handle them here all the same. It's only necessary
1843 * because of a corner-case involving a hard crash during heap relation
1844 * extension. If we ever make relation-extension crash safe, then it should
1845 * no longer be necessary to deal with empty pages here (or new pages, for
1846 * that matter).
1847 *
1848 * Caller must hold at least a shared lock. We might need to escalate the
1849 * lock in that case, so the type of lock caller holds needs to be specified
1850 * using 'sharelock' argument.
1851 *
1852 * Returns false in common case where caller should go on to call
1853 * lazy_scan_prune (or lazy_scan_noprune). Otherwise returns true, indicating
1854 * that lazy_scan_heap is done processing the page, releasing lock on caller's
1855 * behalf.
1856 *
1857 * No vm_page_frozen output parameter (like that passed to lazy_scan_prune())
1858 * is passed here because neither empty nor new pages can be eagerly frozen.
1859 * New pages are never frozen. Empty pages are always set frozen in the VM at
1860 * the same time that they are set all-visible, and we don't eagerly scan
1861 * frozen pages.
1862 */
1863static bool
1865 Page page, bool sharelock, Buffer vmbuffer)
1866{
1867 Size freespace;
1868
1869 if (PageIsNew(page))
1870 {
1871 /*
1872 * All-zeroes pages can be left over if either a backend extends the
1873 * relation by a single page, but crashes before the newly initialized
1874 * page has been written out, or when bulk-extending the relation
1875 * (which creates a number of empty pages at the tail end of the
1876 * relation), and then enters them into the FSM.
1877 *
1878 * Note we do not enter the page into the visibilitymap. That has the
1879 * downside that we repeatedly visit this page in subsequent vacuums,
1880 * but otherwise we'll never discover the space on a promoted standby.
1881 * The harm of repeated checking ought to normally not be too bad. The
1882 * space usually should be used at some point, otherwise there
1883 * wouldn't be any regular vacuums.
1884 *
1885 * Make sure these pages are in the FSM, to ensure they can be reused.
1886 * Do that by testing if there's any space recorded for the page. If
1887 * not, enter it. We do so after releasing the lock on the heap page,
1888 * the FSM is approximate, after all.
1889 */
1891
1892 if (GetRecordedFreeSpace(vacrel->rel, blkno) == 0)
1893 {
1894 freespace = BLCKSZ - SizeOfPageHeaderData;
1895
1896 RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
1897 }
1898
1899 return true;
1900 }
1901
1902 if (PageIsEmpty(page))
1903 {
1904 /*
1905 * It seems likely that caller will always be able to get a cleanup
1906 * lock on an empty page. But don't take any chances -- escalate to
1907 * an exclusive lock (still don't need a cleanup lock, though).
1908 */
1909 if (sharelock)
1910 {
1913
1914 if (!PageIsEmpty(page))
1915 {
1916 /* page isn't new or empty -- keep lock and pin for now */
1917 return false;
1918 }
1919 }
1920 else
1921 {
1922 /* Already have a full cleanup lock (which is more than enough) */
1923 }
1924
1925 /*
1926 * Unlike new pages, empty pages are always set all-visible and
1927 * all-frozen.
1928 */
1929 if (!PageIsAllVisible(page))
1930 {
1931 /* Lock vmbuffer before entering critical section */
1933
1935
1936 /* mark buffer dirty before writing a WAL record */
1938
1939 PageSetAllVisible(page);
1940 PageClearPrunable(page);
1941 visibilitymap_set(blkno,
1942 vmbuffer,
1945 vacrel->rel->rd_locator);
1946
1947 /*
1948 * Emit WAL for setting PD_ALL_VISIBLE on the heap page and
1949 * setting the VM.
1950 */
1951 if (RelationNeedsWAL(vacrel->rel))
1953 vmbuffer,
1956 InvalidTransactionId, /* conflict xid */
1957 false, /* cleanup lock */
1958 PRUNE_VACUUM_SCAN, /* reason */
1959 NULL, 0,
1960 NULL, 0,
1961 NULL, 0,
1962 NULL, 0);
1963
1965
1966 LockBuffer(vmbuffer, BUFFER_LOCK_UNLOCK);
1967
1968 /* Count the newly all-frozen pages for logging */
1969 vacrel->new_all_visible_pages++;
1970 vacrel->new_all_visible_all_frozen_pages++;
1971 }
1972
1973 freespace = PageGetHeapFreeSpace(page);
1975 RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
1976 return true;
1977 }
1978
1979 /* page isn't new or empty -- keep lock and pin */
1980 return false;
1981}
1982
1983/* qsort comparator for sorting OffsetNumbers */
1984static int
1985cmpOffsetNumbers(const void *a, const void *b)
1986{
1987 return pg_cmp_u16(*(const OffsetNumber *) a, *(const OffsetNumber *) b);
1988}
1989
1990/*
1991 * lazy_scan_prune() -- lazy_scan_heap() pruning and freezing.
1992 *
1993 * Caller must hold pin and buffer cleanup lock on the buffer.
1994 *
1995 * vmbuffer is the buffer containing the VM block with visibility information
1996 * for the heap block, blkno.
1997 *
1998 * *has_lpdead_items is set to true or false depending on whether, upon return
1999 * from this function, any LP_DEAD items are still present on the page.
2000 *
2001 * *vm_page_frozen is set to true if the page is newly set all-frozen in the
2002 * VM. The caller currently only uses this for determining whether an eagerly
2003 * scanned page was successfully set all-frozen.
2004 *
2005 * Returns the number of tuples deleted from the page during HOT pruning.
2006 */
2007static int
2009 Buffer buf,
2010 BlockNumber blkno,
2011 Page page,
2012 Buffer vmbuffer,
2013 bool *has_lpdead_items,
2014 bool *vm_page_frozen)
2015{
2016 Relation rel = vacrel->rel;
2018 PruneFreezeParams params = {
2019 .relation = rel,
2020 .buffer = buf,
2021 .vmbuffer = vmbuffer,
2022 .reason = PRUNE_VACUUM_SCAN,
2023 .options = HEAP_PAGE_PRUNE_FREEZE,
2024 .vistest = vacrel->vistest,
2025 .cutoffs = &vacrel->cutoffs,
2026 };
2027
2028 Assert(BufferGetBlockNumber(buf) == blkno);
2029
2030 /*
2031 * Prune all HOT-update chains and potentially freeze tuples on this page.
2032 *
2033 * If the relation has no indexes, we can immediately mark would-be dead
2034 * items LP_UNUSED.
2035 *
2036 * The number of tuples removed from the page is returned in
2037 * presult.ndeleted. It should not be confused with presult.lpdead_items;
2038 * presult.lpdead_items's final value can be thought of as the number of
2039 * tuples that were deleted from indexes.
2040 *
2041 * We will update the VM after collecting LP_DEAD items and freezing
2042 * tuples. Pruning will have determined whether or not the page is
2043 * all-visible.
2044 */
2045 if (vacrel->nindexes == 0)
2047
2048 /*
2049 * Allow skipping full inspection of pages that the VM indicates are
2050 * already all-frozen (which may be scanned due to SKIP_PAGES_THRESHOLD).
2051 * However, if DISABLE_PAGE_SKIPPING was specified, we can't trust the VM,
2052 * so we must examine the page to make sure it is truly all-frozen and fix
2053 * it otherwise.
2054 */
2055 if (vacrel->skipwithvm)
2057
2059 &presult,
2060 &vacrel->offnum,
2061 &vacrel->NewRelfrozenXid, &vacrel->NewRelminMxid);
2062
2063 Assert(MultiXactIdIsValid(vacrel->NewRelminMxid));
2064 Assert(TransactionIdIsValid(vacrel->NewRelfrozenXid));
2065
2066 if (presult.nfrozen > 0)
2067 {
2068 /*
2069 * We don't increment the new_frozen_tuple_pages instrumentation
2070 * counter when nfrozen == 0, since it only counts pages with newly
2071 * frozen tuples (don't confuse that with pages newly set all-frozen
2072 * in VM).
2073 */
2074 vacrel->new_frozen_tuple_pages++;
2075 }
2076
2077 /*
2078 * Now save details of the LP_DEAD items from the page in vacrel
2079 */
2080 if (presult.lpdead_items > 0)
2081 {
2082 vacrel->lpdead_item_pages++;
2083
2084 /*
2085 * deadoffsets are collected incrementally in
2086 * heap_page_prune_and_freeze() as each dead line pointer is recorded,
2087 * with an indeterminate order, but dead_items_add requires them to be
2088 * sorted.
2089 */
2090 qsort(presult.deadoffsets, presult.lpdead_items, sizeof(OffsetNumber),
2092
2093 dead_items_add(vacrel, blkno, presult.deadoffsets, presult.lpdead_items);
2094 }
2095
2096 /* Finally, add page-local counts to whole-VACUUM counts */
2097 if (presult.newly_all_visible)
2098 vacrel->new_all_visible_pages++;
2099 if (presult.newly_all_visible_frozen)
2100 vacrel->new_all_visible_all_frozen_pages++;
2101 if (presult.newly_all_frozen)
2102 vacrel->new_all_frozen_pages++;
2103
2104 /* Capture if the page was newly set frozen */
2105 *vm_page_frozen = presult.newly_all_visible_frozen ||
2106 presult.newly_all_frozen;
2107
2108 vacrel->tuples_deleted += presult.ndeleted;
2109 vacrel->tuples_frozen += presult.nfrozen;
2110 vacrel->lpdead_items += presult.lpdead_items;
2111 vacrel->live_tuples += presult.live_tuples;
2112 vacrel->recently_dead_tuples += presult.recently_dead_tuples;
2113
2114 /* Can't truncate this page */
2115 if (presult.hastup)
2116 vacrel->nonempty_pages = blkno + 1;
2117
2118 /* Did we find LP_DEAD items? */
2119 *has_lpdead_items = (presult.lpdead_items > 0);
2120
2121 return presult.ndeleted;
2122}
2123
2124/*
2125 * lazy_scan_noprune() -- lazy_scan_prune() without pruning or freezing
2126 *
2127 * Caller need only hold a pin and share lock on the buffer, unlike
2128 * lazy_scan_prune, which requires a full cleanup lock. While pruning isn't
2129 * performed here, it's quite possible that an earlier opportunistic pruning
2130 * operation left LP_DEAD items behind. We'll at least collect any such items
2131 * in dead_items for removal from indexes.
2132 *
2133 * For aggressive VACUUM callers, we may return false to indicate that a full
2134 * cleanup lock is required for processing by lazy_scan_prune. This is only
2135 * necessary when the aggressive VACUUM needs to freeze some tuple XIDs from
2136 * one or more tuples on the page. We always return true for non-aggressive
2137 * callers.
2138 *
2139 * If this function returns true, *has_lpdead_items gets set to true or false
2140 * depending on whether, upon return from this function, any LP_DEAD items are
2141 * present on the page. If this function returns false, *has_lpdead_items
2142 * is not updated.
2143 */
2144static bool
2146 Buffer buf,
2147 BlockNumber blkno,
2148 Page page,
2149 bool *has_lpdead_items)
2150{
2151 OffsetNumber offnum,
2152 maxoff;
2153 int lpdead_items,
2154 live_tuples,
2155 recently_dead_tuples,
2156 missed_dead_tuples;
2157 bool hastup;
2159 TransactionId NoFreezePageRelfrozenXid = vacrel->NewRelfrozenXid;
2160 MultiXactId NoFreezePageRelminMxid = vacrel->NewRelminMxid;
2162
2163 Assert(BufferGetBlockNumber(buf) == blkno);
2164
2165 hastup = false; /* for now */
2166
2167 lpdead_items = 0;
2168 live_tuples = 0;
2169 recently_dead_tuples = 0;
2170 missed_dead_tuples = 0;
2171
2172 maxoff = PageGetMaxOffsetNumber(page);
2173 for (offnum = FirstOffsetNumber;
2174 offnum <= maxoff;
2175 offnum = OffsetNumberNext(offnum))
2176 {
2177 ItemId itemid;
2178 HeapTupleData tuple;
2179
2180 vacrel->offnum = offnum;
2181 itemid = PageGetItemId(page, offnum);
2182
2183 if (!ItemIdIsUsed(itemid))
2184 continue;
2185
2186 if (ItemIdIsRedirected(itemid))
2187 {
2188 hastup = true;
2189 continue;
2190 }
2191
2192 if (ItemIdIsDead(itemid))
2193 {
2194 /*
2195 * Deliberately don't set hastup=true here. See same point in
2196 * lazy_scan_prune for an explanation.
2197 */
2198 deadoffsets[lpdead_items++] = offnum;
2199 continue;
2200 }
2201
2202 hastup = true; /* page prevents rel truncation */
2203 tupleheader = (HeapTupleHeader) PageGetItem(page, itemid);
2205 &NoFreezePageRelfrozenXid,
2206 &NoFreezePageRelminMxid))
2207 {
2208 /* Tuple with XID < FreezeLimit (or MXID < MultiXactCutoff) */
2209 if (vacrel->aggressive)
2210 {
2211 /*
2212 * Aggressive VACUUMs must always be able to advance rel's
2213 * relfrozenxid to a value >= FreezeLimit (and be able to
2214 * advance rel's relminmxid to a value >= MultiXactCutoff).
2215 * The ongoing aggressive VACUUM won't be able to do that
2216 * unless it can freeze an XID (or MXID) from this tuple now.
2217 *
2218 * The only safe option is to have caller perform processing
2219 * of this page using lazy_scan_prune. Caller might have to
2220 * wait a while for a cleanup lock, but it can't be helped.
2221 */
2222 vacrel->offnum = InvalidOffsetNumber;
2223 return false;
2224 }
2225
2226 /*
2227 * Non-aggressive VACUUMs are under no obligation to advance
2228 * relfrozenxid (even by one XID). We can be much laxer here.
2229 *
2230 * Currently we always just accept an older final relfrozenxid
2231 * and/or relminmxid value. We never make caller wait or work a
2232 * little harder, even when it likely makes sense to do so.
2233 */
2234 }
2235
2236 ItemPointerSet(&(tuple.t_self), blkno, offnum);
2237 tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
2238 tuple.t_len = ItemIdGetLength(itemid);
2239 tuple.t_tableOid = RelationGetRelid(vacrel->rel);
2240
2241 switch (HeapTupleSatisfiesVacuum(&tuple, vacrel->cutoffs.OldestXmin,
2242 buf))
2243 {
2245 case HEAPTUPLE_LIVE:
2246
2247 /*
2248 * Count both cases as live, just like lazy_scan_prune
2249 */
2250 live_tuples++;
2251
2252 break;
2253 case HEAPTUPLE_DEAD:
2254
2255 /*
2256 * There is some useful work for pruning to do, that won't be
2257 * done due to failure to get a cleanup lock.
2258 */
2259 missed_dead_tuples++;
2260 break;
2262
2263 /*
2264 * Count in recently_dead_tuples, just like lazy_scan_prune
2265 */
2266 recently_dead_tuples++;
2267 break;
2269
2270 /*
2271 * Do not count these rows as live, just like lazy_scan_prune
2272 */
2273 break;
2274 default:
2275 elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
2276 break;
2277 }
2278 }
2279
2280 vacrel->offnum = InvalidOffsetNumber;
2281
2282 /*
2283 * By here we know for sure that caller can put off freezing and pruning
2284 * this particular page until the next VACUUM. Remember its details now.
2285 * (lazy_scan_prune expects a clean slate, so we have to do this last.)
2286 */
2287 vacrel->NewRelfrozenXid = NoFreezePageRelfrozenXid;
2288 vacrel->NewRelminMxid = NoFreezePageRelminMxid;
2289
2290 /* Save any LP_DEAD items found on the page in dead_items */
2291 if (vacrel->nindexes == 0)
2292 {
2293 /* Using one-pass strategy (since table has no indexes) */
2294 if (lpdead_items > 0)
2295 {
2296 /*
2297 * Perfunctory handling for the corner case where a single pass
2298 * strategy VACUUM cannot get a cleanup lock, and it turns out
2299 * that there is one or more LP_DEAD items: just count the LP_DEAD
2300 * items as missed_dead_tuples instead. (This is a bit dishonest,
2301 * but it beats having to maintain specialized heap vacuuming code
2302 * forever, for vanishingly little benefit.)
2303 */
2304 hastup = true;
2305 missed_dead_tuples += lpdead_items;
2306 }
2307 }
2308 else if (lpdead_items > 0)
2309 {
2310 /*
2311 * Page has LP_DEAD items, and so any references/TIDs that remain in
2312 * indexes will be deleted during index vacuuming (and then marked
2313 * LP_UNUSED in the heap)
2314 */
2315 vacrel->lpdead_item_pages++;
2316
2317 dead_items_add(vacrel, blkno, deadoffsets, lpdead_items);
2318
2319 vacrel->lpdead_items += lpdead_items;
2320 }
2321
2322 /*
2323 * Finally, add relevant page-local counts to whole-VACUUM counts
2324 */
2325 vacrel->live_tuples += live_tuples;
2326 vacrel->recently_dead_tuples += recently_dead_tuples;
2327 vacrel->missed_dead_tuples += missed_dead_tuples;
2328 if (missed_dead_tuples > 0)
2329 vacrel->missed_dead_pages++;
2330
2331 /* Can't truncate this page */
2332 if (hastup)
2333 vacrel->nonempty_pages = blkno + 1;
2334
2335 /* Did we find LP_DEAD items? */
2336 *has_lpdead_items = (lpdead_items > 0);
2337
2338 /* Caller won't need to call lazy_scan_prune with same page */
2339 return true;
2340}
2341
2342/*
2343 * Main entry point for index vacuuming and heap vacuuming.
2344 *
2345 * Removes items collected in dead_items from table's indexes, then marks the
2346 * same items LP_UNUSED in the heap. See the comments above lazy_scan_heap
2347 * for full details.
2348 *
2349 * Also empties dead_items, freeing up space for later TIDs.
2350 *
2351 * We may choose to bypass index vacuuming at this point, though only when the
2352 * ongoing VACUUM operation will definitely only have one index scan/round of
2353 * index vacuuming.
2354 */
2355static void
2357{
2358 bool bypass;
2359
2360 /* Should not end up here with no indexes */
2361 Assert(vacrel->nindexes > 0);
2362 Assert(vacrel->lpdead_item_pages > 0);
2363
2364 if (!vacrel->do_index_vacuuming)
2365 {
2366 Assert(!vacrel->do_index_cleanup);
2368 return;
2369 }
2370
2371 /*
2372 * Consider bypassing index vacuuming (and heap vacuuming) entirely.
2373 *
2374 * We currently only do this in cases where the number of LP_DEAD items
2375 * for the entire VACUUM operation is close to zero. This avoids sharp
2376 * discontinuities in the duration and overhead of successive VACUUM
2377 * operations that run against the same table with a fixed workload.
2378 * Ideally, successive VACUUM operations will behave as if there are
2379 * exactly zero LP_DEAD items in cases where there are close to zero.
2380 *
2381 * This is likely to be helpful with a table that is continually affected
2382 * by UPDATEs that can mostly apply the HOT optimization, but occasionally
2383 * have small aberrations that lead to just a few heap pages retaining
2384 * only one or two LP_DEAD items. This is pretty common; even when the
2385 * DBA goes out of their way to make UPDATEs use HOT, it is practically
2386 * impossible to predict whether HOT will be applied in 100% of cases.
2387 * It's far easier to ensure that 99%+ of all UPDATEs against a table use
2388 * HOT through careful tuning.
2389 */
2390 bypass = false;
2391 if (vacrel->consider_bypass_optimization && vacrel->rel_pages > 0)
2392 {
2394
2395 Assert(vacrel->num_index_scans == 0);
2396 Assert(vacrel->lpdead_items == vacrel->dead_items_info->num_items);
2397 Assert(vacrel->do_index_vacuuming);
2398 Assert(vacrel->do_index_cleanup);
2399
2400 /*
2401 * This crossover point at which we'll start to do index vacuuming is
2402 * expressed as a percentage of the total number of heap pages in the
2403 * table that are known to have at least one LP_DEAD item. This is
2404 * much more important than the total number of LP_DEAD items, since
2405 * it's a proxy for the number of heap pages whose visibility map bits
2406 * cannot be set on account of bypassing index and heap vacuuming.
2407 *
2408 * We apply one further precautionary test: the space currently used
2409 * to store the TIDs (TIDs that now all point to LP_DEAD items) must
2410 * not exceed 32MB. This limits the risk that we will bypass index
2411 * vacuuming again and again until eventually there is a VACUUM whose
2412 * dead_items space is not CPU cache resident.
2413 *
2414 * We don't take any special steps to remember the LP_DEAD items (such
2415 * as counting them in our final update to the stats system) when the
2416 * optimization is applied. Though the accounting used in analyze.c's
2417 * acquire_sample_rows() will recognize the same LP_DEAD items as dead
2418 * rows in its own stats report, that's okay. The discrepancy should
2419 * be negligible. If this optimization is ever expanded to cover more
2420 * cases then this may need to be reconsidered.
2421 */
2423 bypass = (vacrel->lpdead_item_pages < threshold &&
2424 TidStoreMemoryUsage(vacrel->dead_items) < 32 * 1024 * 1024);
2425 }
2426
2427 if (bypass)
2428 {
2429 /*
2430 * There are almost zero TIDs. Behave as if there were precisely
2431 * zero: bypass index vacuuming, but do index cleanup.
2432 *
2433 * We expect that the ongoing VACUUM operation will finish very
2434 * quickly, so there is no point in considering speeding up as a
2435 * failsafe against wraparound failure. (Index cleanup is expected to
2436 * finish very quickly in cases where there were no ambulkdelete()
2437 * calls.)
2438 */
2439 vacrel->do_index_vacuuming = false;
2440 }
2442 {
2443 /*
2444 * We successfully completed a round of index vacuuming. Do related
2445 * heap vacuuming now.
2446 */
2448 }
2449 else
2450 {
2451 /*
2452 * Failsafe case.
2453 *
2454 * We attempted index vacuuming, but didn't finish a full round/full
2455 * index scan. This happens when relfrozenxid or relminmxid is too
2456 * far in the past.
2457 *
2458 * From this point on the VACUUM operation will do no further index
2459 * vacuuming or heap vacuuming. This VACUUM operation won't end up
2460 * back here again.
2461 */
2463 }
2464
2465 /*
2466 * Forget the LP_DEAD items that we just vacuumed (or just decided to not
2467 * vacuum)
2468 */
2470}
2471
2472/*
2473 * lazy_vacuum_all_indexes() -- Main entry for index vacuuming
2474 *
2475 * Returns true in the common case when all indexes were successfully
2476 * vacuumed. Returns false in rare cases where we determined that the ongoing
2477 * VACUUM operation is at risk of taking too long to finish, leading to
2478 * wraparound failure.
2479 */
2480static bool
2482{
2483 bool allindexes = true;
2484 double old_live_tuples = vacrel->rel->rd_rel->reltuples;
2485 const int progress_start_index[] = {
2488 };
2489 const int progress_end_index[] = {
2493 };
2496
2497 Assert(vacrel->nindexes > 0);
2498 Assert(vacrel->do_index_vacuuming);
2499 Assert(vacrel->do_index_cleanup);
2500
2501 /* Precheck for XID wraparound emergencies */
2503 {
2504 /* Wraparound emergency -- don't even start an index scan */
2505 return false;
2506 }
2507
2508 /*
2509 * Report that we are now vacuuming indexes and the number of indexes to
2510 * vacuum.
2511 */
2513 progress_start_val[1] = vacrel->nindexes;
2515
2517 {
2518 for (int idx = 0; idx < vacrel->nindexes; idx++)
2519 {
2520 Relation indrel = vacrel->indrels[idx];
2521 IndexBulkDeleteResult *istat = vacrel->indstats[idx];
2522
2523 vacrel->indstats[idx] = lazy_vacuum_one_index(indrel, istat,
2525 vacrel);
2526
2527 /* Report the number of indexes vacuumed */
2529 idx + 1);
2530
2532 {
2533 /* Wraparound emergency -- end current index scan */
2534 allindexes = false;
2535 break;
2536 }
2537 }
2538 }
2539 else
2540 {
2541 /* Outsource everything to parallel variant */
2543 vacrel->num_index_scans,
2544 &(vacrel->worker_usage.vacuum));
2545
2546 /*
2547 * Do a postcheck to consider applying wraparound failsafe now. Note
2548 * that parallel VACUUM only gets the precheck and this postcheck.
2549 */
2551 allindexes = false;
2552 }
2553
2554 /*
2555 * We delete all LP_DEAD items from the first heap pass in all indexes on
2556 * each call here (except calls where we choose to do the failsafe). This
2557 * makes the next call to lazy_vacuum_heap_rel() safe (except in the event
2558 * of the failsafe triggering, which prevents the next call from taking
2559 * place).
2560 */
2561 Assert(vacrel->num_index_scans > 0 ||
2562 vacrel->dead_items_info->num_items == vacrel->lpdead_items);
2564
2565 /*
2566 * Increase and report the number of index scans. Also, we reset
2567 * PROGRESS_VACUUM_INDEXES_TOTAL and PROGRESS_VACUUM_INDEXES_PROCESSED.
2568 *
2569 * We deliberately include the case where we started a round of bulk
2570 * deletes that we weren't able to finish due to the failsafe triggering.
2571 */
2572 vacrel->num_index_scans++;
2573 progress_end_val[0] = 0;
2574 progress_end_val[1] = 0;
2575 progress_end_val[2] = vacrel->num_index_scans;
2577
2578 return allindexes;
2579}
2580
2581/*
2582 * Read stream callback for vacuum's third phase (second pass over the heap).
2583 * Gets the next block from the TID store and returns it or InvalidBlockNumber
2584 * if there are no further blocks to vacuum.
2585 *
2586 * NB: Assumed to be safe to use with READ_STREAM_USE_BATCHING.
2587 */
2588static BlockNumber
2590 void *callback_private_data,
2591 void *per_buffer_data)
2592{
2593 TidStoreIter *iter = callback_private_data;
2595
2597 if (iter_result == NULL)
2598 return InvalidBlockNumber;
2599
2600 /*
2601 * Save the TidStoreIterResult for later, so we can extract the offsets.
2602 * It is safe to copy the result, according to TidStoreIterateNext().
2603 */
2604 memcpy(per_buffer_data, iter_result, sizeof(*iter_result));
2605
2606 return iter_result->blkno;
2607}
2608
2609/*
2610 * lazy_vacuum_heap_rel() -- second pass over the heap for two pass strategy
2611 *
2612 * This routine marks LP_DEAD items in vacrel->dead_items as LP_UNUSED. Pages
2613 * that never had lazy_scan_prune record LP_DEAD items are not visited at all.
2614 *
2615 * We may also be able to truncate the line pointer array of the heap pages we
2616 * visit. If there is a contiguous group of LP_UNUSED items at the end of the
2617 * array, it can be reclaimed as free space. These LP_UNUSED items usually
2618 * start out as LP_DEAD items recorded by lazy_scan_prune (we set items from
2619 * each page to LP_UNUSED, and then consider if it's possible to truncate the
2620 * page's line pointer array).
2621 *
2622 * Note: the reason for doing this as a second pass is we cannot remove the
2623 * tuples until we've removed their index entries, and we want to process
2624 * index entry removal in batches as large as possible.
2625 */
2626static void
2628{
2629 ReadStream *stream;
2631 Buffer vmbuffer = InvalidBuffer;
2633 TidStoreIter *iter;
2634
2635 Assert(vacrel->do_index_vacuuming);
2636 Assert(vacrel->do_index_cleanup);
2637 Assert(vacrel->num_index_scans > 0);
2638
2639 /* Report that we are now vacuuming the heap */
2642
2643 /* Update error traceback information */
2647
2648 iter = TidStoreBeginIterate(vacrel->dead_items);
2649
2650 /*
2651 * Set up the read stream for vacuum's second pass through the heap.
2652 *
2653 * It is safe to use batchmode, as vacuum_reap_lp_read_stream_next() does
2654 * not need to wait for IO and does not perform locking. Once we support
2655 * parallelism it should still be fine, as presumably the holder of locks
2656 * would never be blocked by IO while holding the lock.
2657 */
2660 vacrel->bstrategy,
2661 vacrel->rel,
2664 iter,
2665 sizeof(TidStoreIterResult));
2666
2667 while (true)
2668 {
2669 BlockNumber blkno;
2670 Buffer buf;
2671 Page page;
2673 Size freespace;
2675 int num_offsets;
2676
2677 vacuum_delay_point(false);
2678
2679 buf = read_stream_next_buffer(stream, (void **) &iter_result);
2680
2681 /* The relation is exhausted */
2682 if (!BufferIsValid(buf))
2683 break;
2684
2685 vacrel->blkno = blkno = BufferGetBlockNumber(buf);
2686
2689 Assert(num_offsets <= lengthof(offsets));
2690
2691 /*
2692 * Pin the visibility map page in case we need to mark the page
2693 * all-visible. In most cases this will be very cheap, because we'll
2694 * already have the correct page pinned anyway.
2695 */
2696 visibilitymap_pin(vacrel->rel, blkno, &vmbuffer);
2697
2698 /* We need a non-cleanup exclusive lock to mark dead_items unused */
2700 lazy_vacuum_heap_page(vacrel, blkno, buf, offsets,
2701 num_offsets, vmbuffer);
2702
2703 /* Now that we've vacuumed the page, record its available space */
2704 page = BufferGetPage(buf);
2705 freespace = PageGetHeapFreeSpace(page);
2706
2708 RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
2710 }
2711
2712 read_stream_end(stream);
2713 TidStoreEndIterate(iter);
2714
2715 vacrel->blkno = InvalidBlockNumber;
2716 if (BufferIsValid(vmbuffer))
2717 ReleaseBuffer(vmbuffer);
2718
2719 /*
2720 * We set all LP_DEAD items from the first heap pass to LP_UNUSED during
2721 * the second heap pass. No more, no less.
2722 */
2723 Assert(vacrel->num_index_scans > 1 ||
2724 (vacrel->dead_items_info->num_items == vacrel->lpdead_items &&
2725 vacuumed_pages == vacrel->lpdead_item_pages));
2726
2728 (errmsg("table \"%s\": removed %" PRId64 " dead item identifiers in %u pages",
2729 vacrel->relname, vacrel->dead_items_info->num_items,
2730 vacuumed_pages)));
2731
2732 /* Revert to the previous phase information for error traceback */
2734}
2735
2736/*
2737 * lazy_vacuum_heap_page() -- free page's LP_DEAD items listed in the
2738 * vacrel->dead_items store.
2739 *
2740 * Caller must have an exclusive buffer lock on the buffer (though a full
2741 * cleanup lock is also acceptable). vmbuffer must be valid and already have
2742 * a pin on blkno's visibility map page.
2743 */
2744static void
2746 OffsetNumber *deadoffsets, int num_offsets,
2747 Buffer vmbuffer)
2748{
2749 Page page = BufferGetPage(buffer);
2751 int nunused = 0;
2752 TransactionId newest_live_xid;
2754 bool all_frozen;
2756 uint8 vmflags = 0;
2757
2758 Assert(vacrel->do_index_vacuuming);
2759
2761
2762 /* Update error traceback information */
2766
2767 /*
2768 * Before marking dead items unused, check whether the page will become
2769 * all-visible once that change is applied. This lets us reap the tuples
2770 * and mark the page all-visible within the same critical section,
2771 * enabling both changes to be emitted in a single WAL record. Since the
2772 * visibility checks may perform I/O and allocate memory, they must be
2773 * done outside the critical section.
2774 */
2775 if (heap_page_would_be_all_visible(vacrel->rel, buffer,
2776 vacrel->vistest, true,
2777 deadoffsets, num_offsets,
2778 &all_frozen, &newest_live_xid,
2779 &vacrel->offnum))
2780 {
2782 if (all_frozen)
2783 {
2785 Assert(!TransactionIdIsValid(newest_live_xid));
2786 }
2787
2788 /*
2789 * Take the lock on the vmbuffer before entering a critical section.
2790 * The heap page lock must also be held while updating the VM to
2791 * ensure consistency.
2792 */
2794 }
2795
2797
2798 for (int i = 0; i < num_offsets; i++)
2799 {
2800 ItemId itemid;
2801 OffsetNumber toff = deadoffsets[i];
2802
2803 itemid = PageGetItemId(page, toff);
2804
2805 Assert(ItemIdIsDead(itemid) && !ItemIdHasStorage(itemid));
2806 ItemIdSetUnused(itemid);
2807 unused[nunused++] = toff;
2808 }
2809
2810 Assert(nunused > 0);
2811
2812 /* Attempt to truncate line pointer array now */
2814
2815 if ((vmflags & VISIBILITYMAP_VALID_BITS) != 0)
2816 {
2817 /*
2818 * The page is guaranteed to have had dead line pointers, so we always
2819 * set PD_ALL_VISIBLE.
2820 */
2821 PageSetAllVisible(page);
2822 PageClearPrunable(page);
2823 visibilitymap_set(blkno,
2824 vmbuffer, vmflags,
2825 vacrel->rel->rd_locator);
2826 conflict_xid = newest_live_xid;
2827 }
2828
2829 /*
2830 * Mark buffer dirty before we write WAL.
2831 */
2832 MarkBufferDirty(buffer);
2833
2834 /* XLOG stuff */
2835 if (RelationNeedsWAL(vacrel->rel))
2836 {
2837 log_heap_prune_and_freeze(vacrel->rel, buffer,
2838 vmflags != 0 ? vmbuffer : InvalidBuffer,
2839 vmflags,
2841 false, /* no cleanup lock required */
2843 NULL, 0, /* frozen */
2844 NULL, 0, /* redirected */
2845 NULL, 0, /* dead */
2846 unused, nunused);
2847 }
2848
2850
2852 {
2853 /* Count the newly set VM page for logging */
2854 LockBuffer(vmbuffer, BUFFER_LOCK_UNLOCK);
2855 vacrel->new_all_visible_pages++;
2856 if (all_frozen)
2857 vacrel->new_all_visible_all_frozen_pages++;
2858 }
2859
2860 /* Revert to the previous phase information for error traceback */
2862}
2863
2864/*
2865 * Trigger the failsafe to avoid wraparound failure when vacrel table has a
2866 * relfrozenxid and/or relminmxid that is dangerously far in the past.
2867 * Triggering the failsafe makes the ongoing VACUUM bypass any further index
2868 * vacuuming and heap vacuuming. Truncating the heap is also bypassed.
2869 *
2870 * Any remaining work (work that VACUUM cannot just bypass) is typically sped
2871 * up when the failsafe triggers. VACUUM stops applying any cost-based delay
2872 * that it started out with.
2873 *
2874 * Returns true when failsafe has been triggered.
2875 */
2876static bool
2878{
2879 /* Don't warn more than once per VACUUM */
2881 return true;
2882
2884 {
2885 const int progress_index[] = {
2889 };
2891
2892 VacuumFailsafeActive = true;
2893
2894 /*
2895 * Abandon use of a buffer access strategy to allow use of all of
2896 * shared buffers. We assume the caller who allocated the memory for
2897 * the BufferAccessStrategy will free it.
2898 */
2899 vacrel->bstrategy = NULL;
2900
2901 /* Disable index vacuuming, index cleanup, and heap rel truncation */
2902 vacrel->do_index_vacuuming = false;
2903 vacrel->do_index_cleanup = false;
2904 vacrel->do_rel_truncate = false;
2905
2906 /* Reset the progress counters and set the failsafe mode */
2908
2910 (errmsg("bypassing nonessential maintenance of table \"%s.%s.%s\" as a failsafe after %d index scans",
2911 vacrel->dbname, vacrel->relnamespace, vacrel->relname,
2912 vacrel->num_index_scans),
2913 errdetail("The table's relfrozenxid or relminmxid is too far in the past."),
2914 errhint("Consider increasing configuration parameter \"maintenance_work_mem\" or \"autovacuum_work_mem\".\n"
2915 "You might also need to consider other ways for VACUUM to keep up with the allocation of transaction IDs.")));
2916
2917 /* Stop applying cost limits from this point on */
2918 VacuumCostActive = false;
2920
2921 return true;
2922 }
2923
2924 return false;
2925}
2926
2927/*
2928 * lazy_cleanup_all_indexes() -- cleanup all indexes of relation.
2929 */
2930static void
2932{
2933 double reltuples = vacrel->new_rel_tuples;
2934 bool estimated_count = vacrel->scanned_pages < vacrel->rel_pages;
2935 const int progress_start_index[] = {
2938 };
2939 const int progress_end_index[] = {
2942 };
2944 int64 progress_end_val[2] = {0, 0};
2945
2946 Assert(vacrel->do_index_cleanup);
2947 Assert(vacrel->nindexes > 0);
2948
2949 /*
2950 * Report that we are now cleaning up indexes and the number of indexes to
2951 * cleanup.
2952 */
2954 progress_start_val[1] = vacrel->nindexes;
2956
2958 {
2959 for (int idx = 0; idx < vacrel->nindexes; idx++)
2960 {
2961 Relation indrel = vacrel->indrels[idx];
2962 IndexBulkDeleteResult *istat = vacrel->indstats[idx];
2963
2964 vacrel->indstats[idx] =
2965 lazy_cleanup_one_index(indrel, istat, reltuples,
2966 estimated_count, vacrel);
2967
2968 /* Report the number of indexes cleaned up */
2970 idx + 1);
2971 }
2972 }
2973 else
2974 {
2975 /* Outsource everything to parallel variant */
2977 vacrel->num_index_scans,
2978 estimated_count,
2979 &(vacrel->worker_usage.cleanup));
2980 }
2981
2982 /* Reset the progress counters */
2984}
2985
2986/*
2987 * lazy_vacuum_one_index() -- vacuum index relation.
2988 *
2989 * Delete all the index tuples containing a TID collected in
2990 * vacrel->dead_items. Also update running statistics. Exact
2991 * details depend on index AM's ambulkdelete routine.
2992 *
2993 * reltuples is the number of heap tuples to be passed to the
2994 * bulkdelete callback. It's always assumed to be estimated.
2995 * See indexam.sgml for more info.
2996 *
2997 * Returns bulk delete stats derived from input stats
2998 */
2999static IndexBulkDeleteResult *
3001 double reltuples, LVRelState *vacrel)
3002{
3005
3006 ivinfo.index = indrel;
3007 ivinfo.heaprel = vacrel->rel;
3008 ivinfo.analyze_only = false;
3009 ivinfo.report_progress = false;
3010 ivinfo.estimated_count = true;
3011 ivinfo.message_level = DEBUG2;
3012 ivinfo.num_heap_tuples = reltuples;
3013 ivinfo.strategy = vacrel->bstrategy;
3014
3015 /*
3016 * Update error traceback information.
3017 *
3018 * The index name is saved during this phase and restored immediately
3019 * after this phase. See vacuum_error_callback.
3020 */
3021 Assert(vacrel->indname == NULL);
3026
3027 /* Do bulk deletion */
3028 istat = vac_bulkdel_one_index(&ivinfo, istat, vacrel->dead_items,
3029 vacrel->dead_items_info);
3030
3031 /* Revert to the previous phase information for error traceback */
3033 pfree(vacrel->indname);
3034 vacrel->indname = NULL;
3035
3036 return istat;
3037}
3038
3039/*
3040 * lazy_cleanup_one_index() -- do post-vacuum cleanup for index relation.
3041 *
3042 * Calls index AM's amvacuumcleanup routine. reltuples is the number
3043 * of heap tuples and estimated_count is true if reltuples is an
3044 * estimated value. See indexam.sgml for more info.
3045 *
3046 * Returns bulk delete stats derived from input stats
3047 */
3048static IndexBulkDeleteResult *
3050 double reltuples, bool estimated_count,
3052{
3055
3056 ivinfo.index = indrel;
3057 ivinfo.heaprel = vacrel->rel;
3058 ivinfo.analyze_only = false;
3059 ivinfo.report_progress = false;
3060 ivinfo.estimated_count = estimated_count;
3061 ivinfo.message_level = DEBUG2;
3062
3063 ivinfo.num_heap_tuples = reltuples;
3064 ivinfo.strategy = vacrel->bstrategy;
3065
3066 /*
3067 * Update error traceback information.
3068 *
3069 * The index name is saved during this phase and restored immediately
3070 * after this phase. See vacuum_error_callback.
3071 */
3072 Assert(vacrel->indname == NULL);
3077
3078 istat = vac_cleanup_one_index(&ivinfo, istat);
3079
3080 /* Revert to the previous phase information for error traceback */
3082 pfree(vacrel->indname);
3083 vacrel->indname = NULL;
3084
3085 return istat;
3086}
3087
3088/*
3089 * should_attempt_truncation - should we attempt to truncate the heap?
3090 *
3091 * Don't even think about it unless we have a shot at releasing a goodly
3092 * number of pages. Otherwise, the time taken isn't worth it, mainly because
3093 * an AccessExclusive lock must be replayed on any hot standby, where it can
3094 * be particularly disruptive.
3095 *
3096 * Also don't attempt it if wraparound failsafe is in effect. The entire
3097 * system might be refusing to allocate new XIDs at this point. The system
3098 * definitely won't return to normal unless and until VACUUM actually advances
3099 * the oldest relfrozenxid -- which hasn't happened for target rel just yet.
3100 * If lazy_truncate_heap attempted to acquire an AccessExclusiveLock to
3101 * truncate the table under these circumstances, an XID exhaustion error might
3102 * make it impossible for VACUUM to fix the underlying XID exhaustion problem.
3103 * There is very little chance of truncation working out when the failsafe is
3104 * in effect in any case. lazy_scan_prune makes the optimistic assumption
3105 * that any LP_DEAD items it encounters will always be LP_UNUSED by the time
3106 * we're called.
3107 */
3108static bool
3110{
3112
3113 if (!vacrel->do_rel_truncate || VacuumFailsafeActive)
3114 return false;
3115
3116 possibly_freeable = vacrel->rel_pages - vacrel->nonempty_pages;
3117 if (possibly_freeable > 0 &&
3120 return true;
3121
3122 return false;
3123}
3124
3125/*
3126 * lazy_truncate_heap - try to truncate off any empty pages at the end
3127 */
3128static void
3130{
3131 BlockNumber orig_rel_pages = vacrel->rel_pages;
3134 int lock_retry;
3135
3136 /* Report that we are now truncating */
3139
3140 /* Update error traceback information one last time */
3142 vacrel->nonempty_pages, InvalidOffsetNumber);
3143
3144 /*
3145 * Loop until no more truncating can be done.
3146 */
3147 do
3148 {
3149 /*
3150 * We need full exclusive lock on the relation in order to do
3151 * truncation. If we can't get it, give up rather than waiting --- we
3152 * don't want to block other backends, and we don't want to deadlock
3153 * (which is quite possible considering we already hold a lower-grade
3154 * lock).
3155 */
3156 lock_waiter_detected = false;
3157 lock_retry = 0;
3158 while (true)
3159 {
3161 break;
3162
3163 /*
3164 * Check for interrupts while trying to (re-)acquire the exclusive
3165 * lock.
3166 */
3168
3171 {
3172 /*
3173 * We failed to establish the lock in the specified number of
3174 * retries. This means we give up truncating.
3175 */
3176 ereport(vacrel->verbose ? INFO : DEBUG2,
3177 (errmsg("\"%s\": stopping truncate due to conflicting lock request",
3178 vacrel->relname)));
3179 return;
3180 }
3181
3187 }
3188
3189 /*
3190 * Now that we have exclusive lock, look to see if the rel has grown
3191 * whilst we were vacuuming with non-exclusive lock. If so, give up;
3192 * the newly added pages presumably contain non-deletable tuples.
3193 */
3196 {
3197 /*
3198 * Note: we intentionally don't update vacrel->rel_pages with the
3199 * new rel size here. If we did, it would amount to assuming that
3200 * the new pages are empty, which is unlikely. Leaving the numbers
3201 * alone amounts to assuming that the new pages have the same
3202 * tuple density as existing ones, which is less unlikely.
3203 */
3205 return;
3206 }
3207
3208 /*
3209 * Scan backwards from the end to verify that the end pages actually
3210 * contain no tuples. This is *necessary*, not optional, because
3211 * other backends could have added tuples to these pages whilst we
3212 * were vacuuming.
3213 */
3215 vacrel->blkno = new_rel_pages;
3216
3218 {
3219 /* can't do anything after all */
3221 return;
3222 }
3223
3224 /*
3225 * Okay to truncate.
3226 */
3228
3229 /*
3230 * We can release the exclusive lock as soon as we have truncated.
3231 * Other backends can't safely access the relation until they have
3232 * processed the smgr invalidation that smgrtruncate sent out ... but
3233 * that should happen as part of standard invalidation processing once
3234 * they acquire lock on the relation.
3235 */
3237
3238 /*
3239 * Update statistics. Here, it *is* correct to adjust rel_pages
3240 * without also touching reltuples, since the tuple count wasn't
3241 * changed by the truncation.
3242 */
3243 vacrel->removed_pages += orig_rel_pages - new_rel_pages;
3244 vacrel->rel_pages = new_rel_pages;
3245
3246 ereport(vacrel->verbose ? INFO : DEBUG2,
3247 (errmsg("table \"%s\": truncated %u to %u pages",
3248 vacrel->relname,
3251 } while (new_rel_pages > vacrel->nonempty_pages && lock_waiter_detected);
3252}
3253
3254/*
3255 * Rescan end pages to verify that they are (still) empty of tuples.
3256 *
3257 * Returns number of nondeletable pages (last nonempty page + 1).
3258 */
3259static BlockNumber
3261{
3263 "prefetch size must be power of 2");
3264
3265 BlockNumber blkno;
3267 instr_time starttime;
3268
3269 /* Initialize the starttime if we check for conflicting lock requests */
3270 INSTR_TIME_SET_CURRENT(starttime);
3271
3272 /*
3273 * Start checking blocks at what we believe relation end to be and move
3274 * backwards. (Strange coding of loop control is needed because blkno is
3275 * unsigned.) To make the scan faster, we prefetch a few blocks at a time
3276 * in forward direction, so that OS-level readahead can kick in.
3277 */
3278 blkno = vacrel->rel_pages;
3280 while (blkno > vacrel->nonempty_pages)
3281 {
3282 Buffer buf;
3283 Page page;
3284 OffsetNumber offnum,
3285 maxoff;
3286 bool hastup;
3287
3288 /*
3289 * Check if another process requests a lock on our relation. We are
3290 * holding an AccessExclusiveLock here, so they will be waiting. We
3291 * only do this once per VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL, and we
3292 * only check if that interval has elapsed once every 32 blocks to
3293 * keep the number of system calls and actual shared lock table
3294 * lookups to a minimum.
3295 */
3296 if ((blkno % 32) == 0)
3297 {
3300
3303 INSTR_TIME_SUBTRACT(elapsed, starttime);
3304 if ((INSTR_TIME_GET_MICROSEC(elapsed) / 1000)
3306 {
3308 {
3309 ereport(vacrel->verbose ? INFO : DEBUG2,
3310 (errmsg("table \"%s\": suspending truncate due to conflicting lock request",
3311 vacrel->relname)));
3312
3313 *lock_waiter_detected = true;
3314 return blkno;
3315 }
3316 starttime = currenttime;
3317 }
3318 }
3319
3320 /*
3321 * We don't insert a vacuum delay point here, because we have an
3322 * exclusive lock on the table which we want to hold for as short a
3323 * time as possible. We still need to check for interrupts however.
3324 */
3326
3327 blkno--;
3328
3329 /* If we haven't prefetched this lot yet, do so now. */
3330 if (prefetchedUntil > blkno)
3331 {
3334
3335 prefetchStart = blkno & ~(PREFETCH_SIZE - 1);
3336 for (pblkno = prefetchStart; pblkno <= blkno; pblkno++)
3337 {
3340 }
3342 }
3343
3345 vacrel->bstrategy);
3346
3347 /* In this phase we only need shared access to the buffer */
3349
3350 page = BufferGetPage(buf);
3351
3352 if (PageIsNew(page) || PageIsEmpty(page))
3353 {
3355 continue;
3356 }
3357
3358 hastup = false;
3359 maxoff = PageGetMaxOffsetNumber(page);
3360 for (offnum = FirstOffsetNumber;
3361 offnum <= maxoff;
3362 offnum = OffsetNumberNext(offnum))
3363 {
3364 ItemId itemid;
3365
3366 itemid = PageGetItemId(page, offnum);
3367
3368 /*
3369 * Note: any non-unused item should be taken as a reason to keep
3370 * this page. Even an LP_DEAD item makes truncation unsafe, since
3371 * we must not have cleaned out its index entries.
3372 */
3373 if (ItemIdIsUsed(itemid))
3374 {
3375 hastup = true;
3376 break; /* can stop scanning */
3377 }
3378 } /* scan along page */
3379
3381
3382 /* Done scanning if we found a tuple here */
3383 if (hastup)
3384 return blkno + 1;
3385 }
3386
3387 /*
3388 * If we fall out of the loop, all the previously-thought-to-be-empty
3389 * pages still are; we need not bother to look at the last known-nonempty
3390 * page.
3391 */
3392 return vacrel->nonempty_pages;
3393}
3394
3395/*
3396 * Allocate dead_items and dead_items_info (either using palloc, or in dynamic
3397 * shared memory). Sets both in vacrel for caller.
3398 *
3399 * Also handles parallel initialization as part of allocating dead_items in
3400 * DSM when required.
3401 */
3402static void
3403dead_items_alloc(LVRelState *vacrel, int nworkers)
3404{
3405 VacDeadItemsInfo *dead_items_info;
3407 autovacuum_work_mem != -1 ?
3409
3410 /*
3411 * Initialize state for a parallel vacuum. As of now, only one worker can
3412 * be used for an index, so we invoke parallelism only if there are at
3413 * least two indexes on a table.
3414 */
3415 if (nworkers >= 0 && vacrel->nindexes > 1 && vacrel->do_index_vacuuming)
3416 {
3417 /*
3418 * Since parallel workers cannot access data in temporary tables, we
3419 * can't perform parallel vacuum on them.
3420 */
3422 {
3423 /*
3424 * Give warning only if the user explicitly tries to perform a
3425 * parallel vacuum on the temporary table.
3426 */
3427 if (nworkers > 0)
3429 (errmsg("disabling parallel option of vacuum on \"%s\" --- cannot vacuum temporary tables in parallel",
3430 vacrel->relname)));
3431 }
3432 else
3433 vacrel->pvs = parallel_vacuum_init(vacrel->rel, vacrel->indrels,
3434 vacrel->nindexes, nworkers,
3436 vacrel->verbose ? INFO : DEBUG2,
3437 vacrel->bstrategy);
3438
3439 /*
3440 * If parallel mode started, dead_items and dead_items_info spaces are
3441 * allocated in DSM.
3442 */
3444 {
3446 &vacrel->dead_items_info);
3447 return;
3448 }
3449 }
3450
3451 /*
3452 * Serial VACUUM case. Allocate both dead_items and dead_items_info
3453 * locally.
3454 */
3455
3456 dead_items_info = palloc_object(VacDeadItemsInfo);
3457 dead_items_info->max_bytes = vac_work_mem * (Size) 1024;
3458 dead_items_info->num_items = 0;
3459 vacrel->dead_items_info = dead_items_info;
3460
3461 vacrel->dead_items = TidStoreCreateLocal(dead_items_info->max_bytes, true);
3462}
3463
3464/*
3465 * Add the given block number and offset numbers to dead_items.
3466 */
3467static void
3469 int num_offsets)
3470{
3471 const int prog_index[2] = {
3474 };
3475 int64 prog_val[2];
3476
3477 TidStoreSetBlockOffsets(vacrel->dead_items, blkno, offsets, num_offsets);
3478 vacrel->dead_items_info->num_items += num_offsets;
3479
3480 /* update the progress information */
3481 prog_val[0] = vacrel->dead_items_info->num_items;
3482 prog_val[1] = TidStoreMemoryUsage(vacrel->dead_items);
3484}
3485
3486/*
3487 * Forget all collected dead items.
3488 */
3489static void
3491{
3492 /* Update statistics for dead items */
3493 vacrel->num_dead_items_resets++;
3494 vacrel->total_dead_items_bytes += TidStoreMemoryUsage(vacrel->dead_items);
3495
3497 {
3499 vacrel->dead_items = parallel_vacuum_get_dead_items(vacrel->pvs,
3500 &vacrel->dead_items_info);
3501 return;
3502 }
3503
3504 /* Recreate the tidstore with the same max_bytes limitation */
3505 TidStoreDestroy(vacrel->dead_items);
3506 vacrel->dead_items = TidStoreCreateLocal(vacrel->dead_items_info->max_bytes, true);
3507
3508 /* Reset the counter */
3509 vacrel->dead_items_info->num_items = 0;
3510}
3511
3512/*
3513 * Perform cleanup for resources allocated in dead_items_alloc
3514 */
3515static void
3517{
3519 {
3520 /* Don't bother with pfree here */
3521 return;
3522 }
3523
3524 /* End parallel mode */
3525 parallel_vacuum_end(vacrel->pvs, vacrel->indstats);
3526 vacrel->pvs = NULL;
3527}
3528
3529#ifdef USE_ASSERT_CHECKING
3530
3531/*
3532 * Wrapper for heap_page_would_be_all_visible() which can be used for callers
3533 * that expect no LP_DEAD on the page. Currently assert-only, but there is no
3534 * reason not to use it outside of asserts.
3535 */
3536bool
3538 GlobalVisState *vistest,
3539 bool *all_frozen,
3540 TransactionId *newest_live_xid,
3542{
3543 /*
3544 * Pass allow_update_vistest as false so that the GlobalVisState
3545 * boundaries used here match those used by the pruning code we are
3546 * cross-checking. Allowing an update could move the boundaries between
3547 * the two calls, causing a spurious assertion failure.
3548 */
3550 vistest, false,
3551 NULL, 0,
3552 all_frozen,
3553 newest_live_xid,
3555}
3556#endif
3557
3558/*
3559 * Check whether the heap page in buf is all-visible except for the dead
3560 * tuples referenced in the deadoffsets array.
3561 *
3562 * Vacuum uses this to check if a page would become all-visible after reaping
3563 * known dead tuples. This function does not remove the dead items.
3564 *
3565 * This cannot be called in a critical section, as the visibility checks may
3566 * perform IO and allocate memory.
3567 *
3568 * Returns true if the page is all-visible other than the provided
3569 * deadoffsets and false otherwise.
3570 *
3571 * vistest is used to determine visibility. If allow_update_vistest is true,
3572 * the boundaries of the GlobalVisState may be updated when checking the
3573 * visibility of the newest live XID on the page.
3574 *
3575 * Output parameters:
3576 *
3577 * - *all_frozen: true if every tuple on the page is frozen
3578 * - *newest_live_xid: newest xmin of live tuples on the page
3579 * - *logging_offnum: OffsetNumber of current tuple being processed;
3580 * used by vacuum's error callback system.
3581 *
3582 * Callers looking to verify that the page is already all-visible can call
3583 * heap_page_is_all_visible().
3584 *
3585 * This logic is closely related to heap_prune_record_unchanged_lp_normal().
3586 * If you modify this function, ensure consistency with that code. An
3587 * assertion cross-checks that both remain in agreement. Do not introduce new
3588 * side-effects.
3589 */
3590static bool
3592 GlobalVisState *vistest,
3594 OffsetNumber *deadoffsets,
3595 int ndeadoffsets,
3596 bool *all_frozen,
3597 TransactionId *newest_live_xid,
3599{
3600 Page page = BufferGetPage(buf);
3602 OffsetNumber offnum,
3603 maxoff;
3604 bool all_visible = true;
3605 int matched_dead_count = 0;
3606
3607 *newest_live_xid = InvalidTransactionId;
3608 *all_frozen = true;
3609
3610 Assert(ndeadoffsets == 0 || deadoffsets);
3611
3612#ifdef USE_ASSERT_CHECKING
3613 /* Confirm input deadoffsets[] is strictly sorted */
3614 if (ndeadoffsets > 1)
3615 {
3616 for (int i = 1; i < ndeadoffsets; i++)
3617 Assert(deadoffsets[i - 1] < deadoffsets[i]);
3618 }
3619#endif
3620
3621 maxoff = PageGetMaxOffsetNumber(page);
3622 for (offnum = FirstOffsetNumber;
3623 offnum <= maxoff && all_visible;
3624 offnum = OffsetNumberNext(offnum))
3625 {
3626 ItemId itemid;
3627 HeapTupleData tuple;
3629
3630 /*
3631 * Set the offset number so that we can display it along with any
3632 * error that occurred while processing this tuple.
3633 */
3634 *logging_offnum = offnum;
3635 itemid = PageGetItemId(page, offnum);
3636
3637 /* Unused or redirect line pointers are of no interest */
3638 if (!ItemIdIsUsed(itemid) || ItemIdIsRedirected(itemid))
3639 continue;
3640
3641 ItemPointerSet(&(tuple.t_self), blockno, offnum);
3642
3643 /*
3644 * Dead line pointers can have index pointers pointing to them. So
3645 * they can't be treated as visible
3646 */
3647 if (ItemIdIsDead(itemid))
3648 {
3649 if (!deadoffsets ||
3651 deadoffsets[matched_dead_count] != offnum)
3652 {
3653 *all_frozen = all_visible = false;
3654 break;
3655 }
3657 continue;
3658 }
3659
3660 Assert(ItemIdIsNormal(itemid));
3661
3662 tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
3663 tuple.t_len = ItemIdGetLength(itemid);
3664 tuple.t_tableOid = RelationGetRelid(rel);
3665
3666 /* Visibility checks may do IO or allocate memory */
3669 {
3670 case HEAPTUPLE_LIVE:
3671 {
3672 TransactionId xmin;
3673
3674 /* Check heap_prune_record_unchanged_lp_normal comments */
3676 {
3677 all_visible = false;
3678 *all_frozen = false;
3679 break;
3680 }
3681
3682 /*
3683 * The inserter definitely committed. But we don't know if
3684 * it is old enough that everyone sees it as committed.
3685 * Don't check that now.
3686 *
3687 * If we scan all tuples without finding one that prevents
3688 * the page from being all-visible, we then check whether
3689 * any snapshot still considers the newest XID on the page
3690 * to be running. In that case, the page is not considered
3691 * all-visible.
3692 */
3693 xmin = HeapTupleHeaderGetXmin(tuple.t_data);
3694
3695 /* Track newest xmin on page. */
3696 if (TransactionIdFollows(xmin, *newest_live_xid) &&
3698 *newest_live_xid = xmin;
3699
3700 /* Check whether this tuple is already frozen or not */
3701 if (all_visible && *all_frozen &&
3703 *all_frozen = false;
3704 }
3705 break;
3706
3707 case HEAPTUPLE_DEAD:
3711 {
3712 all_visible = false;
3713 *all_frozen = false;
3714 break;
3715 }
3716 default:
3717 elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
3718 break;
3719 }
3720 } /* scan along page */
3721
3722 /*
3723 * After processing all the live tuples on the page, if the newest xmin
3724 * among them may still be considered running by any snapshot, the page
3725 * cannot be all-visible.
3726 */
3727 if (all_visible &&
3728 TransactionIdIsNormal(*newest_live_xid) &&
3729 GlobalVisTestXidConsideredRunning(vistest, *newest_live_xid,
3731 {
3732 all_visible = false;
3733 *all_frozen = false;
3734 }
3735
3736 /* Clear the offset information once we have processed the given page. */
3738
3739 return all_visible;
3740}
3741
3742/*
3743 * Update index statistics in pg_class if the statistics are accurate.
3744 */
3745static void
3747{
3748 Relation *indrels = vacrel->indrels;
3749 int nindexes = vacrel->nindexes;
3750 IndexBulkDeleteResult **indstats = vacrel->indstats;
3751
3752 Assert(vacrel->do_index_cleanup);
3753
3754 for (int idx = 0; idx < nindexes; idx++)
3755 {
3756 Relation indrel = indrels[idx];
3757 IndexBulkDeleteResult *istat = indstats[idx];
3758
3759 if (istat == NULL || istat->estimated_count)
3760 continue;
3761
3762 /* Update index statistics */
3764 istat->num_pages,
3765 istat->num_index_tuples,
3766 0, 0,
3767 false,
3770 NULL, NULL, false);
3771 }
3772}
3773
3774/*
3775 * Error context callback for errors occurring during vacuum. The error
3776 * context messages for index phases should match the messages set in parallel
3777 * vacuum. If you change this function for those phases, change
3778 * parallel_vacuum_error_callback() as well.
3779 */
3780static void
3782{
3784
3785 switch (errinfo->phase)
3786 {
3788 if (BlockNumberIsValid(errinfo->blkno))
3789 {
3790 if (OffsetNumberIsValid(errinfo->offnum))
3791 errcontext("while scanning block %u offset %u of relation \"%s.%s\"",
3792 errinfo->blkno, errinfo->offnum, errinfo->relnamespace, errinfo->relname);
3793 else
3794 errcontext("while scanning block %u of relation \"%s.%s\"",
3795 errinfo->blkno, errinfo->relnamespace, errinfo->relname);
3796 }
3797 else
3798 errcontext("while scanning relation \"%s.%s\"",
3799 errinfo->relnamespace, errinfo->relname);
3800 break;
3801
3803 if (BlockNumberIsValid(errinfo->blkno))
3804 {
3805 if (OffsetNumberIsValid(errinfo->offnum))
3806 errcontext("while vacuuming block %u offset %u of relation \"%s.%s\"",
3807 errinfo->blkno, errinfo->offnum, errinfo->relnamespace, errinfo->relname);
3808 else
3809 errcontext("while vacuuming block %u of relation \"%s.%s\"",
3810 errinfo->blkno, errinfo->relnamespace, errinfo->relname);
3811 }
3812 else
3813 errcontext("while vacuuming relation \"%s.%s\"",
3814 errinfo->relnamespace, errinfo->relname);
3815 break;
3816
3818 errcontext("while vacuuming index \"%s\" of relation \"%s.%s\"",
3819 errinfo->indname, errinfo->relnamespace, errinfo->relname);
3820 break;
3821
3823 errcontext("while cleaning up index \"%s\" of relation \"%s.%s\"",
3824 errinfo->indname, errinfo->relnamespace, errinfo->relname);
3825 break;
3826
3828 if (BlockNumberIsValid(errinfo->blkno))
3829 errcontext("while truncating relation \"%s.%s\" to %u blocks",
3830 errinfo->relnamespace, errinfo->relname, errinfo->blkno);
3831 break;
3832
3834 default:
3835 return; /* do nothing; the errinfo may not be
3836 * initialized */
3837 }
3838}
3839
3840/*
3841 * Updates the information required for vacuum error callback. This also saves
3842 * the current information which can be later restored via restore_vacuum_error_info.
3843 */
3844static void
3846 int phase, BlockNumber blkno, OffsetNumber offnum)
3847{
3848 if (saved_vacrel)
3849 {
3850 saved_vacrel->offnum = vacrel->offnum;
3851 saved_vacrel->blkno = vacrel->blkno;
3852 saved_vacrel->phase = vacrel->phase;
3853 }
3854
3855 vacrel->blkno = blkno;
3856 vacrel->offnum = offnum;
3857 vacrel->phase = phase;
3858}
3859
3860/*
3861 * Restores the vacuum information saved via a prior call to update_vacuum_error_info.
3862 */
3863static void
3866{
3867 vacrel->blkno = saved_vacrel->blkno;
3868 vacrel->offnum = saved_vacrel->offnum;
3869 vacrel->phase = saved_vacrel->phase;
3870}
Datum idx(PG_FUNCTION_ARGS)
Definition _int_op.c:262
int autovacuum_work_mem
Definition autovacuum.c:122
void TimestampDifference(TimestampTz start_time, TimestampTz stop_time, long *secs, int *microsecs)
Definition timestamp.c:1715
bool TimestampDifferenceExceeds(TimestampTz start_time, TimestampTz stop_time, int msec)
Definition timestamp.c:1775
TimestampTz GetCurrentTimestamp(void)
Definition timestamp.c:1639
void pgstat_progress_start_command(ProgressCommandType cmdtype, Oid relid)
void pgstat_progress_update_param(int index, int64 val)
void pgstat_progress_update_multi_param(int nparam, const int *index, const int64 *val)
void pgstat_progress_end_command(void)
@ PROGRESS_COMMAND_VACUUM
PgBackendStatus * MyBEEntry
uint32 BlockNumber
Definition block.h:31
#define InvalidBlockNumber
Definition block.h:33
static bool BlockNumberIsValid(BlockNumber blockNumber)
Definition block.h:71
int Buffer
Definition buf.h:23
#define InvalidBuffer
Definition buf.h:25
bool track_io_timing
Definition bufmgr.c:192
void CheckBufferIsPinnedOnce(Buffer buffer)
Definition bufmgr.c:6526
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition bufmgr.c:4379
PrefetchBufferResult PrefetchBuffer(Relation reln, ForkNumber forkNum, BlockNumber blockNum)
Definition bufmgr.c:787
void ReleaseBuffer(Buffer buffer)
Definition bufmgr.c:5527
void UnlockReleaseBuffer(Buffer buffer)
Definition bufmgr.c:5544
void MarkBufferDirty(Buffer buffer)
Definition bufmgr.c:3085
void LockBufferForCleanup(Buffer buffer)
Definition bufmgr.c:6559
Buffer ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
Definition bufmgr.c:926
bool ConditionalLockBufferForCleanup(Buffer buffer)
Definition bufmgr.c:6732
#define RelationGetNumberOfBlocks(reln)
Definition bufmgr.h:307
static Page BufferGetPage(Buffer buffer)
Definition bufmgr.h:470
@ BUFFER_LOCK_SHARE
Definition bufmgr.h:210
@ BUFFER_LOCK_EXCLUSIVE
Definition bufmgr.h:220
@ BUFFER_LOCK_UNLOCK
Definition bufmgr.h:205
static void LockBuffer(Buffer buffer, BufferLockMode mode)
Definition bufmgr.h:332
@ RBM_NORMAL
Definition bufmgr.h:46
static bool BufferIsValid(Buffer bufnum)
Definition bufmgr.h:421
Size PageGetHeapFreeSpace(const PageData *page)
Definition bufpage.c:990
void PageTruncateLinePointerArray(Page page)
Definition bufpage.c:834
static bool PageIsEmpty(const PageData *page)
Definition bufpage.h:249
static bool PageIsAllVisible(const PageData *page)
Definition bufpage.h:455
static bool PageIsNew(const PageData *page)
Definition bufpage.h:259
#define SizeOfPageHeaderData
Definition bufpage.h:242
static void PageSetAllVisible(Page page)
Definition bufpage.h:460
static ItemId PageGetItemId(Page page, OffsetNumber offsetNumber)
Definition bufpage.h:269
static void * PageGetItem(PageData *page, const ItemIdData *itemId)
Definition bufpage.h:379
PageData * Page
Definition bufpage.h:81
#define PageClearPrunable(page)
Definition bufpage.h:486
static OffsetNumber PageGetMaxOffsetNumber(const PageData *page)
Definition bufpage.h:397
uint8_t uint8
Definition c.h:616
#define ngettext(s, p, n)
Definition c.h:1272
#define Max(x, y)
Definition c.h:1087
#define Assert(condition)
Definition c.h:945
int64_t int64
Definition c.h:615
TransactionId MultiXactId
Definition c.h:748
int32_t int32
Definition c.h:614
#define unlikely(x)
Definition c.h:432
uint32_t uint32
Definition c.h:618
#define lengthof(array)
Definition c.h:875
#define StaticAssertDecl(condition, errmessage)
Definition c.h:1010
uint32 TransactionId
Definition c.h:738
size_t Size
Definition c.h:691
int64 TimestampTz
Definition timestamp.h:39
Datum arg
Definition elog.c:1322
ErrorContextCallback * error_context_stack
Definition elog.c:99
#define _(x)
Definition elog.c:95
#define LOG
Definition elog.h:31
#define errcontext
Definition elog.h:198
int errhint(const char *fmt,...) pg_attribute_printf(1
int errdetail(const char *fmt,...) pg_attribute_printf(1
int int errmsg_internal(const char *fmt,...) pg_attribute_printf(1
#define WARNING
Definition elog.h:36
#define DEBUG2
Definition elog.h:29
#define ERROR
Definition elog.h:39
#define elog(elevel,...)
Definition elog.h:226
#define INFO
Definition elog.h:34
#define ereport(elevel,...)
Definition elog.h:150
#define palloc_object(type)
Definition fe_memutils.h:74
#define palloc_array(type, count)
Definition fe_memutils.h:76
#define palloc0_object(type)
Definition fe_memutils.h:75
void FreeSpaceMapVacuumRange(Relation rel, BlockNumber start, BlockNumber end)
Definition freespace.c:377
Size GetRecordedFreeSpace(Relation rel, BlockNumber heapBlk)
Definition freespace.c:244
void RecordPageWithFreeSpace(Relation rel, BlockNumber heapBlk, Size spaceAvail)
Definition freespace.c:194
bool VacuumCostActive
Definition globals.c:158
int VacuumCostBalance
Definition globals.c:157
int maintenance_work_mem
Definition globals.c:133
volatile uint32 CritSectionCount
Definition globals.c:45
struct Latch * MyLatch
Definition globals.c:63
Oid MyDatabaseId
Definition globals.c:94
bool heap_tuple_needs_eventual_freeze(HeapTupleHeader tuple)
Definition heapam.c:7910
bool heap_tuple_should_freeze(HeapTupleHeader tuple, const struct VacuumCutoffs *cutoffs, TransactionId *NoFreezePageRelfrozenXid, MultiXactId *NoFreezePageRelminMxid)
Definition heapam.c:7965
#define HEAP_PAGE_PRUNE_FREEZE
Definition heapam.h:44
#define HEAP_PAGE_PRUNE_ALLOW_FAST_PATH
Definition heapam.h:45
@ HEAPTUPLE_RECENTLY_DEAD
Definition heapam.h:141
@ HEAPTUPLE_INSERT_IN_PROGRESS
Definition heapam.h:142
@ HEAPTUPLE_LIVE
Definition heapam.h:140
@ HEAPTUPLE_DELETE_IN_PROGRESS
Definition heapam.h:143
@ HEAPTUPLE_DEAD
Definition heapam.h:139
@ PRUNE_VACUUM_CLEANUP
Definition heapam.h:255
@ PRUNE_VACUUM_SCAN
Definition heapam.h:254
#define HEAP_PAGE_PRUNE_MARK_UNUSED_NOW
Definition heapam.h:43
HTSV_Result HeapTupleSatisfiesVacuumHorizon(HeapTuple htup, Buffer buffer, TransactionId *dead_after)
HTSV_Result HeapTupleSatisfiesVacuum(HeapTuple htup, TransactionId OldestXmin, Buffer buffer)
HeapTupleHeaderData * HeapTupleHeader
Definition htup.h:23
static TransactionId HeapTupleHeaderGetXmin(const HeapTupleHeaderData *tup)
#define MaxHeapTuplesPerPage
static bool HeapTupleHeaderXminCommitted(const HeapTupleHeaderData *tup)
#define false
#define INSTR_TIME_SET_CURRENT(t)
Definition instr_time.h:122
#define INSTR_TIME_SUBTRACT(x, y)
Definition instr_time.h:177
#define INSTR_TIME_GET_MICROSEC(t)
Definition instr_time.h:192
WalUsage pgWalUsage
Definition instrument.c:22
void WalUsageAccumDiff(WalUsage *dst, const WalUsage *add, const WalUsage *sub)
Definition instrument.c:289
BufferUsage pgBufferUsage
Definition instrument.c:20
void BufferUsageAccumDiff(BufferUsage *dst, const BufferUsage *add, const BufferUsage *sub)
Definition instrument.c:249
static int pg_cmp_u16(uint16 a, uint16 b)
Definition int.h:707
int b
Definition isn.c:74
int a
Definition isn.c:73
int i
Definition isn.c:77
#define ItemIdGetLength(itemId)
Definition itemid.h:59
#define ItemIdIsNormal(itemId)
Definition itemid.h:99
#define ItemIdIsDead(itemId)
Definition itemid.h:113
#define ItemIdIsUsed(itemId)
Definition itemid.h:92
#define ItemIdSetUnused(itemId)
Definition itemid.h:128
#define ItemIdIsRedirected(itemId)
Definition itemid.h:106
#define ItemIdHasStorage(itemId)
Definition itemid.h:120
static void ItemPointerSet(ItemPointerData *pointer, BlockNumber blockNumber, OffsetNumber offNum)
Definition itemptr.h:135
void ResetLatch(Latch *latch)
Definition latch.c:374
int WaitLatch(Latch *latch, int wakeEvents, long timeout, uint32 wait_event_info)
Definition latch.c:172
void UnlockRelation(Relation relation, LOCKMODE lockmode)
Definition lmgr.c:314
bool ConditionalLockRelation(Relation relation, LOCKMODE lockmode)
Definition lmgr.c:278
bool LockHasWaitersRelation(Relation relation, LOCKMODE lockmode)
Definition lmgr.c:367
#define NoLock
Definition lockdefs.h:34
#define AccessExclusiveLock
Definition lockdefs.h:43
#define RowExclusiveLock
Definition lockdefs.h:38
char * get_database_name(Oid dbid)
Definition lsyscache.c:1312
char * get_namespace_name(Oid nspid)
Definition lsyscache.c:3588
char * pstrdup(const char *in)
Definition mcxt.c:1781
void pfree(void *pointer)
Definition mcxt.c:1616
void * palloc0(Size size)
Definition mcxt.c:1417
#define AmAutoVacuumWorkerProcess()
Definition miscadmin.h:383
#define START_CRIT_SECTION()
Definition miscadmin.h:150
#define CHECK_FOR_INTERRUPTS()
Definition miscadmin.h:123
#define END_CRIT_SECTION()
Definition miscadmin.h:152
bool MultiXactIdPrecedes(MultiXactId multi1, MultiXactId multi2)
Definition multixact.c:2857
bool MultiXactIdPrecedesOrEquals(MultiXactId multi1, MultiXactId multi2)
Definition multixact.c:2871
#define MultiXactIdIsValid(multi)
Definition multixact.h:29
#define InvalidMultiXactId
Definition multixact.h:25
static char * errmsg
#define InvalidOffsetNumber
Definition off.h:26
#define OffsetNumberIsValid(offsetNumber)
Definition off.h:39
#define OffsetNumberNext(offsetNumber)
Definition off.h:52
uint16 OffsetNumber
Definition off.h:24
#define FirstOffsetNumber
Definition off.h:27
#define MaxOffsetNumber
Definition off.h:28
static int verbose
NameData relname
Definition pg_class.h:40
const void * data
uint32 pg_prng_uint32(pg_prng_state *state)
Definition pg_prng.c:227
pg_prng_state pg_global_prng_state
Definition pg_prng.c:34
const char * pg_rusage_show(const PGRUsage *ru0)
Definition pg_rusage.c:40
void pg_rusage_init(PGRUsage *ru0)
Definition pg_rusage.c:27
static char buf[DEFAULT_XLOG_SEG_SIZE]
int64 PgStat_Counter
Definition pgstat.h:71
PgStat_Counter pgStatBlockReadTime
PgStat_Counter pgStatBlockWriteTime
void pgstat_report_vacuum(Relation rel, PgStat_Counter livetuples, PgStat_Counter deadtuples, TimestampTz starttime)
#define qsort(a, b, c, d)
Definition port.h:495
static int fb(int x)
GlobalVisState * GlobalVisTestFor(Relation rel)
Definition procarray.c:4114
bool GlobalVisTestXidConsideredRunning(GlobalVisState *state, TransactionId xid, bool allow_update)
Definition procarray.c:4315
#define PROGRESS_VACUUM_PHASE_FINAL_CLEANUP
Definition progress.h:41
#define PROGRESS_VACUUM_MODE
Definition progress.h:32
#define PROGRESS_VACUUM_MODE_NORMAL
Definition progress.h:44
#define PROGRESS_VACUUM_STARTED_BY_AUTOVACUUM
Definition progress.h:50
#define PROGRESS_VACUUM_DEAD_TUPLE_BYTES
Definition progress.h:27
#define PROGRESS_VACUUM_PHASE_SCAN_HEAP
Definition progress.h:36
#define PROGRESS_VACUUM_TOTAL_HEAP_BLKS
Definition progress.h:22
#define PROGRESS_VACUUM_PHASE
Definition progress.h:21
#define PROGRESS_VACUUM_DELAY_TIME
Definition progress.h:31
#define PROGRESS_VACUUM_STARTED_BY_AUTOVACUUM_WRAPAROUND
Definition progress.h:51
#define PROGRESS_VACUUM_NUM_INDEX_VACUUMS
Definition progress.h:25
#define PROGRESS_VACUUM_PHASE_VACUUM_HEAP
Definition progress.h:38
#define PROGRESS_VACUUM_NUM_DEAD_ITEM_IDS
Definition progress.h:28
#define PROGRESS_VACUUM_MAX_DEAD_TUPLE_BYTES
Definition progress.h:26
#define PROGRESS_VACUUM_STARTED_BY_MANUAL
Definition progress.h:49
#define PROGRESS_VACUUM_HEAP_BLKS_SCANNED
Definition progress.h:23
#define PROGRESS_VACUUM_STARTED_BY
Definition progress.h:33
#define PROGRESS_VACUUM_PHASE_INDEX_CLEANUP
Definition progress.h:39
#define PROGRESS_VACUUM_PHASE_VACUUM_INDEX
Definition progress.h:37
#define PROGRESS_VACUUM_MODE_FAILSAFE
Definition progress.h:46
#define PROGRESS_VACUUM_INDEXES_PROCESSED
Definition progress.h:30
#define PROGRESS_VACUUM_INDEXES_TOTAL
Definition progress.h:29
#define PROGRESS_VACUUM_MODE_AGGRESSIVE
Definition progress.h:45
#define PROGRESS_VACUUM_HEAP_BLKS_VACUUMED
Definition progress.h:24
#define PROGRESS_VACUUM_PHASE_TRUNCATE
Definition progress.h:40
void heap_page_prune_and_freeze(PruneFreezeParams *params, PruneFreezeResult *presult, OffsetNumber *off_loc, TransactionId *new_relfrozen_xid, MultiXactId *new_relmin_mxid)
Definition pruneheap.c:1056
void log_heap_prune_and_freeze(Relation relation, Buffer buffer, Buffer vmbuffer, uint8 vmflags, TransactionId conflict_xid, bool cleanup_lock, PruneReason reason, HeapTupleFreeze *frozen, int nfrozen, OffsetNumber *redirected, int nredirected, OffsetNumber *dead, int ndead, OffsetNumber *unused, int nunused)
Definition pruneheap.c:2529
Buffer read_stream_next_buffer(ReadStream *stream, void **per_buffer_data)
ReadStream * read_stream_begin_relation(int flags, BufferAccessStrategy strategy, Relation rel, ForkNumber forknum, ReadStreamBlockNumberCB callback, void *callback_private_data, size_t per_buffer_data_size)
void read_stream_end(ReadStream *stream)
#define READ_STREAM_MAINTENANCE
Definition read_stream.h:28
#define READ_STREAM_USE_BATCHING
Definition read_stream.h:64
#define RelationGetRelid(relation)
Definition rel.h:514
#define RelationGetRelationName(relation)
Definition rel.h:548
#define RelationNeedsWAL(relation)
Definition rel.h:637
#define RelationUsesLocalBuffers(relation)
Definition rel.h:646
#define RelationGetNamespace(relation)
Definition rel.h:555
@ MAIN_FORKNUM
Definition relpath.h:58
void RelationTruncate(Relation rel, BlockNumber nblocks)
Definition storage.c:289
char * dbname
Definition streamutil.c:49
void appendStringInfo(StringInfo str, const char *fmt,...)
Definition stringinfo.c:145
void appendStringInfoString(StringInfo str, const char *s)
Definition stringinfo.c:230
void initStringInfo(StringInfo str)
Definition stringinfo.c:97
int64 shared_blks_dirtied
Definition instrument.h:28
int64 local_blks_hit
Definition instrument.h:30
int64 shared_blks_read
Definition instrument.h:27
int64 local_blks_read
Definition instrument.h:31
int64 local_blks_dirtied
Definition instrument.h:32
int64 shared_blks_hit
Definition instrument.h:26
struct ErrorContextCallback * previous
Definition elog.h:297
void(* callback)(void *arg)
Definition elog.h:298
ItemPointerData t_self
Definition htup.h:65
uint32 t_len
Definition htup.h:64
HeapTupleHeader t_data
Definition htup.h:68
Oid t_tableOid
Definition htup.h:66
BlockNumber pages_deleted
Definition genam.h:90
BlockNumber pages_newly_deleted
Definition genam.h:89
BlockNumber pages_free
Definition genam.h:91
BlockNumber num_pages
Definition genam.h:85
double num_index_tuples
Definition genam.h:87
BlockNumber next_eager_scan_region_start
Definition vacuumlazy.c:379
ParallelVacuumState * pvs
Definition vacuumlazy.c:260
bool next_unskippable_eager_scanned
Definition vacuumlazy.c:364
VacDeadItemsInfo * dead_items_info
Definition vacuumlazy.c:303
PVWorkerUsage worker_usage
Definition vacuumlazy.c:351
Buffer next_unskippable_vmbuffer
Definition vacuumlazy.c:365
OffsetNumber offnum
Definition vacuumlazy.c:288
TidStore * dead_items
Definition vacuumlazy.c:302
int64 tuples_deleted
Definition vacuumlazy.c:354
BlockNumber nonempty_pages
Definition vacuumlazy.c:334
BlockNumber eager_scan_remaining_fails
Definition vacuumlazy.c:411
bool do_rel_truncate
Definition vacuumlazy.c:272
BlockNumber scanned_pages
Definition vacuumlazy.c:306
int num_dead_items_resets
Definition vacuumlazy.c:344
bool aggressive
Definition vacuumlazy.c:263
BlockNumber new_frozen_tuple_pages
Definition vacuumlazy.c:315
GlobalVisState * vistest
Definition vacuumlazy.c:276
BlockNumber removed_pages
Definition vacuumlazy.c:314
int num_index_scans
Definition vacuumlazy.c:343
IndexBulkDeleteResult ** indstats
Definition vacuumlazy.c:340
BlockNumber new_all_frozen_pages
Definition vacuumlazy.c:330
double new_live_tuples
Definition vacuumlazy.c:338
double new_rel_tuples
Definition vacuumlazy.c:337
BlockNumber new_all_visible_all_frozen_pages
Definition vacuumlazy.c:327
BlockNumber new_all_visible_pages
Definition vacuumlazy.c:318
TransactionId NewRelfrozenXid
Definition vacuumlazy.c:278
Relation rel
Definition vacuumlazy.c:254
bool consider_bypass_optimization
Definition vacuumlazy.c:267
BlockNumber rel_pages
Definition vacuumlazy.c:305
Size total_dead_items_bytes
Definition vacuumlazy.c:345
BlockNumber next_unskippable_block
Definition vacuumlazy.c:363
int64 recently_dead_tuples
Definition vacuumlazy.c:358
int64 tuples_frozen
Definition vacuumlazy.c:355
char * dbname
Definition vacuumlazy.c:283
BlockNumber missed_dead_pages
Definition vacuumlazy.c:333
BlockNumber current_block
Definition vacuumlazy.c:362
char * relnamespace
Definition vacuumlazy.c:284
int64 live_tuples
Definition vacuumlazy.c:357
int64 lpdead_items
Definition vacuumlazy.c:356
BufferAccessStrategy bstrategy
Definition vacuumlazy.c:259
BlockNumber eager_scan_remaining_successes
Definition vacuumlazy.c:390
bool skippedallvis
Definition vacuumlazy.c:280
BlockNumber lpdead_item_pages
Definition vacuumlazy.c:332
BlockNumber eager_scanned_pages
Definition vacuumlazy.c:312
Relation * indrels
Definition vacuumlazy.c:255
bool skipwithvm
Definition vacuumlazy.c:265
bool do_index_cleanup
Definition vacuumlazy.c:271
MultiXactId NewRelminMxid
Definition vacuumlazy.c:279
int64 missed_dead_tuples
Definition vacuumlazy.c:359
BlockNumber blkno
Definition vacuumlazy.c:287
struct VacuumCutoffs cutoffs
Definition vacuumlazy.c:275
char * relname
Definition vacuumlazy.c:285
BlockNumber eager_scan_max_fails_per_region
Definition vacuumlazy.c:401
VacErrPhase phase
Definition vacuumlazy.c:289
char * indname
Definition vacuumlazy.c:286
bool do_index_vacuuming
Definition vacuumlazy.c:270
BlockNumber blkno
Definition vacuumlazy.c:418
VacErrPhase phase
Definition vacuumlazy.c:420
OffsetNumber offnum
Definition vacuumlazy.c:419
int64 st_progress_param[PGSTAT_NUM_PROGRESS_PARAM]
size_t max_bytes
Definition vacuum.h:298
int64 num_items
Definition vacuum.h:299
int nworkers
Definition vacuum.h:250
VacOptValue truncate
Definition vacuum.h:235
bits32 options
Definition vacuum.h:218
int log_vacuum_min_duration
Definition vacuum.h:226
bool is_wraparound
Definition vacuum.h:225
VacOptValue index_cleanup
Definition vacuum.h:234
double max_eager_freeze_failure_rate
Definition vacuum.h:243
int64 wal_buffers_full
Definition instrument.h:57
uint64 wal_bytes
Definition instrument.h:55
int64 wal_fpi
Definition instrument.h:54
uint64 wal_fpi_bytes
Definition instrument.h:56
int64 wal_records
Definition instrument.h:53
TidStoreIter * TidStoreBeginIterate(TidStore *ts)
Definition tidstore.c:471
void TidStoreEndIterate(TidStoreIter *iter)
Definition tidstore.c:518
TidStoreIterResult * TidStoreIterateNext(TidStoreIter *iter)
Definition tidstore.c:493
TidStore * TidStoreCreateLocal(size_t max_bytes, bool insert_only)
Definition tidstore.c:162
void TidStoreDestroy(TidStore *ts)
Definition tidstore.c:317
int TidStoreGetBlockOffsets(TidStoreIterResult *result, OffsetNumber *offsets, int max_offsets)
Definition tidstore.c:566
void TidStoreSetBlockOffsets(TidStore *ts, BlockNumber blkno, OffsetNumber *offsets, int num_offsets)
Definition tidstore.c:345
size_t TidStoreMemoryUsage(TidStore *ts)
Definition tidstore.c:532
static bool TransactionIdFollows(TransactionId id1, TransactionId id2)
Definition transam.h:297
static TransactionId ReadNextTransactionId(void)
Definition transam.h:377
#define InvalidTransactionId
Definition transam.h:31
static bool TransactionIdPrecedesOrEquals(TransactionId id1, TransactionId id2)
Definition transam.h:282
#define TransactionIdIsValid(xid)
Definition transam.h:41
#define TransactionIdIsNormal(xid)
Definition transam.h:42
static bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition transam.h:263
bool track_cost_delay_timing
Definition vacuum.c:83
void vac_open_indexes(Relation relation, LOCKMODE lockmode, int *nindexes, Relation **Irel)
Definition vacuum.c:2367
IndexBulkDeleteResult * vac_cleanup_one_index(IndexVacuumInfo *ivinfo, IndexBulkDeleteResult *istat)
Definition vacuum.c:2659
void vac_close_indexes(int nindexes, Relation *Irel, LOCKMODE lockmode)
Definition vacuum.c:2410
void vacuum_delay_point(bool is_analyze)
Definition vacuum.c:2431
bool vacuum_xid_failsafe_check(const struct VacuumCutoffs *cutoffs)
Definition vacuum.c:1268
bool VacuumFailsafeActive
Definition vacuum.c:111
double vac_estimate_reltuples(Relation relation, BlockNumber total_pages, BlockNumber scanned_pages, double scanned_tuples)
Definition vacuum.c:1330
void vac_update_relstats(Relation relation, BlockNumber num_pages, double num_tuples, BlockNumber num_all_visible_pages, BlockNumber num_all_frozen_pages, bool hasindex, TransactionId frozenxid, MultiXactId minmulti, bool *frozenxid_updated, bool *minmulti_updated, bool in_outer_xact)
Definition vacuum.c:1426
bool vacuum_get_cutoffs(Relation rel, const VacuumParams params, struct VacuumCutoffs *cutoffs)
Definition vacuum.c:1100
IndexBulkDeleteResult * vac_bulkdel_one_index(IndexVacuumInfo *ivinfo, IndexBulkDeleteResult *istat, TidStore *dead_items, VacDeadItemsInfo *dead_items_info)
Definition vacuum.c:2638
#define VACOPT_VERBOSE
Definition vacuum.h:181
@ VACOPTVALUE_AUTO
Definition vacuum.h:202
@ VACOPTVALUE_ENABLED
Definition vacuum.h:204
@ VACOPTVALUE_UNSPECIFIED
Definition vacuum.h:201
@ VACOPTVALUE_DISABLED
Definition vacuum.h:203
#define VACOPT_DISABLE_PAGE_SKIPPING
Definition vacuum.h:187
static int lazy_scan_prune(LVRelState *vacrel, Buffer buf, BlockNumber blkno, Page page, Buffer vmbuffer, bool *has_lpdead_items, bool *vm_page_frozen)
static void dead_items_cleanup(LVRelState *vacrel)
static void update_relstats_all_indexes(LVRelState *vacrel)
static void dead_items_add(LVRelState *vacrel, BlockNumber blkno, OffsetNumber *offsets, int num_offsets)
void heap_vacuum_rel(Relation rel, const VacuumParams params, BufferAccessStrategy bstrategy)
Definition vacuumlazy.c:623
static BlockNumber heap_vac_scan_next_block(ReadStream *stream, void *callback_private_data, void *per_buffer_data)
static void heap_vacuum_eager_scan_setup(LVRelState *vacrel, const VacuumParams params)
Definition vacuumlazy.c:496
#define VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL
Definition vacuumlazy.c:179
static void vacuum_error_callback(void *arg)
#define EAGER_SCAN_REGION_SIZE
Definition vacuumlazy.c:249
static void lazy_truncate_heap(LVRelState *vacrel)
static void lazy_vacuum(LVRelState *vacrel)
static void lazy_cleanup_all_indexes(LVRelState *vacrel)
#define MAX_EAGER_FREEZE_SUCCESS_RATE
Definition vacuumlazy.c:240
static bool lazy_scan_noprune(LVRelState *vacrel, Buffer buf, BlockNumber blkno, Page page, bool *has_lpdead_items)
static BlockNumber vacuum_reap_lp_read_stream_next(ReadStream *stream, void *callback_private_data, void *per_buffer_data)
#define REL_TRUNCATE_MINIMUM
Definition vacuumlazy.c:168
static bool should_attempt_truncation(LVRelState *vacrel)
static bool lazy_scan_new_or_empty(LVRelState *vacrel, Buffer buf, BlockNumber blkno, Page page, bool sharelock, Buffer vmbuffer)
VacErrPhase
Definition vacuumlazy.c:224
@ VACUUM_ERRCB_PHASE_SCAN_HEAP
Definition vacuumlazy.c:226
@ VACUUM_ERRCB_PHASE_VACUUM_INDEX
Definition vacuumlazy.c:227
@ VACUUM_ERRCB_PHASE_TRUNCATE
Definition vacuumlazy.c:230
@ VACUUM_ERRCB_PHASE_INDEX_CLEANUP
Definition vacuumlazy.c:229
@ VACUUM_ERRCB_PHASE_VACUUM_HEAP
Definition vacuumlazy.c:228
@ VACUUM_ERRCB_PHASE_UNKNOWN
Definition vacuumlazy.c:225
static void lazy_scan_heap(LVRelState *vacrel)
#define ParallelVacuumIsActive(vacrel)
Definition vacuumlazy.c:220
static void restore_vacuum_error_info(LVRelState *vacrel, const LVSavedErrInfo *saved_vacrel)
static IndexBulkDeleteResult * lazy_vacuum_one_index(Relation indrel, IndexBulkDeleteResult *istat, double reltuples, LVRelState *vacrel)
static void find_next_unskippable_block(LVRelState *vacrel, bool *skipsallvis)
static void dead_items_reset(LVRelState *vacrel)
#define REL_TRUNCATE_FRACTION
Definition vacuumlazy.c:169
static bool lazy_check_wraparound_failsafe(LVRelState *vacrel)
static IndexBulkDeleteResult * lazy_cleanup_one_index(Relation indrel, IndexBulkDeleteResult *istat, double reltuples, bool estimated_count, LVRelState *vacrel)
#define PREFETCH_SIZE
Definition vacuumlazy.c:214
static void lazy_vacuum_heap_page(LVRelState *vacrel, BlockNumber blkno, Buffer buffer, OffsetNumber *deadoffsets, int num_offsets, Buffer vmbuffer)
static bool heap_page_would_be_all_visible(Relation rel, Buffer buf, GlobalVisState *vistest, bool allow_update_vistest, OffsetNumber *deadoffsets, int ndeadoffsets, bool *all_frozen, TransactionId *newest_live_xid, OffsetNumber *logging_offnum)
#define BYPASS_THRESHOLD_PAGES
Definition vacuumlazy.c:186
static void dead_items_alloc(LVRelState *vacrel, int nworkers)
#define VACUUM_TRUNCATE_LOCK_TIMEOUT
Definition vacuumlazy.c:180
static bool lazy_vacuum_all_indexes(LVRelState *vacrel)
static void update_vacuum_error_info(LVRelState *vacrel, LVSavedErrInfo *saved_vacrel, int phase, BlockNumber blkno, OffsetNumber offnum)
static BlockNumber count_nondeletable_pages(LVRelState *vacrel, bool *lock_waiter_detected)
#define SKIP_PAGES_THRESHOLD
Definition vacuumlazy.c:208
#define FAILSAFE_EVERY_PAGES
Definition vacuumlazy.c:192
#define VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL
Definition vacuumlazy.c:178
static int cmpOffsetNumbers(const void *a, const void *b)
static void lazy_vacuum_heap_rel(LVRelState *vacrel)
#define VACUUM_FSM_EVERY_PAGES
Definition vacuumlazy.c:201
void parallel_vacuum_cleanup_all_indexes(ParallelVacuumState *pvs, long num_table_tuples, int num_index_scans, bool estimated_count, PVWorkerStats *wstats)
TidStore * parallel_vacuum_get_dead_items(ParallelVacuumState *pvs, VacDeadItemsInfo **dead_items_info_p)
void parallel_vacuum_bulkdel_all_indexes(ParallelVacuumState *pvs, long num_table_tuples, int num_index_scans, PVWorkerStats *wstats)
ParallelVacuumState * parallel_vacuum_init(Relation rel, Relation *indrels, int nindexes, int nrequested_workers, int vac_work_mem, int elevel, BufferAccessStrategy bstrategy)
void parallel_vacuum_reset_dead_items(ParallelVacuumState *pvs)
void parallel_vacuum_end(ParallelVacuumState *pvs, IndexBulkDeleteResult **istats)
void visibilitymap_set(BlockNumber heapBlk, Buffer vmBuf, uint8 flags, const RelFileLocator rlocator)
void visibilitymap_pin(Relation rel, BlockNumber heapBlk, Buffer *vmbuf)
uint8 visibilitymap_get_status(Relation rel, BlockNumber heapBlk, Buffer *vmbuf)
void visibilitymap_count(Relation rel, BlockNumber *all_visible, BlockNumber *all_frozen)
#define VISIBILITYMAP_VALID_BITS
#define VISIBILITYMAP_ALL_FROZEN
#define VISIBILITYMAP_ALL_VISIBLE
#define WL_TIMEOUT
#define WL_EXIT_ON_PM_DEATH
#define WL_LATCH_SET
bool IsInParallelMode(void)
Definition xact.c:1091

◆ MAX_EAGER_FREEZE_SUCCESS_RATE

#define MAX_EAGER_FREEZE_SUCCESS_RATE   0.2

Definition at line 240 of file vacuumlazy.c.

◆ ParallelVacuumIsActive

#define ParallelVacuumIsActive (   vacrel)    ((vacrel)->pvs != NULL)

Definition at line 220 of file vacuumlazy.c.

◆ PREFETCH_SIZE

#define PREFETCH_SIZE   ((BlockNumber) 32)

Definition at line 214 of file vacuumlazy.c.

◆ REL_TRUNCATE_FRACTION

#define REL_TRUNCATE_FRACTION   16

Definition at line 169 of file vacuumlazy.c.

◆ REL_TRUNCATE_MINIMUM

#define REL_TRUNCATE_MINIMUM   1000

Definition at line 168 of file vacuumlazy.c.

◆ SKIP_PAGES_THRESHOLD

#define SKIP_PAGES_THRESHOLD   ((BlockNumber) 32)

Definition at line 208 of file vacuumlazy.c.

◆ VACUUM_FSM_EVERY_PAGES

#define VACUUM_FSM_EVERY_PAGES    ((BlockNumber) (((uint64) 8 * 1024 * 1024 * 1024) / BLCKSZ))

Definition at line 201 of file vacuumlazy.c.

◆ VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL

#define VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL   20 /* ms */

Definition at line 178 of file vacuumlazy.c.

◆ VACUUM_TRUNCATE_LOCK_TIMEOUT

#define VACUUM_TRUNCATE_LOCK_TIMEOUT   5000 /* ms */

Definition at line 180 of file vacuumlazy.c.

◆ VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL

#define VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL   50 /* ms */

Definition at line 179 of file vacuumlazy.c.

Typedef Documentation

◆ LVRelState

◆ LVSavedErrInfo

Enumeration Type Documentation

◆ VacErrPhase

Enumerator
VACUUM_ERRCB_PHASE_UNKNOWN 
VACUUM_ERRCB_PHASE_SCAN_HEAP 
VACUUM_ERRCB_PHASE_VACUUM_INDEX 
VACUUM_ERRCB_PHASE_VACUUM_HEAP 
VACUUM_ERRCB_PHASE_INDEX_CLEANUP 
VACUUM_ERRCB_PHASE_TRUNCATE 

Definition at line 223 of file vacuumlazy.c.

Function Documentation

◆ cmpOffsetNumbers()

static int cmpOffsetNumbers ( const void a,
const void b 
)
static

Definition at line 1986 of file vacuumlazy.c.

1987{
1988 return pg_cmp_u16(*(const OffsetNumber *) a, *(const OffsetNumber *) b);
1989}

References a, b, and pg_cmp_u16().

Referenced by lazy_scan_prune().

◆ count_nondeletable_pages()

static BlockNumber count_nondeletable_pages ( LVRelState vacrel,
bool lock_waiter_detected 
)
static

Definition at line 3261 of file vacuumlazy.c.

3262{
3264 "prefetch size must be power of 2");
3265
3266 BlockNumber blkno;
3268 instr_time starttime;
3269
3270 /* Initialize the starttime if we check for conflicting lock requests */
3271 INSTR_TIME_SET_CURRENT(starttime);
3272
3273 /*
3274 * Start checking blocks at what we believe relation end to be and move
3275 * backwards. (Strange coding of loop control is needed because blkno is
3276 * unsigned.) To make the scan faster, we prefetch a few blocks at a time
3277 * in forward direction, so that OS-level readahead can kick in.
3278 */
3279 blkno = vacrel->rel_pages;
3281 while (blkno > vacrel->nonempty_pages)
3282 {
3283 Buffer buf;
3284 Page page;
3285 OffsetNumber offnum,
3286 maxoff;
3287 bool hastup;
3288
3289 /*
3290 * Check if another process requests a lock on our relation. We are
3291 * holding an AccessExclusiveLock here, so they will be waiting. We
3292 * only do this once per VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL, and we
3293 * only check if that interval has elapsed once every 32 blocks to
3294 * keep the number of system calls and actual shared lock table
3295 * lookups to a minimum.
3296 */
3297 if ((blkno % 32) == 0)
3298 {
3301
3304 INSTR_TIME_SUBTRACT(elapsed, starttime);
3305 if ((INSTR_TIME_GET_MICROSEC(elapsed) / 1000)
3307 {
3309 {
3310 ereport(vacrel->verbose ? INFO : DEBUG2,
3311 (errmsg("table \"%s\": suspending truncate due to conflicting lock request",
3312 vacrel->relname)));
3313
3314 *lock_waiter_detected = true;
3315 return blkno;
3316 }
3317 starttime = currenttime;
3318 }
3319 }
3320
3321 /*
3322 * We don't insert a vacuum delay point here, because we have an
3323 * exclusive lock on the table which we want to hold for as short a
3324 * time as possible. We still need to check for interrupts however.
3325 */
3327
3328 blkno--;
3329
3330 /* If we haven't prefetched this lot yet, do so now. */
3331 if (prefetchedUntil > blkno)
3332 {
3335
3336 prefetchStart = blkno & ~(PREFETCH_SIZE - 1);
3337 for (pblkno = prefetchStart; pblkno <= blkno; pblkno++)
3338 {
3341 }
3343 }
3344
3346 vacrel->bstrategy);
3347
3348 /* In this phase we only need shared access to the buffer */
3350
3351 page = BufferGetPage(buf);
3352
3353 if (PageIsNew(page) || PageIsEmpty(page))
3354 {
3356 continue;
3357 }
3358
3359 hastup = false;
3360 maxoff = PageGetMaxOffsetNumber(page);
3361 for (offnum = FirstOffsetNumber;
3362 offnum <= maxoff;
3363 offnum = OffsetNumberNext(offnum))
3364 {
3365 ItemId itemid;
3366
3367 itemid = PageGetItemId(page, offnum);
3368
3369 /*
3370 * Note: any non-unused item should be taken as a reason to keep
3371 * this page. Even an LP_DEAD item makes truncation unsafe, since
3372 * we must not have cleaned out its index entries.
3373 */
3374 if (ItemIdIsUsed(itemid))
3375 {
3376 hastup = true;
3377 break; /* can stop scanning */
3378 }
3379 } /* scan along page */
3380
3382
3383 /* Done scanning if we found a tuple here */
3384 if (hastup)
3385 return blkno + 1;
3386 }
3387
3388 /*
3389 * If we fall out of the loop, all the previously-thought-to-be-empty
3390 * pages still are; we need not bother to look at the last known-nonempty
3391 * page.
3392 */
3393 return vacrel->nonempty_pages;
3394}

References AccessExclusiveLock, buf, BUFFER_LOCK_SHARE, BufferGetPage(), CHECK_FOR_INTERRUPTS, DEBUG2, ereport, errmsg, fb(), FirstOffsetNumber, INFO, INSTR_TIME_GET_MICROSEC, INSTR_TIME_SET_CURRENT, INSTR_TIME_SUBTRACT, InvalidBlockNumber, ItemIdIsUsed, LockBuffer(), LockHasWaitersRelation(), MAIN_FORKNUM, OffsetNumberNext, PageGetItemId(), PageGetMaxOffsetNumber(), PageIsEmpty(), PageIsNew(), PREFETCH_SIZE, PrefetchBuffer(), RBM_NORMAL, ReadBufferExtended(), StaticAssertDecl, UnlockReleaseBuffer(), and VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL.

Referenced by lazy_truncate_heap().

◆ dead_items_add()

static void dead_items_add ( LVRelState vacrel,
BlockNumber  blkno,
OffsetNumber offsets,
int  num_offsets 
)
static

Definition at line 3469 of file vacuumlazy.c.

3471{
3472 const int prog_index[2] = {
3475 };
3476 int64 prog_val[2];
3477
3478 TidStoreSetBlockOffsets(vacrel->dead_items, blkno, offsets, num_offsets);
3479 vacrel->dead_items_info->num_items += num_offsets;
3480
3481 /* update the progress information */
3482 prog_val[0] = vacrel->dead_items_info->num_items;
3483 prog_val[1] = TidStoreMemoryUsage(vacrel->dead_items);
3485}

References fb(), pgstat_progress_update_multi_param(), PROGRESS_VACUUM_DEAD_TUPLE_BYTES, PROGRESS_VACUUM_NUM_DEAD_ITEM_IDS, TidStoreMemoryUsage(), and TidStoreSetBlockOffsets().

Referenced by lazy_scan_noprune(), and lazy_scan_prune().

◆ dead_items_alloc()

static void dead_items_alloc ( LVRelState vacrel,
int  nworkers 
)
static

Definition at line 3404 of file vacuumlazy.c.

3405{
3406 VacDeadItemsInfo *dead_items_info;
3408 autovacuum_work_mem != -1 ?
3410
3411 /*
3412 * Initialize state for a parallel vacuum. As of now, only one worker can
3413 * be used for an index, so we invoke parallelism only if there are at
3414 * least two indexes on a table.
3415 */
3416 if (nworkers >= 0 && vacrel->nindexes > 1 && vacrel->do_index_vacuuming)
3417 {
3418 /*
3419 * Since parallel workers cannot access data in temporary tables, we
3420 * can't perform parallel vacuum on them.
3421 */
3423 {
3424 /*
3425 * Give warning only if the user explicitly tries to perform a
3426 * parallel vacuum on the temporary table.
3427 */
3428 if (nworkers > 0)
3430 (errmsg("disabling parallel option of vacuum on \"%s\" --- cannot vacuum temporary tables in parallel",
3431 vacrel->relname)));
3432 }
3433 else
3434 vacrel->pvs = parallel_vacuum_init(vacrel->rel, vacrel->indrels,
3435 vacrel->nindexes, nworkers,
3437 vacrel->verbose ? INFO : DEBUG2,
3438 vacrel->bstrategy);
3439
3440 /*
3441 * If parallel mode started, dead_items and dead_items_info spaces are
3442 * allocated in DSM.
3443 */
3445 {
3447 &vacrel->dead_items_info);
3448 return;
3449 }
3450 }
3451
3452 /*
3453 * Serial VACUUM case. Allocate both dead_items and dead_items_info
3454 * locally.
3455 */
3456
3457 dead_items_info = palloc_object(VacDeadItemsInfo);
3458 dead_items_info->max_bytes = vac_work_mem * (Size) 1024;
3459 dead_items_info->num_items = 0;
3460 vacrel->dead_items_info = dead_items_info;
3461
3462 vacrel->dead_items = TidStoreCreateLocal(dead_items_info->max_bytes, true);
3463}

References AmAutoVacuumWorkerProcess, autovacuum_work_mem, ParallelVacuumState::dead_items, DEBUG2, ereport, errmsg, fb(), INFO, maintenance_work_mem, VacDeadItemsInfo::max_bytes, VacDeadItemsInfo::num_items, palloc_object, parallel_vacuum_get_dead_items(), parallel_vacuum_init(), ParallelVacuumIsActive, RelationUsesLocalBuffers, TidStoreCreateLocal(), and WARNING.

Referenced by heap_vacuum_rel().

◆ dead_items_cleanup()

static void dead_items_cleanup ( LVRelState vacrel)
static

Definition at line 3517 of file vacuumlazy.c.

3518{
3520 {
3521 /* Don't bother with pfree here */
3522 return;
3523 }
3524
3525 /* End parallel mode */
3526 parallel_vacuum_end(vacrel->pvs, vacrel->indstats);
3527 vacrel->pvs = NULL;
3528}

References fb(), parallel_vacuum_end(), and ParallelVacuumIsActive.

Referenced by heap_vacuum_rel().

◆ dead_items_reset()

static void dead_items_reset ( LVRelState vacrel)
static

Definition at line 3491 of file vacuumlazy.c.

3492{
3493 /* Update statistics for dead items */
3494 vacrel->num_dead_items_resets++;
3495 vacrel->total_dead_items_bytes += TidStoreMemoryUsage(vacrel->dead_items);
3496
3498 {
3500 vacrel->dead_items = parallel_vacuum_get_dead_items(vacrel->pvs,
3501 &vacrel->dead_items_info);
3502 return;
3503 }
3504
3505 /* Recreate the tidstore with the same max_bytes limitation */
3506 TidStoreDestroy(vacrel->dead_items);
3507 vacrel->dead_items = TidStoreCreateLocal(vacrel->dead_items_info->max_bytes, true);
3508
3509 /* Reset the counter */
3510 vacrel->dead_items_info->num_items = 0;
3511}

References fb(), parallel_vacuum_get_dead_items(), parallel_vacuum_reset_dead_items(), ParallelVacuumIsActive, TidStoreCreateLocal(), TidStoreDestroy(), and TidStoreMemoryUsage().

Referenced by lazy_vacuum().

◆ find_next_unskippable_block()

static void find_next_unskippable_block ( LVRelState vacrel,
bool skipsallvis 
)
static

Definition at line 1736 of file vacuumlazy.c.

1737{
1738 BlockNumber rel_pages = vacrel->rel_pages;
1739 BlockNumber next_unskippable_block = vacrel->next_unskippable_block + 1;
1740 Buffer next_unskippable_vmbuffer = vacrel->next_unskippable_vmbuffer;
1741 bool next_unskippable_eager_scanned = false;
1742
1743 *skipsallvis = false;
1744
1745 for (;; next_unskippable_block++)
1746 {
1748 next_unskippable_block,
1749 &next_unskippable_vmbuffer);
1750
1751
1752 /*
1753 * At the start of each eager scan region, normal vacuums with eager
1754 * scanning enabled reset the failure counter, allowing vacuum to
1755 * resume eager scanning if it had been suspended in the previous
1756 * region.
1757 */
1758 if (next_unskippable_block >= vacrel->next_eager_scan_region_start)
1759 {
1760 vacrel->eager_scan_remaining_fails =
1761 vacrel->eager_scan_max_fails_per_region;
1762 vacrel->next_eager_scan_region_start += EAGER_SCAN_REGION_SIZE;
1763 }
1764
1765 /*
1766 * A block is unskippable if it is not all visible according to the
1767 * visibility map.
1768 */
1770 {
1772 break;
1773 }
1774
1775 /*
1776 * Caller must scan the last page to determine whether it has tuples
1777 * (caller must have the opportunity to set vacrel->nonempty_pages).
1778 * This rule avoids having lazy_truncate_heap() take access-exclusive
1779 * lock on rel to attempt a truncation that fails anyway, just because
1780 * there are tuples on the last page (it is likely that there will be
1781 * tuples on other nearby pages as well, but those can be skipped).
1782 *
1783 * Implement this by always treating the last block as unsafe to skip.
1784 */
1785 if (next_unskippable_block == rel_pages - 1)
1786 break;
1787
1788 /* DISABLE_PAGE_SKIPPING makes all skipping unsafe */
1789 if (!vacrel->skipwithvm)
1790 break;
1791
1792 /*
1793 * All-frozen pages cannot contain XIDs < OldestXmin (XIDs that aren't
1794 * already frozen by now), so this page can be skipped.
1795 */
1796 if ((mapbits & VISIBILITYMAP_ALL_FROZEN) != 0)
1797 continue;
1798
1799 /*
1800 * Aggressive vacuums cannot skip any all-visible pages that are not
1801 * also all-frozen.
1802 */
1803 if (vacrel->aggressive)
1804 break;
1805
1806 /*
1807 * Normal vacuums with eager scanning enabled only skip all-visible
1808 * but not all-frozen pages if they have hit the failure limit for the
1809 * current eager scan region.
1810 */
1811 if (vacrel->eager_scan_remaining_fails > 0)
1812 {
1813 next_unskippable_eager_scanned = true;
1814 break;
1815 }
1816
1817 /*
1818 * All-visible blocks are safe to skip in a normal vacuum. But
1819 * remember that the final range contains such a block for later.
1820 */
1821 *skipsallvis = true;
1822 }
1823
1824 /* write the local variables back to vacrel */
1825 vacrel->next_unskippable_block = next_unskippable_block;
1826 vacrel->next_unskippable_eager_scanned = next_unskippable_eager_scanned;
1827 vacrel->next_unskippable_vmbuffer = next_unskippable_vmbuffer;
1828}

References Assert, EAGER_SCAN_REGION_SIZE, fb(), VISIBILITYMAP_ALL_FROZEN, VISIBILITYMAP_ALL_VISIBLE, and visibilitymap_get_status().

Referenced by heap_vac_scan_next_block().

◆ heap_page_would_be_all_visible()

static bool heap_page_would_be_all_visible ( Relation  rel,
Buffer  buf,
GlobalVisState vistest,
bool  allow_update_vistest,
OffsetNumber deadoffsets,
int  ndeadoffsets,
bool all_frozen,
TransactionId newest_live_xid,
OffsetNumber logging_offnum 
)
static

Definition at line 3592 of file vacuumlazy.c.

3600{
3601 Page page = BufferGetPage(buf);
3603 OffsetNumber offnum,
3604 maxoff;
3605 bool all_visible = true;
3606 int matched_dead_count = 0;
3607
3608 *newest_live_xid = InvalidTransactionId;
3609 *all_frozen = true;
3610
3611 Assert(ndeadoffsets == 0 || deadoffsets);
3612
3613#ifdef USE_ASSERT_CHECKING
3614 /* Confirm input deadoffsets[] is strictly sorted */
3615 if (ndeadoffsets > 1)
3616 {
3617 for (int i = 1; i < ndeadoffsets; i++)
3618 Assert(deadoffsets[i - 1] < deadoffsets[i]);
3619 }
3620#endif
3621
3622 maxoff = PageGetMaxOffsetNumber(page);
3623 for (offnum = FirstOffsetNumber;
3624 offnum <= maxoff && all_visible;
3625 offnum = OffsetNumberNext(offnum))
3626 {
3627 ItemId itemid;
3628 HeapTupleData tuple;
3630
3631 /*
3632 * Set the offset number so that we can display it along with any
3633 * error that occurred while processing this tuple.
3634 */
3635 *logging_offnum = offnum;
3636 itemid = PageGetItemId(page, offnum);
3637
3638 /* Unused or redirect line pointers are of no interest */
3639 if (!ItemIdIsUsed(itemid) || ItemIdIsRedirected(itemid))
3640 continue;
3641
3642 ItemPointerSet(&(tuple.t_self), blockno, offnum);
3643
3644 /*
3645 * Dead line pointers can have index pointers pointing to them. So
3646 * they can't be treated as visible
3647 */
3648 if (ItemIdIsDead(itemid))
3649 {
3650 if (!deadoffsets ||
3652 deadoffsets[matched_dead_count] != offnum)
3653 {
3654 *all_frozen = all_visible = false;
3655 break;
3656 }
3658 continue;
3659 }
3660
3661 Assert(ItemIdIsNormal(itemid));
3662
3663 tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
3664 tuple.t_len = ItemIdGetLength(itemid);
3665 tuple.t_tableOid = RelationGetRelid(rel);
3666
3667 /* Visibility checks may do IO or allocate memory */
3670 {
3671 case HEAPTUPLE_LIVE:
3672 {
3673 TransactionId xmin;
3674
3675 /* Check heap_prune_record_unchanged_lp_normal comments */
3677 {
3678 all_visible = false;
3679 *all_frozen = false;
3680 break;
3681 }
3682
3683 /*
3684 * The inserter definitely committed. But we don't know if
3685 * it is old enough that everyone sees it as committed.
3686 * Don't check that now.
3687 *
3688 * If we scan all tuples without finding one that prevents
3689 * the page from being all-visible, we then check whether
3690 * any snapshot still considers the newest XID on the page
3691 * to be running. In that case, the page is not considered
3692 * all-visible.
3693 */
3694 xmin = HeapTupleHeaderGetXmin(tuple.t_data);
3695
3696 /* Track newest xmin on page. */
3697 if (TransactionIdFollows(xmin, *newest_live_xid) &&
3699 *newest_live_xid = xmin;
3700
3701 /* Check whether this tuple is already frozen or not */
3702 if (all_visible && *all_frozen &&
3704 *all_frozen = false;
3705 }
3706 break;
3707
3708 case HEAPTUPLE_DEAD:
3712 {
3713 all_visible = false;
3714 *all_frozen = false;
3715 break;
3716 }
3717 default:
3718 elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
3719 break;
3720 }
3721 } /* scan along page */
3722
3723 /*
3724 * After processing all the live tuples on the page, if the newest xmin
3725 * among them may still be considered running by any snapshot, the page
3726 * cannot be all-visible.
3727 */
3728 if (all_visible &&
3729 TransactionIdIsNormal(*newest_live_xid) &&
3730 GlobalVisTestXidConsideredRunning(vistest, *newest_live_xid,
3732 {
3733 all_visible = false;
3734 *all_frozen = false;
3735 }
3736
3737 /* Clear the offset information once we have processed the given page. */
3739
3740 return all_visible;
3741}

References Assert, buf, BufferGetBlockNumber(), BufferGetPage(), CritSectionCount, elog, ERROR, fb(), FirstOffsetNumber, GlobalVisTestXidConsideredRunning(), heap_tuple_needs_eventual_freeze(), HEAPTUPLE_DEAD, HEAPTUPLE_DELETE_IN_PROGRESS, HEAPTUPLE_INSERT_IN_PROGRESS, HEAPTUPLE_LIVE, HEAPTUPLE_RECENTLY_DEAD, HeapTupleHeaderGetXmin(), HeapTupleHeaderXminCommitted(), HeapTupleSatisfiesVacuumHorizon(), i, InvalidOffsetNumber, InvalidTransactionId, ItemIdGetLength, ItemIdIsDead, ItemIdIsNormal, ItemIdIsRedirected, ItemIdIsUsed, ItemPointerSet(), OffsetNumberNext, PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), RelationGetRelid, HeapTupleData::t_data, HeapTupleData::t_len, HeapTupleData::t_self, HeapTupleData::t_tableOid, TransactionIdFollows(), and TransactionIdIsNormal.

Referenced by lazy_vacuum_heap_page().

◆ heap_vac_scan_next_block()

static BlockNumber heap_vac_scan_next_block ( ReadStream stream,
void callback_private_data,
void per_buffer_data 
)
static

Definition at line 1636 of file vacuumlazy.c.

1639{
1641 LVRelState *vacrel = callback_private_data;
1642
1643 /* relies on InvalidBlockNumber + 1 overflowing to 0 on first call */
1645
1646 /* Have we reached the end of the relation? */
1647 if (next_block >= vacrel->rel_pages)
1648 {
1649 if (BufferIsValid(vacrel->next_unskippable_vmbuffer))
1650 {
1651 ReleaseBuffer(vacrel->next_unskippable_vmbuffer);
1652 vacrel->next_unskippable_vmbuffer = InvalidBuffer;
1653 }
1654 return InvalidBlockNumber;
1655 }
1656
1657 /*
1658 * We must be in one of the three following states:
1659 */
1660 if (next_block > vacrel->next_unskippable_block ||
1661 vacrel->next_unskippable_block == InvalidBlockNumber)
1662 {
1663 /*
1664 * 1. We have just processed an unskippable block (or we're at the
1665 * beginning of the scan). Find the next unskippable block using the
1666 * visibility map.
1667 */
1668 bool skipsallvis;
1669
1671
1672 /*
1673 * We now know the next block that we must process. It can be the
1674 * next block after the one we just processed, or something further
1675 * ahead. If it's further ahead, we can jump to it, but we choose to
1676 * do so only if we can skip at least SKIP_PAGES_THRESHOLD consecutive
1677 * pages. Since we're reading sequentially, the OS should be doing
1678 * readahead for us, so there's no gain in skipping a page now and
1679 * then. Skipping such a range might even discourage sequential
1680 * detection.
1681 *
1682 * This test also enables more frequent relfrozenxid advancement
1683 * during non-aggressive VACUUMs. If the range has any all-visible
1684 * pages then skipping makes updating relfrozenxid unsafe, which is a
1685 * real downside.
1686 */
1687 if (vacrel->next_unskippable_block - next_block >= SKIP_PAGES_THRESHOLD)
1688 {
1689 next_block = vacrel->next_unskippable_block;
1690 if (skipsallvis)
1691 vacrel->skippedallvis = true;
1692 }
1693 }
1694
1695 /* Now we must be in one of the two remaining states: */
1696 if (next_block < vacrel->next_unskippable_block)
1697 {
1698 /*
1699 * 2. We are processing a range of blocks that we could have skipped
1700 * but chose not to. We know that they are all-visible in the VM,
1701 * otherwise they would've been unskippable.
1702 */
1703 vacrel->current_block = next_block;
1704 /* Block was not eager scanned */
1705 *((bool *) per_buffer_data) = false;
1706 return vacrel->current_block;
1707 }
1708 else
1709 {
1710 /*
1711 * 3. We reached the next unskippable block. Process it. On next
1712 * iteration, we will be back in state 1.
1713 */
1714 Assert(next_block == vacrel->next_unskippable_block);
1715
1716 vacrel->current_block = next_block;
1717 *((bool *) per_buffer_data) = vacrel->next_unskippable_eager_scanned;
1718 return vacrel->current_block;
1719 }
1720}

References Assert, BufferIsValid(), LVRelState::current_block, fb(), find_next_unskippable_block(), InvalidBlockNumber, InvalidBuffer, ReleaseBuffer(), and SKIP_PAGES_THRESHOLD.

Referenced by lazy_scan_heap().

◆ heap_vacuum_eager_scan_setup()

static void heap_vacuum_eager_scan_setup ( LVRelState vacrel,
const VacuumParams  params 
)
static

Definition at line 496 of file vacuumlazy.c.

497{
501 float first_region_ratio;
503
504 /*
505 * Initialize eager scan management fields to their disabled values.
506 * Aggressive vacuums, normal vacuums of small tables, and normal vacuums
507 * of tables without sufficiently old tuples disable eager scanning.
508 */
509 vacrel->next_eager_scan_region_start = InvalidBlockNumber;
510 vacrel->eager_scan_max_fails_per_region = 0;
511 vacrel->eager_scan_remaining_fails = 0;
512 vacrel->eager_scan_remaining_successes = 0;
513
514 /* If eager scanning is explicitly disabled, just return. */
515 if (params.max_eager_freeze_failure_rate == 0)
516 return;
517
518 /*
519 * The caller will have determined whether or not an aggressive vacuum is
520 * required by either the vacuum parameters or the relative age of the
521 * oldest unfrozen transaction IDs. An aggressive vacuum must scan every
522 * all-visible page to safely advance the relfrozenxid and/or relminmxid,
523 * so scans of all-visible pages are not considered eager.
524 */
525 if (vacrel->aggressive)
526 return;
527
528 /*
529 * Aggressively vacuuming a small relation shouldn't take long, so it
530 * isn't worth amortizing. We use two times the region size as the size
531 * cutoff because the eager scan start block is a random spot somewhere in
532 * the first region, making the second region the first to be eager
533 * scanned normally.
534 */
535 if (vacrel->rel_pages < 2 * EAGER_SCAN_REGION_SIZE)
536 return;
537
538 /*
539 * We only want to enable eager scanning if we are likely to be able to
540 * freeze some of the pages in the relation.
541 *
542 * Tuples with XIDs older than OldestXmin or MXIDs older than OldestMxact
543 * are technically freezable, but we won't freeze them unless the criteria
544 * for opportunistic freezing is met. Only tuples with XIDs/MXIDs older
545 * than the FreezeLimit/MultiXactCutoff are frozen in the common case.
546 *
547 * So, as a heuristic, we wait until the FreezeLimit has advanced past the
548 * relfrozenxid or the MultiXactCutoff has advanced past the relminmxid to
549 * enable eager scanning.
550 */
551 if (TransactionIdIsNormal(vacrel->cutoffs.relfrozenxid) &&
552 TransactionIdPrecedes(vacrel->cutoffs.relfrozenxid,
553 vacrel->cutoffs.FreezeLimit))
555
557 MultiXactIdIsValid(vacrel->cutoffs.relminmxid) &&
558 MultiXactIdPrecedes(vacrel->cutoffs.relminmxid,
559 vacrel->cutoffs.MultiXactCutoff))
561
563 return;
564
565 /* We have met the criteria to eagerly scan some pages. */
566
567 /*
568 * Our success cap is MAX_EAGER_FREEZE_SUCCESS_RATE of the number of
569 * all-visible but not all-frozen blocks in the relation.
570 */
572
573 vacrel->eager_scan_remaining_successes =
576
577 /* If every all-visible page is frozen, eager scanning is disabled. */
578 if (vacrel->eager_scan_remaining_successes == 0)
579 return;
580
581 /*
582 * Now calculate the bounds of the first eager scan region. Its end block
583 * will be a random spot somewhere in the first EAGER_SCAN_REGION_SIZE
584 * blocks. This affects the bounds of all subsequent regions and avoids
585 * eager scanning and failing to freeze the same blocks each vacuum of the
586 * relation.
587 */
589
590 vacrel->next_eager_scan_region_start = randseed % EAGER_SCAN_REGION_SIZE;
591
594
595 vacrel->eager_scan_max_fails_per_region =
598
599 /*
600 * The first region will be smaller than subsequent regions. As such,
601 * adjust the eager freeze failures tolerated for this region.
602 */
603 first_region_ratio = 1 - (float) vacrel->next_eager_scan_region_start /
605
606 vacrel->eager_scan_remaining_fails =
607 vacrel->eager_scan_max_fails_per_region *
609}

References Assert, EAGER_SCAN_REGION_SIZE, fb(), InvalidBlockNumber, VacuumParams::max_eager_freeze_failure_rate, MAX_EAGER_FREEZE_SUCCESS_RATE, MultiXactIdIsValid, MultiXactIdPrecedes(), pg_global_prng_state, pg_prng_uint32(), TransactionIdIsNormal, TransactionIdPrecedes(), and visibilitymap_count().

Referenced by heap_vacuum_rel().

◆ heap_vacuum_rel()

void heap_vacuum_rel ( Relation  rel,
const VacuumParams  params,
BufferAccessStrategy  bstrategy 
)

Definition at line 623 of file vacuumlazy.c.

625{
627 bool verbose,
628 instrument,
629 skipwithvm,
637 TimestampTz starttime = 0;
639 startwritetime = 0;
642 ErrorContextCallback errcallback;
643 char **indnames = NULL;
645
646 verbose = (params.options & VACOPT_VERBOSE) != 0;
647 instrument = (verbose || (AmAutoVacuumWorkerProcess() &&
648 params.log_vacuum_min_duration >= 0));
649 if (instrument)
650 {
652 if (track_io_timing)
653 {
656 }
657 }
658
659 /* Used for instrumentation and stats report */
660 starttime = GetCurrentTimestamp();
661
663 RelationGetRelid(rel));
666 params.is_wraparound
669 else
672
673 /*
674 * Setup error traceback support for ereport() first. The idea is to set
675 * up an error context callback to display additional information on any
676 * error during a vacuum. During different phases of vacuum, we update
677 * the state so that the error context callback always display current
678 * information.
679 *
680 * Copy the names of heap rel into local memory for error reporting
681 * purposes, too. It isn't always safe to assume that we can get the name
682 * of each rel. It's convenient for code in lazy_scan_heap to always use
683 * these temp copies.
684 */
687 vacrel->relnamespace = get_namespace_name(RelationGetNamespace(rel));
688 vacrel->relname = pstrdup(RelationGetRelationName(rel));
689 vacrel->indname = NULL;
691 vacrel->verbose = verbose;
692 errcallback.callback = vacuum_error_callback;
693 errcallback.arg = vacrel;
694 errcallback.previous = error_context_stack;
695 error_context_stack = &errcallback;
696
697 /* Set up high level stuff about rel and its indexes */
698 vacrel->rel = rel;
700 &vacrel->indrels);
701 vacrel->bstrategy = bstrategy;
702 if (instrument && vacrel->nindexes > 0)
703 {
704 /* Copy index names used by instrumentation (not error reporting) */
705 indnames = palloc_array(char *, vacrel->nindexes);
706 for (int i = 0; i < vacrel->nindexes; i++)
708 }
709
710 /*
711 * The index_cleanup param either disables index vacuuming and cleanup or
712 * forces it to go ahead when we would otherwise apply the index bypass
713 * optimization. The default is 'auto', which leaves the final decision
714 * up to lazy_vacuum().
715 *
716 * The truncate param allows user to avoid attempting relation truncation,
717 * though it can't force truncation to happen.
718 */
721 params.truncate != VACOPTVALUE_AUTO);
722
723 /*
724 * While VacuumFailSafeActive is reset to false before calling this, we
725 * still need to reset it here due to recursive calls.
726 */
727 VacuumFailsafeActive = false;
728 vacrel->consider_bypass_optimization = true;
729 vacrel->do_index_vacuuming = true;
730 vacrel->do_index_cleanup = true;
731 vacrel->do_rel_truncate = (params.truncate != VACOPTVALUE_DISABLED);
733 {
734 /* Force disable index vacuuming up-front */
735 vacrel->do_index_vacuuming = false;
736 vacrel->do_index_cleanup = false;
737 }
738 else if (params.index_cleanup == VACOPTVALUE_ENABLED)
739 {
740 /* Force index vacuuming. Note that failsafe can still bypass. */
741 vacrel->consider_bypass_optimization = false;
742 }
743 else
744 {
745 /* Default/auto, make all decisions dynamically */
747 }
748
749 /* Initialize page counters explicitly (be tidy) */
750 vacrel->scanned_pages = 0;
751 vacrel->eager_scanned_pages = 0;
752 vacrel->removed_pages = 0;
753 vacrel->new_frozen_tuple_pages = 0;
754 vacrel->lpdead_item_pages = 0;
755 vacrel->missed_dead_pages = 0;
756 vacrel->nonempty_pages = 0;
757 /* dead_items_alloc allocates vacrel->dead_items later on */
758
759 /* Allocate/initialize output statistics state */
760 vacrel->new_rel_tuples = 0;
761 vacrel->new_live_tuples = 0;
762 vacrel->indstats = (IndexBulkDeleteResult **)
763 palloc0(vacrel->nindexes * sizeof(IndexBulkDeleteResult *));
764
765 /* Initialize remaining counters (be tidy) */
766 vacrel->num_index_scans = 0;
767 vacrel->num_dead_items_resets = 0;
768 vacrel->total_dead_items_bytes = 0;
769 vacrel->tuples_deleted = 0;
770 vacrel->tuples_frozen = 0;
771 vacrel->lpdead_items = 0;
772 vacrel->live_tuples = 0;
773 vacrel->recently_dead_tuples = 0;
774 vacrel->missed_dead_tuples = 0;
775
776 vacrel->new_all_visible_pages = 0;
777 vacrel->new_all_visible_all_frozen_pages = 0;
778 vacrel->new_all_frozen_pages = 0;
779
780 vacrel->worker_usage.vacuum.nlaunched = 0;
781 vacrel->worker_usage.vacuum.nplanned = 0;
782 vacrel->worker_usage.cleanup.nlaunched = 0;
783 vacrel->worker_usage.cleanup.nplanned = 0;
784
785 /*
786 * Get cutoffs that determine which deleted tuples are considered DEAD,
787 * not just RECENTLY_DEAD, and which XIDs/MXIDs to freeze. Then determine
788 * the extent of the blocks that we'll scan in lazy_scan_heap. It has to
789 * happen in this order to ensure that the OldestXmin cutoff field works
790 * as an upper bound on the XIDs stored in the pages we'll actually scan
791 * (NewRelfrozenXid tracking must never be allowed to miss unfrozen XIDs).
792 *
793 * Next acquire vistest, a related cutoff that's used in pruning. We use
794 * vistest in combination with OldestXmin to ensure that
795 * heap_page_prune_and_freeze() always removes any deleted tuple whose
796 * xmax is < OldestXmin. lazy_scan_prune must never become confused about
797 * whether a tuple should be frozen or removed. (In the future we might
798 * want to teach lazy_scan_prune to recompute vistest from time to time,
799 * to increase the number of dead tuples it can prune away.)
800 */
801 vacrel->aggressive = vacuum_get_cutoffs(rel, params, &vacrel->cutoffs);
803 vacrel->vistest = GlobalVisTestFor(rel);
804
805 /* Initialize state used to track oldest extant XID/MXID */
806 vacrel->NewRelfrozenXid = vacrel->cutoffs.OldestXmin;
807 vacrel->NewRelminMxid = vacrel->cutoffs.OldestMxact;
808
809 /*
810 * Initialize state related to tracking all-visible page skipping. This is
811 * very important to determine whether or not it is safe to advance the
812 * relfrozenxid/relminmxid.
813 */
814 vacrel->skippedallvis = false;
815 skipwithvm = true;
817 {
818 /*
819 * Force aggressive mode, and disable skipping blocks using the
820 * visibility map (even those set all-frozen)
821 */
822 vacrel->aggressive = true;
823 skipwithvm = false;
824 }
825
826 vacrel->skipwithvm = skipwithvm;
827
828 /*
829 * Set up eager scan tracking state. This must happen after determining
830 * whether or not the vacuum must be aggressive, because only normal
831 * vacuums use the eager scan algorithm.
832 */
834
835 /* Report the vacuum mode: 'normal' or 'aggressive' */
837 vacrel->aggressive
840
841 if (verbose)
842 {
843 if (vacrel->aggressive)
845 (errmsg("aggressively vacuuming \"%s.%s.%s\"",
846 vacrel->dbname, vacrel->relnamespace,
847 vacrel->relname)));
848 else
850 (errmsg("vacuuming \"%s.%s.%s\"",
851 vacrel->dbname, vacrel->relnamespace,
852 vacrel->relname)));
853 }
854
855 /*
856 * Allocate dead_items memory using dead_items_alloc. This handles
857 * parallel VACUUM initialization as part of allocating shared memory
858 * space used for dead_items. (But do a failsafe precheck first, to
859 * ensure that parallel VACUUM won't be attempted at all when relfrozenxid
860 * is already dangerously old.)
861 */
864
865 /*
866 * Call lazy_scan_heap to perform all required heap pruning, index
867 * vacuuming, and heap vacuuming (plus related processing)
868 */
870
871 /*
872 * Save dead items max_bytes and update the memory usage statistics before
873 * cleanup, they are freed in parallel vacuum cases during
874 * dead_items_cleanup().
875 */
876 dead_items_max_bytes = vacrel->dead_items_info->max_bytes;
877 vacrel->total_dead_items_bytes += TidStoreMemoryUsage(vacrel->dead_items);
878
879 /*
880 * Free resources managed by dead_items_alloc. This ends parallel mode in
881 * passing when necessary.
882 */
885
886 /*
887 * Update pg_class entries for each of rel's indexes where appropriate.
888 *
889 * Unlike the later update to rel's pg_class entry, this is not critical.
890 * Maintains relpages/reltuples statistics used by the planner only.
891 */
892 if (vacrel->do_index_cleanup)
894
895 /* Done with rel's indexes */
896 vac_close_indexes(vacrel->nindexes, vacrel->indrels, NoLock);
897
898 /* Optionally truncate rel */
901
902 /* Pop the error context stack */
903 error_context_stack = errcallback.previous;
904
905 /* Report that we are now doing final cleanup */
908
909 /*
910 * Prepare to update rel's pg_class entry.
911 *
912 * Aggressive VACUUMs must always be able to advance relfrozenxid to a
913 * value >= FreezeLimit, and relminmxid to a value >= MultiXactCutoff.
914 * Non-aggressive VACUUMs may advance them by any amount, or not at all.
915 */
916 Assert(vacrel->NewRelfrozenXid == vacrel->cutoffs.OldestXmin ||
917 TransactionIdPrecedesOrEquals(vacrel->aggressive ? vacrel->cutoffs.FreezeLimit :
918 vacrel->cutoffs.relfrozenxid,
919 vacrel->NewRelfrozenXid));
920 Assert(vacrel->NewRelminMxid == vacrel->cutoffs.OldestMxact ||
921 MultiXactIdPrecedesOrEquals(vacrel->aggressive ? vacrel->cutoffs.MultiXactCutoff :
922 vacrel->cutoffs.relminmxid,
923 vacrel->NewRelminMxid));
924 if (vacrel->skippedallvis)
925 {
926 /*
927 * Must keep original relfrozenxid in a non-aggressive VACUUM that
928 * chose to skip an all-visible page range. The state that tracks new
929 * values will have missed unfrozen XIDs from the pages we skipped.
930 */
931 Assert(!vacrel->aggressive);
932 vacrel->NewRelfrozenXid = InvalidTransactionId;
933 vacrel->NewRelminMxid = InvalidMultiXactId;
934 }
935
936 /*
937 * For safety, clamp relallvisible to be not more than what we're setting
938 * pg_class.relpages to
939 */
940 new_rel_pages = vacrel->rel_pages; /* After possible rel truncation */
944
945 /*
946 * An all-frozen block _must_ be all-visible. As such, clamp the count of
947 * all-frozen blocks to the count of all-visible blocks. This matches the
948 * clamping of relallvisible above.
949 */
952
953 /*
954 * Now actually update rel's pg_class entry.
955 *
956 * In principle new_live_tuples could be -1 indicating that we (still)
957 * don't know the tuple count. In practice that can't happen, since we
958 * scan every page that isn't skipped using the visibility map.
959 */
960 vac_update_relstats(rel, new_rel_pages, vacrel->new_live_tuples,
962 vacrel->nindexes > 0,
963 vacrel->NewRelfrozenXid, vacrel->NewRelminMxid,
965
966 /*
967 * Report results to the cumulative stats system, too.
968 *
969 * Deliberately avoid telling the stats system about LP_DEAD items that
970 * remain in the table due to VACUUM bypassing index and heap vacuuming.
971 * ANALYZE will consider the remaining LP_DEAD items to be dead "tuples".
972 * It seems like a good idea to err on the side of not vacuuming again too
973 * soon in cases where the failsafe prevented significant amounts of heap
974 * vacuuming.
975 */
977 Max(vacrel->new_live_tuples, 0),
978 vacrel->recently_dead_tuples +
979 vacrel->missed_dead_tuples,
980 starttime);
982
983 if (instrument)
984 {
986
987 if (verbose || params.log_vacuum_min_duration == 0 ||
990 {
991 long secs_dur;
992 int usecs_dur;
993 WalUsage walusage;
994 BufferUsage bufferusage;
996 char *msgfmt;
997 int32 diff;
998 double read_rate = 0,
999 write_rate = 0;
1003
1005 memset(&walusage, 0, sizeof(WalUsage));
1007 memset(&bufferusage, 0, sizeof(BufferUsage));
1009
1010 total_blks_hit = bufferusage.shared_blks_hit +
1011 bufferusage.local_blks_hit;
1012 total_blks_read = bufferusage.shared_blks_read +
1013 bufferusage.local_blks_read;
1015 bufferusage.local_blks_dirtied;
1016
1018 if (verbose)
1019 {
1020 /*
1021 * Aggressiveness already reported earlier, in dedicated
1022 * VACUUM VERBOSE ereport
1023 */
1024 Assert(!params.is_wraparound);
1025 msgfmt = _("finished vacuuming \"%s.%s.%s\": index scans: %d\n");
1026 }
1027 else if (params.is_wraparound)
1028 {
1029 /*
1030 * While it's possible for a VACUUM to be both is_wraparound
1031 * and !aggressive, that's just a corner-case -- is_wraparound
1032 * implies aggressive. Produce distinct output for the corner
1033 * case all the same, just in case.
1034 */
1035 if (vacrel->aggressive)
1036 msgfmt = _("automatic aggressive vacuum to prevent wraparound of table \"%s.%s.%s\": index scans: %d\n");
1037 else
1038 msgfmt = _("automatic vacuum to prevent wraparound of table \"%s.%s.%s\": index scans: %d\n");
1039 }
1040 else
1041 {
1042 if (vacrel->aggressive)
1043 msgfmt = _("automatic aggressive vacuum of table \"%s.%s.%s\": index scans: %d\n");
1044 else
1045 msgfmt = _("automatic vacuum of table \"%s.%s.%s\": index scans: %d\n");
1046 }
1048 vacrel->dbname,
1049 vacrel->relnamespace,
1050 vacrel->relname,
1051 vacrel->num_index_scans);
1052 appendStringInfo(&buf, _("pages: %u removed, %u remain, %u scanned (%.2f%% of total), %u eagerly scanned\n"),
1053 vacrel->removed_pages,
1055 vacrel->scanned_pages,
1056 orig_rel_pages == 0 ? 100.0 :
1057 100.0 * vacrel->scanned_pages /
1059 vacrel->eager_scanned_pages);
1061 _("tuples: %" PRId64 " removed, %" PRId64 " remain, %" PRId64 " are dead but not yet removable\n"),
1062 vacrel->tuples_deleted,
1063 (int64) vacrel->new_rel_tuples,
1064 vacrel->recently_dead_tuples);
1065 if (vacrel->missed_dead_tuples > 0)
1067 _("tuples missed: %" PRId64 " dead from %u pages not removed due to cleanup lock contention\n"),
1068 vacrel->missed_dead_tuples,
1069 vacrel->missed_dead_pages);
1071 vacrel->cutoffs.OldestXmin);
1073 _("removable cutoff: %u, which was %d XIDs old when operation ended\n"),
1074 vacrel->cutoffs.OldestXmin, diff);
1076 {
1077 diff = (int32) (vacrel->NewRelfrozenXid -
1078 vacrel->cutoffs.relfrozenxid);
1080 _("new relfrozenxid: %u, which is %d XIDs ahead of previous value\n"),
1081 vacrel->NewRelfrozenXid, diff);
1082 }
1083 if (minmulti_updated)
1084 {
1085 diff = (int32) (vacrel->NewRelminMxid -
1086 vacrel->cutoffs.relminmxid);
1088 _("new relminmxid: %u, which is %d MXIDs ahead of previous value\n"),
1089 vacrel->NewRelminMxid, diff);
1090 }
1091 appendStringInfo(&buf, _("frozen: %u pages from table (%.2f%% of total) had %" PRId64 " tuples frozen\n"),
1092 vacrel->new_frozen_tuple_pages,
1093 orig_rel_pages == 0 ? 100.0 :
1094 100.0 * vacrel->new_frozen_tuple_pages /
1096 vacrel->tuples_frozen);
1097
1099 _("visibility map: %u pages set all-visible, %u pages set all-frozen (%u were all-visible)\n"),
1100 vacrel->new_all_visible_pages,
1101 vacrel->new_all_visible_all_frozen_pages +
1102 vacrel->new_all_frozen_pages,
1103 vacrel->new_all_frozen_pages);
1104 if (vacrel->do_index_vacuuming)
1105 {
1106 if (vacrel->nindexes == 0 || vacrel->num_index_scans == 0)
1107 appendStringInfoString(&buf, _("index scan not needed: "));
1108 else
1109 appendStringInfoString(&buf, _("index scan needed: "));
1110
1111 msgfmt = _("%u pages from table (%.2f%% of total) had %" PRId64 " dead item identifiers removed\n");
1112 }
1113 else
1114 {
1116 appendStringInfoString(&buf, _("index scan bypassed: "));
1117 else
1118 appendStringInfoString(&buf, _("index scan bypassed by failsafe: "));
1119
1120 msgfmt = _("%u pages from table (%.2f%% of total) have %" PRId64 " dead item identifiers\n");
1121 }
1123 vacrel->lpdead_item_pages,
1124 orig_rel_pages == 0 ? 100.0 :
1125 100.0 * vacrel->lpdead_item_pages / orig_rel_pages,
1126 vacrel->lpdead_items);
1127
1128 if (vacrel->worker_usage.vacuum.nplanned > 0)
1130 _("parallel workers: index vacuum: %d planned, %d launched in total\n"),
1131 vacrel->worker_usage.vacuum.nplanned,
1132 vacrel->worker_usage.vacuum.nlaunched);
1133
1134 if (vacrel->worker_usage.cleanup.nplanned > 0)
1136 _("parallel workers: index cleanup: %d planned, %d launched\n"),
1137 vacrel->worker_usage.cleanup.nplanned,
1138 vacrel->worker_usage.cleanup.nlaunched);
1139
1140 for (int i = 0; i < vacrel->nindexes; i++)
1141 {
1142 IndexBulkDeleteResult *istat = vacrel->indstats[i];
1143
1144 if (!istat)
1145 continue;
1146
1148 _("index \"%s\": pages: %u in total, %u newly deleted, %u currently deleted, %u reusable\n"),
1149 indnames[i],
1150 istat->num_pages,
1151 istat->pages_newly_deleted,
1152 istat->pages_deleted,
1153 istat->pages_free);
1154 }
1156 {
1157 /*
1158 * We bypass the changecount mechanism because this value is
1159 * only updated by the calling process. We also rely on the
1160 * above call to pgstat_progress_end_command() to not clear
1161 * the st_progress_param array.
1162 */
1163 appendStringInfo(&buf, _("delay time: %.3f ms\n"),
1165 }
1166 if (track_io_timing)
1167 {
1168 double read_ms = (double) (pgStatBlockReadTime - startreadtime) / 1000;
1169 double write_ms = (double) (pgStatBlockWriteTime - startwritetime) / 1000;
1170
1171 appendStringInfo(&buf, _("I/O timings: read: %.3f ms, write: %.3f ms\n"),
1172 read_ms, write_ms);
1173 }
1174 if (secs_dur > 0 || usecs_dur > 0)
1175 {
1177 (1024 * 1024) / (secs_dur + usecs_dur / 1000000.0);
1179 (1024 * 1024) / (secs_dur + usecs_dur / 1000000.0);
1180 }
1181 appendStringInfo(&buf, _("avg read rate: %.3f MB/s, avg write rate: %.3f MB/s\n"),
1184 _("buffer usage: %" PRId64 " hits, %" PRId64 " reads, %" PRId64 " dirtied\n"),
1189 _("WAL usage: %" PRId64 " records, %" PRId64 " full page images, %" PRIu64 " bytes, %" PRIu64 " full page image bytes, %" PRId64 " buffers full\n"),
1190 walusage.wal_records,
1191 walusage.wal_fpi,
1192 walusage.wal_bytes,
1193 walusage.wal_fpi_bytes,
1194 walusage.wal_buffers_full);
1195
1196 /*
1197 * Report the dead items memory usage.
1198 *
1199 * The num_dead_items_resets counter increases when we reset the
1200 * collected dead items, so the counter is non-zero if at least
1201 * one dead items are collected, even if index vacuuming is
1202 * disabled.
1203 */
1205 ngettext("memory usage: dead item storage %.2f MB accumulated across %d reset (limit %.2f MB each)\n",
1206 "memory usage: dead item storage %.2f MB accumulated across %d resets (limit %.2f MB each)\n",
1207 vacrel->num_dead_items_resets),
1208 (double) vacrel->total_dead_items_bytes / (1024 * 1024),
1209 vacrel->num_dead_items_resets,
1210 (double) dead_items_max_bytes / (1024 * 1024));
1211 appendStringInfo(&buf, _("system usage: %s"), pg_rusage_show(&ru0));
1212
1213 ereport(verbose ? INFO : LOG,
1214 (errmsg_internal("%s", buf.data)));
1215 pfree(buf.data);
1216 }
1217 }
1218
1219 /* Cleanup index statistics and index names */
1220 for (int i = 0; i < vacrel->nindexes; i++)
1221 {
1222 if (vacrel->indstats[i])
1223 pfree(vacrel->indstats[i]);
1224
1225 if (instrument)
1226 pfree(indnames[i]);
1227 }
1228}

References _, AmAutoVacuumWorkerProcess, appendStringInfo(), appendStringInfoString(), ErrorContextCallback::arg, Assert, buf, BufferUsageAccumDiff(), ErrorContextCallback::callback, dead_items_alloc(), dead_items_cleanup(), ereport, errmsg, errmsg_internal(), error_context_stack, fb(), get_database_name(), get_namespace_name(), GetCurrentTimestamp(), GlobalVisTestFor(), heap_vacuum_eager_scan_setup(), i, VacuumParams::index_cleanup, INFO, initStringInfo(), InvalidMultiXactId, InvalidTransactionId, VacuumParams::is_wraparound, IsInParallelMode(), lazy_check_wraparound_failsafe(), lazy_scan_heap(), lazy_truncate_heap(), BufferUsage::local_blks_dirtied, BufferUsage::local_blks_hit, BufferUsage::local_blks_read, LOG, VacuumParams::log_vacuum_min_duration, Max, MultiXactIdPrecedesOrEquals(), MyBEEntry, MyDatabaseId, ngettext, NoLock, IndexBulkDeleteResult::num_pages, VacuumParams::nworkers, VacuumParams::options, IndexBulkDeleteResult::pages_deleted, IndexBulkDeleteResult::pages_free, IndexBulkDeleteResult::pages_newly_deleted, palloc0(), palloc0_object, palloc_array, pfree(), pg_rusage_init(), pg_rusage_show(), pgBufferUsage, pgstat_progress_end_command(), pgstat_progress_start_command(), pgstat_progress_update_param(), pgstat_report_vacuum(), pgStatBlockReadTime, pgStatBlockWriteTime, pgWalUsage, ErrorContextCallback::previous, PROGRESS_COMMAND_VACUUM, PROGRESS_VACUUM_DELAY_TIME, PROGRESS_VACUUM_MODE, PROGRESS_VACUUM_MODE_AGGRESSIVE, PROGRESS_VACUUM_MODE_NORMAL, PROGRESS_VACUUM_PHASE, PROGRESS_VACUUM_PHASE_FINAL_CLEANUP, PROGRESS_VACUUM_STARTED_BY, PROGRESS_VACUUM_STARTED_BY_AUTOVACUUM, PROGRESS_VACUUM_STARTED_BY_AUTOVACUUM_WRAPAROUND, PROGRESS_VACUUM_STARTED_BY_MANUAL, pstrdup(), ReadNextTransactionId(), RelationGetNamespace, RelationGetNumberOfBlocks, RelationGetRelationName, RelationGetRelid, RowExclusiveLock, BufferUsage::shared_blks_dirtied, BufferUsage::shared_blks_hit, BufferUsage::shared_blks_read, should_attempt_truncation(), PgBackendStatus::st_progress_param, TidStoreMemoryUsage(), TimestampDifference(), TimestampDifferenceExceeds(), track_cost_delay_timing, track_io_timing, TransactionIdPrecedesOrEquals(), VacuumParams::truncate, update_relstats_all_indexes(), vac_close_indexes(), vac_open_indexes(), vac_update_relstats(), VACOPT_DISABLE_PAGE_SKIPPING, VACOPT_VERBOSE, VACOPTVALUE_AUTO, VACOPTVALUE_DISABLED, VACOPTVALUE_ENABLED, VACOPTVALUE_UNSPECIFIED, VACUUM_ERRCB_PHASE_UNKNOWN, vacuum_error_callback(), vacuum_get_cutoffs(), VacuumFailsafeActive, verbose, visibilitymap_count(), WalUsage::wal_buffers_full, WalUsage::wal_bytes, WalUsage::wal_fpi, WalUsage::wal_fpi_bytes, WalUsage::wal_records, and WalUsageAccumDiff().

◆ lazy_check_wraparound_failsafe()

static bool lazy_check_wraparound_failsafe ( LVRelState vacrel)
static

Definition at line 2878 of file vacuumlazy.c.

2879{
2880 /* Don't warn more than once per VACUUM */
2882 return true;
2883
2885 {
2886 const int progress_index[] = {
2890 };
2892
2893 VacuumFailsafeActive = true;
2894
2895 /*
2896 * Abandon use of a buffer access strategy to allow use of all of
2897 * shared buffers. We assume the caller who allocated the memory for
2898 * the BufferAccessStrategy will free it.
2899 */
2900 vacrel->bstrategy = NULL;
2901
2902 /* Disable index vacuuming, index cleanup, and heap rel truncation */
2903 vacrel->do_index_vacuuming = false;
2904 vacrel->do_index_cleanup = false;
2905 vacrel->do_rel_truncate = false;
2906
2907 /* Reset the progress counters and set the failsafe mode */
2909
2911 (errmsg("bypassing nonessential maintenance of table \"%s.%s.%s\" as a failsafe after %d index scans",
2912 vacrel->dbname, vacrel->relnamespace, vacrel->relname,
2913 vacrel->num_index_scans),
2914 errdetail("The table's relfrozenxid or relminmxid is too far in the past."),
2915 errhint("Consider increasing configuration parameter \"maintenance_work_mem\" or \"autovacuum_work_mem\".\n"
2916 "You might also need to consider other ways for VACUUM to keep up with the allocation of transaction IDs.")));
2917
2918 /* Stop applying cost limits from this point on */
2919 VacuumCostActive = false;
2921
2922 return true;
2923 }
2924
2925 return false;
2926}

References ereport, errdetail(), errhint(), errmsg, fb(), pgstat_progress_update_multi_param(), PROGRESS_VACUUM_INDEXES_PROCESSED, PROGRESS_VACUUM_INDEXES_TOTAL, PROGRESS_VACUUM_MODE, PROGRESS_VACUUM_MODE_FAILSAFE, unlikely, vacuum_xid_failsafe_check(), VacuumCostActive, VacuumCostBalance, VacuumFailsafeActive, and WARNING.

Referenced by heap_vacuum_rel(), lazy_scan_heap(), and lazy_vacuum_all_indexes().

◆ lazy_cleanup_all_indexes()

static void lazy_cleanup_all_indexes ( LVRelState vacrel)
static

Definition at line 2932 of file vacuumlazy.c.

2933{
2934 double reltuples = vacrel->new_rel_tuples;
2935 bool estimated_count = vacrel->scanned_pages < vacrel->rel_pages;
2936 const int progress_start_index[] = {
2939 };
2940 const int progress_end_index[] = {
2943 };
2945 int64 progress_end_val[2] = {0, 0};
2946
2947 Assert(vacrel->do_index_cleanup);
2948 Assert(vacrel->nindexes > 0);
2949
2950 /*
2951 * Report that we are now cleaning up indexes and the number of indexes to
2952 * cleanup.
2953 */
2955 progress_start_val[1] = vacrel->nindexes;
2957
2959 {
2960 for (int idx = 0; idx < vacrel->nindexes; idx++)
2961 {
2962 Relation indrel = vacrel->indrels[idx];
2963 IndexBulkDeleteResult *istat = vacrel->indstats[idx];
2964
2965 vacrel->indstats[idx] =
2966 lazy_cleanup_one_index(indrel, istat, reltuples,
2967 estimated_count, vacrel);
2968
2969 /* Report the number of indexes cleaned up */
2971 idx + 1);
2972 }
2973 }
2974 else
2975 {
2976 /* Outsource everything to parallel variant */
2978 vacrel->num_index_scans,
2979 estimated_count,
2980 &(vacrel->worker_usage.cleanup));
2981 }
2982
2983 /* Reset the progress counters */
2985}

References Assert, fb(), idx(), lazy_cleanup_one_index(), parallel_vacuum_cleanup_all_indexes(), ParallelVacuumIsActive, pgstat_progress_update_multi_param(), pgstat_progress_update_param(), PROGRESS_VACUUM_INDEXES_PROCESSED, PROGRESS_VACUUM_INDEXES_TOTAL, PROGRESS_VACUUM_PHASE, and PROGRESS_VACUUM_PHASE_INDEX_CLEANUP.

Referenced by lazy_scan_heap().

◆ lazy_cleanup_one_index()

static IndexBulkDeleteResult * lazy_cleanup_one_index ( Relation  indrel,
IndexBulkDeleteResult istat,
double  reltuples,
bool  estimated_count,
LVRelState vacrel 
)
static

Definition at line 3050 of file vacuumlazy.c.

3053{
3056
3057 ivinfo.index = indrel;
3058 ivinfo.heaprel = vacrel->rel;
3059 ivinfo.analyze_only = false;
3060 ivinfo.report_progress = false;
3061 ivinfo.estimated_count = estimated_count;
3062 ivinfo.message_level = DEBUG2;
3063
3064 ivinfo.num_heap_tuples = reltuples;
3065 ivinfo.strategy = vacrel->bstrategy;
3066
3067 /*
3068 * Update error traceback information.
3069 *
3070 * The index name is saved during this phase and restored immediately
3071 * after this phase. See vacuum_error_callback.
3072 */
3073 Assert(vacrel->indname == NULL);
3078
3079 istat = vac_cleanup_one_index(&ivinfo, istat);
3080
3081 /* Revert to the previous phase information for error traceback */
3083 pfree(vacrel->indname);
3084 vacrel->indname = NULL;
3085
3086 return istat;
3087}

References Assert, DEBUG2, fb(), InvalidBlockNumber, InvalidOffsetNumber, pfree(), pstrdup(), RelationGetRelationName, restore_vacuum_error_info(), update_vacuum_error_info(), vac_cleanup_one_index(), and VACUUM_ERRCB_PHASE_INDEX_CLEANUP.

Referenced by lazy_cleanup_all_indexes().

◆ lazy_scan_heap()

static void lazy_scan_heap ( LVRelState vacrel)
static

Definition at line 1267 of file vacuumlazy.c.

1268{
1269 ReadStream *stream;
1270 BlockNumber rel_pages = vacrel->rel_pages,
1271 blkno = 0,
1274 vacrel->eager_scan_remaining_successes; /* for logging */
1275 Buffer vmbuffer = InvalidBuffer;
1276 const int initprog_index[] = {
1280 };
1282
1283 /* Report that we're scanning the heap, advertising total # of blocks */
1285 initprog_val[1] = rel_pages;
1286 initprog_val[2] = vacrel->dead_items_info->max_bytes;
1288
1289 /* Initialize for the first heap_vac_scan_next_block() call */
1290 vacrel->current_block = InvalidBlockNumber;
1291 vacrel->next_unskippable_block = InvalidBlockNumber;
1292 vacrel->next_unskippable_eager_scanned = false;
1293 vacrel->next_unskippable_vmbuffer = InvalidBuffer;
1294
1295 /*
1296 * Set up the read stream for vacuum's first pass through the heap.
1297 *
1298 * This could be made safe for READ_STREAM_USE_BATCHING, but only with
1299 * explicit work in heap_vac_scan_next_block.
1300 */
1302 vacrel->bstrategy,
1303 vacrel->rel,
1306 vacrel,
1307 sizeof(bool));
1308
1309 while (true)
1310 {
1311 Buffer buf;
1312 Page page;
1313 bool was_eager_scanned = false;
1314 int ndeleted = 0;
1315 bool has_lpdead_items;
1316 void *per_buffer_data = NULL;
1317 bool vm_page_frozen = false;
1318 bool got_cleanup_lock = false;
1319
1320 vacuum_delay_point(false);
1321
1322 /*
1323 * Regularly check if wraparound failsafe should trigger.
1324 *
1325 * There is a similar check inside lazy_vacuum_all_indexes(), but
1326 * relfrozenxid might start to look dangerously old before we reach
1327 * that point. This check also provides failsafe coverage for the
1328 * one-pass strategy, and the two-pass strategy with the index_cleanup
1329 * param set to 'off'.
1330 */
1331 if (vacrel->scanned_pages > 0 &&
1332 vacrel->scanned_pages % FAILSAFE_EVERY_PAGES == 0)
1334
1335 /*
1336 * Consider if we definitely have enough space to process TIDs on page
1337 * already. If we are close to overrunning the available space for
1338 * dead_items TIDs, pause and do a cycle of vacuuming before we tackle
1339 * this page. However, let's force at least one page-worth of tuples
1340 * to be stored as to ensure we do at least some work when the memory
1341 * configured is so low that we run out before storing anything.
1342 */
1343 if (vacrel->dead_items_info->num_items > 0 &&
1344 TidStoreMemoryUsage(vacrel->dead_items) > vacrel->dead_items_info->max_bytes)
1345 {
1346 /*
1347 * Before beginning index vacuuming, we release any pin we may
1348 * hold on the visibility map page. This isn't necessary for
1349 * correctness, but we do it anyway to avoid holding the pin
1350 * across a lengthy, unrelated operation.
1351 */
1352 if (BufferIsValid(vmbuffer))
1353 {
1354 ReleaseBuffer(vmbuffer);
1355 vmbuffer = InvalidBuffer;
1356 }
1357
1358 /* Perform a round of index and heap vacuuming */
1359 vacrel->consider_bypass_optimization = false;
1361
1362 /*
1363 * Vacuum the Free Space Map to make newly-freed space visible on
1364 * upper-level FSM pages. Note that blkno is the previously
1365 * processed block.
1366 */
1368 blkno + 1);
1370
1371 /* Report that we are once again scanning the heap */
1374 }
1375
1376 buf = read_stream_next_buffer(stream, &per_buffer_data);
1377
1378 /* The relation is exhausted. */
1379 if (!BufferIsValid(buf))
1380 break;
1381
1382 was_eager_scanned = *((bool *) per_buffer_data);
1384 page = BufferGetPage(buf);
1385 blkno = BufferGetBlockNumber(buf);
1386
1387 vacrel->scanned_pages++;
1389 vacrel->eager_scanned_pages++;
1390
1391 /* Report as block scanned, update error traceback information */
1394 blkno, InvalidOffsetNumber);
1395
1396 /*
1397 * Pin the visibility map page in case we need to mark the page
1398 * all-visible. In most cases this will be very cheap, because we'll
1399 * already have the correct page pinned anyway.
1400 */
1401 visibilitymap_pin(vacrel->rel, blkno, &vmbuffer);
1402
1403 /*
1404 * We need a buffer cleanup lock to prune HOT chains and defragment
1405 * the page in lazy_scan_prune. But when it's not possible to acquire
1406 * a cleanup lock right away, we may be able to settle for reduced
1407 * processing using lazy_scan_noprune.
1408 */
1410
1411 if (!got_cleanup_lock)
1413
1414 /* Check for new or empty pages before lazy_scan_[no]prune call */
1416 vmbuffer))
1417 {
1418 /* Processed as new/empty page (lock and pin released) */
1419 continue;
1420 }
1421
1422 /*
1423 * If we didn't get the cleanup lock, we can still collect LP_DEAD
1424 * items in the dead_items area for later vacuuming, count live and
1425 * recently dead tuples for vacuum logging, and determine if this
1426 * block could later be truncated. If we encounter any xid/mxids that
1427 * require advancing the relfrozenxid/relminxid, we'll have to wait
1428 * for a cleanup lock and call lazy_scan_prune().
1429 */
1430 if (!got_cleanup_lock &&
1431 !lazy_scan_noprune(vacrel, buf, blkno, page, &has_lpdead_items))
1432 {
1433 /*
1434 * lazy_scan_noprune could not do all required processing. Wait
1435 * for a cleanup lock, and call lazy_scan_prune in the usual way.
1436 */
1437 Assert(vacrel->aggressive);
1440 got_cleanup_lock = true;
1441 }
1442
1443 /*
1444 * If we have a cleanup lock, we must now prune, freeze, and count
1445 * tuples. We may have acquired the cleanup lock originally, or we may
1446 * have gone back and acquired it after lazy_scan_noprune() returned
1447 * false. Either way, the page hasn't been processed yet.
1448 *
1449 * Like lazy_scan_noprune(), lazy_scan_prune() will count
1450 * recently_dead_tuples and live tuples for vacuum logging, determine
1451 * if the block can later be truncated, and accumulate the details of
1452 * remaining LP_DEAD line pointers on the page into dead_items. These
1453 * dead items include those pruned by lazy_scan_prune() as well as
1454 * line pointers previously marked LP_DEAD.
1455 */
1456 if (got_cleanup_lock)
1457 ndeleted = lazy_scan_prune(vacrel, buf, blkno, page,
1458 vmbuffer,
1460
1461 /*
1462 * Count an eagerly scanned page as a failure or a success.
1463 *
1464 * Only lazy_scan_prune() freezes pages, so if we didn't get the
1465 * cleanup lock, we won't have frozen the page. However, we only count
1466 * pages that were too new to require freezing as eager freeze
1467 * failures.
1468 *
1469 * We could gather more information from lazy_scan_noprune() about
1470 * whether or not there were tuples with XIDs or MXIDs older than the
1471 * FreezeLimit or MultiXactCutoff. However, for simplicity, we simply
1472 * exclude pages skipped due to cleanup lock contention from eager
1473 * freeze algorithm caps.
1474 */
1476 {
1477 /* Aggressive vacuums do not eager scan. */
1478 Assert(!vacrel->aggressive);
1479
1480 if (vm_page_frozen)
1481 {
1482 if (vacrel->eager_scan_remaining_successes > 0)
1483 vacrel->eager_scan_remaining_successes--;
1484
1485 if (vacrel->eager_scan_remaining_successes == 0)
1486 {
1487 /*
1488 * Report only once that we disabled eager scanning. We
1489 * may eagerly read ahead blocks in excess of the success
1490 * or failure caps before attempting to freeze them, so we
1491 * could reach here even after disabling additional eager
1492 * scanning.
1493 */
1494 if (vacrel->eager_scan_max_fails_per_region > 0)
1495 ereport(vacrel->verbose ? INFO : DEBUG2,
1496 (errmsg("disabling eager scanning after freezing %u eagerly scanned blocks of relation \"%s.%s.%s\"",
1498 vacrel->dbname, vacrel->relnamespace,
1499 vacrel->relname)));
1500
1501 /*
1502 * If we hit our success cap, permanently disable eager
1503 * scanning by setting the other eager scan management
1504 * fields to their disabled values.
1505 */
1506 vacrel->eager_scan_remaining_fails = 0;
1507 vacrel->next_eager_scan_region_start = InvalidBlockNumber;
1508 vacrel->eager_scan_max_fails_per_region = 0;
1509 }
1510 }
1511 else if (vacrel->eager_scan_remaining_fails > 0)
1512 vacrel->eager_scan_remaining_fails--;
1513 }
1514
1515 /*
1516 * Now drop the buffer lock and, potentially, update the FSM.
1517 *
1518 * Our goal is to update the freespace map the last time we touch the
1519 * page. If we'll process a block in the second pass, we may free up
1520 * additional space on the page, so it is better to update the FSM
1521 * after the second pass. If the relation has no indexes, or if index
1522 * vacuuming is disabled, there will be no second heap pass; if this
1523 * particular page has no dead items, the second heap pass will not
1524 * touch this page. So, in those cases, update the FSM now.
1525 *
1526 * Note: In corner cases, it's possible to miss updating the FSM
1527 * entirely. If index vacuuming is currently enabled, we'll skip the
1528 * FSM update now. But if failsafe mode is later activated, or there
1529 * are so few dead tuples that index vacuuming is bypassed, there will
1530 * also be no opportunity to update the FSM later, because we'll never
1531 * revisit this page. Since updating the FSM is desirable but not
1532 * absolutely required, that's OK.
1533 */
1534 if (vacrel->nindexes == 0
1535 || !vacrel->do_index_vacuuming
1536 || !has_lpdead_items)
1537 {
1538 Size freespace = PageGetHeapFreeSpace(page);
1539
1541 RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
1542
1543 /*
1544 * Periodically perform FSM vacuuming to make newly-freed space
1545 * visible on upper FSM pages. This is done after vacuuming if the
1546 * table has indexes. There will only be newly-freed space if we
1547 * held the cleanup lock and lazy_scan_prune() was called.
1548 */
1549 if (got_cleanup_lock && vacrel->nindexes == 0 && ndeleted > 0 &&
1551 {
1553 blkno);
1555 }
1556 }
1557 else
1559 }
1560
1561 vacrel->blkno = InvalidBlockNumber;
1562 if (BufferIsValid(vmbuffer))
1563 ReleaseBuffer(vmbuffer);
1564
1565 /*
1566 * Report that everything is now scanned. We never skip scanning the last
1567 * block in the relation, so we can pass rel_pages here.
1568 */
1570 rel_pages);
1571
1572 /* now we can compute the new value for pg_class.reltuples */
1573 vacrel->new_live_tuples = vac_estimate_reltuples(vacrel->rel, rel_pages,
1574 vacrel->scanned_pages,
1575 vacrel->live_tuples);
1576
1577 /*
1578 * Also compute the total number of surviving heap entries. In the
1579 * (unlikely) scenario that new_live_tuples is -1, take it as zero.
1580 */
1581 vacrel->new_rel_tuples =
1582 Max(vacrel->new_live_tuples, 0) + vacrel->recently_dead_tuples +
1583 vacrel->missed_dead_tuples;
1584
1585 read_stream_end(stream);
1586
1587 /*
1588 * Do index vacuuming (call each index's ambulkdelete routine), then do
1589 * related heap vacuuming
1590 */
1591 if (vacrel->dead_items_info->num_items > 0)
1593
1594 /*
1595 * Vacuum the remainder of the Free Space Map. We must do this whether or
1596 * not there were indexes, and whether or not we bypassed index vacuuming.
1597 * We can pass rel_pages here because we never skip scanning the last
1598 * block of the relation.
1599 */
1600 if (rel_pages > next_fsm_block_to_vacuum)
1602
1603 /* report all blocks vacuumed */
1605
1606 /* Do final index cleanup (call each index's amvacuumcleanup routine) */
1607 if (vacrel->nindexes > 0 && vacrel->do_index_cleanup)
1609}

References Assert, buf, BUFFER_LOCK_SHARE, BUFFER_LOCK_UNLOCK, BufferGetBlockNumber(), BufferGetPage(), BufferIsValid(), CheckBufferIsPinnedOnce(), ConditionalLockBufferForCleanup(), DEBUG2, ereport, errmsg, FAILSAFE_EVERY_PAGES, fb(), FreeSpaceMapVacuumRange(), heap_vac_scan_next_block(), INFO, InvalidBlockNumber, InvalidBuffer, InvalidOffsetNumber, lazy_check_wraparound_failsafe(), lazy_cleanup_all_indexes(), lazy_scan_new_or_empty(), lazy_scan_noprune(), lazy_scan_prune(), lazy_vacuum(), LockBuffer(), LockBufferForCleanup(), MAIN_FORKNUM, Max, PageGetHeapFreeSpace(), pgstat_progress_update_multi_param(), pgstat_progress_update_param(), PROGRESS_VACUUM_HEAP_BLKS_SCANNED, PROGRESS_VACUUM_HEAP_BLKS_VACUUMED, PROGRESS_VACUUM_MAX_DEAD_TUPLE_BYTES, PROGRESS_VACUUM_PHASE, PROGRESS_VACUUM_PHASE_SCAN_HEAP, PROGRESS_VACUUM_TOTAL_HEAP_BLKS, read_stream_begin_relation(), read_stream_end(), READ_STREAM_MAINTENANCE, read_stream_next_buffer(), RecordPageWithFreeSpace(), ReleaseBuffer(), TidStoreMemoryUsage(), UnlockReleaseBuffer(), update_vacuum_error_info(), vac_estimate_reltuples(), vacuum_delay_point(), VACUUM_ERRCB_PHASE_SCAN_HEAP, VACUUM_FSM_EVERY_PAGES, and visibilitymap_pin().

Referenced by heap_vacuum_rel().

◆ lazy_scan_new_or_empty()

static bool lazy_scan_new_or_empty ( LVRelState vacrel,
Buffer  buf,
BlockNumber  blkno,
Page  page,
bool  sharelock,
Buffer  vmbuffer 
)
static

Definition at line 1865 of file vacuumlazy.c.

1867{
1868 Size freespace;
1869
1870 if (PageIsNew(page))
1871 {
1872 /*
1873 * All-zeroes pages can be left over if either a backend extends the
1874 * relation by a single page, but crashes before the newly initialized
1875 * page has been written out, or when bulk-extending the relation
1876 * (which creates a number of empty pages at the tail end of the
1877 * relation), and then enters them into the FSM.
1878 *
1879 * Note we do not enter the page into the visibilitymap. That has the
1880 * downside that we repeatedly visit this page in subsequent vacuums,
1881 * but otherwise we'll never discover the space on a promoted standby.
1882 * The harm of repeated checking ought to normally not be too bad. The
1883 * space usually should be used at some point, otherwise there
1884 * wouldn't be any regular vacuums.
1885 *
1886 * Make sure these pages are in the FSM, to ensure they can be reused.
1887 * Do that by testing if there's any space recorded for the page. If
1888 * not, enter it. We do so after releasing the lock on the heap page,
1889 * the FSM is approximate, after all.
1890 */
1892
1893 if (GetRecordedFreeSpace(vacrel->rel, blkno) == 0)
1894 {
1895 freespace = BLCKSZ - SizeOfPageHeaderData;
1896
1897 RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
1898 }
1899
1900 return true;
1901 }
1902
1903 if (PageIsEmpty(page))
1904 {
1905 /*
1906 * It seems likely that caller will always be able to get a cleanup
1907 * lock on an empty page. But don't take any chances -- escalate to
1908 * an exclusive lock (still don't need a cleanup lock, though).
1909 */
1910 if (sharelock)
1911 {
1914
1915 if (!PageIsEmpty(page))
1916 {
1917 /* page isn't new or empty -- keep lock and pin for now */
1918 return false;
1919 }
1920 }
1921 else
1922 {
1923 /* Already have a full cleanup lock (which is more than enough) */
1924 }
1925
1926 /*
1927 * Unlike new pages, empty pages are always set all-visible and
1928 * all-frozen.
1929 */
1930 if (!PageIsAllVisible(page))
1931 {
1932 /* Lock vmbuffer before entering critical section */
1934
1936
1937 /* mark buffer dirty before writing a WAL record */
1939
1940 PageSetAllVisible(page);
1941 PageClearPrunable(page);
1942 visibilitymap_set(blkno,
1943 vmbuffer,
1946 vacrel->rel->rd_locator);
1947
1948 /*
1949 * Emit WAL for setting PD_ALL_VISIBLE on the heap page and
1950 * setting the VM.
1951 */
1952 if (RelationNeedsWAL(vacrel->rel))
1954 vmbuffer,
1957 InvalidTransactionId, /* conflict xid */
1958 false, /* cleanup lock */
1959 PRUNE_VACUUM_SCAN, /* reason */
1960 NULL, 0,
1961 NULL, 0,
1962 NULL, 0,
1963 NULL, 0);
1964
1966
1967 LockBuffer(vmbuffer, BUFFER_LOCK_UNLOCK);
1968
1969 /* Count the newly all-frozen pages for logging */
1970 vacrel->new_all_visible_pages++;
1971 vacrel->new_all_visible_all_frozen_pages++;
1972 }
1973
1974 freespace = PageGetHeapFreeSpace(page);
1976 RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
1977 return true;
1978 }
1979
1980 /* page isn't new or empty -- keep lock and pin */
1981 return false;
1982}

References buf, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, END_CRIT_SECTION, fb(), GetRecordedFreeSpace(), InvalidTransactionId, LockBuffer(), log_heap_prune_and_freeze(), MarkBufferDirty(), PageClearPrunable, PageGetHeapFreeSpace(), PageIsAllVisible(), PageIsEmpty(), PageIsNew(), PageSetAllVisible(), PRUNE_VACUUM_SCAN, RecordPageWithFreeSpace(), RelationNeedsWAL, SizeOfPageHeaderData, START_CRIT_SECTION, UnlockReleaseBuffer(), VISIBILITYMAP_ALL_FROZEN, VISIBILITYMAP_ALL_VISIBLE, and visibilitymap_set().

Referenced by lazy_scan_heap().

◆ lazy_scan_noprune()

static bool lazy_scan_noprune ( LVRelState vacrel,
Buffer  buf,
BlockNumber  blkno,
Page  page,
bool has_lpdead_items 
)
static

Definition at line 2146 of file vacuumlazy.c.

2151{
2152 OffsetNumber offnum,
2153 maxoff;
2154 int lpdead_items,
2155 live_tuples,
2156 recently_dead_tuples,
2157 missed_dead_tuples;
2158 bool hastup;
2160 TransactionId NoFreezePageRelfrozenXid = vacrel->NewRelfrozenXid;
2161 MultiXactId NoFreezePageRelminMxid = vacrel->NewRelminMxid;
2163
2164 Assert(BufferGetBlockNumber(buf) == blkno);
2165
2166 hastup = false; /* for now */
2167
2168 lpdead_items = 0;
2169 live_tuples = 0;
2170 recently_dead_tuples = 0;
2171 missed_dead_tuples = 0;
2172
2173 maxoff = PageGetMaxOffsetNumber(page);
2174 for (offnum = FirstOffsetNumber;
2175 offnum <= maxoff;
2176 offnum = OffsetNumberNext(offnum))
2177 {
2178 ItemId itemid;
2179 HeapTupleData tuple;
2180
2181 vacrel->offnum = offnum;
2182 itemid = PageGetItemId(page, offnum);
2183
2184 if (!ItemIdIsUsed(itemid))
2185 continue;
2186
2187 if (ItemIdIsRedirected(itemid))
2188 {
2189 hastup = true;
2190 continue;
2191 }
2192
2193 if (ItemIdIsDead(itemid))
2194 {
2195 /*
2196 * Deliberately don't set hastup=true here. See same point in
2197 * lazy_scan_prune for an explanation.
2198 */
2199 deadoffsets[lpdead_items++] = offnum;
2200 continue;
2201 }
2202
2203 hastup = true; /* page prevents rel truncation */
2204 tupleheader = (HeapTupleHeader) PageGetItem(page, itemid);
2206 &NoFreezePageRelfrozenXid,
2207 &NoFreezePageRelminMxid))
2208 {
2209 /* Tuple with XID < FreezeLimit (or MXID < MultiXactCutoff) */
2210 if (vacrel->aggressive)
2211 {
2212 /*
2213 * Aggressive VACUUMs must always be able to advance rel's
2214 * relfrozenxid to a value >= FreezeLimit (and be able to
2215 * advance rel's relminmxid to a value >= MultiXactCutoff).
2216 * The ongoing aggressive VACUUM won't be able to do that
2217 * unless it can freeze an XID (or MXID) from this tuple now.
2218 *
2219 * The only safe option is to have caller perform processing
2220 * of this page using lazy_scan_prune. Caller might have to
2221 * wait a while for a cleanup lock, but it can't be helped.
2222 */
2223 vacrel->offnum = InvalidOffsetNumber;
2224 return false;
2225 }
2226
2227 /*
2228 * Non-aggressive VACUUMs are under no obligation to advance
2229 * relfrozenxid (even by one XID). We can be much laxer here.
2230 *
2231 * Currently we always just accept an older final relfrozenxid
2232 * and/or relminmxid value. We never make caller wait or work a
2233 * little harder, even when it likely makes sense to do so.
2234 */
2235 }
2236
2237 ItemPointerSet(&(tuple.t_self), blkno, offnum);
2238 tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
2239 tuple.t_len = ItemIdGetLength(itemid);
2240 tuple.t_tableOid = RelationGetRelid(vacrel->rel);
2241
2242 switch (HeapTupleSatisfiesVacuum(&tuple, vacrel->cutoffs.OldestXmin,
2243 buf))
2244 {
2246 case HEAPTUPLE_LIVE:
2247
2248 /*
2249 * Count both cases as live, just like lazy_scan_prune
2250 */
2251 live_tuples++;
2252
2253 break;
2254 case HEAPTUPLE_DEAD:
2255
2256 /*
2257 * There is some useful work for pruning to do, that won't be
2258 * done due to failure to get a cleanup lock.
2259 */
2260 missed_dead_tuples++;
2261 break;
2263
2264 /*
2265 * Count in recently_dead_tuples, just like lazy_scan_prune
2266 */
2267 recently_dead_tuples++;
2268 break;
2270
2271 /*
2272 * Do not count these rows as live, just like lazy_scan_prune
2273 */
2274 break;
2275 default:
2276 elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
2277 break;
2278 }
2279 }
2280
2281 vacrel->offnum = InvalidOffsetNumber;
2282
2283 /*
2284 * By here we know for sure that caller can put off freezing and pruning
2285 * this particular page until the next VACUUM. Remember its details now.
2286 * (lazy_scan_prune expects a clean slate, so we have to do this last.)
2287 */
2288 vacrel->NewRelfrozenXid = NoFreezePageRelfrozenXid;
2289 vacrel->NewRelminMxid = NoFreezePageRelminMxid;
2290
2291 /* Save any LP_DEAD items found on the page in dead_items */
2292 if (vacrel->nindexes == 0)
2293 {
2294 /* Using one-pass strategy (since table has no indexes) */
2295 if (lpdead_items > 0)
2296 {
2297 /*
2298 * Perfunctory handling for the corner case where a single pass
2299 * strategy VACUUM cannot get a cleanup lock, and it turns out
2300 * that there is one or more LP_DEAD items: just count the LP_DEAD
2301 * items as missed_dead_tuples instead. (This is a bit dishonest,
2302 * but it beats having to maintain specialized heap vacuuming code
2303 * forever, for vanishingly little benefit.)
2304 */
2305 hastup = true;
2306 missed_dead_tuples += lpdead_items;
2307 }
2308 }
2309 else if (lpdead_items > 0)
2310 {
2311 /*
2312 * Page has LP_DEAD items, and so any references/TIDs that remain in
2313 * indexes will be deleted during index vacuuming (and then marked
2314 * LP_UNUSED in the heap)
2315 */
2316 vacrel->lpdead_item_pages++;
2317
2318 dead_items_add(vacrel, blkno, deadoffsets, lpdead_items);
2319
2320 vacrel->lpdead_items += lpdead_items;
2321 }
2322
2323 /*
2324 * Finally, add relevant page-local counts to whole-VACUUM counts
2325 */
2326 vacrel->live_tuples += live_tuples;
2327 vacrel->recently_dead_tuples += recently_dead_tuples;
2328 vacrel->missed_dead_tuples += missed_dead_tuples;
2329 if (missed_dead_tuples > 0)
2330 vacrel->missed_dead_pages++;
2331
2332 /* Can't truncate this page */
2333 if (hastup)
2334 vacrel->nonempty_pages = blkno + 1;
2335
2336 /* Did we find LP_DEAD items? */
2337 *has_lpdead_items = (lpdead_items > 0);
2338
2339 /* Caller won't need to call lazy_scan_prune with same page */
2340 return true;
2341}

References Assert, buf, BufferGetBlockNumber(), dead_items_add(), elog, ERROR, fb(), FirstOffsetNumber, heap_tuple_should_freeze(), HEAPTUPLE_DEAD, HEAPTUPLE_DELETE_IN_PROGRESS, HEAPTUPLE_INSERT_IN_PROGRESS, HEAPTUPLE_LIVE, HEAPTUPLE_RECENTLY_DEAD, HeapTupleSatisfiesVacuum(), InvalidOffsetNumber, ItemIdGetLength, ItemIdIsDead, ItemIdIsRedirected, ItemIdIsUsed, ItemPointerSet(), MaxHeapTuplesPerPage, OffsetNumberNext, PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), RelationGetRelid, HeapTupleData::t_data, HeapTupleData::t_len, HeapTupleData::t_self, and HeapTupleData::t_tableOid.

Referenced by lazy_scan_heap().

◆ lazy_scan_prune()

static int lazy_scan_prune ( LVRelState vacrel,
Buffer  buf,
BlockNumber  blkno,
Page  page,
Buffer  vmbuffer,
bool has_lpdead_items,
bool vm_page_frozen 
)
static

Definition at line 2009 of file vacuumlazy.c.

2016{
2017 Relation rel = vacrel->rel;
2019 PruneFreezeParams params = {
2020 .relation = rel,
2021 .buffer = buf,
2022 .vmbuffer = vmbuffer,
2023 .reason = PRUNE_VACUUM_SCAN,
2024 .options = HEAP_PAGE_PRUNE_FREEZE,
2025 .vistest = vacrel->vistest,
2026 .cutoffs = &vacrel->cutoffs,
2027 };
2028
2029 Assert(BufferGetBlockNumber(buf) == blkno);
2030
2031 /*
2032 * Prune all HOT-update chains and potentially freeze tuples on this page.
2033 *
2034 * If the relation has no indexes, we can immediately mark would-be dead
2035 * items LP_UNUSED.
2036 *
2037 * The number of tuples removed from the page is returned in
2038 * presult.ndeleted. It should not be confused with presult.lpdead_items;
2039 * presult.lpdead_items's final value can be thought of as the number of
2040 * tuples that were deleted from indexes.
2041 *
2042 * We will update the VM after collecting LP_DEAD items and freezing
2043 * tuples. Pruning will have determined whether or not the page is
2044 * all-visible.
2045 */
2046 if (vacrel->nindexes == 0)
2048
2049 /*
2050 * Allow skipping full inspection of pages that the VM indicates are
2051 * already all-frozen (which may be scanned due to SKIP_PAGES_THRESHOLD).
2052 * However, if DISABLE_PAGE_SKIPPING was specified, we can't trust the VM,
2053 * so we must examine the page to make sure it is truly all-frozen and fix
2054 * it otherwise.
2055 */
2056 if (vacrel->skipwithvm)
2058
2060 &presult,
2061 &vacrel->offnum,
2062 &vacrel->NewRelfrozenXid, &vacrel->NewRelminMxid);
2063
2064 Assert(MultiXactIdIsValid(vacrel->NewRelminMxid));
2065 Assert(TransactionIdIsValid(vacrel->NewRelfrozenXid));
2066
2067 if (presult.nfrozen > 0)
2068 {
2069 /*
2070 * We don't increment the new_frozen_tuple_pages instrumentation
2071 * counter when nfrozen == 0, since it only counts pages with newly
2072 * frozen tuples (don't confuse that with pages newly set all-frozen
2073 * in VM).
2074 */
2075 vacrel->new_frozen_tuple_pages++;
2076 }
2077
2078 /*
2079 * Now save details of the LP_DEAD items from the page in vacrel
2080 */
2081 if (presult.lpdead_items > 0)
2082 {
2083 vacrel->lpdead_item_pages++;
2084
2085 /*
2086 * deadoffsets are collected incrementally in
2087 * heap_page_prune_and_freeze() as each dead line pointer is recorded,
2088 * with an indeterminate order, but dead_items_add requires them to be
2089 * sorted.
2090 */
2091 qsort(presult.deadoffsets, presult.lpdead_items, sizeof(OffsetNumber),
2093
2094 dead_items_add(vacrel, blkno, presult.deadoffsets, presult.lpdead_items);
2095 }
2096
2097 /* Finally, add page-local counts to whole-VACUUM counts */
2098 if (presult.newly_all_visible)
2099 vacrel->new_all_visible_pages++;
2100 if (presult.newly_all_visible_frozen)
2101 vacrel->new_all_visible_all_frozen_pages++;
2102 if (presult.newly_all_frozen)
2103 vacrel->new_all_frozen_pages++;
2104
2105 /* Capture if the page was newly set frozen */
2106 *vm_page_frozen = presult.newly_all_visible_frozen ||
2107 presult.newly_all_frozen;
2108
2109 vacrel->tuples_deleted += presult.ndeleted;
2110 vacrel->tuples_frozen += presult.nfrozen;
2111 vacrel->lpdead_items += presult.lpdead_items;
2112 vacrel->live_tuples += presult.live_tuples;
2113 vacrel->recently_dead_tuples += presult.recently_dead_tuples;
2114
2115 /* Can't truncate this page */
2116 if (presult.hastup)
2117 vacrel->nonempty_pages = blkno + 1;
2118
2119 /* Did we find LP_DEAD items? */
2120 *has_lpdead_items = (presult.lpdead_items > 0);
2121
2122 return presult.ndeleted;
2123}
Relation relation
Definition heapam.h:263

References Assert, buf, BufferGetBlockNumber(), cmpOffsetNumbers(), dead_items_add(), fb(), HEAP_PAGE_PRUNE_ALLOW_FAST_PATH, heap_page_prune_and_freeze(), HEAP_PAGE_PRUNE_FREEZE, HEAP_PAGE_PRUNE_MARK_UNUSED_NOW, MultiXactIdIsValid, PruneFreezeParams::options, PRUNE_VACUUM_SCAN, qsort, PruneFreezeParams::relation, and TransactionIdIsValid.

Referenced by lazy_scan_heap().

◆ lazy_truncate_heap()

static void lazy_truncate_heap ( LVRelState vacrel)
static

Definition at line 3130 of file vacuumlazy.c.

3131{
3132 BlockNumber orig_rel_pages = vacrel->rel_pages;
3135 int lock_retry;
3136
3137 /* Report that we are now truncating */
3140
3141 /* Update error traceback information one last time */
3143 vacrel->nonempty_pages, InvalidOffsetNumber);
3144
3145 /*
3146 * Loop until no more truncating can be done.
3147 */
3148 do
3149 {
3150 /*
3151 * We need full exclusive lock on the relation in order to do
3152 * truncation. If we can't get it, give up rather than waiting --- we
3153 * don't want to block other backends, and we don't want to deadlock
3154 * (which is quite possible considering we already hold a lower-grade
3155 * lock).
3156 */
3157 lock_waiter_detected = false;
3158 lock_retry = 0;
3159 while (true)
3160 {
3162 break;
3163
3164 /*
3165 * Check for interrupts while trying to (re-)acquire the exclusive
3166 * lock.
3167 */
3169
3172 {
3173 /*
3174 * We failed to establish the lock in the specified number of
3175 * retries. This means we give up truncating.
3176 */
3177 ereport(vacrel->verbose ? INFO : DEBUG2,
3178 (errmsg("\"%s\": stopping truncate due to conflicting lock request",
3179 vacrel->relname)));
3180 return;
3181 }
3182
3188 }
3189
3190 /*
3191 * Now that we have exclusive lock, look to see if the rel has grown
3192 * whilst we were vacuuming with non-exclusive lock. If so, give up;
3193 * the newly added pages presumably contain non-deletable tuples.
3194 */
3197 {
3198 /*
3199 * Note: we intentionally don't update vacrel->rel_pages with the
3200 * new rel size here. If we did, it would amount to assuming that
3201 * the new pages are empty, which is unlikely. Leaving the numbers
3202 * alone amounts to assuming that the new pages have the same
3203 * tuple density as existing ones, which is less unlikely.
3204 */
3206 return;
3207 }
3208
3209 /*
3210 * Scan backwards from the end to verify that the end pages actually
3211 * contain no tuples. This is *necessary*, not optional, because
3212 * other backends could have added tuples to these pages whilst we
3213 * were vacuuming.
3214 */
3216 vacrel->blkno = new_rel_pages;
3217
3219 {
3220 /* can't do anything after all */
3222 return;
3223 }
3224
3225 /*
3226 * Okay to truncate.
3227 */
3229
3230 /*
3231 * We can release the exclusive lock as soon as we have truncated.
3232 * Other backends can't safely access the relation until they have
3233 * processed the smgr invalidation that smgrtruncate sent out ... but
3234 * that should happen as part of standard invalidation processing once
3235 * they acquire lock on the relation.
3236 */
3238
3239 /*
3240 * Update statistics. Here, it *is* correct to adjust rel_pages
3241 * without also touching reltuples, since the tuple count wasn't
3242 * changed by the truncation.
3243 */
3244 vacrel->removed_pages += orig_rel_pages - new_rel_pages;
3245 vacrel->rel_pages = new_rel_pages;
3246
3247 ereport(vacrel->verbose ? INFO : DEBUG2,
3248 (errmsg("table \"%s\": truncated %u to %u pages",
3249 vacrel->relname,
3252 } while (new_rel_pages > vacrel->nonempty_pages && lock_waiter_detected);
3253}

References AccessExclusiveLock, CHECK_FOR_INTERRUPTS, ConditionalLockRelation(), count_nondeletable_pages(), DEBUG2, ereport, errmsg, fb(), INFO, InvalidOffsetNumber, MyLatch, pgstat_progress_update_param(), PROGRESS_VACUUM_PHASE, PROGRESS_VACUUM_PHASE_TRUNCATE, RelationGetNumberOfBlocks, RelationTruncate(), ResetLatch(), UnlockRelation(), update_vacuum_error_info(), VACUUM_ERRCB_PHASE_TRUNCATE, VACUUM_TRUNCATE_LOCK_TIMEOUT, VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL, WaitLatch(), WL_EXIT_ON_PM_DEATH, WL_LATCH_SET, and WL_TIMEOUT.

Referenced by heap_vacuum_rel().

◆ lazy_vacuum()

static void lazy_vacuum ( LVRelState vacrel)
static

Definition at line 2357 of file vacuumlazy.c.

2358{
2359 bool bypass;
2360
2361 /* Should not end up here with no indexes */
2362 Assert(vacrel->nindexes > 0);
2363 Assert(vacrel->lpdead_item_pages > 0);
2364
2365 if (!vacrel->do_index_vacuuming)
2366 {
2367 Assert(!vacrel->do_index_cleanup);
2369 return;
2370 }
2371
2372 /*
2373 * Consider bypassing index vacuuming (and heap vacuuming) entirely.
2374 *
2375 * We currently only do this in cases where the number of LP_DEAD items
2376 * for the entire VACUUM operation is close to zero. This avoids sharp
2377 * discontinuities in the duration and overhead of successive VACUUM
2378 * operations that run against the same table with a fixed workload.
2379 * Ideally, successive VACUUM operations will behave as if there are
2380 * exactly zero LP_DEAD items in cases where there are close to zero.
2381 *
2382 * This is likely to be helpful with a table that is continually affected
2383 * by UPDATEs that can mostly apply the HOT optimization, but occasionally
2384 * have small aberrations that lead to just a few heap pages retaining
2385 * only one or two LP_DEAD items. This is pretty common; even when the
2386 * DBA goes out of their way to make UPDATEs use HOT, it is practically
2387 * impossible to predict whether HOT will be applied in 100% of cases.
2388 * It's far easier to ensure that 99%+ of all UPDATEs against a table use
2389 * HOT through careful tuning.
2390 */
2391 bypass = false;
2392 if (vacrel->consider_bypass_optimization && vacrel->rel_pages > 0)
2393 {
2395
2396 Assert(vacrel->num_index_scans == 0);
2397 Assert(vacrel->lpdead_items == vacrel->dead_items_info->num_items);
2398 Assert(vacrel->do_index_vacuuming);
2399 Assert(vacrel->do_index_cleanup);
2400
2401 /*
2402 * This crossover point at which we'll start to do index vacuuming is
2403 * expressed as a percentage of the total number of heap pages in the
2404 * table that are known to have at least one LP_DEAD item. This is
2405 * much more important than the total number of LP_DEAD items, since
2406 * it's a proxy for the number of heap pages whose visibility map bits
2407 * cannot be set on account of bypassing index and heap vacuuming.
2408 *
2409 * We apply one further precautionary test: the space currently used
2410 * to store the TIDs (TIDs that now all point to LP_DEAD items) must
2411 * not exceed 32MB. This limits the risk that we will bypass index
2412 * vacuuming again and again until eventually there is a VACUUM whose
2413 * dead_items space is not CPU cache resident.
2414 *
2415 * We don't take any special steps to remember the LP_DEAD items (such
2416 * as counting them in our final update to the stats system) when the
2417 * optimization is applied. Though the accounting used in analyze.c's
2418 * acquire_sample_rows() will recognize the same LP_DEAD items as dead
2419 * rows in its own stats report, that's okay. The discrepancy should
2420 * be negligible. If this optimization is ever expanded to cover more
2421 * cases then this may need to be reconsidered.
2422 */
2424 bypass = (vacrel->lpdead_item_pages < threshold &&
2425 TidStoreMemoryUsage(vacrel->dead_items) < 32 * 1024 * 1024);
2426 }
2427
2428 if (bypass)
2429 {
2430 /*
2431 * There are almost zero TIDs. Behave as if there were precisely
2432 * zero: bypass index vacuuming, but do index cleanup.
2433 *
2434 * We expect that the ongoing VACUUM operation will finish very
2435 * quickly, so there is no point in considering speeding up as a
2436 * failsafe against wraparound failure. (Index cleanup is expected to
2437 * finish very quickly in cases where there were no ambulkdelete()
2438 * calls.)
2439 */
2440 vacrel->do_index_vacuuming = false;
2441 }
2443 {
2444 /*
2445 * We successfully completed a round of index vacuuming. Do related
2446 * heap vacuuming now.
2447 */
2449 }
2450 else
2451 {
2452 /*
2453 * Failsafe case.
2454 *
2455 * We attempted index vacuuming, but didn't finish a full round/full
2456 * index scan. This happens when relfrozenxid or relminmxid is too
2457 * far in the past.
2458 *
2459 * From this point on the VACUUM operation will do no further index
2460 * vacuuming or heap vacuuming. This VACUUM operation won't end up
2461 * back here again.
2462 */
2464 }
2465
2466 /*
2467 * Forget the LP_DEAD items that we just vacuumed (or just decided to not
2468 * vacuum)
2469 */
2471}

References Assert, BYPASS_THRESHOLD_PAGES, dead_items_reset(), fb(), lazy_vacuum_all_indexes(), lazy_vacuum_heap_rel(), TidStoreMemoryUsage(), and VacuumFailsafeActive.

Referenced by lazy_scan_heap().

◆ lazy_vacuum_all_indexes()

static bool lazy_vacuum_all_indexes ( LVRelState vacrel)
static

Definition at line 2482 of file vacuumlazy.c.

2483{
2484 bool allindexes = true;
2485 double old_live_tuples = vacrel->rel->rd_rel->reltuples;
2486 const int progress_start_index[] = {
2489 };
2490 const int progress_end_index[] = {
2494 };
2497
2498 Assert(vacrel->nindexes > 0);
2499 Assert(vacrel->do_index_vacuuming);
2500 Assert(vacrel->do_index_cleanup);
2501
2502 /* Precheck for XID wraparound emergencies */
2504 {
2505 /* Wraparound emergency -- don't even start an index scan */
2506 return false;
2507 }
2508
2509 /*
2510 * Report that we are now vacuuming indexes and the number of indexes to
2511 * vacuum.
2512 */
2514 progress_start_val[1] = vacrel->nindexes;
2516
2518 {
2519 for (int idx = 0; idx < vacrel->nindexes; idx++)
2520 {
2521 Relation indrel = vacrel->indrels[idx];
2522 IndexBulkDeleteResult *istat = vacrel->indstats[idx];
2523
2524 vacrel->indstats[idx] = lazy_vacuum_one_index(indrel, istat,
2526 vacrel);
2527
2528 /* Report the number of indexes vacuumed */
2530 idx + 1);
2531
2533 {
2534 /* Wraparound emergency -- end current index scan */
2535 allindexes = false;
2536 break;
2537 }
2538 }
2539 }
2540 else
2541 {
2542 /* Outsource everything to parallel variant */
2544 vacrel->num_index_scans,
2545 &(vacrel->worker_usage.vacuum));
2546
2547 /*
2548 * Do a postcheck to consider applying wraparound failsafe now. Note
2549 * that parallel VACUUM only gets the precheck and this postcheck.
2550 */
2552 allindexes = false;
2553 }
2554
2555 /*
2556 * We delete all LP_DEAD items from the first heap pass in all indexes on
2557 * each call here (except calls where we choose to do the failsafe). This
2558 * makes the next call to lazy_vacuum_heap_rel() safe (except in the event
2559 * of the failsafe triggering, which prevents the next call from taking
2560 * place).
2561 */
2562 Assert(vacrel->num_index_scans > 0 ||
2563 vacrel->dead_items_info->num_items == vacrel->lpdead_items);
2565
2566 /*
2567 * Increase and report the number of index scans. Also, we reset
2568 * PROGRESS_VACUUM_INDEXES_TOTAL and PROGRESS_VACUUM_INDEXES_PROCESSED.
2569 *
2570 * We deliberately include the case where we started a round of bulk
2571 * deletes that we weren't able to finish due to the failsafe triggering.
2572 */
2573 vacrel->num_index_scans++;
2574 progress_end_val[0] = 0;
2575 progress_end_val[1] = 0;
2576 progress_end_val[2] = vacrel->num_index_scans;
2578
2579 return allindexes;
2580}

References Assert, fb(), idx(), lazy_check_wraparound_failsafe(), lazy_vacuum_one_index(), parallel_vacuum_bulkdel_all_indexes(), ParallelVacuumIsActive, pgstat_progress_update_multi_param(), pgstat_progress_update_param(), PROGRESS_VACUUM_INDEXES_PROCESSED, PROGRESS_VACUUM_INDEXES_TOTAL, PROGRESS_VACUUM_NUM_INDEX_VACUUMS, PROGRESS_VACUUM_PHASE, PROGRESS_VACUUM_PHASE_VACUUM_INDEX, and VacuumFailsafeActive.

Referenced by lazy_vacuum().

◆ lazy_vacuum_heap_page()

static void lazy_vacuum_heap_page ( LVRelState vacrel,
BlockNumber  blkno,
Buffer  buffer,
OffsetNumber deadoffsets,
int  num_offsets,
Buffer  vmbuffer 
)
static

Definition at line 2746 of file vacuumlazy.c.

2749{
2750 Page page = BufferGetPage(buffer);
2752 int nunused = 0;
2753 TransactionId newest_live_xid;
2755 bool all_frozen;
2757 uint8 vmflags = 0;
2758
2759 Assert(vacrel->do_index_vacuuming);
2760
2762
2763 /* Update error traceback information */
2767
2768 /*
2769 * Before marking dead items unused, check whether the page will become
2770 * all-visible once that change is applied. This lets us reap the tuples
2771 * and mark the page all-visible within the same critical section,
2772 * enabling both changes to be emitted in a single WAL record. Since the
2773 * visibility checks may perform I/O and allocate memory, they must be
2774 * done outside the critical section.
2775 */
2776 if (heap_page_would_be_all_visible(vacrel->rel, buffer,
2777 vacrel->vistest, true,
2778 deadoffsets, num_offsets,
2779 &all_frozen, &newest_live_xid,
2780 &vacrel->offnum))
2781 {
2783 if (all_frozen)
2784 {
2786 Assert(!TransactionIdIsValid(newest_live_xid));
2787 }
2788
2789 /*
2790 * Take the lock on the vmbuffer before entering a critical section.
2791 * The heap page lock must also be held while updating the VM to
2792 * ensure consistency.
2793 */
2795 }
2796
2798
2799 for (int i = 0; i < num_offsets; i++)
2800 {
2801 ItemId itemid;
2802 OffsetNumber toff = deadoffsets[i];
2803
2804 itemid = PageGetItemId(page, toff);
2805
2806 Assert(ItemIdIsDead(itemid) && !ItemIdHasStorage(itemid));
2807 ItemIdSetUnused(itemid);
2808 unused[nunused++] = toff;
2809 }
2810
2811 Assert(nunused > 0);
2812
2813 /* Attempt to truncate line pointer array now */
2815
2816 if ((vmflags & VISIBILITYMAP_VALID_BITS) != 0)
2817 {
2818 /*
2819 * The page is guaranteed to have had dead line pointers, so we always
2820 * set PD_ALL_VISIBLE.
2821 */
2822 PageSetAllVisible(page);
2823 PageClearPrunable(page);
2824 visibilitymap_set(blkno,
2825 vmbuffer, vmflags,
2826 vacrel->rel->rd_locator);
2827 conflict_xid = newest_live_xid;
2828 }
2829
2830 /*
2831 * Mark buffer dirty before we write WAL.
2832 */
2833 MarkBufferDirty(buffer);
2834
2835 /* XLOG stuff */
2836 if (RelationNeedsWAL(vacrel->rel))
2837 {
2838 log_heap_prune_and_freeze(vacrel->rel, buffer,
2839 vmflags != 0 ? vmbuffer : InvalidBuffer,
2840 vmflags,
2842 false, /* no cleanup lock required */
2844 NULL, 0, /* frozen */
2845 NULL, 0, /* redirected */
2846 NULL, 0, /* dead */
2847 unused, nunused);
2848 }
2849
2851
2853 {
2854 /* Count the newly set VM page for logging */
2855 LockBuffer(vmbuffer, BUFFER_LOCK_UNLOCK);
2856 vacrel->new_all_visible_pages++;
2857 if (all_frozen)
2858 vacrel->new_all_visible_all_frozen_pages++;
2859 }
2860
2861 /* Revert to the previous phase information for error traceback */
2863}

References Assert, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferGetPage(), END_CRIT_SECTION, fb(), heap_page_would_be_all_visible(), i, InvalidBuffer, InvalidOffsetNumber, InvalidTransactionId, ItemIdHasStorage, ItemIdIsDead, ItemIdSetUnused, LockBuffer(), log_heap_prune_and_freeze(), MarkBufferDirty(), MaxHeapTuplesPerPage, PageClearPrunable, PageGetItemId(), PageSetAllVisible(), PageTruncateLinePointerArray(), pgstat_progress_update_param(), PROGRESS_VACUUM_HEAP_BLKS_VACUUMED, PRUNE_VACUUM_CLEANUP, RelationNeedsWAL, restore_vacuum_error_info(), START_CRIT_SECTION, TransactionIdIsValid, update_vacuum_error_info(), VACUUM_ERRCB_PHASE_VACUUM_HEAP, VISIBILITYMAP_ALL_FROZEN, VISIBILITYMAP_ALL_VISIBLE, visibilitymap_set(), and VISIBILITYMAP_VALID_BITS.

Referenced by lazy_vacuum_heap_rel().

◆ lazy_vacuum_heap_rel()

static void lazy_vacuum_heap_rel ( LVRelState vacrel)
static

Definition at line 2628 of file vacuumlazy.c.

2629{
2630 ReadStream *stream;
2632 Buffer vmbuffer = InvalidBuffer;
2634 TidStoreIter *iter;
2635
2636 Assert(vacrel->do_index_vacuuming);
2637 Assert(vacrel->do_index_cleanup);
2638 Assert(vacrel->num_index_scans > 0);
2639
2640 /* Report that we are now vacuuming the heap */
2643
2644 /* Update error traceback information */
2648
2649 iter = TidStoreBeginIterate(vacrel->dead_items);
2650
2651 /*
2652 * Set up the read stream for vacuum's second pass through the heap.
2653 *
2654 * It is safe to use batchmode, as vacuum_reap_lp_read_stream_next() does
2655 * not need to wait for IO and does not perform locking. Once we support
2656 * parallelism it should still be fine, as presumably the holder of locks
2657 * would never be blocked by IO while holding the lock.
2658 */
2661 vacrel->bstrategy,
2662 vacrel->rel,
2665 iter,
2666 sizeof(TidStoreIterResult));
2667
2668 while (true)
2669 {
2670 BlockNumber blkno;
2671 Buffer buf;
2672 Page page;
2674 Size freespace;
2676 int num_offsets;
2677
2678 vacuum_delay_point(false);
2679
2680 buf = read_stream_next_buffer(stream, (void **) &iter_result);
2681
2682 /* The relation is exhausted */
2683 if (!BufferIsValid(buf))
2684 break;
2685
2686 vacrel->blkno = blkno = BufferGetBlockNumber(buf);
2687
2690 Assert(num_offsets <= lengthof(offsets));
2691
2692 /*
2693 * Pin the visibility map page in case we need to mark the page
2694 * all-visible. In most cases this will be very cheap, because we'll
2695 * already have the correct page pinned anyway.
2696 */
2697 visibilitymap_pin(vacrel->rel, blkno, &vmbuffer);
2698
2699 /* We need a non-cleanup exclusive lock to mark dead_items unused */
2701 lazy_vacuum_heap_page(vacrel, blkno, buf, offsets,
2702 num_offsets, vmbuffer);
2703
2704 /* Now that we've vacuumed the page, record its available space */
2705 page = BufferGetPage(buf);
2706 freespace = PageGetHeapFreeSpace(page);
2707
2709 RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
2711 }
2712
2713 read_stream_end(stream);
2714 TidStoreEndIterate(iter);
2715
2716 vacrel->blkno = InvalidBlockNumber;
2717 if (BufferIsValid(vmbuffer))
2718 ReleaseBuffer(vmbuffer);
2719
2720 /*
2721 * We set all LP_DEAD items from the first heap pass to LP_UNUSED during
2722 * the second heap pass. No more, no less.
2723 */
2724 Assert(vacrel->num_index_scans > 1 ||
2725 (vacrel->dead_items_info->num_items == vacrel->lpdead_items &&
2726 vacuumed_pages == vacrel->lpdead_item_pages));
2727
2729 (errmsg("table \"%s\": removed %" PRId64 " dead item identifiers in %u pages",
2730 vacrel->relname, vacrel->dead_items_info->num_items,
2731 vacuumed_pages)));
2732
2733 /* Revert to the previous phase information for error traceback */
2735}

References Assert, buf, BUFFER_LOCK_EXCLUSIVE, BufferGetBlockNumber(), BufferGetPage(), BufferIsValid(), DEBUG2, ereport, errmsg, fb(), InvalidBlockNumber, InvalidBuffer, InvalidOffsetNumber, lazy_vacuum_heap_page(), lengthof, LockBuffer(), MAIN_FORKNUM, MaxOffsetNumber, PageGetHeapFreeSpace(), pgstat_progress_update_param(), PROGRESS_VACUUM_PHASE, PROGRESS_VACUUM_PHASE_VACUUM_HEAP, read_stream_begin_relation(), read_stream_end(), READ_STREAM_MAINTENANCE, read_stream_next_buffer(), READ_STREAM_USE_BATCHING, RecordPageWithFreeSpace(), ReleaseBuffer(), restore_vacuum_error_info(), TidStoreBeginIterate(), TidStoreEndIterate(), TidStoreGetBlockOffsets(), UnlockReleaseBuffer(), update_vacuum_error_info(), vacuum_delay_point(), VACUUM_ERRCB_PHASE_VACUUM_HEAP, vacuum_reap_lp_read_stream_next(), and visibilitymap_pin().

Referenced by lazy_vacuum().

◆ lazy_vacuum_one_index()

static IndexBulkDeleteResult * lazy_vacuum_one_index ( Relation  indrel,
IndexBulkDeleteResult istat,
double  reltuples,
LVRelState vacrel 
)
static

Definition at line 3001 of file vacuumlazy.c.

3003{
3006
3007 ivinfo.index = indrel;
3008 ivinfo.heaprel = vacrel->rel;
3009 ivinfo.analyze_only = false;
3010 ivinfo.report_progress = false;
3011 ivinfo.estimated_count = true;
3012 ivinfo.message_level = DEBUG2;
3013 ivinfo.num_heap_tuples = reltuples;
3014 ivinfo.strategy = vacrel->bstrategy;
3015
3016 /*
3017 * Update error traceback information.
3018 *
3019 * The index name is saved during this phase and restored immediately
3020 * after this phase. See vacuum_error_callback.
3021 */
3022 Assert(vacrel->indname == NULL);
3027
3028 /* Do bulk deletion */
3029 istat = vac_bulkdel_one_index(&ivinfo, istat, vacrel->dead_items,
3030 vacrel->dead_items_info);
3031
3032 /* Revert to the previous phase information for error traceback */
3034 pfree(vacrel->indname);
3035 vacrel->indname = NULL;
3036
3037 return istat;
3038}

References Assert, DEBUG2, fb(), InvalidBlockNumber, InvalidOffsetNumber, pfree(), pstrdup(), RelationGetRelationName, restore_vacuum_error_info(), update_vacuum_error_info(), vac_bulkdel_one_index(), and VACUUM_ERRCB_PHASE_VACUUM_INDEX.

Referenced by lazy_vacuum_all_indexes().

◆ restore_vacuum_error_info()

static void restore_vacuum_error_info ( LVRelState vacrel,
const LVSavedErrInfo saved_vacrel 
)
static

Definition at line 3865 of file vacuumlazy.c.

3867{
3868 vacrel->blkno = saved_vacrel->blkno;
3869 vacrel->offnum = saved_vacrel->offnum;
3870 vacrel->phase = saved_vacrel->phase;
3871}

References fb().

Referenced by lazy_cleanup_one_index(), lazy_vacuum_heap_page(), lazy_vacuum_heap_rel(), and lazy_vacuum_one_index().

◆ should_attempt_truncation()

static bool should_attempt_truncation ( LVRelState vacrel)
static

Definition at line 3110 of file vacuumlazy.c.

3111{
3113
3114 if (!vacrel->do_rel_truncate || VacuumFailsafeActive)
3115 return false;
3116
3117 possibly_freeable = vacrel->rel_pages - vacrel->nonempty_pages;
3118 if (possibly_freeable > 0 &&
3121 return true;
3122
3123 return false;
3124}

References fb(), REL_TRUNCATE_FRACTION, REL_TRUNCATE_MINIMUM, and VacuumFailsafeActive.

Referenced by heap_vacuum_rel().

◆ update_relstats_all_indexes()

static void update_relstats_all_indexes ( LVRelState vacrel)
static

Definition at line 3747 of file vacuumlazy.c.

3748{
3749 Relation *indrels = vacrel->indrels;
3750 int nindexes = vacrel->nindexes;
3751 IndexBulkDeleteResult **indstats = vacrel->indstats;
3752
3753 Assert(vacrel->do_index_cleanup);
3754
3755 for (int idx = 0; idx < nindexes; idx++)
3756 {
3757 Relation indrel = indrels[idx];
3758 IndexBulkDeleteResult *istat = indstats[idx];
3759
3760 if (istat == NULL || istat->estimated_count)
3761 continue;
3762
3763 /* Update index statistics */
3765 istat->num_pages,
3766 istat->num_index_tuples,
3767 0, 0,
3768 false,
3771 NULL, NULL, false);
3772 }
3773}

References Assert, IndexBulkDeleteResult::estimated_count, fb(), idx(), InvalidMultiXactId, InvalidTransactionId, IndexBulkDeleteResult::num_index_tuples, IndexBulkDeleteResult::num_pages, and vac_update_relstats().

Referenced by heap_vacuum_rel().

◆ update_vacuum_error_info()

static void update_vacuum_error_info ( LVRelState vacrel,
LVSavedErrInfo saved_vacrel,
int  phase,
BlockNumber  blkno,
OffsetNumber  offnum 
)
static

Definition at line 3846 of file vacuumlazy.c.

3848{
3849 if (saved_vacrel)
3850 {
3851 saved_vacrel->offnum = vacrel->offnum;
3852 saved_vacrel->blkno = vacrel->blkno;
3853 saved_vacrel->phase = vacrel->phase;
3854 }
3855
3856 vacrel->blkno = blkno;
3857 vacrel->offnum = offnum;
3858 vacrel->phase = phase;
3859}

References fb().

Referenced by lazy_cleanup_one_index(), lazy_scan_heap(), lazy_truncate_heap(), lazy_vacuum_heap_page(), lazy_vacuum_heap_rel(), and lazy_vacuum_one_index().

◆ vacuum_error_callback()

static void vacuum_error_callback ( void arg)
static

Definition at line 3782 of file vacuumlazy.c.

3783{
3785
3786 switch (errinfo->phase)
3787 {
3789 if (BlockNumberIsValid(errinfo->blkno))
3790 {
3791 if (OffsetNumberIsValid(errinfo->offnum))
3792 errcontext("while scanning block %u offset %u of relation \"%s.%s\"",
3793 errinfo->blkno, errinfo->offnum, errinfo->relnamespace, errinfo->relname);
3794 else
3795 errcontext("while scanning block %u of relation \"%s.%s\"",
3796 errinfo->blkno, errinfo->relnamespace, errinfo->relname);
3797 }
3798 else
3799 errcontext("while scanning relation \"%s.%s\"",
3800 errinfo->relnamespace, errinfo->relname);
3801 break;
3802
3804 if (BlockNumberIsValid(errinfo->blkno))
3805 {
3806 if (OffsetNumberIsValid(errinfo->offnum))
3807 errcontext("while vacuuming block %u offset %u of relation \"%s.%s\"",
3808 errinfo->blkno, errinfo->offnum, errinfo->relnamespace, errinfo->relname);
3809 else
3810 errcontext("while vacuuming block %u of relation \"%s.%s\"",
3811 errinfo->blkno, errinfo->relnamespace, errinfo->relname);
3812 }
3813 else
3814 errcontext("while vacuuming relation \"%s.%s\"",
3815 errinfo->relnamespace, errinfo->relname);
3816 break;
3817
3819 errcontext("while vacuuming index \"%s\" of relation \"%s.%s\"",
3820 errinfo->indname, errinfo->relnamespace, errinfo->relname);
3821 break;
3822
3824 errcontext("while cleaning up index \"%s\" of relation \"%s.%s\"",
3825 errinfo->indname, errinfo->relnamespace, errinfo->relname);
3826 break;
3827
3829 if (BlockNumberIsValid(errinfo->blkno))
3830 errcontext("while truncating relation \"%s.%s\" to %u blocks",
3831 errinfo->relnamespace, errinfo->relname, errinfo->blkno);
3832 break;
3833
3835 default:
3836 return; /* do nothing; the errinfo may not be
3837 * initialized */
3838 }
3839}

References arg, BlockNumberIsValid(), errcontext, fb(), OffsetNumberIsValid, VACUUM_ERRCB_PHASE_INDEX_CLEANUP, VACUUM_ERRCB_PHASE_SCAN_HEAP, VACUUM_ERRCB_PHASE_TRUNCATE, VACUUM_ERRCB_PHASE_UNKNOWN, VACUUM_ERRCB_PHASE_VACUUM_HEAP, and VACUUM_ERRCB_PHASE_VACUUM_INDEX.

Referenced by heap_vacuum_rel().

◆ vacuum_reap_lp_read_stream_next()

static BlockNumber vacuum_reap_lp_read_stream_next ( ReadStream stream,
void callback_private_data,
void per_buffer_data 
)
static

Definition at line 2590 of file vacuumlazy.c.

2593{
2594 TidStoreIter *iter = callback_private_data;
2596
2598 if (iter_result == NULL)
2599 return InvalidBlockNumber;
2600
2601 /*
2602 * Save the TidStoreIterResult for later, so we can extract the offsets.
2603 * It is safe to copy the result, according to TidStoreIterateNext().
2604 */
2605 memcpy(per_buffer_data, iter_result, sizeof(*iter_result));
2606
2607 return iter_result->blkno;
2608}

References fb(), InvalidBlockNumber, and TidStoreIterateNext().

Referenced by lazy_vacuum_heap_rel().