PostgreSQL Source Code git master
Loading...
Searching...
No Matches
vacuumlazy.c File Reference
#include "postgres.h"
#include "access/genam.h"
#include "access/heapam.h"
#include "access/htup_details.h"
#include "access/multixact.h"
#include "access/tidstore.h"
#include "access/transam.h"
#include "access/visibilitymap.h"
#include "access/xloginsert.h"
#include "catalog/storage.h"
#include "commands/progress.h"
#include "commands/vacuum.h"
#include "common/int.h"
#include "common/pg_prng.h"
#include "executor/instrument.h"
#include "miscadmin.h"
#include "pgstat.h"
#include "portability/instr_time.h"
#include "postmaster/autovacuum.h"
#include "storage/bufmgr.h"
#include "storage/freespace.h"
#include "storage/latch.h"
#include "storage/lmgr.h"
#include "storage/read_stream.h"
#include "utils/lsyscache.h"
#include "utils/pg_rusage.h"
#include "utils/timestamp.h"
Include dependency graph for vacuumlazy.c:

Go to the source code of this file.

Data Structures

struct  LVRelState
 
struct  LVSavedErrInfo
 

Macros

#define REL_TRUNCATE_MINIMUM   1000
 
#define REL_TRUNCATE_FRACTION   16
 
#define VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL   20 /* ms */
 
#define VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL   50 /* ms */
 
#define VACUUM_TRUNCATE_LOCK_TIMEOUT   5000 /* ms */
 
#define BYPASS_THRESHOLD_PAGES   0.02 /* i.e. 2% of rel_pages */
 
#define FAILSAFE_EVERY_PAGES    ((BlockNumber) (((uint64) 4 * 1024 * 1024 * 1024) / BLCKSZ))
 
#define VACUUM_FSM_EVERY_PAGES    ((BlockNumber) (((uint64) 8 * 1024 * 1024 * 1024) / BLCKSZ))
 
#define SKIP_PAGES_THRESHOLD   ((BlockNumber) 32)
 
#define PREFETCH_SIZE   ((BlockNumber) 32)
 
#define ParallelVacuumIsActive(vacrel)   ((vacrel)->pvs != NULL)
 
#define MAX_EAGER_FREEZE_SUCCESS_RATE   0.2
 
#define EAGER_SCAN_REGION_SIZE   4096
 

Typedefs

typedef struct LVRelState LVRelState
 
typedef struct LVSavedErrInfo LVSavedErrInfo
 

Enumerations

enum  VacErrPhase {
  VACUUM_ERRCB_PHASE_UNKNOWN , VACUUM_ERRCB_PHASE_SCAN_HEAP , VACUUM_ERRCB_PHASE_VACUUM_INDEX , VACUUM_ERRCB_PHASE_VACUUM_HEAP ,
  VACUUM_ERRCB_PHASE_INDEX_CLEANUP , VACUUM_ERRCB_PHASE_TRUNCATE
}
 

Functions

static void lazy_scan_heap (LVRelState *vacrel)
 
static void heap_vacuum_eager_scan_setup (LVRelState *vacrel, const VacuumParams params)
 
static BlockNumber heap_vac_scan_next_block (ReadStream *stream, void *callback_private_data, void *per_buffer_data)
 
static void find_next_unskippable_block (LVRelState *vacrel, bool *skipsallvis)
 
static bool lazy_scan_new_or_empty (LVRelState *vacrel, Buffer buf, BlockNumber blkno, Page page, bool sharelock, Buffer vmbuffer)
 
static void identify_and_fix_vm_corruption (Relation rel, Buffer heap_buffer, BlockNumber heap_blk, Page heap_page, int nlpdead_items, Buffer vmbuffer, uint8 *vmbits)
 
static int lazy_scan_prune (LVRelState *vacrel, Buffer buf, BlockNumber blkno, Page page, Buffer vmbuffer, bool *has_lpdead_items, bool *vm_page_frozen)
 
static bool lazy_scan_noprune (LVRelState *vacrel, Buffer buf, BlockNumber blkno, Page page, bool *has_lpdead_items)
 
static void lazy_vacuum (LVRelState *vacrel)
 
static bool lazy_vacuum_all_indexes (LVRelState *vacrel)
 
static void lazy_vacuum_heap_rel (LVRelState *vacrel)
 
static void lazy_vacuum_heap_page (LVRelState *vacrel, BlockNumber blkno, Buffer buffer, OffsetNumber *deadoffsets, int num_offsets, Buffer vmbuffer)
 
static bool lazy_check_wraparound_failsafe (LVRelState *vacrel)
 
static void lazy_cleanup_all_indexes (LVRelState *vacrel)
 
static IndexBulkDeleteResultlazy_vacuum_one_index (Relation indrel, IndexBulkDeleteResult *istat, double reltuples, LVRelState *vacrel)
 
static IndexBulkDeleteResultlazy_cleanup_one_index (Relation indrel, IndexBulkDeleteResult *istat, double reltuples, bool estimated_count, LVRelState *vacrel)
 
static bool should_attempt_truncation (LVRelState *vacrel)
 
static void lazy_truncate_heap (LVRelState *vacrel)
 
static BlockNumber count_nondeletable_pages (LVRelState *vacrel, bool *lock_waiter_detected)
 
static void dead_items_alloc (LVRelState *vacrel, int nworkers)
 
static void dead_items_add (LVRelState *vacrel, BlockNumber blkno, OffsetNumber *offsets, int num_offsets)
 
static void dead_items_reset (LVRelState *vacrel)
 
static void dead_items_cleanup (LVRelState *vacrel)
 
static bool heap_page_would_be_all_visible (Relation rel, Buffer buf, TransactionId OldestXmin, OffsetNumber *deadoffsets, int ndeadoffsets, bool *all_frozen, TransactionId *visibility_cutoff_xid, OffsetNumber *logging_offnum)
 
static void update_relstats_all_indexes (LVRelState *vacrel)
 
static void vacuum_error_callback (void *arg)
 
static void update_vacuum_error_info (LVRelState *vacrel, LVSavedErrInfo *saved_vacrel, int phase, BlockNumber blkno, OffsetNumber offnum)
 
static void restore_vacuum_error_info (LVRelState *vacrel, const LVSavedErrInfo *saved_vacrel)
 
void heap_vacuum_rel (Relation rel, const VacuumParams params, BufferAccessStrategy bstrategy)
 
static int cmpOffsetNumbers (const void *a, const void *b)
 
static BlockNumber vacuum_reap_lp_read_stream_next (ReadStream *stream, void *callback_private_data, void *per_buffer_data)
 

Macro Definition Documentation

◆ BYPASS_THRESHOLD_PAGES

#define BYPASS_THRESHOLD_PAGES   0.02 /* i.e. 2% of rel_pages */

Definition at line 185 of file vacuumlazy.c.

◆ EAGER_SCAN_REGION_SIZE

#define EAGER_SCAN_REGION_SIZE   4096

Definition at line 248 of file vacuumlazy.c.

◆ FAILSAFE_EVERY_PAGES

#define FAILSAFE_EVERY_PAGES    ((BlockNumber) (((uint64) 4 * 1024 * 1024 * 1024) / BLCKSZ))

Definition at line 191 of file vacuumlazy.c.

222{
230
231/*
232 * An eager scan of a page that is set all-frozen in the VM is considered
233 * "successful". To spread out freezing overhead across multiple normal
234 * vacuums, we limit the number of successful eager page freezes. The maximum
235 * number of eager page freezes is calculated as a ratio of the all-visible
236 * but not all-frozen pages at the beginning of the vacuum.
237 */
238#define MAX_EAGER_FREEZE_SUCCESS_RATE 0.2
239
240/*
241 * On the assumption that different regions of the table tend to have
242 * similarly aged data, once vacuum fails to freeze
243 * vacuum_max_eager_freeze_failure_rate of the blocks in a region of size
244 * EAGER_SCAN_REGION_SIZE, it suspends eager scanning until it has progressed
245 * to another region of the table with potentially older data.
246 */
247#define EAGER_SCAN_REGION_SIZE 4096
248
249typedef struct LVRelState
250{
251 /* Target heap relation and its indexes */
254 int nindexes;
255
256 /* Buffer access strategy and parallel vacuum state */
259
260 /* Aggressive VACUUM? (must set relfrozenxid >= FreezeLimit) */
261 bool aggressive;
262 /* Use visibility map to skip? (disabled by DISABLE_PAGE_SKIPPING) */
263 bool skipwithvm;
264 /* Consider index vacuuming bypass optimization? */
266
267 /* Doing index vacuuming, index cleanup, rel truncation? */
269 bool do_index_cleanup;
270 bool do_rel_truncate;
271
272 /* VACUUM operation's cutoffs for freezing and pruning */
273 struct VacuumCutoffs cutoffs;
275 /* Tracks oldest extant XID/MXID for setting relfrozenxid/relminmxid */
278 bool skippedallvis;
279
280 /* Error reporting state */
281 char *dbname;
282 char *relnamespace;
283 char *relname;
284 char *indname; /* Current index name */
285 BlockNumber blkno; /* used only for heap operations */
286 OffsetNumber offnum; /* used only for heap operations */
288 bool verbose; /* VACUUM VERBOSE? */
289
290 /*
291 * dead_items stores TIDs whose index tuples are deleted by index
292 * vacuuming. Each TID points to an LP_DEAD line pointer from a heap page
293 * that has been processed by lazy_scan_prune. Also needed by
294 * lazy_vacuum_heap_rel, which marks the same LP_DEAD line pointers as
295 * LP_UNUSED during second heap pass.
296 *
297 * Both dead_items and dead_items_info are allocated in shared memory in
298 * parallel vacuum cases.
299 */
300 TidStore *dead_items; /* TIDs whose index tuples we'll delete */
302
303 BlockNumber rel_pages; /* total number of pages */
304 BlockNumber scanned_pages; /* # pages examined (not skipped via VM) */
305
306 /*
307 * Count of all-visible blocks eagerly scanned (for logging only). This
308 * does not include skippable blocks scanned due to SKIP_PAGES_THRESHOLD.
309 */
311
312 BlockNumber removed_pages; /* # pages removed by relation truncation */
313 BlockNumber new_frozen_tuple_pages; /* # pages with newly frozen tuples */
314
315 /* # pages newly set all-visible in the VM */
317
318 /*
319 * # pages newly set all-visible and all-frozen in the VM. This is a
320 * subset of new_all_visible_pages. That is, new_all_visible_pages
321 * includes all pages set all-visible, but
322 * new_all_visible_all_frozen_pages includes only those which were also
323 * set all-frozen.
324 */
326
327 /* # all-visible pages newly set all-frozen in the VM */
329
330 BlockNumber lpdead_item_pages; /* # pages with LP_DEAD items */
331 BlockNumber missed_dead_pages; /* # pages with missed dead tuples */
332 BlockNumber nonempty_pages; /* actually, last nonempty page + 1 */
333
334 /* Statistics output by us, for table */
335 double new_rel_tuples; /* new estimated total # of tuples */
336 double new_live_tuples; /* new estimated total # of live tuples */
337 /* Statistics output by index AMs */
339
340 /* Instrumentation counters */
341 int num_index_scans;
344 /* Counters that follow are only for scanned_pages */
345 int64 tuples_deleted; /* # deleted from table */
346 int64 tuples_frozen; /* # newly frozen */
347 int64 lpdead_items; /* # deleted from indexes */
348 int64 live_tuples; /* # live tuples remaining */
349 int64 recently_dead_tuples; /* # dead, but not yet removable */
350 int64 missed_dead_tuples; /* # removable, but not removed */
351
352 /* State maintained by heap_vac_scan_next_block() */
353 BlockNumber current_block; /* last block returned */
354 BlockNumber next_unskippable_block; /* next unskippable block */
355 bool next_unskippable_eager_scanned; /* if it was eagerly scanned */
356 Buffer next_unskippable_vmbuffer; /* buffer containing its VM bit */
357
358 /* State related to managing eager scanning of all-visible pages */
359
360 /*
361 * A normal vacuum that has failed to freeze too many eagerly scanned
362 * blocks in a region suspends eager scanning.
363 * next_eager_scan_region_start is the block number of the first block
364 * eligible for resumed eager scanning.
365 *
366 * When eager scanning is permanently disabled, either initially
367 * (including for aggressive vacuum) or due to hitting the success cap,
368 * this is set to InvalidBlockNumber.
369 */
371
372 /*
373 * The remaining number of blocks a normal vacuum will consider eager
374 * scanning when it is successful. When eager scanning is enabled, this is
375 * initialized to MAX_EAGER_FREEZE_SUCCESS_RATE of the total number of
376 * all-visible but not all-frozen pages. For each eager freeze success,
377 * this is decremented. Once it hits 0, eager scanning is permanently
378 * disabled. It is initialized to 0 if eager scanning starts out disabled
379 * (including for aggressive vacuum).
380 */
382
383 /*
384 * The maximum number of blocks which may be eagerly scanned and not
385 * frozen before eager scanning is temporarily suspended. This is
386 * configurable both globally, via the
387 * vacuum_max_eager_freeze_failure_rate GUC, and per table, with a table
388 * storage parameter of the same name. It is calculated as
389 * vacuum_max_eager_freeze_failure_rate of EAGER_SCAN_REGION_SIZE blocks.
390 * It is 0 when eager scanning is disabled.
391 */
393
394 /*
395 * The number of eagerly scanned blocks vacuum failed to freeze (due to
396 * age) in the current eager scan region. Vacuum resets it to
397 * eager_scan_max_fails_per_region each time it enters a new region of the
398 * relation. If eager_scan_remaining_fails hits 0, eager scanning is
399 * suspended until the next region. It is also 0 if eager scanning has
400 * been permanently disabled.
401 */
403} LVRelState;
404
405
406/* Struct for saving and restoring vacuum error information. */
407typedef struct LVSavedErrInfo
408{
413
414
415/* non-export function prototypes */
416static void lazy_scan_heap(LVRelState *vacrel);
418 const VacuumParams params);
420 void *callback_private_data,
421 void *per_buffer_data);
424 BlockNumber blkno, Page page,
425 bool sharelock, Buffer vmbuffer);
428 int nlpdead_items,
429 Buffer vmbuffer,
430 uint8 *vmbits);
432 BlockNumber blkno, Page page,
433 Buffer vmbuffer,
434 bool *has_lpdead_items, bool *vm_page_frozen);
436 BlockNumber blkno, Page page,
437 bool *has_lpdead_items);
438static void lazy_vacuum(LVRelState *vacrel);
442 Buffer buffer, OffsetNumber *deadoffsets,
443 int num_offsets, Buffer vmbuffer);
448 double reltuples,
452 double reltuples,
453 bool estimated_count,
459static void dead_items_alloc(LVRelState *vacrel, int nworkers);
460static void dead_items_add(LVRelState *vacrel, BlockNumber blkno, OffsetNumber *offsets,
461 int num_offsets);
464
465#ifdef USE_ASSERT_CHECKING
467 TransactionId OldestXmin,
468 bool *all_frozen,
469 TransactionId *visibility_cutoff_xid,
471#endif
473 TransactionId OldestXmin,
474 OffsetNumber *deadoffsets,
475 int ndeadoffsets,
476 bool *all_frozen,
477 TransactionId *visibility_cutoff_xid,
480static void vacuum_error_callback(void *arg);
483 int phase, BlockNumber blkno,
484 OffsetNumber offnum);
487
488
489
490/*
491 * Helper to set up the eager scanning state for vacuuming a single relation.
492 * Initializes the eager scan management related members of the LVRelState.
493 *
494 * Caller provides whether or not an aggressive vacuum is required due to
495 * vacuum options or for relfrozenxid/relminmxid advancement.
496 */
497static void
499{
503 float first_region_ratio;
505
506 /*
507 * Initialize eager scan management fields to their disabled values.
508 * Aggressive vacuums, normal vacuums of small tables, and normal vacuums
509 * of tables without sufficiently old tuples disable eager scanning.
510 */
511 vacrel->next_eager_scan_region_start = InvalidBlockNumber;
512 vacrel->eager_scan_max_fails_per_region = 0;
513 vacrel->eager_scan_remaining_fails = 0;
514 vacrel->eager_scan_remaining_successes = 0;
515
516 /* If eager scanning is explicitly disabled, just return. */
517 if (params.max_eager_freeze_failure_rate == 0)
518 return;
519
520 /*
521 * The caller will have determined whether or not an aggressive vacuum is
522 * required by either the vacuum parameters or the relative age of the
523 * oldest unfrozen transaction IDs. An aggressive vacuum must scan every
524 * all-visible page to safely advance the relfrozenxid and/or relminmxid,
525 * so scans of all-visible pages are not considered eager.
526 */
527 if (vacrel->aggressive)
528 return;
529
530 /*
531 * Aggressively vacuuming a small relation shouldn't take long, so it
532 * isn't worth amortizing. We use two times the region size as the size
533 * cutoff because the eager scan start block is a random spot somewhere in
534 * the first region, making the second region the first to be eager
535 * scanned normally.
536 */
537 if (vacrel->rel_pages < 2 * EAGER_SCAN_REGION_SIZE)
538 return;
539
540 /*
541 * We only want to enable eager scanning if we are likely to be able to
542 * freeze some of the pages in the relation.
543 *
544 * Tuples with XIDs older than OldestXmin or MXIDs older than OldestMxact
545 * are technically freezable, but we won't freeze them unless the criteria
546 * for opportunistic freezing is met. Only tuples with XIDs/MXIDs older
547 * than the FreezeLimit/MultiXactCutoff are frozen in the common case.
548 *
549 * So, as a heuristic, we wait until the FreezeLimit has advanced past the
550 * relfrozenxid or the MultiXactCutoff has advanced past the relminmxid to
551 * enable eager scanning.
552 */
553 if (TransactionIdIsNormal(vacrel->cutoffs.relfrozenxid) &&
554 TransactionIdPrecedes(vacrel->cutoffs.relfrozenxid,
555 vacrel->cutoffs.FreezeLimit))
557
559 MultiXactIdIsValid(vacrel->cutoffs.relminmxid) &&
560 MultiXactIdPrecedes(vacrel->cutoffs.relminmxid,
561 vacrel->cutoffs.MultiXactCutoff))
563
565 return;
566
567 /* We have met the criteria to eagerly scan some pages. */
568
569 /*
570 * Our success cap is MAX_EAGER_FREEZE_SUCCESS_RATE of the number of
571 * all-visible but not all-frozen blocks in the relation.
572 */
574
575 vacrel->eager_scan_remaining_successes =
578
579 /* If every all-visible page is frozen, eager scanning is disabled. */
580 if (vacrel->eager_scan_remaining_successes == 0)
581 return;
582
583 /*
584 * Now calculate the bounds of the first eager scan region. Its end block
585 * will be a random spot somewhere in the first EAGER_SCAN_REGION_SIZE
586 * blocks. This affects the bounds of all subsequent regions and avoids
587 * eager scanning and failing to freeze the same blocks each vacuum of the
588 * relation.
589 */
591
592 vacrel->next_eager_scan_region_start = randseed % EAGER_SCAN_REGION_SIZE;
593
596
597 vacrel->eager_scan_max_fails_per_region =
600
601 /*
602 * The first region will be smaller than subsequent regions. As such,
603 * adjust the eager freeze failures tolerated for this region.
604 */
605 first_region_ratio = 1 - (float) vacrel->next_eager_scan_region_start /
607
608 vacrel->eager_scan_remaining_fails =
609 vacrel->eager_scan_max_fails_per_region *
611}
612
613/*
614 * heap_vacuum_rel() -- perform VACUUM for one heap relation
615 *
616 * This routine sets things up for and then calls lazy_scan_heap, where
617 * almost all work actually takes place. Finalizes everything after call
618 * returns by managing relation truncation and updating rel's pg_class
619 * entry. (Also updates pg_class entries for any indexes that need it.)
620 *
621 * At entry, we have already established a transaction and opened
622 * and locked the relation.
623 */
624void
625heap_vacuum_rel(Relation rel, const VacuumParams params,
626 BufferAccessStrategy bstrategy)
627{
629 bool verbose,
630 instrument,
631 skipwithvm,
639 TimestampTz starttime = 0;
641 startwritetime = 0;
644 ErrorContextCallback errcallback;
645 char **indnames = NULL;
647
648 verbose = (params.options & VACOPT_VERBOSE) != 0;
649 instrument = (verbose || (AmAutoVacuumWorkerProcess() &&
650 params.log_vacuum_min_duration >= 0));
651 if (instrument)
652 {
654 if (track_io_timing)
655 {
658 }
659 }
660
661 /* Used for instrumentation and stats report */
662 starttime = GetCurrentTimestamp();
663
665 RelationGetRelid(rel));
668 params.is_wraparound
671 else
674
675 /*
676 * Setup error traceback support for ereport() first. The idea is to set
677 * up an error context callback to display additional information on any
678 * error during a vacuum. During different phases of vacuum, we update
679 * the state so that the error context callback always display current
680 * information.
681 *
682 * Copy the names of heap rel into local memory for error reporting
683 * purposes, too. It isn't always safe to assume that we can get the name
684 * of each rel. It's convenient for code in lazy_scan_heap to always use
685 * these temp copies.
686 */
689 vacrel->relnamespace = get_namespace_name(RelationGetNamespace(rel));
690 vacrel->relname = pstrdup(RelationGetRelationName(rel));
691 vacrel->indname = NULL;
693 vacrel->verbose = verbose;
694 errcallback.callback = vacuum_error_callback;
695 errcallback.arg = vacrel;
696 errcallback.previous = error_context_stack;
697 error_context_stack = &errcallback;
698
699 /* Set up high level stuff about rel and its indexes */
700 vacrel->rel = rel;
702 &vacrel->indrels);
703 vacrel->bstrategy = bstrategy;
704 if (instrument && vacrel->nindexes > 0)
705 {
706 /* Copy index names used by instrumentation (not error reporting) */
707 indnames = palloc_array(char *, vacrel->nindexes);
708 for (int i = 0; i < vacrel->nindexes; i++)
710 }
711
712 /*
713 * The index_cleanup param either disables index vacuuming and cleanup or
714 * forces it to go ahead when we would otherwise apply the index bypass
715 * optimization. The default is 'auto', which leaves the final decision
716 * up to lazy_vacuum().
717 *
718 * The truncate param allows user to avoid attempting relation truncation,
719 * though it can't force truncation to happen.
720 */
723 params.truncate != VACOPTVALUE_AUTO);
724
725 /*
726 * While VacuumFailSafeActive is reset to false before calling this, we
727 * still need to reset it here due to recursive calls.
728 */
729 VacuumFailsafeActive = false;
730 vacrel->consider_bypass_optimization = true;
731 vacrel->do_index_vacuuming = true;
732 vacrel->do_index_cleanup = true;
733 vacrel->do_rel_truncate = (params.truncate != VACOPTVALUE_DISABLED);
735 {
736 /* Force disable index vacuuming up-front */
737 vacrel->do_index_vacuuming = false;
738 vacrel->do_index_cleanup = false;
739 }
740 else if (params.index_cleanup == VACOPTVALUE_ENABLED)
741 {
742 /* Force index vacuuming. Note that failsafe can still bypass. */
743 vacrel->consider_bypass_optimization = false;
744 }
745 else
746 {
747 /* Default/auto, make all decisions dynamically */
749 }
750
751 /* Initialize page counters explicitly (be tidy) */
752 vacrel->scanned_pages = 0;
753 vacrel->eager_scanned_pages = 0;
754 vacrel->removed_pages = 0;
755 vacrel->new_frozen_tuple_pages = 0;
756 vacrel->lpdead_item_pages = 0;
757 vacrel->missed_dead_pages = 0;
758 vacrel->nonempty_pages = 0;
759 /* dead_items_alloc allocates vacrel->dead_items later on */
760
761 /* Allocate/initialize output statistics state */
762 vacrel->new_rel_tuples = 0;
763 vacrel->new_live_tuples = 0;
764 vacrel->indstats = (IndexBulkDeleteResult **)
765 palloc0(vacrel->nindexes * sizeof(IndexBulkDeleteResult *));
766
767 /* Initialize remaining counters (be tidy) */
768 vacrel->num_index_scans = 0;
769 vacrel->num_dead_items_resets = 0;
770 vacrel->total_dead_items_bytes = 0;
771 vacrel->tuples_deleted = 0;
772 vacrel->tuples_frozen = 0;
773 vacrel->lpdead_items = 0;
774 vacrel->live_tuples = 0;
775 vacrel->recently_dead_tuples = 0;
776 vacrel->missed_dead_tuples = 0;
777
778 vacrel->new_all_visible_pages = 0;
779 vacrel->new_all_visible_all_frozen_pages = 0;
780 vacrel->new_all_frozen_pages = 0;
781
782 /*
783 * Get cutoffs that determine which deleted tuples are considered DEAD,
784 * not just RECENTLY_DEAD, and which XIDs/MXIDs to freeze. Then determine
785 * the extent of the blocks that we'll scan in lazy_scan_heap. It has to
786 * happen in this order to ensure that the OldestXmin cutoff field works
787 * as an upper bound on the XIDs stored in the pages we'll actually scan
788 * (NewRelfrozenXid tracking must never be allowed to miss unfrozen XIDs).
789 *
790 * Next acquire vistest, a related cutoff that's used in pruning. We use
791 * vistest in combination with OldestXmin to ensure that
792 * heap_page_prune_and_freeze() always removes any deleted tuple whose
793 * xmax is < OldestXmin. lazy_scan_prune must never become confused about
794 * whether a tuple should be frozen or removed. (In the future we might
795 * want to teach lazy_scan_prune to recompute vistest from time to time,
796 * to increase the number of dead tuples it can prune away.)
797 */
798 vacrel->aggressive = vacuum_get_cutoffs(rel, params, &vacrel->cutoffs);
800 vacrel->vistest = GlobalVisTestFor(rel);
801
802 /* Initialize state used to track oldest extant XID/MXID */
803 vacrel->NewRelfrozenXid = vacrel->cutoffs.OldestXmin;
804 vacrel->NewRelminMxid = vacrel->cutoffs.OldestMxact;
805
806 /*
807 * Initialize state related to tracking all-visible page skipping. This is
808 * very important to determine whether or not it is safe to advance the
809 * relfrozenxid/relminmxid.
810 */
811 vacrel->skippedallvis = false;
812 skipwithvm = true;
814 {
815 /*
816 * Force aggressive mode, and disable skipping blocks using the
817 * visibility map (even those set all-frozen)
818 */
819 vacrel->aggressive = true;
820 skipwithvm = false;
821 }
822
823 vacrel->skipwithvm = skipwithvm;
824
825 /*
826 * Set up eager scan tracking state. This must happen after determining
827 * whether or not the vacuum must be aggressive, because only normal
828 * vacuums use the eager scan algorithm.
829 */
831
832 /* Report the vacuum mode: 'normal' or 'aggressive' */
834 vacrel->aggressive
837
838 if (verbose)
839 {
840 if (vacrel->aggressive)
842 (errmsg("aggressively vacuuming \"%s.%s.%s\"",
843 vacrel->dbname, vacrel->relnamespace,
844 vacrel->relname)));
845 else
847 (errmsg("vacuuming \"%s.%s.%s\"",
848 vacrel->dbname, vacrel->relnamespace,
849 vacrel->relname)));
850 }
851
852 /*
853 * Allocate dead_items memory using dead_items_alloc. This handles
854 * parallel VACUUM initialization as part of allocating shared memory
855 * space used for dead_items. (But do a failsafe precheck first, to
856 * ensure that parallel VACUUM won't be attempted at all when relfrozenxid
857 * is already dangerously old.)
858 */
861
862 /*
863 * Call lazy_scan_heap to perform all required heap pruning, index
864 * vacuuming, and heap vacuuming (plus related processing)
865 */
867
868 /*
869 * Save dead items max_bytes and update the memory usage statistics before
870 * cleanup, they are freed in parallel vacuum cases during
871 * dead_items_cleanup().
872 */
873 dead_items_max_bytes = vacrel->dead_items_info->max_bytes;
874 vacrel->total_dead_items_bytes += TidStoreMemoryUsage(vacrel->dead_items);
875
876 /*
877 * Free resources managed by dead_items_alloc. This ends parallel mode in
878 * passing when necessary.
879 */
882
883 /*
884 * Update pg_class entries for each of rel's indexes where appropriate.
885 *
886 * Unlike the later update to rel's pg_class entry, this is not critical.
887 * Maintains relpages/reltuples statistics used by the planner only.
888 */
889 if (vacrel->do_index_cleanup)
891
892 /* Done with rel's indexes */
893 vac_close_indexes(vacrel->nindexes, vacrel->indrels, NoLock);
894
895 /* Optionally truncate rel */
898
899 /* Pop the error context stack */
900 error_context_stack = errcallback.previous;
901
902 /* Report that we are now doing final cleanup */
905
906 /*
907 * Prepare to update rel's pg_class entry.
908 *
909 * Aggressive VACUUMs must always be able to advance relfrozenxid to a
910 * value >= FreezeLimit, and relminmxid to a value >= MultiXactCutoff.
911 * Non-aggressive VACUUMs may advance them by any amount, or not at all.
912 */
913 Assert(vacrel->NewRelfrozenXid == vacrel->cutoffs.OldestXmin ||
914 TransactionIdPrecedesOrEquals(vacrel->aggressive ? vacrel->cutoffs.FreezeLimit :
915 vacrel->cutoffs.relfrozenxid,
916 vacrel->NewRelfrozenXid));
917 Assert(vacrel->NewRelminMxid == vacrel->cutoffs.OldestMxact ||
918 MultiXactIdPrecedesOrEquals(vacrel->aggressive ? vacrel->cutoffs.MultiXactCutoff :
919 vacrel->cutoffs.relminmxid,
920 vacrel->NewRelminMxid));
921 if (vacrel->skippedallvis)
922 {
923 /*
924 * Must keep original relfrozenxid in a non-aggressive VACUUM that
925 * chose to skip an all-visible page range. The state that tracks new
926 * values will have missed unfrozen XIDs from the pages we skipped.
927 */
928 Assert(!vacrel->aggressive);
929 vacrel->NewRelfrozenXid = InvalidTransactionId;
930 vacrel->NewRelminMxid = InvalidMultiXactId;
931 }
932
933 /*
934 * For safety, clamp relallvisible to be not more than what we're setting
935 * pg_class.relpages to
936 */
937 new_rel_pages = vacrel->rel_pages; /* After possible rel truncation */
941
942 /*
943 * An all-frozen block _must_ be all-visible. As such, clamp the count of
944 * all-frozen blocks to the count of all-visible blocks. This matches the
945 * clamping of relallvisible above.
946 */
949
950 /*
951 * Now actually update rel's pg_class entry.
952 *
953 * In principle new_live_tuples could be -1 indicating that we (still)
954 * don't know the tuple count. In practice that can't happen, since we
955 * scan every page that isn't skipped using the visibility map.
956 */
957 vac_update_relstats(rel, new_rel_pages, vacrel->new_live_tuples,
959 vacrel->nindexes > 0,
960 vacrel->NewRelfrozenXid, vacrel->NewRelminMxid,
962
963 /*
964 * Report results to the cumulative stats system, too.
965 *
966 * Deliberately avoid telling the stats system about LP_DEAD items that
967 * remain in the table due to VACUUM bypassing index and heap vacuuming.
968 * ANALYZE will consider the remaining LP_DEAD items to be dead "tuples".
969 * It seems like a good idea to err on the side of not vacuuming again too
970 * soon in cases where the failsafe prevented significant amounts of heap
971 * vacuuming.
972 */
974 Max(vacrel->new_live_tuples, 0),
975 vacrel->recently_dead_tuples +
976 vacrel->missed_dead_tuples,
977 starttime);
979
980 if (instrument)
981 {
983
984 if (verbose || params.log_vacuum_min_duration == 0 ||
987 {
988 long secs_dur;
989 int usecs_dur;
990 WalUsage walusage;
991 BufferUsage bufferusage;
993 char *msgfmt;
994 int32 diff;
995 double read_rate = 0,
996 write_rate = 0;
1000
1002 memset(&walusage, 0, sizeof(WalUsage));
1004 memset(&bufferusage, 0, sizeof(BufferUsage));
1006
1007 total_blks_hit = bufferusage.shared_blks_hit +
1008 bufferusage.local_blks_hit;
1009 total_blks_read = bufferusage.shared_blks_read +
1010 bufferusage.local_blks_read;
1012 bufferusage.local_blks_dirtied;
1013
1015 if (verbose)
1016 {
1017 /*
1018 * Aggressiveness already reported earlier, in dedicated
1019 * VACUUM VERBOSE ereport
1020 */
1021 Assert(!params.is_wraparound);
1022 msgfmt = _("finished vacuuming \"%s.%s.%s\": index scans: %d\n");
1023 }
1024 else if (params.is_wraparound)
1025 {
1026 /*
1027 * While it's possible for a VACUUM to be both is_wraparound
1028 * and !aggressive, that's just a corner-case -- is_wraparound
1029 * implies aggressive. Produce distinct output for the corner
1030 * case all the same, just in case.
1031 */
1032 if (vacrel->aggressive)
1033 msgfmt = _("automatic aggressive vacuum to prevent wraparound of table \"%s.%s.%s\": index scans: %d\n");
1034 else
1035 msgfmt = _("automatic vacuum to prevent wraparound of table \"%s.%s.%s\": index scans: %d\n");
1036 }
1037 else
1038 {
1039 if (vacrel->aggressive)
1040 msgfmt = _("automatic aggressive vacuum of table \"%s.%s.%s\": index scans: %d\n");
1041 else
1042 msgfmt = _("automatic vacuum of table \"%s.%s.%s\": index scans: %d\n");
1043 }
1045 vacrel->dbname,
1046 vacrel->relnamespace,
1047 vacrel->relname,
1048 vacrel->num_index_scans);
1049 appendStringInfo(&buf, _("pages: %u removed, %u remain, %u scanned (%.2f%% of total), %u eagerly scanned\n"),
1050 vacrel->removed_pages,
1052 vacrel->scanned_pages,
1053 orig_rel_pages == 0 ? 100.0 :
1054 100.0 * vacrel->scanned_pages /
1056 vacrel->eager_scanned_pages);
1058 _("tuples: %" PRId64 " removed, %" PRId64 " remain, %" PRId64 " are dead but not yet removable\n"),
1059 vacrel->tuples_deleted,
1060 (int64) vacrel->new_rel_tuples,
1061 vacrel->recently_dead_tuples);
1062 if (vacrel->missed_dead_tuples > 0)
1064 _("tuples missed: %" PRId64 " dead from %u pages not removed due to cleanup lock contention\n"),
1065 vacrel->missed_dead_tuples,
1066 vacrel->missed_dead_pages);
1068 vacrel->cutoffs.OldestXmin);
1070 _("removable cutoff: %u, which was %d XIDs old when operation ended\n"),
1071 vacrel->cutoffs.OldestXmin, diff);
1073 {
1074 diff = (int32) (vacrel->NewRelfrozenXid -
1075 vacrel->cutoffs.relfrozenxid);
1077 _("new relfrozenxid: %u, which is %d XIDs ahead of previous value\n"),
1078 vacrel->NewRelfrozenXid, diff);
1079 }
1080 if (minmulti_updated)
1081 {
1082 diff = (int32) (vacrel->NewRelminMxid -
1083 vacrel->cutoffs.relminmxid);
1085 _("new relminmxid: %u, which is %d MXIDs ahead of previous value\n"),
1086 vacrel->NewRelminMxid, diff);
1087 }
1088 appendStringInfo(&buf, _("frozen: %u pages from table (%.2f%% of total) had %" PRId64 " tuples frozen\n"),
1089 vacrel->new_frozen_tuple_pages,
1090 orig_rel_pages == 0 ? 100.0 :
1091 100.0 * vacrel->new_frozen_tuple_pages /
1093 vacrel->tuples_frozen);
1094
1096 _("visibility map: %u pages set all-visible, %u pages set all-frozen (%u were all-visible)\n"),
1097 vacrel->new_all_visible_pages,
1098 vacrel->new_all_visible_all_frozen_pages +
1099 vacrel->new_all_frozen_pages,
1100 vacrel->new_all_frozen_pages);
1101 if (vacrel->do_index_vacuuming)
1102 {
1103 if (vacrel->nindexes == 0 || vacrel->num_index_scans == 0)
1104 appendStringInfoString(&buf, _("index scan not needed: "));
1105 else
1106 appendStringInfoString(&buf, _("index scan needed: "));
1107
1108 msgfmt = _("%u pages from table (%.2f%% of total) had %" PRId64 " dead item identifiers removed\n");
1109 }
1110 else
1111 {
1113 appendStringInfoString(&buf, _("index scan bypassed: "));
1114 else
1115 appendStringInfoString(&buf, _("index scan bypassed by failsafe: "));
1116
1117 msgfmt = _("%u pages from table (%.2f%% of total) have %" PRId64 " dead item identifiers\n");
1118 }
1120 vacrel->lpdead_item_pages,
1121 orig_rel_pages == 0 ? 100.0 :
1122 100.0 * vacrel->lpdead_item_pages / orig_rel_pages,
1123 vacrel->lpdead_items);
1124 for (int i = 0; i < vacrel->nindexes; i++)
1125 {
1126 IndexBulkDeleteResult *istat = vacrel->indstats[i];
1127
1128 if (!istat)
1129 continue;
1130
1132 _("index \"%s\": pages: %u in total, %u newly deleted, %u currently deleted, %u reusable\n"),
1133 indnames[i],
1134 istat->num_pages,
1135 istat->pages_newly_deleted,
1136 istat->pages_deleted,
1137 istat->pages_free);
1138 }
1140 {
1141 /*
1142 * We bypass the changecount mechanism because this value is
1143 * only updated by the calling process. We also rely on the
1144 * above call to pgstat_progress_end_command() to not clear
1145 * the st_progress_param array.
1146 */
1147 appendStringInfo(&buf, _("delay time: %.3f ms\n"),
1149 }
1150 if (track_io_timing)
1151 {
1152 double read_ms = (double) (pgStatBlockReadTime - startreadtime) / 1000;
1153 double write_ms = (double) (pgStatBlockWriteTime - startwritetime) / 1000;
1154
1155 appendStringInfo(&buf, _("I/O timings: read: %.3f ms, write: %.3f ms\n"),
1156 read_ms, write_ms);
1157 }
1158 if (secs_dur > 0 || usecs_dur > 0)
1159 {
1161 (1024 * 1024) / (secs_dur + usecs_dur / 1000000.0);
1163 (1024 * 1024) / (secs_dur + usecs_dur / 1000000.0);
1164 }
1165 appendStringInfo(&buf, _("avg read rate: %.3f MB/s, avg write rate: %.3f MB/s\n"),
1168 _("buffer usage: %" PRId64 " hits, %" PRId64 " reads, %" PRId64 " dirtied\n"),
1173 _("WAL usage: %" PRId64 " records, %" PRId64 " full page images, %" PRIu64 " bytes, %" PRIu64 " full page image bytes, %" PRId64 " buffers full\n"),
1174 walusage.wal_records,
1175 walusage.wal_fpi,
1176 walusage.wal_bytes,
1177 walusage.wal_fpi_bytes,
1178 walusage.wal_buffers_full);
1179
1180 /*
1181 * Report the dead items memory usage.
1182 *
1183 * The num_dead_items_resets counter increases when we reset the
1184 * collected dead items, so the counter is non-zero if at least
1185 * one dead items are collected, even if index vacuuming is
1186 * disabled.
1187 */
1189 ngettext("memory usage: dead item storage %.2f MB accumulated across %d reset (limit %.2f MB each)\n",
1190 "memory usage: dead item storage %.2f MB accumulated across %d resets (limit %.2f MB each)\n",
1191 vacrel->num_dead_items_resets),
1192 (double) vacrel->total_dead_items_bytes / (1024 * 1024),
1193 vacrel->num_dead_items_resets,
1194 (double) dead_items_max_bytes / (1024 * 1024));
1195 appendStringInfo(&buf, _("system usage: %s"), pg_rusage_show(&ru0));
1196
1197 ereport(verbose ? INFO : LOG,
1198 (errmsg_internal("%s", buf.data)));
1199 pfree(buf.data);
1200 }
1201 }
1202
1203 /* Cleanup index statistics and index names */
1204 for (int i = 0; i < vacrel->nindexes; i++)
1205 {
1206 if (vacrel->indstats[i])
1207 pfree(vacrel->indstats[i]);
1208
1209 if (instrument)
1210 pfree(indnames[i]);
1211 }
1212}
1213
1214/*
1215 * lazy_scan_heap() -- workhorse function for VACUUM
1216 *
1217 * This routine prunes each page in the heap, and considers the need to
1218 * freeze remaining tuples with storage (not including pages that can be
1219 * skipped using the visibility map). Also performs related maintenance
1220 * of the FSM and visibility map. These steps all take place during an
1221 * initial pass over the target heap relation.
1222 *
1223 * Also invokes lazy_vacuum_all_indexes to vacuum indexes, which largely
1224 * consists of deleting index tuples that point to LP_DEAD items left in
1225 * heap pages following pruning. Earlier initial pass over the heap will
1226 * have collected the TIDs whose index tuples need to be removed.
1227 *
1228 * Finally, invokes lazy_vacuum_heap_rel to vacuum heap pages, which
1229 * largely consists of marking LP_DEAD items (from vacrel->dead_items)
1230 * as LP_UNUSED. This has to happen in a second, final pass over the
1231 * heap, to preserve a basic invariant that all index AMs rely on: no
1232 * extant index tuple can ever be allowed to contain a TID that points to
1233 * an LP_UNUSED line pointer in the heap. We must disallow premature
1234 * recycling of line pointers to avoid index scans that get confused
1235 * about which TID points to which tuple immediately after recycling.
1236 * (Actually, this isn't a concern when target heap relation happens to
1237 * have no indexes, which allows us to safely apply the one-pass strategy
1238 * as an optimization).
1239 *
1240 * In practice we often have enough space to fit all TIDs, and so won't
1241 * need to call lazy_vacuum more than once, after our initial pass over
1242 * the heap has totally finished. Otherwise things are slightly more
1243 * complicated: our "initial pass" over the heap applies only to those
1244 * pages that were pruned before we needed to call lazy_vacuum, and our
1245 * "final pass" over the heap only vacuums these same heap pages.
1246 * However, we process indexes in full every time lazy_vacuum is called,
1247 * which makes index processing very inefficient when memory is in short
1248 * supply.
1249 */
1250static void
1252{
1253 ReadStream *stream;
1254 BlockNumber rel_pages = vacrel->rel_pages,
1255 blkno = 0,
1258 vacrel->eager_scan_remaining_successes; /* for logging */
1259 Buffer vmbuffer = InvalidBuffer;
1260 const int initprog_index[] = {
1264 };
1266
1267 /* Report that we're scanning the heap, advertising total # of blocks */
1269 initprog_val[1] = rel_pages;
1270 initprog_val[2] = vacrel->dead_items_info->max_bytes;
1272
1273 /* Initialize for the first heap_vac_scan_next_block() call */
1274 vacrel->current_block = InvalidBlockNumber;
1275 vacrel->next_unskippable_block = InvalidBlockNumber;
1276 vacrel->next_unskippable_eager_scanned = false;
1277 vacrel->next_unskippable_vmbuffer = InvalidBuffer;
1278
1279 /*
1280 * Set up the read stream for vacuum's first pass through the heap.
1281 *
1282 * This could be made safe for READ_STREAM_USE_BATCHING, but only with
1283 * explicit work in heap_vac_scan_next_block.
1284 */
1286 vacrel->bstrategy,
1287 vacrel->rel,
1290 vacrel,
1291 sizeof(bool));
1292
1293 while (true)
1294 {
1295 Buffer buf;
1296 Page page;
1297 bool was_eager_scanned = false;
1298 int ndeleted = 0;
1299 bool has_lpdead_items;
1300 void *per_buffer_data = NULL;
1301 bool vm_page_frozen = false;
1302 bool got_cleanup_lock = false;
1303
1304 vacuum_delay_point(false);
1305
1306 /*
1307 * Regularly check if wraparound failsafe should trigger.
1308 *
1309 * There is a similar check inside lazy_vacuum_all_indexes(), but
1310 * relfrozenxid might start to look dangerously old before we reach
1311 * that point. This check also provides failsafe coverage for the
1312 * one-pass strategy, and the two-pass strategy with the index_cleanup
1313 * param set to 'off'.
1314 */
1315 if (vacrel->scanned_pages > 0 &&
1316 vacrel->scanned_pages % FAILSAFE_EVERY_PAGES == 0)
1318
1319 /*
1320 * Consider if we definitely have enough space to process TIDs on page
1321 * already. If we are close to overrunning the available space for
1322 * dead_items TIDs, pause and do a cycle of vacuuming before we tackle
1323 * this page. However, let's force at least one page-worth of tuples
1324 * to be stored as to ensure we do at least some work when the memory
1325 * configured is so low that we run out before storing anything.
1326 */
1327 if (vacrel->dead_items_info->num_items > 0 &&
1328 TidStoreMemoryUsage(vacrel->dead_items) > vacrel->dead_items_info->max_bytes)
1329 {
1330 /*
1331 * Before beginning index vacuuming, we release any pin we may
1332 * hold on the visibility map page. This isn't necessary for
1333 * correctness, but we do it anyway to avoid holding the pin
1334 * across a lengthy, unrelated operation.
1335 */
1336 if (BufferIsValid(vmbuffer))
1337 {
1338 ReleaseBuffer(vmbuffer);
1339 vmbuffer = InvalidBuffer;
1340 }
1341
1342 /* Perform a round of index and heap vacuuming */
1343 vacrel->consider_bypass_optimization = false;
1345
1346 /*
1347 * Vacuum the Free Space Map to make newly-freed space visible on
1348 * upper-level FSM pages. Note that blkno is the previously
1349 * processed block.
1350 */
1352 blkno + 1);
1354
1355 /* Report that we are once again scanning the heap */
1358 }
1359
1360 buf = read_stream_next_buffer(stream, &per_buffer_data);
1361
1362 /* The relation is exhausted. */
1363 if (!BufferIsValid(buf))
1364 break;
1365
1366 was_eager_scanned = *((bool *) per_buffer_data);
1368 page = BufferGetPage(buf);
1369 blkno = BufferGetBlockNumber(buf);
1370
1371 vacrel->scanned_pages++;
1373 vacrel->eager_scanned_pages++;
1374
1375 /* Report as block scanned, update error traceback information */
1378 blkno, InvalidOffsetNumber);
1379
1380 /*
1381 * Pin the visibility map page in case we need to mark the page
1382 * all-visible. In most cases this will be very cheap, because we'll
1383 * already have the correct page pinned anyway.
1384 */
1385 visibilitymap_pin(vacrel->rel, blkno, &vmbuffer);
1386
1387 /*
1388 * We need a buffer cleanup lock to prune HOT chains and defragment
1389 * the page in lazy_scan_prune. But when it's not possible to acquire
1390 * a cleanup lock right away, we may be able to settle for reduced
1391 * processing using lazy_scan_noprune.
1392 */
1394
1395 if (!got_cleanup_lock)
1397
1398 /* Check for new or empty pages before lazy_scan_[no]prune call */
1400 vmbuffer))
1401 {
1402 /* Processed as new/empty page (lock and pin released) */
1403 continue;
1404 }
1405
1406 /*
1407 * If we didn't get the cleanup lock, we can still collect LP_DEAD
1408 * items in the dead_items area for later vacuuming, count live and
1409 * recently dead tuples for vacuum logging, and determine if this
1410 * block could later be truncated. If we encounter any xid/mxids that
1411 * require advancing the relfrozenxid/relminxid, we'll have to wait
1412 * for a cleanup lock and call lazy_scan_prune().
1413 */
1414 if (!got_cleanup_lock &&
1415 !lazy_scan_noprune(vacrel, buf, blkno, page, &has_lpdead_items))
1416 {
1417 /*
1418 * lazy_scan_noprune could not do all required processing. Wait
1419 * for a cleanup lock, and call lazy_scan_prune in the usual way.
1420 */
1421 Assert(vacrel->aggressive);
1424 got_cleanup_lock = true;
1425 }
1426
1427 /*
1428 * If we have a cleanup lock, we must now prune, freeze, and count
1429 * tuples. We may have acquired the cleanup lock originally, or we may
1430 * have gone back and acquired it after lazy_scan_noprune() returned
1431 * false. Either way, the page hasn't been processed yet.
1432 *
1433 * Like lazy_scan_noprune(), lazy_scan_prune() will count
1434 * recently_dead_tuples and live tuples for vacuum logging, determine
1435 * if the block can later be truncated, and accumulate the details of
1436 * remaining LP_DEAD line pointers on the page into dead_items. These
1437 * dead items include those pruned by lazy_scan_prune() as well as
1438 * line pointers previously marked LP_DEAD.
1439 */
1440 if (got_cleanup_lock)
1441 ndeleted = lazy_scan_prune(vacrel, buf, blkno, page,
1442 vmbuffer,
1444
1445 /*
1446 * Count an eagerly scanned page as a failure or a success.
1447 *
1448 * Only lazy_scan_prune() freezes pages, so if we didn't get the
1449 * cleanup lock, we won't have frozen the page. However, we only count
1450 * pages that were too new to require freezing as eager freeze
1451 * failures.
1452 *
1453 * We could gather more information from lazy_scan_noprune() about
1454 * whether or not there were tuples with XIDs or MXIDs older than the
1455 * FreezeLimit or MultiXactCutoff. However, for simplicity, we simply
1456 * exclude pages skipped due to cleanup lock contention from eager
1457 * freeze algorithm caps.
1458 */
1460 {
1461 /* Aggressive vacuums do not eager scan. */
1462 Assert(!vacrel->aggressive);
1463
1464 if (vm_page_frozen)
1465 {
1466 if (vacrel->eager_scan_remaining_successes > 0)
1467 vacrel->eager_scan_remaining_successes--;
1468
1469 if (vacrel->eager_scan_remaining_successes == 0)
1470 {
1471 /*
1472 * Report only once that we disabled eager scanning. We
1473 * may eagerly read ahead blocks in excess of the success
1474 * or failure caps before attempting to freeze them, so we
1475 * could reach here even after disabling additional eager
1476 * scanning.
1477 */
1478 if (vacrel->eager_scan_max_fails_per_region > 0)
1479 ereport(vacrel->verbose ? INFO : DEBUG2,
1480 (errmsg("disabling eager scanning after freezing %u eagerly scanned blocks of relation \"%s.%s.%s\"",
1482 vacrel->dbname, vacrel->relnamespace,
1483 vacrel->relname)));
1484
1485 /*
1486 * If we hit our success cap, permanently disable eager
1487 * scanning by setting the other eager scan management
1488 * fields to their disabled values.
1489 */
1490 vacrel->eager_scan_remaining_fails = 0;
1491 vacrel->next_eager_scan_region_start = InvalidBlockNumber;
1492 vacrel->eager_scan_max_fails_per_region = 0;
1493 }
1494 }
1495 else if (vacrel->eager_scan_remaining_fails > 0)
1496 vacrel->eager_scan_remaining_fails--;
1497 }
1498
1499 /*
1500 * Now drop the buffer lock and, potentially, update the FSM.
1501 *
1502 * Our goal is to update the freespace map the last time we touch the
1503 * page. If we'll process a block in the second pass, we may free up
1504 * additional space on the page, so it is better to update the FSM
1505 * after the second pass. If the relation has no indexes, or if index
1506 * vacuuming is disabled, there will be no second heap pass; if this
1507 * particular page has no dead items, the second heap pass will not
1508 * touch this page. So, in those cases, update the FSM now.
1509 *
1510 * Note: In corner cases, it's possible to miss updating the FSM
1511 * entirely. If index vacuuming is currently enabled, we'll skip the
1512 * FSM update now. But if failsafe mode is later activated, or there
1513 * are so few dead tuples that index vacuuming is bypassed, there will
1514 * also be no opportunity to update the FSM later, because we'll never
1515 * revisit this page. Since updating the FSM is desirable but not
1516 * absolutely required, that's OK.
1517 */
1518 if (vacrel->nindexes == 0
1519 || !vacrel->do_index_vacuuming
1520 || !has_lpdead_items)
1521 {
1522 Size freespace = PageGetHeapFreeSpace(page);
1523
1525 RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
1526
1527 /*
1528 * Periodically perform FSM vacuuming to make newly-freed space
1529 * visible on upper FSM pages. This is done after vacuuming if the
1530 * table has indexes. There will only be newly-freed space if we
1531 * held the cleanup lock and lazy_scan_prune() was called.
1532 */
1533 if (got_cleanup_lock && vacrel->nindexes == 0 && ndeleted > 0 &&
1535 {
1537 blkno);
1539 }
1540 }
1541 else
1543 }
1544
1545 vacrel->blkno = InvalidBlockNumber;
1546 if (BufferIsValid(vmbuffer))
1547 ReleaseBuffer(vmbuffer);
1548
1549 /*
1550 * Report that everything is now scanned. We never skip scanning the last
1551 * block in the relation, so we can pass rel_pages here.
1552 */
1554 rel_pages);
1555
1556 /* now we can compute the new value for pg_class.reltuples */
1557 vacrel->new_live_tuples = vac_estimate_reltuples(vacrel->rel, rel_pages,
1558 vacrel->scanned_pages,
1559 vacrel->live_tuples);
1560
1561 /*
1562 * Also compute the total number of surviving heap entries. In the
1563 * (unlikely) scenario that new_live_tuples is -1, take it as zero.
1564 */
1565 vacrel->new_rel_tuples =
1566 Max(vacrel->new_live_tuples, 0) + vacrel->recently_dead_tuples +
1567 vacrel->missed_dead_tuples;
1568
1569 read_stream_end(stream);
1570
1571 /*
1572 * Do index vacuuming (call each index's ambulkdelete routine), then do
1573 * related heap vacuuming
1574 */
1575 if (vacrel->dead_items_info->num_items > 0)
1577
1578 /*
1579 * Vacuum the remainder of the Free Space Map. We must do this whether or
1580 * not there were indexes, and whether or not we bypassed index vacuuming.
1581 * We can pass rel_pages here because we never skip scanning the last
1582 * block of the relation.
1583 */
1584 if (rel_pages > next_fsm_block_to_vacuum)
1586
1587 /* report all blocks vacuumed */
1589
1590 /* Do final index cleanup (call each index's amvacuumcleanup routine) */
1591 if (vacrel->nindexes > 0 && vacrel->do_index_cleanup)
1593}
1594
1595/*
1596 * heap_vac_scan_next_block() -- read stream callback to get the next block
1597 * for vacuum to process
1598 *
1599 * Every time lazy_scan_heap() needs a new block to process during its first
1600 * phase, it invokes read_stream_next_buffer() with a stream set up to call
1601 * heap_vac_scan_next_block() to get the next block.
1602 *
1603 * heap_vac_scan_next_block() uses the visibility map, vacuum options, and
1604 * various thresholds to skip blocks which do not need to be processed and
1605 * returns the next block to process or InvalidBlockNumber if there are no
1606 * remaining blocks.
1607 *
1608 * The visibility status of the next block to process and whether or not it
1609 * was eager scanned is set in the per_buffer_data.
1610 *
1611 * callback_private_data contains a reference to the LVRelState, passed to the
1612 * read stream API during stream setup. The LVRelState is an in/out parameter
1613 * here (locally named `vacrel`). Vacuum options and information about the
1614 * relation are read from it. vacrel->skippedallvis is set if we skip a block
1615 * that's all-visible but not all-frozen (to ensure that we don't update
1616 * relfrozenxid in that case). vacrel also holds information about the next
1617 * unskippable block -- as bookkeeping for this function.
1618 */
1619static BlockNumber
1621 void *callback_private_data,
1622 void *per_buffer_data)
1623{
1625 LVRelState *vacrel = callback_private_data;
1626
1627 /* relies on InvalidBlockNumber + 1 overflowing to 0 on first call */
1629
1630 /* Have we reached the end of the relation? */
1631 if (next_block >= vacrel->rel_pages)
1632 {
1633 if (BufferIsValid(vacrel->next_unskippable_vmbuffer))
1634 {
1635 ReleaseBuffer(vacrel->next_unskippable_vmbuffer);
1636 vacrel->next_unskippable_vmbuffer = InvalidBuffer;
1637 }
1638 return InvalidBlockNumber;
1639 }
1640
1641 /*
1642 * We must be in one of the three following states:
1643 */
1644 if (next_block > vacrel->next_unskippable_block ||
1645 vacrel->next_unskippable_block == InvalidBlockNumber)
1646 {
1647 /*
1648 * 1. We have just processed an unskippable block (or we're at the
1649 * beginning of the scan). Find the next unskippable block using the
1650 * visibility map.
1651 */
1652 bool skipsallvis;
1653
1655
1656 /*
1657 * We now know the next block that we must process. It can be the
1658 * next block after the one we just processed, or something further
1659 * ahead. If it's further ahead, we can jump to it, but we choose to
1660 * do so only if we can skip at least SKIP_PAGES_THRESHOLD consecutive
1661 * pages. Since we're reading sequentially, the OS should be doing
1662 * readahead for us, so there's no gain in skipping a page now and
1663 * then. Skipping such a range might even discourage sequential
1664 * detection.
1665 *
1666 * This test also enables more frequent relfrozenxid advancement
1667 * during non-aggressive VACUUMs. If the range has any all-visible
1668 * pages then skipping makes updating relfrozenxid unsafe, which is a
1669 * real downside.
1670 */
1671 if (vacrel->next_unskippable_block - next_block >= SKIP_PAGES_THRESHOLD)
1672 {
1673 next_block = vacrel->next_unskippable_block;
1674 if (skipsallvis)
1675 vacrel->skippedallvis = true;
1676 }
1677 }
1678
1679 /* Now we must be in one of the two remaining states: */
1680 if (next_block < vacrel->next_unskippable_block)
1681 {
1682 /*
1683 * 2. We are processing a range of blocks that we could have skipped
1684 * but chose not to. We know that they are all-visible in the VM,
1685 * otherwise they would've been unskippable.
1686 */
1687 vacrel->current_block = next_block;
1688 /* Block was not eager scanned */
1689 *((bool *) per_buffer_data) = false;
1690 return vacrel->current_block;
1691 }
1692 else
1693 {
1694 /*
1695 * 3. We reached the next unskippable block. Process it. On next
1696 * iteration, we will be back in state 1.
1697 */
1698 Assert(next_block == vacrel->next_unskippable_block);
1699
1700 vacrel->current_block = next_block;
1701 *((bool *) per_buffer_data) = vacrel->next_unskippable_eager_scanned;
1702 return vacrel->current_block;
1703 }
1704}
1705
1706/*
1707 * Find the next unskippable block in a vacuum scan using the visibility map.
1708 * The next unskippable block and its visibility information is updated in
1709 * vacrel.
1710 *
1711 * Note: our opinion of which blocks can be skipped can go stale immediately.
1712 * It's okay if caller "misses" a page whose all-visible or all-frozen marking
1713 * was concurrently cleared, though. All that matters is that caller scan all
1714 * pages whose tuples might contain XIDs < OldestXmin, or MXIDs < OldestMxact.
1715 * (Actually, non-aggressive VACUUMs can choose to skip all-visible pages with
1716 * older XIDs/MXIDs. The *skippedallvis flag will be set here when the choice
1717 * to skip such a range is actually made, making everything safe.)
1718 */
1719static void
1721{
1722 BlockNumber rel_pages = vacrel->rel_pages;
1723 BlockNumber next_unskippable_block = vacrel->next_unskippable_block + 1;
1724 Buffer next_unskippable_vmbuffer = vacrel->next_unskippable_vmbuffer;
1725 bool next_unskippable_eager_scanned = false;
1726
1727 *skipsallvis = false;
1728
1729 for (;; next_unskippable_block++)
1730 {
1732 next_unskippable_block,
1733 &next_unskippable_vmbuffer);
1734
1735
1736 /*
1737 * At the start of each eager scan region, normal vacuums with eager
1738 * scanning enabled reset the failure counter, allowing vacuum to
1739 * resume eager scanning if it had been suspended in the previous
1740 * region.
1741 */
1742 if (next_unskippable_block >= vacrel->next_eager_scan_region_start)
1743 {
1744 vacrel->eager_scan_remaining_fails =
1745 vacrel->eager_scan_max_fails_per_region;
1746 vacrel->next_eager_scan_region_start += EAGER_SCAN_REGION_SIZE;
1747 }
1748
1749 /*
1750 * A block is unskippable if it is not all visible according to the
1751 * visibility map.
1752 */
1754 {
1756 break;
1757 }
1758
1759 /*
1760 * Caller must scan the last page to determine whether it has tuples
1761 * (caller must have the opportunity to set vacrel->nonempty_pages).
1762 * This rule avoids having lazy_truncate_heap() take access-exclusive
1763 * lock on rel to attempt a truncation that fails anyway, just because
1764 * there are tuples on the last page (it is likely that there will be
1765 * tuples on other nearby pages as well, but those can be skipped).
1766 *
1767 * Implement this by always treating the last block as unsafe to skip.
1768 */
1769 if (next_unskippable_block == rel_pages - 1)
1770 break;
1771
1772 /* DISABLE_PAGE_SKIPPING makes all skipping unsafe */
1773 if (!vacrel->skipwithvm)
1774 break;
1775
1776 /*
1777 * All-frozen pages cannot contain XIDs < OldestXmin (XIDs that aren't
1778 * already frozen by now), so this page can be skipped.
1779 */
1780 if ((mapbits & VISIBILITYMAP_ALL_FROZEN) != 0)
1781 continue;
1782
1783 /*
1784 * Aggressive vacuums cannot skip any all-visible pages that are not
1785 * also all-frozen.
1786 */
1787 if (vacrel->aggressive)
1788 break;
1789
1790 /*
1791 * Normal vacuums with eager scanning enabled only skip all-visible
1792 * but not all-frozen pages if they have hit the failure limit for the
1793 * current eager scan region.
1794 */
1795 if (vacrel->eager_scan_remaining_fails > 0)
1796 {
1797 next_unskippable_eager_scanned = true;
1798 break;
1799 }
1800
1801 /*
1802 * All-visible blocks are safe to skip in a normal vacuum. But
1803 * remember that the final range contains such a block for later.
1804 */
1805 *skipsallvis = true;
1806 }
1807
1808 /* write the local variables back to vacrel */
1809 vacrel->next_unskippable_block = next_unskippable_block;
1810 vacrel->next_unskippable_eager_scanned = next_unskippable_eager_scanned;
1811 vacrel->next_unskippable_vmbuffer = next_unskippable_vmbuffer;
1812}
1813
1814/*
1815 * lazy_scan_new_or_empty() -- lazy_scan_heap() new/empty page handling.
1816 *
1817 * Must call here to handle both new and empty pages before calling
1818 * lazy_scan_prune or lazy_scan_noprune, since they're not prepared to deal
1819 * with new or empty pages.
1820 *
1821 * It's necessary to consider new pages as a special case, since the rules for
1822 * maintaining the visibility map and FSM with empty pages are a little
1823 * different (though new pages can be truncated away during rel truncation).
1824 *
1825 * Empty pages are not really a special case -- they're just heap pages that
1826 * have no allocated tuples (including even LP_UNUSED items). You might
1827 * wonder why we need to handle them here all the same. It's only necessary
1828 * because of a corner-case involving a hard crash during heap relation
1829 * extension. If we ever make relation-extension crash safe, then it should
1830 * no longer be necessary to deal with empty pages here (or new pages, for
1831 * that matter).
1832 *
1833 * Caller must hold at least a shared lock. We might need to escalate the
1834 * lock in that case, so the type of lock caller holds needs to be specified
1835 * using 'sharelock' argument.
1836 *
1837 * Returns false in common case where caller should go on to call
1838 * lazy_scan_prune (or lazy_scan_noprune). Otherwise returns true, indicating
1839 * that lazy_scan_heap is done processing the page, releasing lock on caller's
1840 * behalf.
1841 *
1842 * No vm_page_frozen output parameter (like that passed to lazy_scan_prune())
1843 * is passed here because neither empty nor new pages can be eagerly frozen.
1844 * New pages are never frozen. Empty pages are always set frozen in the VM at
1845 * the same time that they are set all-visible, and we don't eagerly scan
1846 * frozen pages.
1847 */
1848static bool
1850 Page page, bool sharelock, Buffer vmbuffer)
1851{
1852 Size freespace;
1853
1854 if (PageIsNew(page))
1855 {
1856 /*
1857 * All-zeroes pages can be left over if either a backend extends the
1858 * relation by a single page, but crashes before the newly initialized
1859 * page has been written out, or when bulk-extending the relation
1860 * (which creates a number of empty pages at the tail end of the
1861 * relation), and then enters them into the FSM.
1862 *
1863 * Note we do not enter the page into the visibilitymap. That has the
1864 * downside that we repeatedly visit this page in subsequent vacuums,
1865 * but otherwise we'll never discover the space on a promoted standby.
1866 * The harm of repeated checking ought to normally not be too bad. The
1867 * space usually should be used at some point, otherwise there
1868 * wouldn't be any regular vacuums.
1869 *
1870 * Make sure these pages are in the FSM, to ensure they can be reused.
1871 * Do that by testing if there's any space recorded for the page. If
1872 * not, enter it. We do so after releasing the lock on the heap page,
1873 * the FSM is approximate, after all.
1874 */
1876
1877 if (GetRecordedFreeSpace(vacrel->rel, blkno) == 0)
1878 {
1879 freespace = BLCKSZ - SizeOfPageHeaderData;
1880
1881 RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
1882 }
1883
1884 return true;
1885 }
1886
1887 if (PageIsEmpty(page))
1888 {
1889 /*
1890 * It seems likely that caller will always be able to get a cleanup
1891 * lock on an empty page. But don't take any chances -- escalate to
1892 * an exclusive lock (still don't need a cleanup lock, though).
1893 */
1894 if (sharelock)
1895 {
1898
1899 if (!PageIsEmpty(page))
1900 {
1901 /* page isn't new or empty -- keep lock and pin for now */
1902 return false;
1903 }
1904 }
1905 else
1906 {
1907 /* Already have a full cleanup lock (which is more than enough) */
1908 }
1909
1910 /*
1911 * Unlike new pages, empty pages are always set all-visible and
1912 * all-frozen.
1913 */
1914 if (!PageIsAllVisible(page))
1915 {
1917
1918 /* mark buffer dirty before writing a WAL record */
1920
1921 /*
1922 * It's possible that another backend has extended the heap,
1923 * initialized the page, and then failed to WAL-log the page due
1924 * to an ERROR. Since heap extension is not WAL-logged, recovery
1925 * might try to replay our record setting the page all-visible and
1926 * find that the page isn't initialized, which will cause a PANIC.
1927 * To prevent that, check whether the page has been previously
1928 * WAL-logged, and if not, do that now.
1929 */
1930 if (RelationNeedsWAL(vacrel->rel) &&
1932 log_newpage_buffer(buf, true);
1933
1934 PageSetAllVisible(page);
1935 PageClearPrunable(page);
1936 visibilitymap_set(vacrel->rel, blkno, buf,
1938 vmbuffer, InvalidTransactionId,
1942
1943 /* Count the newly all-frozen pages for logging */
1944 vacrel->new_all_visible_pages++;
1945 vacrel->new_all_visible_all_frozen_pages++;
1946 }
1947
1948 freespace = PageGetHeapFreeSpace(page);
1950 RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
1951 return true;
1952 }
1953
1954 /* page isn't new or empty -- keep lock and pin */
1955 return false;
1956}
1957
1958/* qsort comparator for sorting OffsetNumbers */
1959static int
1960cmpOffsetNumbers(const void *a, const void *b)
1961{
1962 return pg_cmp_u16(*(const OffsetNumber *) a, *(const OffsetNumber *) b);
1963}
1964
1965/*
1966 * Helper to correct any corruption detected on a heap page and its
1967 * corresponding visibility map page after pruning but before setting the
1968 * visibility map. It examines the heap page, the associated VM page, and the
1969 * number of dead items previously identified.
1970 *
1971 * This function must be called while holding an exclusive lock on the heap
1972 * buffer, and the dead items must have been discovered under that same lock.
1973
1974 * The provided vmbits must reflect the current state of the VM block
1975 * referenced by vmbuffer. Although we do not hold a lock on the VM buffer, it
1976 * is pinned, and the heap buffer is exclusively locked, ensuring that no
1977 * other backend can update the VM bits corresponding to this heap page.
1978 *
1979 * If it clears corruption, it will zero out vmbits.
1980 */
1981static void
1984 int nlpdead_items,
1985 Buffer vmbuffer,
1986 uint8 *vmbits)
1987{
1988 Assert(visibilitymap_get_status(rel, heap_blk, &vmbuffer) == *vmbits);
1989
1991
1992 /*
1993 * As of PostgreSQL 9.2, the visibility map bit should never be set if the
1994 * page-level bit is clear. However, it's possible that the bit got
1995 * cleared after heap_vac_scan_next_block() was called, so we must recheck
1996 * with buffer lock before concluding that the VM is corrupt.
1997 */
1999 ((*vmbits & VISIBILITYMAP_VALID_BITS) != 0))
2000 {
2003 errmsg("page is not marked all-visible but visibility map bit is set in relation \"%s\" page %u",
2005
2006 visibilitymap_clear(rel, heap_blk, vmbuffer,
2008 *vmbits = 0;
2009 }
2010
2011 /*
2012 * It's possible for the value returned by
2013 * GetOldestNonRemovableTransactionId() to move backwards, so it's not
2014 * wrong for us to see tuples that appear to not be visible to everyone
2015 * yet, while PD_ALL_VISIBLE is already set. The real safe xmin value
2016 * never moves backwards, but GetOldestNonRemovableTransactionId() is
2017 * conservative and sometimes returns a value that's unnecessarily small,
2018 * so if we see that contradiction it just means that the tuples that we
2019 * think are not visible to everyone yet actually are, and the
2020 * PD_ALL_VISIBLE flag is correct.
2021 *
2022 * There should never be LP_DEAD items on a page with PD_ALL_VISIBLE set,
2023 * however.
2024 */
2025 else if (PageIsAllVisible(heap_page) && nlpdead_items > 0)
2026 {
2029 errmsg("page containing LP_DEAD items is marked as all-visible in relation \"%s\" page %u",
2031
2034 visibilitymap_clear(rel, heap_blk, vmbuffer,
2036 *vmbits = 0;
2037 }
2038}
2039
2040/*
2041 * lazy_scan_prune() -- lazy_scan_heap() pruning and freezing.
2042 *
2043 * Caller must hold pin and buffer cleanup lock on the buffer.
2044 *
2045 * vmbuffer is the buffer containing the VM block with visibility information
2046 * for the heap block, blkno.
2047 *
2048 * *has_lpdead_items is set to true or false depending on whether, upon return
2049 * from this function, any LP_DEAD items are still present on the page.
2050 *
2051 * *vm_page_frozen is set to true if the page is newly set all-frozen in the
2052 * VM. The caller currently only uses this for determining whether an eagerly
2053 * scanned page was successfully set all-frozen.
2054 *
2055 * Returns the number of tuples deleted from the page during HOT pruning.
2056 */
2057static int
2059 Buffer buf,
2060 BlockNumber blkno,
2061 Page page,
2062 Buffer vmbuffer,
2063 bool *has_lpdead_items,
2064 bool *vm_page_frozen)
2065{
2066 Relation rel = vacrel->rel;
2068 PruneFreezeParams params = {
2069 .relation = rel,
2070 .buffer = buf,
2071 .reason = PRUNE_VACUUM_SCAN,
2072 .options = HEAP_PAGE_PRUNE_FREEZE,
2073 .vistest = vacrel->vistest,
2074 .cutoffs = &vacrel->cutoffs,
2075 };
2076 uint8 old_vmbits = 0;
2077 uint8 new_vmbits = 0;
2078
2079 Assert(BufferGetBlockNumber(buf) == blkno);
2080
2081 /*
2082 * Prune all HOT-update chains and potentially freeze tuples on this page.
2083 *
2084 * If the relation has no indexes, we can immediately mark would-be dead
2085 * items LP_UNUSED.
2086 *
2087 * The number of tuples removed from the page is returned in
2088 * presult.ndeleted. It should not be confused with presult.lpdead_items;
2089 * presult.lpdead_items's final value can be thought of as the number of
2090 * tuples that were deleted from indexes.
2091 *
2092 * We will update the VM after collecting LP_DEAD items and freezing
2093 * tuples. Pruning will have determined whether or not the page is
2094 * all-visible.
2095 */
2096 if (vacrel->nindexes == 0)
2098
2100 &presult,
2101 &vacrel->offnum,
2102 &vacrel->NewRelfrozenXid, &vacrel->NewRelminMxid);
2103
2104 Assert(MultiXactIdIsValid(vacrel->NewRelminMxid));
2105 Assert(TransactionIdIsValid(vacrel->NewRelfrozenXid));
2106
2107 if (presult.nfrozen > 0)
2108 {
2109 /*
2110 * We don't increment the new_frozen_tuple_pages instrumentation
2111 * counter when nfrozen == 0, since it only counts pages with newly
2112 * frozen tuples (don't confuse that with pages newly set all-frozen
2113 * in VM).
2114 */
2115 vacrel->new_frozen_tuple_pages++;
2116 }
2117
2118 /*
2119 * VACUUM will call heap_page_is_all_visible() during the second pass over
2120 * the heap to determine all_visible and all_frozen for the page -- this
2121 * is a specialized version of the logic from this function. Now that
2122 * we've finished pruning and freezing, make sure that we're in total
2123 * agreement with heap_page_is_all_visible() using an assertion.
2124 */
2125#ifdef USE_ASSERT_CHECKING
2126 if (presult.all_visible)
2127 {
2129 bool debug_all_frozen;
2130
2131 Assert(presult.lpdead_items == 0);
2132
2134 vacrel->cutoffs.OldestXmin, &debug_all_frozen,
2135 &debug_cutoff, &vacrel->offnum));
2136
2137 Assert(presult.all_frozen == debug_all_frozen);
2138
2140 debug_cutoff == presult.vm_conflict_horizon);
2141 }
2142#endif
2143
2144 /*
2145 * Now save details of the LP_DEAD items from the page in vacrel
2146 */
2147 if (presult.lpdead_items > 0)
2148 {
2149 vacrel->lpdead_item_pages++;
2150
2151 /*
2152 * deadoffsets are collected incrementally in
2153 * heap_page_prune_and_freeze() as each dead line pointer is recorded,
2154 * with an indeterminate order, but dead_items_add requires them to be
2155 * sorted.
2156 */
2157 qsort(presult.deadoffsets, presult.lpdead_items, sizeof(OffsetNumber),
2159
2160 dead_items_add(vacrel, blkno, presult.deadoffsets, presult.lpdead_items);
2161 }
2162
2163 /* Finally, add page-local counts to whole-VACUUM counts */
2164 vacrel->tuples_deleted += presult.ndeleted;
2165 vacrel->tuples_frozen += presult.nfrozen;
2166 vacrel->lpdead_items += presult.lpdead_items;
2167 vacrel->live_tuples += presult.live_tuples;
2168 vacrel->recently_dead_tuples += presult.recently_dead_tuples;
2169
2170 /* Can't truncate this page */
2171 if (presult.hastup)
2172 vacrel->nonempty_pages = blkno + 1;
2173
2174 /* Did we find LP_DEAD items? */
2175 *has_lpdead_items = (presult.lpdead_items > 0);
2176
2177 Assert(!presult.all_visible || !(*has_lpdead_items));
2178 Assert(!presult.all_frozen || presult.all_visible);
2179
2180 old_vmbits = visibilitymap_get_status(vacrel->rel, blkno, &vmbuffer);
2181
2182 identify_and_fix_vm_corruption(vacrel->rel, buf, blkno, page,
2183 presult.lpdead_items, vmbuffer,
2184 &old_vmbits);
2185
2186 if (!presult.all_visible)
2187 return presult.ndeleted;
2188
2189 /* Set the visibility map and page visibility hint */
2191
2192 if (presult.all_frozen)
2194
2195 /* Nothing to do */
2196 if (old_vmbits == new_vmbits)
2197 return presult.ndeleted;
2198
2199 /*
2200 * It should never be the case that the visibility map page is set while
2201 * the page-level bit is clear (and if so, we cleared it above), but the
2202 * reverse is allowed (if checksums are not enabled). Regardless, set both
2203 * bits so that we get back in sync.
2204 *
2205 * The heap buffer must be marked dirty before adding it to the WAL chain
2206 * when setting the VM. We don't worry about unnecessarily dirtying the
2207 * heap buffer if PD_ALL_VISIBLE is already set, though. It is extremely
2208 * rare to have a clean heap buffer with PD_ALL_VISIBLE already set and
2209 * the VM bits clear, so there is no point in optimizing it.
2210 */
2211 PageSetAllVisible(page);
2212 PageClearPrunable(page);
2214
2215 /*
2216 * If the page is being set all-frozen, we pass InvalidTransactionId as
2217 * the cutoff_xid, since a snapshot conflict horizon sufficient to make
2218 * everything safe for REDO was logged when the page's tuples were frozen.
2219 */
2220 Assert(!presult.all_frozen ||
2221 !TransactionIdIsValid(presult.vm_conflict_horizon));
2222
2223 visibilitymap_set(vacrel->rel, blkno, buf,
2225 vmbuffer, presult.vm_conflict_horizon,
2226 new_vmbits);
2227
2228 /*
2229 * If the page wasn't already set all-visible and/or all-frozen in the VM,
2230 * count it as newly set for logging.
2231 */
2233 {
2234 vacrel->new_all_visible_pages++;
2235 if (presult.all_frozen)
2236 {
2237 vacrel->new_all_visible_all_frozen_pages++;
2238 *vm_page_frozen = true;
2239 }
2240 }
2241 else if ((old_vmbits & VISIBILITYMAP_ALL_FROZEN) == 0 &&
2242 presult.all_frozen)
2243 {
2244 vacrel->new_all_frozen_pages++;
2245 *vm_page_frozen = true;
2246 }
2247
2248 return presult.ndeleted;
2249}
2250
2251/*
2252 * lazy_scan_noprune() -- lazy_scan_prune() without pruning or freezing
2253 *
2254 * Caller need only hold a pin and share lock on the buffer, unlike
2255 * lazy_scan_prune, which requires a full cleanup lock. While pruning isn't
2256 * performed here, it's quite possible that an earlier opportunistic pruning
2257 * operation left LP_DEAD items behind. We'll at least collect any such items
2258 * in dead_items for removal from indexes.
2259 *
2260 * For aggressive VACUUM callers, we may return false to indicate that a full
2261 * cleanup lock is required for processing by lazy_scan_prune. This is only
2262 * necessary when the aggressive VACUUM needs to freeze some tuple XIDs from
2263 * one or more tuples on the page. We always return true for non-aggressive
2264 * callers.
2265 *
2266 * If this function returns true, *has_lpdead_items gets set to true or false
2267 * depending on whether, upon return from this function, any LP_DEAD items are
2268 * present on the page. If this function returns false, *has_lpdead_items
2269 * is not updated.
2270 */
2271static bool
2273 Buffer buf,
2274 BlockNumber blkno,
2275 Page page,
2276 bool *has_lpdead_items)
2277{
2278 OffsetNumber offnum,
2279 maxoff;
2280 int lpdead_items,
2281 live_tuples,
2282 recently_dead_tuples,
2283 missed_dead_tuples;
2284 bool hastup;
2286 TransactionId NoFreezePageRelfrozenXid = vacrel->NewRelfrozenXid;
2287 MultiXactId NoFreezePageRelminMxid = vacrel->NewRelminMxid;
2289
2290 Assert(BufferGetBlockNumber(buf) == blkno);
2291
2292 hastup = false; /* for now */
2293
2294 lpdead_items = 0;
2295 live_tuples = 0;
2296 recently_dead_tuples = 0;
2297 missed_dead_tuples = 0;
2298
2299 maxoff = PageGetMaxOffsetNumber(page);
2300 for (offnum = FirstOffsetNumber;
2301 offnum <= maxoff;
2302 offnum = OffsetNumberNext(offnum))
2303 {
2304 ItemId itemid;
2305 HeapTupleData tuple;
2306
2307 vacrel->offnum = offnum;
2308 itemid = PageGetItemId(page, offnum);
2309
2310 if (!ItemIdIsUsed(itemid))
2311 continue;
2312
2313 if (ItemIdIsRedirected(itemid))
2314 {
2315 hastup = true;
2316 continue;
2317 }
2318
2319 if (ItemIdIsDead(itemid))
2320 {
2321 /*
2322 * Deliberately don't set hastup=true here. See same point in
2323 * lazy_scan_prune for an explanation.
2324 */
2325 deadoffsets[lpdead_items++] = offnum;
2326 continue;
2327 }
2328
2329 hastup = true; /* page prevents rel truncation */
2330 tupleheader = (HeapTupleHeader) PageGetItem(page, itemid);
2332 &NoFreezePageRelfrozenXid,
2333 &NoFreezePageRelminMxid))
2334 {
2335 /* Tuple with XID < FreezeLimit (or MXID < MultiXactCutoff) */
2336 if (vacrel->aggressive)
2337 {
2338 /*
2339 * Aggressive VACUUMs must always be able to advance rel's
2340 * relfrozenxid to a value >= FreezeLimit (and be able to
2341 * advance rel's relminmxid to a value >= MultiXactCutoff).
2342 * The ongoing aggressive VACUUM won't be able to do that
2343 * unless it can freeze an XID (or MXID) from this tuple now.
2344 *
2345 * The only safe option is to have caller perform processing
2346 * of this page using lazy_scan_prune. Caller might have to
2347 * wait a while for a cleanup lock, but it can't be helped.
2348 */
2349 vacrel->offnum = InvalidOffsetNumber;
2350 return false;
2351 }
2352
2353 /*
2354 * Non-aggressive VACUUMs are under no obligation to advance
2355 * relfrozenxid (even by one XID). We can be much laxer here.
2356 *
2357 * Currently we always just accept an older final relfrozenxid
2358 * and/or relminmxid value. We never make caller wait or work a
2359 * little harder, even when it likely makes sense to do so.
2360 */
2361 }
2362
2363 ItemPointerSet(&(tuple.t_self), blkno, offnum);
2364 tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
2365 tuple.t_len = ItemIdGetLength(itemid);
2366 tuple.t_tableOid = RelationGetRelid(vacrel->rel);
2367
2368 switch (HeapTupleSatisfiesVacuum(&tuple, vacrel->cutoffs.OldestXmin,
2369 buf))
2370 {
2372 case HEAPTUPLE_LIVE:
2373
2374 /*
2375 * Count both cases as live, just like lazy_scan_prune
2376 */
2377 live_tuples++;
2378
2379 break;
2380 case HEAPTUPLE_DEAD:
2381
2382 /*
2383 * There is some useful work for pruning to do, that won't be
2384 * done due to failure to get a cleanup lock.
2385 */
2386 missed_dead_tuples++;
2387 break;
2389
2390 /*
2391 * Count in recently_dead_tuples, just like lazy_scan_prune
2392 */
2393 recently_dead_tuples++;
2394 break;
2396
2397 /*
2398 * Do not count these rows as live, just like lazy_scan_prune
2399 */
2400 break;
2401 default:
2402 elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
2403 break;
2404 }
2405 }
2406
2407 vacrel->offnum = InvalidOffsetNumber;
2408
2409 /*
2410 * By here we know for sure that caller can put off freezing and pruning
2411 * this particular page until the next VACUUM. Remember its details now.
2412 * (lazy_scan_prune expects a clean slate, so we have to do this last.)
2413 */
2414 vacrel->NewRelfrozenXid = NoFreezePageRelfrozenXid;
2415 vacrel->NewRelminMxid = NoFreezePageRelminMxid;
2416
2417 /* Save any LP_DEAD items found on the page in dead_items */
2418 if (vacrel->nindexes == 0)
2419 {
2420 /* Using one-pass strategy (since table has no indexes) */
2421 if (lpdead_items > 0)
2422 {
2423 /*
2424 * Perfunctory handling for the corner case where a single pass
2425 * strategy VACUUM cannot get a cleanup lock, and it turns out
2426 * that there is one or more LP_DEAD items: just count the LP_DEAD
2427 * items as missed_dead_tuples instead. (This is a bit dishonest,
2428 * but it beats having to maintain specialized heap vacuuming code
2429 * forever, for vanishingly little benefit.)
2430 */
2431 hastup = true;
2432 missed_dead_tuples += lpdead_items;
2433 }
2434 }
2435 else if (lpdead_items > 0)
2436 {
2437 /*
2438 * Page has LP_DEAD items, and so any references/TIDs that remain in
2439 * indexes will be deleted during index vacuuming (and then marked
2440 * LP_UNUSED in the heap)
2441 */
2442 vacrel->lpdead_item_pages++;
2443
2444 dead_items_add(vacrel, blkno, deadoffsets, lpdead_items);
2445
2446 vacrel->lpdead_items += lpdead_items;
2447 }
2448
2449 /*
2450 * Finally, add relevant page-local counts to whole-VACUUM counts
2451 */
2452 vacrel->live_tuples += live_tuples;
2453 vacrel->recently_dead_tuples += recently_dead_tuples;
2454 vacrel->missed_dead_tuples += missed_dead_tuples;
2455 if (missed_dead_tuples > 0)
2456 vacrel->missed_dead_pages++;
2457
2458 /* Can't truncate this page */
2459 if (hastup)
2460 vacrel->nonempty_pages = blkno + 1;
2461
2462 /* Did we find LP_DEAD items? */
2463 *has_lpdead_items = (lpdead_items > 0);
2464
2465 /* Caller won't need to call lazy_scan_prune with same page */
2466 return true;
2467}
2468
2469/*
2470 * Main entry point for index vacuuming and heap vacuuming.
2471 *
2472 * Removes items collected in dead_items from table's indexes, then marks the
2473 * same items LP_UNUSED in the heap. See the comments above lazy_scan_heap
2474 * for full details.
2475 *
2476 * Also empties dead_items, freeing up space for later TIDs.
2477 *
2478 * We may choose to bypass index vacuuming at this point, though only when the
2479 * ongoing VACUUM operation will definitely only have one index scan/round of
2480 * index vacuuming.
2481 */
2482static void
2484{
2485 bool bypass;
2486
2487 /* Should not end up here with no indexes */
2488 Assert(vacrel->nindexes > 0);
2489 Assert(vacrel->lpdead_item_pages > 0);
2490
2491 if (!vacrel->do_index_vacuuming)
2492 {
2493 Assert(!vacrel->do_index_cleanup);
2495 return;
2496 }
2497
2498 /*
2499 * Consider bypassing index vacuuming (and heap vacuuming) entirely.
2500 *
2501 * We currently only do this in cases where the number of LP_DEAD items
2502 * for the entire VACUUM operation is close to zero. This avoids sharp
2503 * discontinuities in the duration and overhead of successive VACUUM
2504 * operations that run against the same table with a fixed workload.
2505 * Ideally, successive VACUUM operations will behave as if there are
2506 * exactly zero LP_DEAD items in cases where there are close to zero.
2507 *
2508 * This is likely to be helpful with a table that is continually affected
2509 * by UPDATEs that can mostly apply the HOT optimization, but occasionally
2510 * have small aberrations that lead to just a few heap pages retaining
2511 * only one or two LP_DEAD items. This is pretty common; even when the
2512 * DBA goes out of their way to make UPDATEs use HOT, it is practically
2513 * impossible to predict whether HOT will be applied in 100% of cases.
2514 * It's far easier to ensure that 99%+ of all UPDATEs against a table use
2515 * HOT through careful tuning.
2516 */
2517 bypass = false;
2518 if (vacrel->consider_bypass_optimization && vacrel->rel_pages > 0)
2519 {
2521
2522 Assert(vacrel->num_index_scans == 0);
2523 Assert(vacrel->lpdead_items == vacrel->dead_items_info->num_items);
2524 Assert(vacrel->do_index_vacuuming);
2525 Assert(vacrel->do_index_cleanup);
2526
2527 /*
2528 * This crossover point at which we'll start to do index vacuuming is
2529 * expressed as a percentage of the total number of heap pages in the
2530 * table that are known to have at least one LP_DEAD item. This is
2531 * much more important than the total number of LP_DEAD items, since
2532 * it's a proxy for the number of heap pages whose visibility map bits
2533 * cannot be set on account of bypassing index and heap vacuuming.
2534 *
2535 * We apply one further precautionary test: the space currently used
2536 * to store the TIDs (TIDs that now all point to LP_DEAD items) must
2537 * not exceed 32MB. This limits the risk that we will bypass index
2538 * vacuuming again and again until eventually there is a VACUUM whose
2539 * dead_items space is not CPU cache resident.
2540 *
2541 * We don't take any special steps to remember the LP_DEAD items (such
2542 * as counting them in our final update to the stats system) when the
2543 * optimization is applied. Though the accounting used in analyze.c's
2544 * acquire_sample_rows() will recognize the same LP_DEAD items as dead
2545 * rows in its own stats report, that's okay. The discrepancy should
2546 * be negligible. If this optimization is ever expanded to cover more
2547 * cases then this may need to be reconsidered.
2548 */
2550 bypass = (vacrel->lpdead_item_pages < threshold &&
2551 TidStoreMemoryUsage(vacrel->dead_items) < 32 * 1024 * 1024);
2552 }
2553
2554 if (bypass)
2555 {
2556 /*
2557 * There are almost zero TIDs. Behave as if there were precisely
2558 * zero: bypass index vacuuming, but do index cleanup.
2559 *
2560 * We expect that the ongoing VACUUM operation will finish very
2561 * quickly, so there is no point in considering speeding up as a
2562 * failsafe against wraparound failure. (Index cleanup is expected to
2563 * finish very quickly in cases where there were no ambulkdelete()
2564 * calls.)
2565 */
2566 vacrel->do_index_vacuuming = false;
2567 }
2569 {
2570 /*
2571 * We successfully completed a round of index vacuuming. Do related
2572 * heap vacuuming now.
2573 */
2575 }
2576 else
2577 {
2578 /*
2579 * Failsafe case.
2580 *
2581 * We attempted index vacuuming, but didn't finish a full round/full
2582 * index scan. This happens when relfrozenxid or relminmxid is too
2583 * far in the past.
2584 *
2585 * From this point on the VACUUM operation will do no further index
2586 * vacuuming or heap vacuuming. This VACUUM operation won't end up
2587 * back here again.
2588 */
2590 }
2591
2592 /*
2593 * Forget the LP_DEAD items that we just vacuumed (or just decided to not
2594 * vacuum)
2595 */
2597}
2598
2599/*
2600 * lazy_vacuum_all_indexes() -- Main entry for index vacuuming
2601 *
2602 * Returns true in the common case when all indexes were successfully
2603 * vacuumed. Returns false in rare cases where we determined that the ongoing
2604 * VACUUM operation is at risk of taking too long to finish, leading to
2605 * wraparound failure.
2606 */
2607static bool
2609{
2610 bool allindexes = true;
2611 double old_live_tuples = vacrel->rel->rd_rel->reltuples;
2612 const int progress_start_index[] = {
2615 };
2616 const int progress_end_index[] = {
2620 };
2623
2624 Assert(vacrel->nindexes > 0);
2625 Assert(vacrel->do_index_vacuuming);
2626 Assert(vacrel->do_index_cleanup);
2627
2628 /* Precheck for XID wraparound emergencies */
2630 {
2631 /* Wraparound emergency -- don't even start an index scan */
2632 return false;
2633 }
2634
2635 /*
2636 * Report that we are now vacuuming indexes and the number of indexes to
2637 * vacuum.
2638 */
2640 progress_start_val[1] = vacrel->nindexes;
2642
2644 {
2645 for (int idx = 0; idx < vacrel->nindexes; idx++)
2646 {
2647 Relation indrel = vacrel->indrels[idx];
2648 IndexBulkDeleteResult *istat = vacrel->indstats[idx];
2649
2650 vacrel->indstats[idx] = lazy_vacuum_one_index(indrel, istat,
2652 vacrel);
2653
2654 /* Report the number of indexes vacuumed */
2656 idx + 1);
2657
2659 {
2660 /* Wraparound emergency -- end current index scan */
2661 allindexes = false;
2662 break;
2663 }
2664 }
2665 }
2666 else
2667 {
2668 /* Outsource everything to parallel variant */
2670 vacrel->num_index_scans);
2671
2672 /*
2673 * Do a postcheck to consider applying wraparound failsafe now. Note
2674 * that parallel VACUUM only gets the precheck and this postcheck.
2675 */
2677 allindexes = false;
2678 }
2679
2680 /*
2681 * We delete all LP_DEAD items from the first heap pass in all indexes on
2682 * each call here (except calls where we choose to do the failsafe). This
2683 * makes the next call to lazy_vacuum_heap_rel() safe (except in the event
2684 * of the failsafe triggering, which prevents the next call from taking
2685 * place).
2686 */
2687 Assert(vacrel->num_index_scans > 0 ||
2688 vacrel->dead_items_info->num_items == vacrel->lpdead_items);
2690
2691 /*
2692 * Increase and report the number of index scans. Also, we reset
2693 * PROGRESS_VACUUM_INDEXES_TOTAL and PROGRESS_VACUUM_INDEXES_PROCESSED.
2694 *
2695 * We deliberately include the case where we started a round of bulk
2696 * deletes that we weren't able to finish due to the failsafe triggering.
2697 */
2698 vacrel->num_index_scans++;
2699 progress_end_val[0] = 0;
2700 progress_end_val[1] = 0;
2701 progress_end_val[2] = vacrel->num_index_scans;
2703
2704 return allindexes;
2705}
2706
2707/*
2708 * Read stream callback for vacuum's third phase (second pass over the heap).
2709 * Gets the next block from the TID store and returns it or InvalidBlockNumber
2710 * if there are no further blocks to vacuum.
2711 *
2712 * NB: Assumed to be safe to use with READ_STREAM_USE_BATCHING.
2713 */
2714static BlockNumber
2716 void *callback_private_data,
2717 void *per_buffer_data)
2718{
2719 TidStoreIter *iter = callback_private_data;
2721
2723 if (iter_result == NULL)
2724 return InvalidBlockNumber;
2725
2726 /*
2727 * Save the TidStoreIterResult for later, so we can extract the offsets.
2728 * It is safe to copy the result, according to TidStoreIterateNext().
2729 */
2730 memcpy(per_buffer_data, iter_result, sizeof(*iter_result));
2731
2732 return iter_result->blkno;
2733}
2734
2735/*
2736 * lazy_vacuum_heap_rel() -- second pass over the heap for two pass strategy
2737 *
2738 * This routine marks LP_DEAD items in vacrel->dead_items as LP_UNUSED. Pages
2739 * that never had lazy_scan_prune record LP_DEAD items are not visited at all.
2740 *
2741 * We may also be able to truncate the line pointer array of the heap pages we
2742 * visit. If there is a contiguous group of LP_UNUSED items at the end of the
2743 * array, it can be reclaimed as free space. These LP_UNUSED items usually
2744 * start out as LP_DEAD items recorded by lazy_scan_prune (we set items from
2745 * each page to LP_UNUSED, and then consider if it's possible to truncate the
2746 * page's line pointer array).
2747 *
2748 * Note: the reason for doing this as a second pass is we cannot remove the
2749 * tuples until we've removed their index entries, and we want to process
2750 * index entry removal in batches as large as possible.
2751 */
2752static void
2754{
2755 ReadStream *stream;
2757 Buffer vmbuffer = InvalidBuffer;
2759 TidStoreIter *iter;
2760
2761 Assert(vacrel->do_index_vacuuming);
2762 Assert(vacrel->do_index_cleanup);
2763 Assert(vacrel->num_index_scans > 0);
2764
2765 /* Report that we are now vacuuming the heap */
2768
2769 /* Update error traceback information */
2773
2774 iter = TidStoreBeginIterate(vacrel->dead_items);
2775
2776 /*
2777 * Set up the read stream for vacuum's second pass through the heap.
2778 *
2779 * It is safe to use batchmode, as vacuum_reap_lp_read_stream_next() does
2780 * not need to wait for IO and does not perform locking. Once we support
2781 * parallelism it should still be fine, as presumably the holder of locks
2782 * would never be blocked by IO while holding the lock.
2783 */
2786 vacrel->bstrategy,
2787 vacrel->rel,
2790 iter,
2791 sizeof(TidStoreIterResult));
2792
2793 while (true)
2794 {
2795 BlockNumber blkno;
2796 Buffer buf;
2797 Page page;
2799 Size freespace;
2801 int num_offsets;
2802
2803 vacuum_delay_point(false);
2804
2805 buf = read_stream_next_buffer(stream, (void **) &iter_result);
2806
2807 /* The relation is exhausted */
2808 if (!BufferIsValid(buf))
2809 break;
2810
2811 vacrel->blkno = blkno = BufferGetBlockNumber(buf);
2812
2815 Assert(num_offsets <= lengthof(offsets));
2816
2817 /*
2818 * Pin the visibility map page in case we need to mark the page
2819 * all-visible. In most cases this will be very cheap, because we'll
2820 * already have the correct page pinned anyway.
2821 */
2822 visibilitymap_pin(vacrel->rel, blkno, &vmbuffer);
2823
2824 /* We need a non-cleanup exclusive lock to mark dead_items unused */
2826 lazy_vacuum_heap_page(vacrel, blkno, buf, offsets,
2827 num_offsets, vmbuffer);
2828
2829 /* Now that we've vacuumed the page, record its available space */
2830 page = BufferGetPage(buf);
2831 freespace = PageGetHeapFreeSpace(page);
2832
2834 RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
2836 }
2837
2838 read_stream_end(stream);
2839 TidStoreEndIterate(iter);
2840
2841 vacrel->blkno = InvalidBlockNumber;
2842 if (BufferIsValid(vmbuffer))
2843 ReleaseBuffer(vmbuffer);
2844
2845 /*
2846 * We set all LP_DEAD items from the first heap pass to LP_UNUSED during
2847 * the second heap pass. No more, no less.
2848 */
2849 Assert(vacrel->num_index_scans > 1 ||
2850 (vacrel->dead_items_info->num_items == vacrel->lpdead_items &&
2851 vacuumed_pages == vacrel->lpdead_item_pages));
2852
2854 (errmsg("table \"%s\": removed %" PRId64 " dead item identifiers in %u pages",
2855 vacrel->relname, vacrel->dead_items_info->num_items,
2856 vacuumed_pages)));
2857
2858 /* Revert to the previous phase information for error traceback */
2860}
2861
2862/*
2863 * lazy_vacuum_heap_page() -- free page's LP_DEAD items listed in the
2864 * vacrel->dead_items store.
2865 *
2866 * Caller must have an exclusive buffer lock on the buffer (though a full
2867 * cleanup lock is also acceptable). vmbuffer must be valid and already have
2868 * a pin on blkno's visibility map page.
2869 */
2870static void
2872 OffsetNumber *deadoffsets, int num_offsets,
2873 Buffer vmbuffer)
2874{
2875 Page page = BufferGetPage(buffer);
2877 int nunused = 0;
2878 TransactionId visibility_cutoff_xid;
2880 bool all_frozen;
2882 uint8 vmflags = 0;
2883
2884 Assert(vacrel->do_index_vacuuming);
2885
2887
2888 /* Update error traceback information */
2892
2893 /*
2894 * Before marking dead items unused, check whether the page will become
2895 * all-visible once that change is applied. This lets us reap the tuples
2896 * and mark the page all-visible within the same critical section,
2897 * enabling both changes to be emitted in a single WAL record. Since the
2898 * visibility checks may perform I/O and allocate memory, they must be
2899 * done outside the critical section.
2900 */
2901 if (heap_page_would_be_all_visible(vacrel->rel, buffer,
2902 vacrel->cutoffs.OldestXmin,
2903 deadoffsets, num_offsets,
2904 &all_frozen, &visibility_cutoff_xid,
2905 &vacrel->offnum))
2906 {
2908 if (all_frozen)
2909 {
2911 Assert(!TransactionIdIsValid(visibility_cutoff_xid));
2912 }
2913
2914 /*
2915 * Take the lock on the vmbuffer before entering a critical section.
2916 * The heap page lock must also be held while updating the VM to
2917 * ensure consistency.
2918 */
2920 }
2921
2923
2924 for (int i = 0; i < num_offsets; i++)
2925 {
2926 ItemId itemid;
2927 OffsetNumber toff = deadoffsets[i];
2928
2929 itemid = PageGetItemId(page, toff);
2930
2931 Assert(ItemIdIsDead(itemid) && !ItemIdHasStorage(itemid));
2932 ItemIdSetUnused(itemid);
2933 unused[nunused++] = toff;
2934 }
2935
2936 Assert(nunused > 0);
2937
2938 /* Attempt to truncate line pointer array now */
2940
2941 if ((vmflags & VISIBILITYMAP_VALID_BITS) != 0)
2942 {
2943 /*
2944 * The page is guaranteed to have had dead line pointers, so we always
2945 * set PD_ALL_VISIBLE.
2946 */
2947 PageSetAllVisible(page);
2948 PageClearPrunable(page);
2950 vmbuffer, vmflags,
2951 vacrel->rel->rd_locator);
2952 conflict_xid = visibility_cutoff_xid;
2953 }
2954
2955 /*
2956 * Mark buffer dirty before we write WAL.
2957 */
2958 MarkBufferDirty(buffer);
2959
2960 /* XLOG stuff */
2961 if (RelationNeedsWAL(vacrel->rel))
2962 {
2963 log_heap_prune_and_freeze(vacrel->rel, buffer,
2964 vmflags != 0 ? vmbuffer : InvalidBuffer,
2965 vmflags,
2967 false, /* no cleanup lock required */
2969 NULL, 0, /* frozen */
2970 NULL, 0, /* redirected */
2971 NULL, 0, /* dead */
2972 unused, nunused);
2973 }
2974
2976
2978 {
2979 /* Count the newly set VM page for logging */
2980 LockBuffer(vmbuffer, BUFFER_LOCK_UNLOCK);
2981 vacrel->new_all_visible_pages++;
2982 if (all_frozen)
2983 vacrel->new_all_visible_all_frozen_pages++;
2984 }
2985
2986 /* Revert to the previous phase information for error traceback */
2988}
2989
2990/*
2991 * Trigger the failsafe to avoid wraparound failure when vacrel table has a
2992 * relfrozenxid and/or relminmxid that is dangerously far in the past.
2993 * Triggering the failsafe makes the ongoing VACUUM bypass any further index
2994 * vacuuming and heap vacuuming. Truncating the heap is also bypassed.
2995 *
2996 * Any remaining work (work that VACUUM cannot just bypass) is typically sped
2997 * up when the failsafe triggers. VACUUM stops applying any cost-based delay
2998 * that it started out with.
2999 *
3000 * Returns true when failsafe has been triggered.
3001 */
3002static bool
3004{
3005 /* Don't warn more than once per VACUUM */
3007 return true;
3008
3010 {
3011 const int progress_index[] = {
3015 };
3017
3018 VacuumFailsafeActive = true;
3019
3020 /*
3021 * Abandon use of a buffer access strategy to allow use of all of
3022 * shared buffers. We assume the caller who allocated the memory for
3023 * the BufferAccessStrategy will free it.
3024 */
3025 vacrel->bstrategy = NULL;
3026
3027 /* Disable index vacuuming, index cleanup, and heap rel truncation */
3028 vacrel->do_index_vacuuming = false;
3029 vacrel->do_index_cleanup = false;
3030 vacrel->do_rel_truncate = false;
3031
3032 /* Reset the progress counters and set the failsafe mode */
3034
3036 (errmsg("bypassing nonessential maintenance of table \"%s.%s.%s\" as a failsafe after %d index scans",
3037 vacrel->dbname, vacrel->relnamespace, vacrel->relname,
3038 vacrel->num_index_scans),
3039 errdetail("The table's relfrozenxid or relminmxid is too far in the past."),
3040 errhint("Consider increasing configuration parameter \"maintenance_work_mem\" or \"autovacuum_work_mem\".\n"
3041 "You might also need to consider other ways for VACUUM to keep up with the allocation of transaction IDs.")));
3042
3043 /* Stop applying cost limits from this point on */
3044 VacuumCostActive = false;
3046
3047 return true;
3048 }
3049
3050 return false;
3051}
3052
3053/*
3054 * lazy_cleanup_all_indexes() -- cleanup all indexes of relation.
3055 */
3056static void
3058{
3059 double reltuples = vacrel->new_rel_tuples;
3060 bool estimated_count = vacrel->scanned_pages < vacrel->rel_pages;
3061 const int progress_start_index[] = {
3064 };
3065 const int progress_end_index[] = {
3068 };
3070 int64 progress_end_val[2] = {0, 0};
3071
3072 Assert(vacrel->do_index_cleanup);
3073 Assert(vacrel->nindexes > 0);
3074
3075 /*
3076 * Report that we are now cleaning up indexes and the number of indexes to
3077 * cleanup.
3078 */
3080 progress_start_val[1] = vacrel->nindexes;
3082
3084 {
3085 for (int idx = 0; idx < vacrel->nindexes; idx++)
3086 {
3087 Relation indrel = vacrel->indrels[idx];
3088 IndexBulkDeleteResult *istat = vacrel->indstats[idx];
3089
3090 vacrel->indstats[idx] =
3091 lazy_cleanup_one_index(indrel, istat, reltuples,
3092 estimated_count, vacrel);
3093
3094 /* Report the number of indexes cleaned up */
3096 idx + 1);
3097 }
3098 }
3099 else
3100 {
3101 /* Outsource everything to parallel variant */
3103 vacrel->num_index_scans,
3104 estimated_count);
3105 }
3106
3107 /* Reset the progress counters */
3109}
3110
3111/*
3112 * lazy_vacuum_one_index() -- vacuum index relation.
3113 *
3114 * Delete all the index tuples containing a TID collected in
3115 * vacrel->dead_items. Also update running statistics. Exact
3116 * details depend on index AM's ambulkdelete routine.
3117 *
3118 * reltuples is the number of heap tuples to be passed to the
3119 * bulkdelete callback. It's always assumed to be estimated.
3120 * See indexam.sgml for more info.
3121 *
3122 * Returns bulk delete stats derived from input stats
3123 */
3124static IndexBulkDeleteResult *
3126 double reltuples, LVRelState *vacrel)
3127{
3130
3131 ivinfo.index = indrel;
3132 ivinfo.heaprel = vacrel->rel;
3133 ivinfo.analyze_only = false;
3134 ivinfo.report_progress = false;
3135 ivinfo.estimated_count = true;
3136 ivinfo.message_level = DEBUG2;
3137 ivinfo.num_heap_tuples = reltuples;
3138 ivinfo.strategy = vacrel->bstrategy;
3139
3140 /*
3141 * Update error traceback information.
3142 *
3143 * The index name is saved during this phase and restored immediately
3144 * after this phase. See vacuum_error_callback.
3145 */
3146 Assert(vacrel->indname == NULL);
3151
3152 /* Do bulk deletion */
3153 istat = vac_bulkdel_one_index(&ivinfo, istat, vacrel->dead_items,
3154 vacrel->dead_items_info);
3155
3156 /* Revert to the previous phase information for error traceback */
3158 pfree(vacrel->indname);
3159 vacrel->indname = NULL;
3160
3161 return istat;
3162}
3163
3164/*
3165 * lazy_cleanup_one_index() -- do post-vacuum cleanup for index relation.
3166 *
3167 * Calls index AM's amvacuumcleanup routine. reltuples is the number
3168 * of heap tuples and estimated_count is true if reltuples is an
3169 * estimated value. See indexam.sgml for more info.
3170 *
3171 * Returns bulk delete stats derived from input stats
3172 */
3173static IndexBulkDeleteResult *
3175 double reltuples, bool estimated_count,
3177{
3180
3181 ivinfo.index = indrel;
3182 ivinfo.heaprel = vacrel->rel;
3183 ivinfo.analyze_only = false;
3184 ivinfo.report_progress = false;
3185 ivinfo.estimated_count = estimated_count;
3186 ivinfo.message_level = DEBUG2;
3187
3188 ivinfo.num_heap_tuples = reltuples;
3189 ivinfo.strategy = vacrel->bstrategy;
3190
3191 /*
3192 * Update error traceback information.
3193 *
3194 * The index name is saved during this phase and restored immediately
3195 * after this phase. See vacuum_error_callback.
3196 */
3197 Assert(vacrel->indname == NULL);
3202
3203 istat = vac_cleanup_one_index(&ivinfo, istat);
3204
3205 /* Revert to the previous phase information for error traceback */
3207 pfree(vacrel->indname);
3208 vacrel->indname = NULL;
3209
3210 return istat;
3211}
3212
3213/*
3214 * should_attempt_truncation - should we attempt to truncate the heap?
3215 *
3216 * Don't even think about it unless we have a shot at releasing a goodly
3217 * number of pages. Otherwise, the time taken isn't worth it, mainly because
3218 * an AccessExclusive lock must be replayed on any hot standby, where it can
3219 * be particularly disruptive.
3220 *
3221 * Also don't attempt it if wraparound failsafe is in effect. The entire
3222 * system might be refusing to allocate new XIDs at this point. The system
3223 * definitely won't return to normal unless and until VACUUM actually advances
3224 * the oldest relfrozenxid -- which hasn't happened for target rel just yet.
3225 * If lazy_truncate_heap attempted to acquire an AccessExclusiveLock to
3226 * truncate the table under these circumstances, an XID exhaustion error might
3227 * make it impossible for VACUUM to fix the underlying XID exhaustion problem.
3228 * There is very little chance of truncation working out when the failsafe is
3229 * in effect in any case. lazy_scan_prune makes the optimistic assumption
3230 * that any LP_DEAD items it encounters will always be LP_UNUSED by the time
3231 * we're called.
3232 */
3233static bool
3235{
3237
3238 if (!vacrel->do_rel_truncate || VacuumFailsafeActive)
3239 return false;
3240
3241 possibly_freeable = vacrel->rel_pages - vacrel->nonempty_pages;
3242 if (possibly_freeable > 0 &&
3245 return true;
3246
3247 return false;
3248}
3249
3250/*
3251 * lazy_truncate_heap - try to truncate off any empty pages at the end
3252 */
3253static void
3255{
3256 BlockNumber orig_rel_pages = vacrel->rel_pages;
3259 int lock_retry;
3260
3261 /* Report that we are now truncating */
3264
3265 /* Update error traceback information one last time */
3267 vacrel->nonempty_pages, InvalidOffsetNumber);
3268
3269 /*
3270 * Loop until no more truncating can be done.
3271 */
3272 do
3273 {
3274 /*
3275 * We need full exclusive lock on the relation in order to do
3276 * truncation. If we can't get it, give up rather than waiting --- we
3277 * don't want to block other backends, and we don't want to deadlock
3278 * (which is quite possible considering we already hold a lower-grade
3279 * lock).
3280 */
3281 lock_waiter_detected = false;
3282 lock_retry = 0;
3283 while (true)
3284 {
3286 break;
3287
3288 /*
3289 * Check for interrupts while trying to (re-)acquire the exclusive
3290 * lock.
3291 */
3293
3296 {
3297 /*
3298 * We failed to establish the lock in the specified number of
3299 * retries. This means we give up truncating.
3300 */
3301 ereport(vacrel->verbose ? INFO : DEBUG2,
3302 (errmsg("\"%s\": stopping truncate due to conflicting lock request",
3303 vacrel->relname)));
3304 return;
3305 }
3306
3312 }
3313
3314 /*
3315 * Now that we have exclusive lock, look to see if the rel has grown
3316 * whilst we were vacuuming with non-exclusive lock. If so, give up;
3317 * the newly added pages presumably contain non-deletable tuples.
3318 */
3321 {
3322 /*
3323 * Note: we intentionally don't update vacrel->rel_pages with the
3324 * new rel size here. If we did, it would amount to assuming that
3325 * the new pages are empty, which is unlikely. Leaving the numbers
3326 * alone amounts to assuming that the new pages have the same
3327 * tuple density as existing ones, which is less unlikely.
3328 */
3330 return;
3331 }
3332
3333 /*
3334 * Scan backwards from the end to verify that the end pages actually
3335 * contain no tuples. This is *necessary*, not optional, because
3336 * other backends could have added tuples to these pages whilst we
3337 * were vacuuming.
3338 */
3340 vacrel->blkno = new_rel_pages;
3341
3343 {
3344 /* can't do anything after all */
3346 return;
3347 }
3348
3349 /*
3350 * Okay to truncate.
3351 */
3353
3354 /*
3355 * We can release the exclusive lock as soon as we have truncated.
3356 * Other backends can't safely access the relation until they have
3357 * processed the smgr invalidation that smgrtruncate sent out ... but
3358 * that should happen as part of standard invalidation processing once
3359 * they acquire lock on the relation.
3360 */
3362
3363 /*
3364 * Update statistics. Here, it *is* correct to adjust rel_pages
3365 * without also touching reltuples, since the tuple count wasn't
3366 * changed by the truncation.
3367 */
3368 vacrel->removed_pages += orig_rel_pages - new_rel_pages;
3369 vacrel->rel_pages = new_rel_pages;
3370
3371 ereport(vacrel->verbose ? INFO : DEBUG2,
3372 (errmsg("table \"%s\": truncated %u to %u pages",
3373 vacrel->relname,
3376 } while (new_rel_pages > vacrel->nonempty_pages && lock_waiter_detected);
3377}
3378
3379/*
3380 * Rescan end pages to verify that they are (still) empty of tuples.
3381 *
3382 * Returns number of nondeletable pages (last nonempty page + 1).
3383 */
3384static BlockNumber
3386{
3388 "prefetch size must be power of 2");
3389
3390 BlockNumber blkno;
3392 instr_time starttime;
3393
3394 /* Initialize the starttime if we check for conflicting lock requests */
3395 INSTR_TIME_SET_CURRENT(starttime);
3396
3397 /*
3398 * Start checking blocks at what we believe relation end to be and move
3399 * backwards. (Strange coding of loop control is needed because blkno is
3400 * unsigned.) To make the scan faster, we prefetch a few blocks at a time
3401 * in forward direction, so that OS-level readahead can kick in.
3402 */
3403 blkno = vacrel->rel_pages;
3405 while (blkno > vacrel->nonempty_pages)
3406 {
3407 Buffer buf;
3408 Page page;
3409 OffsetNumber offnum,
3410 maxoff;
3411 bool hastup;
3412
3413 /*
3414 * Check if another process requests a lock on our relation. We are
3415 * holding an AccessExclusiveLock here, so they will be waiting. We
3416 * only do this once per VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL, and we
3417 * only check if that interval has elapsed once every 32 blocks to
3418 * keep the number of system calls and actual shared lock table
3419 * lookups to a minimum.
3420 */
3421 if ((blkno % 32) == 0)
3422 {
3425
3428 INSTR_TIME_SUBTRACT(elapsed, starttime);
3429 if ((INSTR_TIME_GET_MICROSEC(elapsed) / 1000)
3431 {
3433 {
3434 ereport(vacrel->verbose ? INFO : DEBUG2,
3435 (errmsg("table \"%s\": suspending truncate due to conflicting lock request",
3436 vacrel->relname)));
3437
3438 *lock_waiter_detected = true;
3439 return blkno;
3440 }
3441 starttime = currenttime;
3442 }
3443 }
3444
3445 /*
3446 * We don't insert a vacuum delay point here, because we have an
3447 * exclusive lock on the table which we want to hold for as short a
3448 * time as possible. We still need to check for interrupts however.
3449 */
3451
3452 blkno--;
3453
3454 /* If we haven't prefetched this lot yet, do so now. */
3455 if (prefetchedUntil > blkno)
3456 {
3459
3460 prefetchStart = blkno & ~(PREFETCH_SIZE - 1);
3461 for (pblkno = prefetchStart; pblkno <= blkno; pblkno++)
3462 {
3465 }
3467 }
3468
3470 vacrel->bstrategy);
3471
3472 /* In this phase we only need shared access to the buffer */
3474
3475 page = BufferGetPage(buf);
3476
3477 if (PageIsNew(page) || PageIsEmpty(page))
3478 {
3480 continue;
3481 }
3482
3483 hastup = false;
3484 maxoff = PageGetMaxOffsetNumber(page);
3485 for (offnum = FirstOffsetNumber;
3486 offnum <= maxoff;
3487 offnum = OffsetNumberNext(offnum))
3488 {
3489 ItemId itemid;
3490
3491 itemid = PageGetItemId(page, offnum);
3492
3493 /*
3494 * Note: any non-unused item should be taken as a reason to keep
3495 * this page. Even an LP_DEAD item makes truncation unsafe, since
3496 * we must not have cleaned out its index entries.
3497 */
3498 if (ItemIdIsUsed(itemid))
3499 {
3500 hastup = true;
3501 break; /* can stop scanning */
3502 }
3503 } /* scan along page */
3504
3506
3507 /* Done scanning if we found a tuple here */
3508 if (hastup)
3509 return blkno + 1;
3510 }
3511
3512 /*
3513 * If we fall out of the loop, all the previously-thought-to-be-empty
3514 * pages still are; we need not bother to look at the last known-nonempty
3515 * page.
3516 */
3517 return vacrel->nonempty_pages;
3518}
3519
3520/*
3521 * Allocate dead_items and dead_items_info (either using palloc, or in dynamic
3522 * shared memory). Sets both in vacrel for caller.
3523 *
3524 * Also handles parallel initialization as part of allocating dead_items in
3525 * DSM when required.
3526 */
3527static void
3528dead_items_alloc(LVRelState *vacrel, int nworkers)
3529{
3530 VacDeadItemsInfo *dead_items_info;
3532 autovacuum_work_mem != -1 ?
3534
3535 /*
3536 * Initialize state for a parallel vacuum. As of now, only one worker can
3537 * be used for an index, so we invoke parallelism only if there are at
3538 * least two indexes on a table.
3539 */
3540 if (nworkers >= 0 && vacrel->nindexes > 1 && vacrel->do_index_vacuuming)
3541 {
3542 /*
3543 * Since parallel workers cannot access data in temporary tables, we
3544 * can't perform parallel vacuum on them.
3545 */
3547 {
3548 /*
3549 * Give warning only if the user explicitly tries to perform a
3550 * parallel vacuum on the temporary table.
3551 */
3552 if (nworkers > 0)
3554 (errmsg("disabling parallel option of vacuum on \"%s\" --- cannot vacuum temporary tables in parallel",
3555 vacrel->relname)));
3556 }
3557 else
3558 vacrel->pvs = parallel_vacuum_init(vacrel->rel, vacrel->indrels,
3559 vacrel->nindexes, nworkers,
3561 vacrel->verbose ? INFO : DEBUG2,
3562 vacrel->bstrategy);
3563
3564 /*
3565 * If parallel mode started, dead_items and dead_items_info spaces are
3566 * allocated in DSM.
3567 */
3569 {
3571 &vacrel->dead_items_info);
3572 return;
3573 }
3574 }
3575
3576 /*
3577 * Serial VACUUM case. Allocate both dead_items and dead_items_info
3578 * locally.
3579 */
3580
3581 dead_items_info = palloc_object(VacDeadItemsInfo);
3582 dead_items_info->max_bytes = vac_work_mem * (Size) 1024;
3583 dead_items_info->num_items = 0;
3584 vacrel->dead_items_info = dead_items_info;
3585
3586 vacrel->dead_items = TidStoreCreateLocal(dead_items_info->max_bytes, true);
3587}
3588
3589/*
3590 * Add the given block number and offset numbers to dead_items.
3591 */
3592static void
3594 int num_offsets)
3595{
3596 const int prog_index[2] = {
3599 };
3600 int64 prog_val[2];
3601
3602 TidStoreSetBlockOffsets(vacrel->dead_items, blkno, offsets, num_offsets);
3603 vacrel->dead_items_info->num_items += num_offsets;
3604
3605 /* update the progress information */
3606 prog_val[0] = vacrel->dead_items_info->num_items;
3607 prog_val[1] = TidStoreMemoryUsage(vacrel->dead_items);
3609}
3610
3611/*
3612 * Forget all collected dead items.
3613 */
3614static void
3616{
3617 /* Update statistics for dead items */
3618 vacrel->num_dead_items_resets++;
3619 vacrel->total_dead_items_bytes += TidStoreMemoryUsage(vacrel->dead_items);
3620
3622 {
3624 vacrel->dead_items = parallel_vacuum_get_dead_items(vacrel->pvs,
3625 &vacrel->dead_items_info);
3626 return;
3627 }
3628
3629 /* Recreate the tidstore with the same max_bytes limitation */
3630 TidStoreDestroy(vacrel->dead_items);
3631 vacrel->dead_items = TidStoreCreateLocal(vacrel->dead_items_info->max_bytes, true);
3632
3633 /* Reset the counter */
3634 vacrel->dead_items_info->num_items = 0;
3635}
3636
3637/*
3638 * Perform cleanup for resources allocated in dead_items_alloc
3639 */
3640static void
3642{
3644 {
3645 /* Don't bother with pfree here */
3646 return;
3647 }
3648
3649 /* End parallel mode */
3650 parallel_vacuum_end(vacrel->pvs, vacrel->indstats);
3651 vacrel->pvs = NULL;
3652}
3653
3654#ifdef USE_ASSERT_CHECKING
3655
3656/*
3657 * Wrapper for heap_page_would_be_all_visible() which can be used for callers
3658 * that expect no LP_DEAD on the page. Currently assert-only, but there is no
3659 * reason not to use it outside of asserts.
3660 */
3661static bool
3663 TransactionId OldestXmin,
3664 bool *all_frozen,
3665 TransactionId *visibility_cutoff_xid,
3667{
3668
3670 OldestXmin,
3671 NULL, 0,
3672 all_frozen,
3673 visibility_cutoff_xid,
3675}
3676#endif
3677
3678/*
3679 * Check whether the heap page in buf is all-visible except for the dead
3680 * tuples referenced in the deadoffsets array.
3681 *
3682 * Vacuum uses this to check if a page would become all-visible after reaping
3683 * known dead tuples. This function does not remove the dead items.
3684 *
3685 * This cannot be called in a critical section, as the visibility checks may
3686 * perform IO and allocate memory.
3687 *
3688 * Returns true if the page is all-visible other than the provided
3689 * deadoffsets and false otherwise.
3690 *
3691 * OldestXmin is used to determine visibility.
3692 *
3693 * Output parameters:
3694 *
3695 * - *all_frozen: true if every tuple on the page is frozen
3696 * - *visibility_cutoff_xid: newest xmin; valid only if page is all-visible
3697 * - *logging_offnum: OffsetNumber of current tuple being processed;
3698 * used by vacuum's error callback system.
3699 *
3700 * Callers looking to verify that the page is already all-visible can call
3701 * heap_page_is_all_visible().
3702 *
3703 * This logic is closely related to heap_prune_record_unchanged_lp_normal().
3704 * If you modify this function, ensure consistency with that code. An
3705 * assertion cross-checks that both remain in agreement. Do not introduce new
3706 * side-effects.
3707 */
3708static bool
3710 TransactionId OldestXmin,
3711 OffsetNumber *deadoffsets,
3712 int ndeadoffsets,
3713 bool *all_frozen,
3714 TransactionId *visibility_cutoff_xid,
3716{
3717 Page page = BufferGetPage(buf);
3719 OffsetNumber offnum,
3720 maxoff;
3721 bool all_visible = true;
3722 int matched_dead_count = 0;
3723
3724 *visibility_cutoff_xid = InvalidTransactionId;
3725 *all_frozen = true;
3726
3727 Assert(ndeadoffsets == 0 || deadoffsets);
3728
3729#ifdef USE_ASSERT_CHECKING
3730 /* Confirm input deadoffsets[] is strictly sorted */
3731 if (ndeadoffsets > 1)
3732 {
3733 for (int i = 1; i < ndeadoffsets; i++)
3734 Assert(deadoffsets[i - 1] < deadoffsets[i]);
3735 }
3736#endif
3737
3738 maxoff = PageGetMaxOffsetNumber(page);
3739 for (offnum = FirstOffsetNumber;
3740 offnum <= maxoff && all_visible;
3741 offnum = OffsetNumberNext(offnum))
3742 {
3743 ItemId itemid;
3744 HeapTupleData tuple;
3746
3747 /*
3748 * Set the offset number so that we can display it along with any
3749 * error that occurred while processing this tuple.
3750 */
3751 *logging_offnum = offnum;
3752 itemid = PageGetItemId(page, offnum);
3753
3754 /* Unused or redirect line pointers are of no interest */
3755 if (!ItemIdIsUsed(itemid) || ItemIdIsRedirected(itemid))
3756 continue;
3757
3758 ItemPointerSet(&(tuple.t_self), blockno, offnum);
3759
3760 /*
3761 * Dead line pointers can have index pointers pointing to them. So
3762 * they can't be treated as visible
3763 */
3764 if (ItemIdIsDead(itemid))
3765 {
3766 if (!deadoffsets ||
3768 deadoffsets[matched_dead_count] != offnum)
3769 {
3770 *all_frozen = all_visible = false;
3771 break;
3772 }
3774 continue;
3775 }
3776
3777 Assert(ItemIdIsNormal(itemid));
3778
3779 tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
3780 tuple.t_len = ItemIdGetLength(itemid);
3781 tuple.t_tableOid = RelationGetRelid(rel);
3782
3783 /* Visibility checks may do IO or allocate memory */
3786 {
3787 case HEAPTUPLE_LIVE:
3788 {
3789 TransactionId xmin;
3790
3791 /* Check comments in lazy_scan_prune. */
3793 {
3794 all_visible = false;
3795 *all_frozen = false;
3796 break;
3797 }
3798
3799 /*
3800 * The inserter definitely committed. But is it old enough
3801 * that everyone sees it as committed?
3802 */
3803 xmin = HeapTupleHeaderGetXmin(tuple.t_data);
3804 if (!TransactionIdPrecedes(xmin, OldestXmin))
3805 {
3806 all_visible = false;
3807 *all_frozen = false;
3808 break;
3809 }
3810
3811 /* Track newest xmin on page. */
3812 if (TransactionIdFollows(xmin, *visibility_cutoff_xid) &&
3814 *visibility_cutoff_xid = xmin;
3815
3816 /* Check whether this tuple is already frozen or not */
3817 if (all_visible && *all_frozen &&
3819 *all_frozen = false;
3820 }
3821 break;
3822
3823 case HEAPTUPLE_DEAD:
3827 {
3828 all_visible = false;
3829 *all_frozen = false;
3830 break;
3831 }
3832 default:
3833 elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
3834 break;
3835 }
3836 } /* scan along page */
3837
3838 /* Clear the offset information once we have processed the given page. */
3840
3841 return all_visible;
3842}
3843
3844/*
3845 * Update index statistics in pg_class if the statistics are accurate.
3846 */
3847static void
3849{
3850 Relation *indrels = vacrel->indrels;
3851 int nindexes = vacrel->nindexes;
3852 IndexBulkDeleteResult **indstats = vacrel->indstats;
3853
3854 Assert(vacrel->do_index_cleanup);
3855
3856 for (int idx = 0; idx < nindexes; idx++)
3857 {
3858 Relation indrel = indrels[idx];
3859 IndexBulkDeleteResult *istat = indstats[idx];
3860
3861 if (istat == NULL || istat->estimated_count)
3862 continue;
3863
3864 /* Update index statistics */
3866 istat->num_pages,
3867 istat->num_index_tuples,
3868 0, 0,
3869 false,
3872 NULL, NULL, false);
3873 }
3874}
3875
3876/*
3877 * Error context callback for errors occurring during vacuum. The error
3878 * context messages for index phases should match the messages set in parallel
3879 * vacuum. If you change this function for those phases, change
3880 * parallel_vacuum_error_callback() as well.
3881 */
3882static void
3884{
3886
3887 switch (errinfo->phase)
3888 {
3890 if (BlockNumberIsValid(errinfo->blkno))
3891 {
3892 if (OffsetNumberIsValid(errinfo->offnum))
3893 errcontext("while scanning block %u offset %u of relation \"%s.%s\"",
3894 errinfo->blkno, errinfo->offnum, errinfo->relnamespace, errinfo->relname);
3895 else
3896 errcontext("while scanning block %u of relation \"%s.%s\"",
3897 errinfo->blkno, errinfo->relnamespace, errinfo->relname);
3898 }
3899 else
3900 errcontext("while scanning relation \"%s.%s\"",
3901 errinfo->relnamespace, errinfo->relname);
3902 break;
3903
3905 if (BlockNumberIsValid(errinfo->blkno))
3906 {
3907 if (OffsetNumberIsValid(errinfo->offnum))
3908 errcontext("while vacuuming block %u offset %u of relation \"%s.%s\"",
3909 errinfo->blkno, errinfo->offnum, errinfo->relnamespace, errinfo->relname);
3910 else
3911 errcontext("while vacuuming block %u of relation \"%s.%s\"",
3912 errinfo->blkno, errinfo->relnamespace, errinfo->relname);
3913 }
3914 else
3915 errcontext("while vacuuming relation \"%s.%s\"",
3916 errinfo->relnamespace, errinfo->relname);
3917 break;
3918
3920 errcontext("while vacuuming index \"%s\" of relation \"%s.%s\"",
3921 errinfo->indname, errinfo->relnamespace, errinfo->relname);
3922 break;
3923
3925 errcontext("while cleaning up index \"%s\" of relation \"%s.%s\"",
3926 errinfo->indname, errinfo->relnamespace, errinfo->relname);
3927 break;
3928
3930 if (BlockNumberIsValid(errinfo->blkno))
3931 errcontext("while truncating relation \"%s.%s\" to %u blocks",
3932 errinfo->relnamespace, errinfo->relname, errinfo->blkno);
3933 break;
3934
3936 default:
3937 return; /* do nothing; the errinfo may not be
3938 * initialized */
3939 }
3940}
3941
3942/*
3943 * Updates the information required for vacuum error callback. This also saves
3944 * the current information which can be later restored via restore_vacuum_error_info.
3945 */
3946static void
3948 int phase, BlockNumber blkno, OffsetNumber offnum)
3949{
3950 if (saved_vacrel)
3951 {
3952 saved_vacrel->offnum = vacrel->offnum;
3953 saved_vacrel->blkno = vacrel->blkno;
3954 saved_vacrel->phase = vacrel->phase;
3955 }
3956
3957 vacrel->blkno = blkno;
3958 vacrel->offnum = offnum;
3959 vacrel->phase = phase;
3960}
3961
3962/*
3963 * Restores the vacuum information saved via a prior call to update_vacuum_error_info.
3964 */
3965static void
3968{
3969 vacrel->blkno = saved_vacrel->blkno;
3970 vacrel->offnum = saved_vacrel->offnum;
3971 vacrel->phase = saved_vacrel->phase;
3972}
Datum idx(PG_FUNCTION_ARGS)
Definition _int_op.c:262
int autovacuum_work_mem
Definition autovacuum.c:120
void TimestampDifference(TimestampTz start_time, TimestampTz stop_time, long *secs, int *microsecs)
Definition timestamp.c:1719
bool TimestampDifferenceExceeds(TimestampTz start_time, TimestampTz stop_time, int msec)
Definition timestamp.c:1779
TimestampTz GetCurrentTimestamp(void)
Definition timestamp.c:1643
void pgstat_progress_start_command(ProgressCommandType cmdtype, Oid relid)
void pgstat_progress_update_param(int index, int64 val)
void pgstat_progress_update_multi_param(int nparam, const int *index, const int64 *val)
void pgstat_progress_end_command(void)
@ PROGRESS_COMMAND_VACUUM
PgBackendStatus * MyBEEntry
uint32 BlockNumber
Definition block.h:31
#define InvalidBlockNumber
Definition block.h:33
static bool BlockNumberIsValid(BlockNumber blockNumber)
Definition block.h:71
int Buffer
Definition buf.h:23
#define InvalidBuffer
Definition buf.h:25
bool track_io_timing
Definition bufmgr.c:177
void CheckBufferIsPinnedOnce(Buffer buffer)
Definition bufmgr.c:6495
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition bufmgr.c:4357
PrefetchBufferResult PrefetchBuffer(Relation reln, ForkNumber forkNum, BlockNumber blockNum)
Definition bufmgr.c:773
bool BufferIsLockedByMeInMode(Buffer buffer, BufferLockMode mode)
Definition bufmgr.c:2998
void ReleaseBuffer(Buffer buffer)
Definition bufmgr.c:5502
void UnlockReleaseBuffer(Buffer buffer)
Definition bufmgr.c:5519
void MarkBufferDirty(Buffer buffer)
Definition bufmgr.c:3057
void LockBufferForCleanup(Buffer buffer)
Definition bufmgr.c:6528
Buffer ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
Definition bufmgr.c:912
bool ConditionalLockBufferForCleanup(Buffer buffer)
Definition bufmgr.c:6701
#define RelationGetNumberOfBlocks(reln)
Definition bufmgr.h:307
static Page BufferGetPage(Buffer buffer)
Definition bufmgr.h:466
@ BUFFER_LOCK_SHARE
Definition bufmgr.h:210
@ BUFFER_LOCK_EXCLUSIVE
Definition bufmgr.h:220
@ BUFFER_LOCK_UNLOCK
Definition bufmgr.h:205
static void LockBuffer(Buffer buffer, BufferLockMode mode)
Definition bufmgr.h:328
@ RBM_NORMAL
Definition bufmgr.h:46
static bool BufferIsValid(Buffer bufnum)
Definition bufmgr.h:417
Size PageGetHeapFreeSpace(const PageData *page)
Definition bufpage.c:990
void PageTruncateLinePointerArray(Page page)
Definition bufpage.c:834
static bool PageIsEmpty(const PageData *page)
Definition bufpage.h:223
static bool PageIsAllVisible(const PageData *page)
Definition bufpage.h:428
static void PageClearAllVisible(Page page)
Definition bufpage.h:438
static bool PageIsNew(const PageData *page)
Definition bufpage.h:233
#define SizeOfPageHeaderData
Definition bufpage.h:216
static void PageSetAllVisible(Page page)
Definition bufpage.h:433
static ItemId PageGetItemId(Page page, OffsetNumber offsetNumber)
Definition bufpage.h:243
static void * PageGetItem(PageData *page, const ItemIdData *itemId)
Definition bufpage.h:353
PageData * Page
Definition bufpage.h:81
#define PageClearPrunable(page)
Definition bufpage.h:453
static XLogRecPtr PageGetLSN(const PageData *page)
Definition bufpage.h:385
static OffsetNumber PageGetMaxOffsetNumber(const PageData *page)
Definition bufpage.h:371
uint8_t uint8
Definition c.h:577
#define ngettext(s, p, n)
Definition c.h:1219
#define Max(x, y)
Definition c.h:1034
#define Assert(condition)
Definition c.h:906
int64_t int64
Definition c.h:576
TransactionId MultiXactId
Definition c.h:709
int32_t int32
Definition c.h:575
#define unlikely(x)
Definition c.h:424
uint32_t uint32
Definition c.h:579
#define lengthof(array)
Definition c.h:836
#define StaticAssertDecl(condition, errmessage)
Definition c.h:971
uint32 TransactionId
Definition c.h:699
size_t Size
Definition c.h:652
int64 TimestampTz
Definition timestamp.h:39
Datum arg
Definition elog.c:1322
ErrorContextCallback * error_context_stack
Definition elog.c:99
int errcode(int sqlerrcode)
Definition elog.c:874
int errmsg(const char *fmt,...)
Definition elog.c:1093
#define _(x)
Definition elog.c:95
#define LOG
Definition elog.h:31
#define errcontext
Definition elog.h:198
int errhint(const char *fmt,...) pg_attribute_printf(1
int errdetail(const char *fmt,...) pg_attribute_printf(1
int int errmsg_internal(const char *fmt,...) pg_attribute_printf(1
#define WARNING
Definition elog.h:36
#define DEBUG2
Definition elog.h:29
#define ERROR
Definition elog.h:39
#define elog(elevel,...)
Definition elog.h:226
#define INFO
Definition elog.h:34
#define ereport(elevel,...)
Definition elog.h:150
#define palloc_object(type)
Definition fe_memutils.h:74
#define palloc_array(type, count)
Definition fe_memutils.h:76
#define palloc0_object(type)
Definition fe_memutils.h:75
void FreeSpaceMapVacuumRange(Relation rel, BlockNumber start, BlockNumber end)
Definition freespace.c:377
Size GetRecordedFreeSpace(Relation rel, BlockNumber heapBlk)
Definition freespace.c:244
void RecordPageWithFreeSpace(Relation rel, BlockNumber heapBlk, Size spaceAvail)
Definition freespace.c:194
bool VacuumCostActive
Definition globals.c:158
int VacuumCostBalance
Definition globals.c:157
int maintenance_work_mem
Definition globals.c:133
volatile uint32 CritSectionCount
Definition globals.c:45
struct Latch * MyLatch
Definition globals.c:63
Oid MyDatabaseId
Definition globals.c:94
bool heap_tuple_needs_eventual_freeze(HeapTupleHeader tuple)
Definition heapam.c:7892
bool heap_tuple_should_freeze(HeapTupleHeader tuple, const struct VacuumCutoffs *cutoffs, TransactionId *NoFreezePageRelfrozenXid, MultiXactId *NoFreezePageRelminMxid)
Definition heapam.c:7947
#define HEAP_PAGE_PRUNE_FREEZE
Definition heapam.h:44
@ HEAPTUPLE_RECENTLY_DEAD
Definition heapam.h:128
@ HEAPTUPLE_INSERT_IN_PROGRESS
Definition heapam.h:129
@ HEAPTUPLE_LIVE
Definition heapam.h:127
@ HEAPTUPLE_DELETE_IN_PROGRESS
Definition heapam.h:130
@ HEAPTUPLE_DEAD
Definition heapam.h:126
@ PRUNE_VACUUM_CLEANUP
Definition heapam.h:230
@ PRUNE_VACUUM_SCAN
Definition heapam.h:229
#define HEAP_PAGE_PRUNE_MARK_UNUSED_NOW
Definition heapam.h:43
HTSV_Result HeapTupleSatisfiesVacuumHorizon(HeapTuple htup, Buffer buffer, TransactionId *dead_after)
HTSV_Result HeapTupleSatisfiesVacuum(HeapTuple htup, TransactionId OldestXmin, Buffer buffer)
HeapTupleHeaderData * HeapTupleHeader
Definition htup.h:23
static TransactionId HeapTupleHeaderGetXmin(const HeapTupleHeaderData *tup)
#define MaxHeapTuplesPerPage
static bool HeapTupleHeaderXminCommitted(const HeapTupleHeaderData *tup)
#define false
#define INSTR_TIME_SET_CURRENT(t)
Definition instr_time.h:122
#define INSTR_TIME_SUBTRACT(x, y)
Definition instr_time.h:177
#define INSTR_TIME_GET_MICROSEC(t)
Definition instr_time.h:192
WalUsage pgWalUsage
Definition instrument.c:22
void WalUsageAccumDiff(WalUsage *dst, const WalUsage *add, const WalUsage *sub)
Definition instrument.c:289
BufferUsage pgBufferUsage
Definition instrument.c:20
void BufferUsageAccumDiff(BufferUsage *dst, const BufferUsage *add, const BufferUsage *sub)
Definition instrument.c:249
static int pg_cmp_u16(uint16 a, uint16 b)
Definition int.h:707
int b
Definition isn.c:74
int a
Definition isn.c:73
int i
Definition isn.c:77
#define ItemIdGetLength(itemId)
Definition itemid.h:59
#define ItemIdIsNormal(itemId)
Definition itemid.h:99
#define ItemIdIsDead(itemId)
Definition itemid.h:113
#define ItemIdIsUsed(itemId)
Definition itemid.h:92
#define ItemIdSetUnused(itemId)
Definition itemid.h:128
#define ItemIdIsRedirected(itemId)
Definition itemid.h:106
#define ItemIdHasStorage(itemId)
Definition itemid.h:120
static void ItemPointerSet(ItemPointerData *pointer, BlockNumber blockNumber, OffsetNumber offNum)
Definition itemptr.h:135
void ResetLatch(Latch *latch)
Definition latch.c:374
int WaitLatch(Latch *latch, int wakeEvents, long timeout, uint32 wait_event_info)
Definition latch.c:172
void UnlockRelation(Relation relation, LOCKMODE lockmode)
Definition lmgr.c:314
bool ConditionalLockRelation(Relation relation, LOCKMODE lockmode)
Definition lmgr.c:278
bool LockHasWaitersRelation(Relation relation, LOCKMODE lockmode)
Definition lmgr.c:367
#define NoLock
Definition lockdefs.h:34
#define AccessExclusiveLock
Definition lockdefs.h:43
#define RowExclusiveLock
Definition lockdefs.h:38
char * get_database_name(Oid dbid)
Definition lsyscache.c:1242
char * get_namespace_name(Oid nspid)
Definition lsyscache.c:3518
char * pstrdup(const char *in)
Definition mcxt.c:1781
void pfree(void *pointer)
Definition mcxt.c:1616
void * palloc0(Size size)
Definition mcxt.c:1417
#define AmAutoVacuumWorkerProcess()
Definition miscadmin.h:383
#define START_CRIT_SECTION()
Definition miscadmin.h:150
#define CHECK_FOR_INTERRUPTS()
Definition miscadmin.h:123
#define END_CRIT_SECTION()
Definition miscadmin.h:152
bool MultiXactIdPrecedes(MultiXactId multi1, MultiXactId multi2)
Definition multixact.c:2818
bool MultiXactIdPrecedesOrEquals(MultiXactId multi1, MultiXactId multi2)
Definition multixact.c:2832
#define MultiXactIdIsValid(multi)
Definition multixact.h:29
#define InvalidMultiXactId
Definition multixact.h:25
#define InvalidOffsetNumber
Definition off.h:26
#define OffsetNumberIsValid(offsetNumber)
Definition off.h:39
#define OffsetNumberNext(offsetNumber)
Definition off.h:52
uint16 OffsetNumber
Definition off.h:24
#define FirstOffsetNumber
Definition off.h:27
#define MaxOffsetNumber
Definition off.h:28
static int verbose
#define ERRCODE_DATA_CORRUPTED
NameData relname
Definition pg_class.h:40
const void * data
uint32 pg_prng_uint32(pg_prng_state *state)
Definition pg_prng.c:227
pg_prng_state pg_global_prng_state
Definition pg_prng.c:34
const char * pg_rusage_show(const PGRUsage *ru0)
Definition pg_rusage.c:40
void pg_rusage_init(PGRUsage *ru0)
Definition pg_rusage.c:27
static char buf[DEFAULT_XLOG_SEG_SIZE]
int64 PgStat_Counter
Definition pgstat.h:71
PgStat_Counter pgStatBlockReadTime
PgStat_Counter pgStatBlockWriteTime
void pgstat_report_vacuum(Relation rel, PgStat_Counter livetuples, PgStat_Counter deadtuples, TimestampTz starttime)
#define qsort(a, b, c, d)
Definition port.h:495
static int fb(int x)
GlobalVisState * GlobalVisTestFor(Relation rel)
Definition procarray.c:4118
#define PROGRESS_VACUUM_PHASE_FINAL_CLEANUP
Definition progress.h:41
#define PROGRESS_VACUUM_MODE
Definition progress.h:32
#define PROGRESS_VACUUM_MODE_NORMAL
Definition progress.h:44
#define PROGRESS_VACUUM_STARTED_BY_AUTOVACUUM
Definition progress.h:50
#define PROGRESS_VACUUM_DEAD_TUPLE_BYTES
Definition progress.h:27
#define PROGRESS_VACUUM_PHASE_SCAN_HEAP
Definition progress.h:36
#define PROGRESS_VACUUM_TOTAL_HEAP_BLKS
Definition progress.h:22
#define PROGRESS_VACUUM_PHASE
Definition progress.h:21
#define PROGRESS_VACUUM_DELAY_TIME
Definition progress.h:31
#define PROGRESS_VACUUM_STARTED_BY_AUTOVACUUM_WRAPAROUND
Definition progress.h:51
#define PROGRESS_VACUUM_NUM_INDEX_VACUUMS
Definition progress.h:25
#define PROGRESS_VACUUM_PHASE_VACUUM_HEAP
Definition progress.h:38
#define PROGRESS_VACUUM_NUM_DEAD_ITEM_IDS
Definition progress.h:28
#define PROGRESS_VACUUM_MAX_DEAD_TUPLE_BYTES
Definition progress.h:26
#define PROGRESS_VACUUM_STARTED_BY_MANUAL
Definition progress.h:49
#define PROGRESS_VACUUM_HEAP_BLKS_SCANNED
Definition progress.h:23
#define PROGRESS_VACUUM_STARTED_BY
Definition progress.h:33
#define PROGRESS_VACUUM_PHASE_INDEX_CLEANUP
Definition progress.h:39
#define PROGRESS_VACUUM_PHASE_VACUUM_INDEX
Definition progress.h:37
#define PROGRESS_VACUUM_MODE_FAILSAFE
Definition progress.h:46
#define PROGRESS_VACUUM_INDEXES_PROCESSED
Definition progress.h:30
#define PROGRESS_VACUUM_INDEXES_TOTAL
Definition progress.h:29
#define PROGRESS_VACUUM_MODE_AGGRESSIVE
Definition progress.h:45
#define PROGRESS_VACUUM_HEAP_BLKS_VACUUMED
Definition progress.h:24
#define PROGRESS_VACUUM_PHASE_TRUNCATE
Definition progress.h:40
void heap_page_prune_and_freeze(PruneFreezeParams *params, PruneFreezeResult *presult, OffsetNumber *off_loc, TransactionId *new_relfrozen_xid, MultiXactId *new_relmin_mxid)
Definition pruneheap.c:819
void log_heap_prune_and_freeze(Relation relation, Buffer buffer, Buffer vmbuffer, uint8 vmflags, TransactionId conflict_xid, bool cleanup_lock, PruneReason reason, HeapTupleFreeze *frozen, int nfrozen, OffsetNumber *redirected, int nredirected, OffsetNumber *dead, int ndead, OffsetNumber *unused, int nunused)
Definition pruneheap.c:2167
Buffer read_stream_next_buffer(ReadStream *stream, void **per_buffer_data)
ReadStream * read_stream_begin_relation(int flags, BufferAccessStrategy strategy, Relation rel, ForkNumber forknum, ReadStreamBlockNumberCB callback, void *callback_private_data, size_t per_buffer_data_size)
void read_stream_end(ReadStream *stream)
#define READ_STREAM_MAINTENANCE
Definition read_stream.h:28
#define READ_STREAM_USE_BATCHING
Definition read_stream.h:64
#define RelationGetRelid(relation)
Definition rel.h:514
#define RelationGetRelationName(relation)
Definition rel.h:548
#define RelationNeedsWAL(relation)
Definition rel.h:637
#define RelationUsesLocalBuffers(relation)
Definition rel.h:646
#define RelationGetNamespace(relation)
Definition rel.h:555
@ MAIN_FORKNUM
Definition relpath.h:58
void RelationTruncate(Relation rel, BlockNumber nblocks)
Definition storage.c:289
char * dbname
Definition streamutil.c:49
void appendStringInfo(StringInfo str, const char *fmt,...)
Definition stringinfo.c:145
void appendStringInfoString(StringInfo str, const char *s)
Definition stringinfo.c:230
void initStringInfo(StringInfo str)
Definition stringinfo.c:97
int64 shared_blks_dirtied
Definition instrument.h:28
int64 local_blks_hit
Definition instrument.h:30
int64 shared_blks_read
Definition instrument.h:27
int64 local_blks_read
Definition instrument.h:31
int64 local_blks_dirtied
Definition instrument.h:32
int64 shared_blks_hit
Definition instrument.h:26
struct ErrorContextCallback * previous
Definition elog.h:297
void(* callback)(void *arg)
Definition elog.h:298
ItemPointerData t_self
Definition htup.h:65
uint32 t_len
Definition htup.h:64
HeapTupleHeader t_data
Definition htup.h:68
Oid t_tableOid
Definition htup.h:66
BlockNumber pages_deleted
Definition genam.h:88
BlockNumber pages_newly_deleted
Definition genam.h:87
BlockNumber pages_free
Definition genam.h:89
BlockNumber num_pages
Definition genam.h:83
double num_index_tuples
Definition genam.h:85
BlockNumber next_eager_scan_region_start
Definition vacuumlazy.c:371
ParallelVacuumState * pvs
Definition vacuumlazy.c:259
bool next_unskippable_eager_scanned
Definition vacuumlazy.c:356
VacDeadItemsInfo * dead_items_info
Definition vacuumlazy.c:302
Buffer next_unskippable_vmbuffer
Definition vacuumlazy.c:357
OffsetNumber offnum
Definition vacuumlazy.c:287
TidStore * dead_items
Definition vacuumlazy.c:301
int64 tuples_deleted
Definition vacuumlazy.c:346
BlockNumber nonempty_pages
Definition vacuumlazy.c:333
BlockNumber eager_scan_remaining_fails
Definition vacuumlazy.c:403
bool do_rel_truncate
Definition vacuumlazy.c:271
BlockNumber scanned_pages
Definition vacuumlazy.c:305
int num_dead_items_resets
Definition vacuumlazy.c:343
bool aggressive
Definition vacuumlazy.c:262
BlockNumber new_frozen_tuple_pages
Definition vacuumlazy.c:314
GlobalVisState * vistest
Definition vacuumlazy.c:275
BlockNumber removed_pages
Definition vacuumlazy.c:313
int num_index_scans
Definition vacuumlazy.c:342
IndexBulkDeleteResult ** indstats
Definition vacuumlazy.c:339
BlockNumber new_all_frozen_pages
Definition vacuumlazy.c:329
double new_live_tuples
Definition vacuumlazy.c:337
double new_rel_tuples
Definition vacuumlazy.c:336
BlockNumber new_all_visible_all_frozen_pages
Definition vacuumlazy.c:326
BlockNumber new_all_visible_pages
Definition vacuumlazy.c:317
TransactionId NewRelfrozenXid
Definition vacuumlazy.c:277
Relation rel
Definition vacuumlazy.c:253
bool consider_bypass_optimization
Definition vacuumlazy.c:266
BlockNumber rel_pages
Definition vacuumlazy.c:304
Size total_dead_items_bytes
Definition vacuumlazy.c:344
BlockNumber next_unskippable_block
Definition vacuumlazy.c:355
int64 recently_dead_tuples
Definition vacuumlazy.c:350
int64 tuples_frozen
Definition vacuumlazy.c:347
char * dbname
Definition vacuumlazy.c:282
BlockNumber missed_dead_pages
Definition vacuumlazy.c:332
BlockNumber current_block
Definition vacuumlazy.c:354
char * relnamespace
Definition vacuumlazy.c:283
int64 live_tuples
Definition vacuumlazy.c:349
int64 lpdead_items
Definition vacuumlazy.c:348
BufferAccessStrategy bstrategy
Definition vacuumlazy.c:258
BlockNumber eager_scan_remaining_successes
Definition vacuumlazy.c:382
bool skippedallvis
Definition vacuumlazy.c:279
BlockNumber lpdead_item_pages
Definition vacuumlazy.c:331
BlockNumber eager_scanned_pages
Definition vacuumlazy.c:311
Relation * indrels
Definition vacuumlazy.c:254
bool skipwithvm
Definition vacuumlazy.c:264
bool do_index_cleanup
Definition vacuumlazy.c:270
MultiXactId NewRelminMxid
Definition vacuumlazy.c:278
int64 missed_dead_tuples
Definition vacuumlazy.c:351
BlockNumber blkno
Definition vacuumlazy.c:286
struct VacuumCutoffs cutoffs
Definition vacuumlazy.c:274
char * relname
Definition vacuumlazy.c:284
BlockNumber eager_scan_max_fails_per_region
Definition vacuumlazy.c:393
VacErrPhase phase
Definition vacuumlazy.c:288
char * indname
Definition vacuumlazy.c:285
bool do_index_vacuuming
Definition vacuumlazy.c:269
BlockNumber blkno
Definition vacuumlazy.c:410
VacErrPhase phase
Definition vacuumlazy.c:412
OffsetNumber offnum
Definition vacuumlazy.c:411
int64 st_progress_param[PGSTAT_NUM_PROGRESS_PARAM]
size_t max_bytes
Definition vacuum.h:299
int64 num_items
Definition vacuum.h:300
int nworkers
Definition vacuum.h:251
VacOptValue truncate
Definition vacuum.h:236
bits32 options
Definition vacuum.h:219
int log_vacuum_min_duration
Definition vacuum.h:227
bool is_wraparound
Definition vacuum.h:226
VacOptValue index_cleanup
Definition vacuum.h:235
double max_eager_freeze_failure_rate
Definition vacuum.h:244
int64 wal_buffers_full
Definition instrument.h:57
uint64 wal_bytes
Definition instrument.h:55
int64 wal_fpi
Definition instrument.h:54
uint64 wal_fpi_bytes
Definition instrument.h:56
int64 wal_records
Definition instrument.h:53
TidStoreIter * TidStoreBeginIterate(TidStore *ts)
Definition tidstore.c:471
void TidStoreEndIterate(TidStoreIter *iter)
Definition tidstore.c:518
TidStoreIterResult * TidStoreIterateNext(TidStoreIter *iter)
Definition tidstore.c:493
TidStore * TidStoreCreateLocal(size_t max_bytes, bool insert_only)
Definition tidstore.c:162
void TidStoreDestroy(TidStore *ts)
Definition tidstore.c:317
int TidStoreGetBlockOffsets(TidStoreIterResult *result, OffsetNumber *offsets, int max_offsets)
Definition tidstore.c:566
void TidStoreSetBlockOffsets(TidStore *ts, BlockNumber blkno, OffsetNumber *offsets, int num_offsets)
Definition tidstore.c:345
size_t TidStoreMemoryUsage(TidStore *ts)
Definition tidstore.c:532
static bool TransactionIdFollows(TransactionId id1, TransactionId id2)
Definition transam.h:297
static TransactionId ReadNextTransactionId(void)
Definition transam.h:377
#define InvalidTransactionId
Definition transam.h:31
static bool TransactionIdPrecedesOrEquals(TransactionId id1, TransactionId id2)
Definition transam.h:282
#define TransactionIdIsValid(xid)
Definition transam.h:41
#define TransactionIdIsNormal(xid)
Definition transam.h:42
static bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition transam.h:263
bool track_cost_delay_timing
Definition vacuum.c:82
void vac_open_indexes(Relation relation, LOCKMODE lockmode, int *nindexes, Relation **Irel)
Definition vacuum.c:2366
IndexBulkDeleteResult * vac_cleanup_one_index(IndexVacuumInfo *ivinfo, IndexBulkDeleteResult *istat)
Definition vacuum.c:2658
void vac_close_indexes(int nindexes, Relation *Irel, LOCKMODE lockmode)
Definition vacuum.c:2409
void vacuum_delay_point(bool is_analyze)
Definition vacuum.c:2430
bool vacuum_xid_failsafe_check(const struct VacuumCutoffs *cutoffs)
Definition vacuum.c:1268
bool VacuumFailsafeActive
Definition vacuum.c:110
double vac_estimate_reltuples(Relation relation, BlockNumber total_pages, BlockNumber scanned_pages, double scanned_tuples)
Definition vacuum.c:1330
void vac_update_relstats(Relation relation, BlockNumber num_pages, double num_tuples, BlockNumber num_all_visible_pages, BlockNumber num_all_frozen_pages, bool hasindex, TransactionId frozenxid, MultiXactId minmulti, bool *frozenxid_updated, bool *minmulti_updated, bool in_outer_xact)
Definition vacuum.c:1426
bool vacuum_get_cutoffs(Relation rel, const VacuumParams params, struct VacuumCutoffs *cutoffs)
Definition vacuum.c:1100
IndexBulkDeleteResult * vac_bulkdel_one_index(IndexVacuumInfo *ivinfo, IndexBulkDeleteResult *istat, TidStore *dead_items, VacDeadItemsInfo *dead_items_info)
Definition vacuum.c:2637
#define VACOPT_VERBOSE
Definition vacuum.h:182
@ VACOPTVALUE_AUTO
Definition vacuum.h:203
@ VACOPTVALUE_ENABLED
Definition vacuum.h:205
@ VACOPTVALUE_UNSPECIFIED
Definition vacuum.h:202
@ VACOPTVALUE_DISABLED
Definition vacuum.h:204
#define VACOPT_DISABLE_PAGE_SKIPPING
Definition vacuum.h:188
static int lazy_scan_prune(LVRelState *vacrel, Buffer buf, BlockNumber blkno, Page page, Buffer vmbuffer, bool *has_lpdead_items, bool *vm_page_frozen)
static void dead_items_cleanup(LVRelState *vacrel)
static void identify_and_fix_vm_corruption(Relation rel, Buffer heap_buffer, BlockNumber heap_blk, Page heap_page, int nlpdead_items, Buffer vmbuffer, uint8 *vmbits)
static void update_relstats_all_indexes(LVRelState *vacrel)
static void dead_items_add(LVRelState *vacrel, BlockNumber blkno, OffsetNumber *offsets, int num_offsets)
void heap_vacuum_rel(Relation rel, const VacuumParams params, BufferAccessStrategy bstrategy)
Definition vacuumlazy.c:626
static BlockNumber heap_vac_scan_next_block(ReadStream *stream, void *callback_private_data, void *per_buffer_data)
static void heap_vacuum_eager_scan_setup(LVRelState *vacrel, const VacuumParams params)
Definition vacuumlazy.c:499
#define VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL
Definition vacuumlazy.c:178
static void vacuum_error_callback(void *arg)
static bool heap_page_would_be_all_visible(Relation rel, Buffer buf, TransactionId OldestXmin, OffsetNumber *deadoffsets, int ndeadoffsets, bool *all_frozen, TransactionId *visibility_cutoff_xid, OffsetNumber *logging_offnum)
#define EAGER_SCAN_REGION_SIZE
Definition vacuumlazy.c:248
static void lazy_truncate_heap(LVRelState *vacrel)
static void lazy_vacuum(LVRelState *vacrel)
static void lazy_cleanup_all_indexes(LVRelState *vacrel)
#define MAX_EAGER_FREEZE_SUCCESS_RATE
Definition vacuumlazy.c:239
static bool lazy_scan_noprune(LVRelState *vacrel, Buffer buf, BlockNumber blkno, Page page, bool *has_lpdead_items)
static BlockNumber vacuum_reap_lp_read_stream_next(ReadStream *stream, void *callback_private_data, void *per_buffer_data)
#define REL_TRUNCATE_MINIMUM
Definition vacuumlazy.c:167
static bool should_attempt_truncation(LVRelState *vacrel)
static bool lazy_scan_new_or_empty(LVRelState *vacrel, Buffer buf, BlockNumber blkno, Page page, bool sharelock, Buffer vmbuffer)
VacErrPhase
Definition vacuumlazy.c:223
@ VACUUM_ERRCB_PHASE_SCAN_HEAP
Definition vacuumlazy.c:225
@ VACUUM_ERRCB_PHASE_VACUUM_INDEX
Definition vacuumlazy.c:226
@ VACUUM_ERRCB_PHASE_TRUNCATE
Definition vacuumlazy.c:229
@ VACUUM_ERRCB_PHASE_INDEX_CLEANUP
Definition vacuumlazy.c:228
@ VACUUM_ERRCB_PHASE_VACUUM_HEAP
Definition vacuumlazy.c:227
@ VACUUM_ERRCB_PHASE_UNKNOWN
Definition vacuumlazy.c:224
static void lazy_scan_heap(LVRelState *vacrel)
#define ParallelVacuumIsActive(vacrel)
Definition vacuumlazy.c:219
static void restore_vacuum_error_info(LVRelState *vacrel, const LVSavedErrInfo *saved_vacrel)
static IndexBulkDeleteResult * lazy_vacuum_one_index(Relation indrel, IndexBulkDeleteResult *istat, double reltuples, LVRelState *vacrel)
static void find_next_unskippable_block(LVRelState *vacrel, bool *skipsallvis)
static void dead_items_reset(LVRelState *vacrel)
#define REL_TRUNCATE_FRACTION
Definition vacuumlazy.c:168
static bool lazy_check_wraparound_failsafe(LVRelState *vacrel)
static IndexBulkDeleteResult * lazy_cleanup_one_index(Relation indrel, IndexBulkDeleteResult *istat, double reltuples, bool estimated_count, LVRelState *vacrel)
#define PREFETCH_SIZE
Definition vacuumlazy.c:213
static void lazy_vacuum_heap_page(LVRelState *vacrel, BlockNumber blkno, Buffer buffer, OffsetNumber *deadoffsets, int num_offsets, Buffer vmbuffer)
#define BYPASS_THRESHOLD_PAGES
Definition vacuumlazy.c:185
static void dead_items_alloc(LVRelState *vacrel, int nworkers)
#define VACUUM_TRUNCATE_LOCK_TIMEOUT
Definition vacuumlazy.c:179
static bool lazy_vacuum_all_indexes(LVRelState *vacrel)
static void update_vacuum_error_info(LVRelState *vacrel, LVSavedErrInfo *saved_vacrel, int phase, BlockNumber blkno, OffsetNumber offnum)
static BlockNumber count_nondeletable_pages(LVRelState *vacrel, bool *lock_waiter_detected)
#define SKIP_PAGES_THRESHOLD
Definition vacuumlazy.c:207
#define FAILSAFE_EVERY_PAGES
Definition vacuumlazy.c:191
#define VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL
Definition vacuumlazy.c:177
static int cmpOffsetNumbers(const void *a, const void *b)
static void lazy_vacuum_heap_rel(LVRelState *vacrel)
#define VACUUM_FSM_EVERY_PAGES
Definition vacuumlazy.c:200
TidStore * parallel_vacuum_get_dead_items(ParallelVacuumState *pvs, VacDeadItemsInfo **dead_items_info_p)
ParallelVacuumState * parallel_vacuum_init(Relation rel, Relation *indrels, int nindexes, int nrequested_workers, int vac_work_mem, int elevel, BufferAccessStrategy bstrategy)
void parallel_vacuum_bulkdel_all_indexes(ParallelVacuumState *pvs, long num_table_tuples, int num_index_scans)
void parallel_vacuum_reset_dead_items(ParallelVacuumState *pvs)
void parallel_vacuum_cleanup_all_indexes(ParallelVacuumState *pvs, long num_table_tuples, int num_index_scans, bool estimated_count)
void parallel_vacuum_end(ParallelVacuumState *pvs, IndexBulkDeleteResult **istats)
void visibilitymap_set(Relation rel, BlockNumber heapBlk, Buffer heapBuf, XLogRecPtr recptr, Buffer vmBuf, TransactionId cutoff_xid, uint8 flags)
bool visibilitymap_clear(Relation rel, BlockNumber heapBlk, Buffer vmbuf, uint8 flags)
void visibilitymap_pin(Relation rel, BlockNumber heapBlk, Buffer *vmbuf)
uint8 visibilitymap_get_status(Relation rel, BlockNumber heapBlk, Buffer *vmbuf)
void visibilitymap_count(Relation rel, BlockNumber *all_visible, BlockNumber *all_frozen)
void visibilitymap_set_vmbits(BlockNumber heapBlk, Buffer vmBuf, uint8 flags, const RelFileLocator rlocator)
#define VISIBILITYMAP_VALID_BITS
#define VISIBILITYMAP_ALL_FROZEN
#define VISIBILITYMAP_ALL_VISIBLE
#define WL_TIMEOUT
#define WL_EXIT_ON_PM_DEATH
#define WL_LATCH_SET
bool IsInParallelMode(void)
Definition xact.c:1090
#define XLogRecPtrIsValid(r)
Definition xlogdefs.h:29
#define InvalidXLogRecPtr
Definition xlogdefs.h:28
XLogRecPtr log_newpage_buffer(Buffer buffer, bool page_std)

◆ MAX_EAGER_FREEZE_SUCCESS_RATE

#define MAX_EAGER_FREEZE_SUCCESS_RATE   0.2

Definition at line 239 of file vacuumlazy.c.

◆ ParallelVacuumIsActive

#define ParallelVacuumIsActive (   vacrel)    ((vacrel)->pvs != NULL)

Definition at line 219 of file vacuumlazy.c.

◆ PREFETCH_SIZE

#define PREFETCH_SIZE   ((BlockNumber) 32)

Definition at line 213 of file vacuumlazy.c.

◆ REL_TRUNCATE_FRACTION

#define REL_TRUNCATE_FRACTION   16

Definition at line 168 of file vacuumlazy.c.

◆ REL_TRUNCATE_MINIMUM

#define REL_TRUNCATE_MINIMUM   1000

Definition at line 167 of file vacuumlazy.c.

◆ SKIP_PAGES_THRESHOLD

#define SKIP_PAGES_THRESHOLD   ((BlockNumber) 32)

Definition at line 207 of file vacuumlazy.c.

◆ VACUUM_FSM_EVERY_PAGES

#define VACUUM_FSM_EVERY_PAGES    ((BlockNumber) (((uint64) 8 * 1024 * 1024 * 1024) / BLCKSZ))

Definition at line 200 of file vacuumlazy.c.

◆ VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL

#define VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL   20 /* ms */

Definition at line 177 of file vacuumlazy.c.

◆ VACUUM_TRUNCATE_LOCK_TIMEOUT

#define VACUUM_TRUNCATE_LOCK_TIMEOUT   5000 /* ms */

Definition at line 179 of file vacuumlazy.c.

◆ VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL

#define VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL   50 /* ms */

Definition at line 178 of file vacuumlazy.c.

Typedef Documentation

◆ LVRelState

◆ LVSavedErrInfo

Enumeration Type Documentation

◆ VacErrPhase

Enumerator
VACUUM_ERRCB_PHASE_UNKNOWN 
VACUUM_ERRCB_PHASE_SCAN_HEAP 
VACUUM_ERRCB_PHASE_VACUUM_INDEX 
VACUUM_ERRCB_PHASE_VACUUM_HEAP 
VACUUM_ERRCB_PHASE_INDEX_CLEANUP 
VACUUM_ERRCB_PHASE_TRUNCATE 

Definition at line 222 of file vacuumlazy.c.

Function Documentation

◆ cmpOffsetNumbers()

static int cmpOffsetNumbers ( const void a,
const void b 
)
static

Definition at line 1961 of file vacuumlazy.c.

1962{
1963 return pg_cmp_u16(*(const OffsetNumber *) a, *(const OffsetNumber *) b);
1964}

References a, b, and pg_cmp_u16().

Referenced by lazy_scan_prune().

◆ count_nondeletable_pages()

static BlockNumber count_nondeletable_pages ( LVRelState vacrel,
bool lock_waiter_detected 
)
static

Definition at line 3386 of file vacuumlazy.c.

3387{
3389 "prefetch size must be power of 2");
3390
3391 BlockNumber blkno;
3393 instr_time starttime;
3394
3395 /* Initialize the starttime if we check for conflicting lock requests */
3396 INSTR_TIME_SET_CURRENT(starttime);
3397
3398 /*
3399 * Start checking blocks at what we believe relation end to be and move
3400 * backwards. (Strange coding of loop control is needed because blkno is
3401 * unsigned.) To make the scan faster, we prefetch a few blocks at a time
3402 * in forward direction, so that OS-level readahead can kick in.
3403 */
3404 blkno = vacrel->rel_pages;
3406 while (blkno > vacrel->nonempty_pages)
3407 {
3408 Buffer buf;
3409 Page page;
3410 OffsetNumber offnum,
3411 maxoff;
3412 bool hastup;
3413
3414 /*
3415 * Check if another process requests a lock on our relation. We are
3416 * holding an AccessExclusiveLock here, so they will be waiting. We
3417 * only do this once per VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL, and we
3418 * only check if that interval has elapsed once every 32 blocks to
3419 * keep the number of system calls and actual shared lock table
3420 * lookups to a minimum.
3421 */
3422 if ((blkno % 32) == 0)
3423 {
3426
3429 INSTR_TIME_SUBTRACT(elapsed, starttime);
3430 if ((INSTR_TIME_GET_MICROSEC(elapsed) / 1000)
3432 {
3434 {
3435 ereport(vacrel->verbose ? INFO : DEBUG2,
3436 (errmsg("table \"%s\": suspending truncate due to conflicting lock request",
3437 vacrel->relname)));
3438
3439 *lock_waiter_detected = true;
3440 return blkno;
3441 }
3442 starttime = currenttime;
3443 }
3444 }
3445
3446 /*
3447 * We don't insert a vacuum delay point here, because we have an
3448 * exclusive lock on the table which we want to hold for as short a
3449 * time as possible. We still need to check for interrupts however.
3450 */
3452
3453 blkno--;
3454
3455 /* If we haven't prefetched this lot yet, do so now. */
3456 if (prefetchedUntil > blkno)
3457 {
3460
3461 prefetchStart = blkno & ~(PREFETCH_SIZE - 1);
3462 for (pblkno = prefetchStart; pblkno <= blkno; pblkno++)
3463 {
3466 }
3468 }
3469
3471 vacrel->bstrategy);
3472
3473 /* In this phase we only need shared access to the buffer */
3475
3476 page = BufferGetPage(buf);
3477
3478 if (PageIsNew(page) || PageIsEmpty(page))
3479 {
3481 continue;
3482 }
3483
3484 hastup = false;
3485 maxoff = PageGetMaxOffsetNumber(page);
3486 for (offnum = FirstOffsetNumber;
3487 offnum <= maxoff;
3488 offnum = OffsetNumberNext(offnum))
3489 {
3490 ItemId itemid;
3491
3492 itemid = PageGetItemId(page, offnum);
3493
3494 /*
3495 * Note: any non-unused item should be taken as a reason to keep
3496 * this page. Even an LP_DEAD item makes truncation unsafe, since
3497 * we must not have cleaned out its index entries.
3498 */
3499 if (ItemIdIsUsed(itemid))
3500 {
3501 hastup = true;
3502 break; /* can stop scanning */
3503 }
3504 } /* scan along page */
3505
3507
3508 /* Done scanning if we found a tuple here */
3509 if (hastup)
3510 return blkno + 1;
3511 }
3512
3513 /*
3514 * If we fall out of the loop, all the previously-thought-to-be-empty
3515 * pages still are; we need not bother to look at the last known-nonempty
3516 * page.
3517 */
3518 return vacrel->nonempty_pages;
3519}

References AccessExclusiveLock, buf, BUFFER_LOCK_SHARE, BufferGetPage(), CHECK_FOR_INTERRUPTS, DEBUG2, ereport, errmsg(), fb(), FirstOffsetNumber, INFO, INSTR_TIME_GET_MICROSEC, INSTR_TIME_SET_CURRENT, INSTR_TIME_SUBTRACT, InvalidBlockNumber, ItemIdIsUsed, LockBuffer(), LockHasWaitersRelation(), MAIN_FORKNUM, OffsetNumberNext, PageGetItemId(), PageGetMaxOffsetNumber(), PageIsEmpty(), PageIsNew(), PREFETCH_SIZE, PrefetchBuffer(), RBM_NORMAL, ReadBufferExtended(), StaticAssertDecl, UnlockReleaseBuffer(), and VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL.

Referenced by lazy_truncate_heap().

◆ dead_items_add()

static void dead_items_add ( LVRelState vacrel,
BlockNumber  blkno,
OffsetNumber offsets,
int  num_offsets 
)
static

Definition at line 3594 of file vacuumlazy.c.

3596{
3597 const int prog_index[2] = {
3600 };
3601 int64 prog_val[2];
3602
3603 TidStoreSetBlockOffsets(vacrel->dead_items, blkno, offsets, num_offsets);
3604 vacrel->dead_items_info->num_items += num_offsets;
3605
3606 /* update the progress information */
3607 prog_val[0] = vacrel->dead_items_info->num_items;
3608 prog_val[1] = TidStoreMemoryUsage(vacrel->dead_items);
3610}

References fb(), pgstat_progress_update_multi_param(), PROGRESS_VACUUM_DEAD_TUPLE_BYTES, PROGRESS_VACUUM_NUM_DEAD_ITEM_IDS, TidStoreMemoryUsage(), and TidStoreSetBlockOffsets().

Referenced by lazy_scan_noprune(), and lazy_scan_prune().

◆ dead_items_alloc()

static void dead_items_alloc ( LVRelState vacrel,
int  nworkers 
)
static

Definition at line 3529 of file vacuumlazy.c.

3530{
3531 VacDeadItemsInfo *dead_items_info;
3533 autovacuum_work_mem != -1 ?
3535
3536 /*
3537 * Initialize state for a parallel vacuum. As of now, only one worker can
3538 * be used for an index, so we invoke parallelism only if there are at
3539 * least two indexes on a table.
3540 */
3541 if (nworkers >= 0 && vacrel->nindexes > 1 && vacrel->do_index_vacuuming)
3542 {
3543 /*
3544 * Since parallel workers cannot access data in temporary tables, we
3545 * can't perform parallel vacuum on them.
3546 */
3548 {
3549 /*
3550 * Give warning only if the user explicitly tries to perform a
3551 * parallel vacuum on the temporary table.
3552 */
3553 if (nworkers > 0)
3555 (errmsg("disabling parallel option of vacuum on \"%s\" --- cannot vacuum temporary tables in parallel",
3556 vacrel->relname)));
3557 }
3558 else
3559 vacrel->pvs = parallel_vacuum_init(vacrel->rel, vacrel->indrels,
3560 vacrel->nindexes, nworkers,
3562 vacrel->verbose ? INFO : DEBUG2,
3563 vacrel->bstrategy);
3564
3565 /*
3566 * If parallel mode started, dead_items and dead_items_info spaces are
3567 * allocated in DSM.
3568 */
3570 {
3572 &vacrel->dead_items_info);
3573 return;
3574 }
3575 }
3576
3577 /*
3578 * Serial VACUUM case. Allocate both dead_items and dead_items_info
3579 * locally.
3580 */
3581
3582 dead_items_info = palloc_object(VacDeadItemsInfo);
3583 dead_items_info->max_bytes = vac_work_mem * (Size) 1024;
3584 dead_items_info->num_items = 0;
3585 vacrel->dead_items_info = dead_items_info;
3586
3587 vacrel->dead_items = TidStoreCreateLocal(dead_items_info->max_bytes, true);
3588}

References AmAutoVacuumWorkerProcess, autovacuum_work_mem, ParallelVacuumState::dead_items, DEBUG2, ereport, errmsg(), fb(), INFO, maintenance_work_mem, VacDeadItemsInfo::max_bytes, VacDeadItemsInfo::num_items, palloc_object, parallel_vacuum_get_dead_items(), parallel_vacuum_init(), ParallelVacuumIsActive, RelationUsesLocalBuffers, TidStoreCreateLocal(), and WARNING.

Referenced by heap_vacuum_rel().

◆ dead_items_cleanup()

static void dead_items_cleanup ( LVRelState vacrel)
static

Definition at line 3642 of file vacuumlazy.c.

3643{
3645 {
3646 /* Don't bother with pfree here */
3647 return;
3648 }
3649
3650 /* End parallel mode */
3651 parallel_vacuum_end(vacrel->pvs, vacrel->indstats);
3652 vacrel->pvs = NULL;
3653}

References fb(), parallel_vacuum_end(), and ParallelVacuumIsActive.

Referenced by heap_vacuum_rel().

◆ dead_items_reset()

static void dead_items_reset ( LVRelState vacrel)
static

Definition at line 3616 of file vacuumlazy.c.

3617{
3618 /* Update statistics for dead items */
3619 vacrel->num_dead_items_resets++;
3620 vacrel->total_dead_items_bytes += TidStoreMemoryUsage(vacrel->dead_items);
3621
3623 {
3625 vacrel->dead_items = parallel_vacuum_get_dead_items(vacrel->pvs,
3626 &vacrel->dead_items_info);
3627 return;
3628 }
3629
3630 /* Recreate the tidstore with the same max_bytes limitation */
3631 TidStoreDestroy(vacrel->dead_items);
3632 vacrel->dead_items = TidStoreCreateLocal(vacrel->dead_items_info->max_bytes, true);
3633
3634 /* Reset the counter */
3635 vacrel->dead_items_info->num_items = 0;
3636}

References fb(), parallel_vacuum_get_dead_items(), parallel_vacuum_reset_dead_items(), ParallelVacuumIsActive, TidStoreCreateLocal(), TidStoreDestroy(), and TidStoreMemoryUsage().

Referenced by lazy_vacuum().

◆ find_next_unskippable_block()

static void find_next_unskippable_block ( LVRelState vacrel,
bool skipsallvis 
)
static

Definition at line 1721 of file vacuumlazy.c.

1722{
1723 BlockNumber rel_pages = vacrel->rel_pages;
1724 BlockNumber next_unskippable_block = vacrel->next_unskippable_block + 1;
1725 Buffer next_unskippable_vmbuffer = vacrel->next_unskippable_vmbuffer;
1726 bool next_unskippable_eager_scanned = false;
1727
1728 *skipsallvis = false;
1729
1730 for (;; next_unskippable_block++)
1731 {
1733 next_unskippable_block,
1734 &next_unskippable_vmbuffer);
1735
1736
1737 /*
1738 * At the start of each eager scan region, normal vacuums with eager
1739 * scanning enabled reset the failure counter, allowing vacuum to
1740 * resume eager scanning if it had been suspended in the previous
1741 * region.
1742 */
1743 if (next_unskippable_block >= vacrel->next_eager_scan_region_start)
1744 {
1745 vacrel->eager_scan_remaining_fails =
1746 vacrel->eager_scan_max_fails_per_region;
1747 vacrel->next_eager_scan_region_start += EAGER_SCAN_REGION_SIZE;
1748 }
1749
1750 /*
1751 * A block is unskippable if it is not all visible according to the
1752 * visibility map.
1753 */
1755 {
1757 break;
1758 }
1759
1760 /*
1761 * Caller must scan the last page to determine whether it has tuples
1762 * (caller must have the opportunity to set vacrel->nonempty_pages).
1763 * This rule avoids having lazy_truncate_heap() take access-exclusive
1764 * lock on rel to attempt a truncation that fails anyway, just because
1765 * there are tuples on the last page (it is likely that there will be
1766 * tuples on other nearby pages as well, but those can be skipped).
1767 *
1768 * Implement this by always treating the last block as unsafe to skip.
1769 */
1770 if (next_unskippable_block == rel_pages - 1)
1771 break;
1772
1773 /* DISABLE_PAGE_SKIPPING makes all skipping unsafe */
1774 if (!vacrel->skipwithvm)
1775 break;
1776
1777 /*
1778 * All-frozen pages cannot contain XIDs < OldestXmin (XIDs that aren't
1779 * already frozen by now), so this page can be skipped.
1780 */
1781 if ((mapbits & VISIBILITYMAP_ALL_FROZEN) != 0)
1782 continue;
1783
1784 /*
1785 * Aggressive vacuums cannot skip any all-visible pages that are not
1786 * also all-frozen.
1787 */
1788 if (vacrel->aggressive)
1789 break;
1790
1791 /*
1792 * Normal vacuums with eager scanning enabled only skip all-visible
1793 * but not all-frozen pages if they have hit the failure limit for the
1794 * current eager scan region.
1795 */
1796 if (vacrel->eager_scan_remaining_fails > 0)
1797 {
1798 next_unskippable_eager_scanned = true;
1799 break;
1800 }
1801
1802 /*
1803 * All-visible blocks are safe to skip in a normal vacuum. But
1804 * remember that the final range contains such a block for later.
1805 */
1806 *skipsallvis = true;
1807 }
1808
1809 /* write the local variables back to vacrel */
1810 vacrel->next_unskippable_block = next_unskippable_block;
1811 vacrel->next_unskippable_eager_scanned = next_unskippable_eager_scanned;
1812 vacrel->next_unskippable_vmbuffer = next_unskippable_vmbuffer;
1813}

References Assert, EAGER_SCAN_REGION_SIZE, fb(), VISIBILITYMAP_ALL_FROZEN, VISIBILITYMAP_ALL_VISIBLE, and visibilitymap_get_status().

Referenced by heap_vac_scan_next_block().

◆ heap_page_would_be_all_visible()

static bool heap_page_would_be_all_visible ( Relation  rel,
Buffer  buf,
TransactionId  OldestXmin,
OffsetNumber deadoffsets,
int  ndeadoffsets,
bool all_frozen,
TransactionId visibility_cutoff_xid,
OffsetNumber logging_offnum 
)
static

Definition at line 3710 of file vacuumlazy.c.

3717{
3718 Page page = BufferGetPage(buf);
3720 OffsetNumber offnum,
3721 maxoff;
3722 bool all_visible = true;
3723 int matched_dead_count = 0;
3724
3725 *visibility_cutoff_xid = InvalidTransactionId;
3726 *all_frozen = true;
3727
3728 Assert(ndeadoffsets == 0 || deadoffsets);
3729
3730#ifdef USE_ASSERT_CHECKING
3731 /* Confirm input deadoffsets[] is strictly sorted */
3732 if (ndeadoffsets > 1)
3733 {
3734 for (int i = 1; i < ndeadoffsets; i++)
3735 Assert(deadoffsets[i - 1] < deadoffsets[i]);
3736 }
3737#endif
3738
3739 maxoff = PageGetMaxOffsetNumber(page);
3740 for (offnum = FirstOffsetNumber;
3741 offnum <= maxoff && all_visible;
3742 offnum = OffsetNumberNext(offnum))
3743 {
3744 ItemId itemid;
3745 HeapTupleData tuple;
3747
3748 /*
3749 * Set the offset number so that we can display it along with any
3750 * error that occurred while processing this tuple.
3751 */
3752 *logging_offnum = offnum;
3753 itemid = PageGetItemId(page, offnum);
3754
3755 /* Unused or redirect line pointers are of no interest */
3756 if (!ItemIdIsUsed(itemid) || ItemIdIsRedirected(itemid))
3757 continue;
3758
3759 ItemPointerSet(&(tuple.t_self), blockno, offnum);
3760
3761 /*
3762 * Dead line pointers can have index pointers pointing to them. So
3763 * they can't be treated as visible
3764 */
3765 if (ItemIdIsDead(itemid))
3766 {
3767 if (!deadoffsets ||
3769 deadoffsets[matched_dead_count] != offnum)
3770 {
3771 *all_frozen = all_visible = false;
3772 break;
3773 }
3775 continue;
3776 }
3777
3778 Assert(ItemIdIsNormal(itemid));
3779
3780 tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
3781 tuple.t_len = ItemIdGetLength(itemid);
3782 tuple.t_tableOid = RelationGetRelid(rel);
3783
3784 /* Visibility checks may do IO or allocate memory */
3787 {
3788 case HEAPTUPLE_LIVE:
3789 {
3790 TransactionId xmin;
3791
3792 /* Check comments in lazy_scan_prune. */
3794 {
3795 all_visible = false;
3796 *all_frozen = false;
3797 break;
3798 }
3799
3800 /*
3801 * The inserter definitely committed. But is it old enough
3802 * that everyone sees it as committed?
3803 */
3804 xmin = HeapTupleHeaderGetXmin(tuple.t_data);
3805 if (!TransactionIdPrecedes(xmin, OldestXmin))
3806 {
3807 all_visible = false;
3808 *all_frozen = false;
3809 break;
3810 }
3811
3812 /* Track newest xmin on page. */
3813 if (TransactionIdFollows(xmin, *visibility_cutoff_xid) &&
3815 *visibility_cutoff_xid = xmin;
3816
3817 /* Check whether this tuple is already frozen or not */
3818 if (all_visible && *all_frozen &&
3820 *all_frozen = false;
3821 }
3822 break;
3823
3824 case HEAPTUPLE_DEAD:
3828 {
3829 all_visible = false;
3830 *all_frozen = false;
3831 break;
3832 }
3833 default:
3834 elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
3835 break;
3836 }
3837 } /* scan along page */
3838
3839 /* Clear the offset information once we have processed the given page. */
3841
3842 return all_visible;
3843}

References Assert, buf, BufferGetBlockNumber(), BufferGetPage(), CritSectionCount, elog, ERROR, fb(), FirstOffsetNumber, heap_tuple_needs_eventual_freeze(), HEAPTUPLE_DEAD, HEAPTUPLE_DELETE_IN_PROGRESS, HEAPTUPLE_INSERT_IN_PROGRESS, HEAPTUPLE_LIVE, HEAPTUPLE_RECENTLY_DEAD, HeapTupleHeaderGetXmin(), HeapTupleHeaderXminCommitted(), HeapTupleSatisfiesVacuumHorizon(), i, InvalidOffsetNumber, InvalidTransactionId, ItemIdGetLength, ItemIdIsDead, ItemIdIsNormal, ItemIdIsRedirected, ItemIdIsUsed, ItemPointerSet(), OffsetNumberNext, PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), RelationGetRelid, HeapTupleData::t_data, HeapTupleData::t_len, HeapTupleData::t_self, HeapTupleData::t_tableOid, TransactionIdFollows(), TransactionIdIsNormal, and TransactionIdPrecedes().

Referenced by lazy_vacuum_heap_page().

◆ heap_vac_scan_next_block()

static BlockNumber heap_vac_scan_next_block ( ReadStream stream,
void callback_private_data,
void per_buffer_data 
)
static

Definition at line 1621 of file vacuumlazy.c.

1624{
1626 LVRelState *vacrel = callback_private_data;
1627
1628 /* relies on InvalidBlockNumber + 1 overflowing to 0 on first call */
1630
1631 /* Have we reached the end of the relation? */
1632 if (next_block >= vacrel->rel_pages)
1633 {
1634 if (BufferIsValid(vacrel->next_unskippable_vmbuffer))
1635 {
1636 ReleaseBuffer(vacrel->next_unskippable_vmbuffer);
1637 vacrel->next_unskippable_vmbuffer = InvalidBuffer;
1638 }
1639 return InvalidBlockNumber;
1640 }
1641
1642 /*
1643 * We must be in one of the three following states:
1644 */
1645 if (next_block > vacrel->next_unskippable_block ||
1646 vacrel->next_unskippable_block == InvalidBlockNumber)
1647 {
1648 /*
1649 * 1. We have just processed an unskippable block (or we're at the
1650 * beginning of the scan). Find the next unskippable block using the
1651 * visibility map.
1652 */
1653 bool skipsallvis;
1654
1656
1657 /*
1658 * We now know the next block that we must process. It can be the
1659 * next block after the one we just processed, or something further
1660 * ahead. If it's further ahead, we can jump to it, but we choose to
1661 * do so only if we can skip at least SKIP_PAGES_THRESHOLD consecutive
1662 * pages. Since we're reading sequentially, the OS should be doing
1663 * readahead for us, so there's no gain in skipping a page now and
1664 * then. Skipping such a range might even discourage sequential
1665 * detection.
1666 *
1667 * This test also enables more frequent relfrozenxid advancement
1668 * during non-aggressive VACUUMs. If the range has any all-visible
1669 * pages then skipping makes updating relfrozenxid unsafe, which is a
1670 * real downside.
1671 */
1672 if (vacrel->next_unskippable_block - next_block >= SKIP_PAGES_THRESHOLD)
1673 {
1674 next_block = vacrel->next_unskippable_block;
1675 if (skipsallvis)
1676 vacrel->skippedallvis = true;
1677 }
1678 }
1679
1680 /* Now we must be in one of the two remaining states: */
1681 if (next_block < vacrel->next_unskippable_block)
1682 {
1683 /*
1684 * 2. We are processing a range of blocks that we could have skipped
1685 * but chose not to. We know that they are all-visible in the VM,
1686 * otherwise they would've been unskippable.
1687 */
1688 vacrel->current_block = next_block;
1689 /* Block was not eager scanned */
1690 *((bool *) per_buffer_data) = false;
1691 return vacrel->current_block;
1692 }
1693 else
1694 {
1695 /*
1696 * 3. We reached the next unskippable block. Process it. On next
1697 * iteration, we will be back in state 1.
1698 */
1699 Assert(next_block == vacrel->next_unskippable_block);
1700
1701 vacrel->current_block = next_block;
1702 *((bool *) per_buffer_data) = vacrel->next_unskippable_eager_scanned;
1703 return vacrel->current_block;
1704 }
1705}

References Assert, BufferIsValid(), LVRelState::current_block, fb(), find_next_unskippable_block(), InvalidBlockNumber, InvalidBuffer, ReleaseBuffer(), and SKIP_PAGES_THRESHOLD.

Referenced by lazy_scan_heap().

◆ heap_vacuum_eager_scan_setup()

static void heap_vacuum_eager_scan_setup ( LVRelState vacrel,
const VacuumParams  params 
)
static

Definition at line 499 of file vacuumlazy.c.

500{
504 float first_region_ratio;
506
507 /*
508 * Initialize eager scan management fields to their disabled values.
509 * Aggressive vacuums, normal vacuums of small tables, and normal vacuums
510 * of tables without sufficiently old tuples disable eager scanning.
511 */
512 vacrel->next_eager_scan_region_start = InvalidBlockNumber;
513 vacrel->eager_scan_max_fails_per_region = 0;
514 vacrel->eager_scan_remaining_fails = 0;
515 vacrel->eager_scan_remaining_successes = 0;
516
517 /* If eager scanning is explicitly disabled, just return. */
518 if (params.max_eager_freeze_failure_rate == 0)
519 return;
520
521 /*
522 * The caller will have determined whether or not an aggressive vacuum is
523 * required by either the vacuum parameters or the relative age of the
524 * oldest unfrozen transaction IDs. An aggressive vacuum must scan every
525 * all-visible page to safely advance the relfrozenxid and/or relminmxid,
526 * so scans of all-visible pages are not considered eager.
527 */
528 if (vacrel->aggressive)
529 return;
530
531 /*
532 * Aggressively vacuuming a small relation shouldn't take long, so it
533 * isn't worth amortizing. We use two times the region size as the size
534 * cutoff because the eager scan start block is a random spot somewhere in
535 * the first region, making the second region the first to be eager
536 * scanned normally.
537 */
538 if (vacrel->rel_pages < 2 * EAGER_SCAN_REGION_SIZE)
539 return;
540
541 /*
542 * We only want to enable eager scanning if we are likely to be able to
543 * freeze some of the pages in the relation.
544 *
545 * Tuples with XIDs older than OldestXmin or MXIDs older than OldestMxact
546 * are technically freezable, but we won't freeze them unless the criteria
547 * for opportunistic freezing is met. Only tuples with XIDs/MXIDs older
548 * than the FreezeLimit/MultiXactCutoff are frozen in the common case.
549 *
550 * So, as a heuristic, we wait until the FreezeLimit has advanced past the
551 * relfrozenxid or the MultiXactCutoff has advanced past the relminmxid to
552 * enable eager scanning.
553 */
554 if (TransactionIdIsNormal(vacrel->cutoffs.relfrozenxid) &&
555 TransactionIdPrecedes(vacrel->cutoffs.relfrozenxid,
556 vacrel->cutoffs.FreezeLimit))
558
560 MultiXactIdIsValid(vacrel->cutoffs.relminmxid) &&
561 MultiXactIdPrecedes(vacrel->cutoffs.relminmxid,
562 vacrel->cutoffs.MultiXactCutoff))
564
566 return;
567
568 /* We have met the criteria to eagerly scan some pages. */
569
570 /*
571 * Our success cap is MAX_EAGER_FREEZE_SUCCESS_RATE of the number of
572 * all-visible but not all-frozen blocks in the relation.
573 */
575
576 vacrel->eager_scan_remaining_successes =
579
580 /* If every all-visible page is frozen, eager scanning is disabled. */
581 if (vacrel->eager_scan_remaining_successes == 0)
582 return;
583
584 /*
585 * Now calculate the bounds of the first eager scan region. Its end block
586 * will be a random spot somewhere in the first EAGER_SCAN_REGION_SIZE
587 * blocks. This affects the bounds of all subsequent regions and avoids
588 * eager scanning and failing to freeze the same blocks each vacuum of the
589 * relation.
590 */
592
593 vacrel->next_eager_scan_region_start = randseed % EAGER_SCAN_REGION_SIZE;
594
597
598 vacrel->eager_scan_max_fails_per_region =
601
602 /*
603 * The first region will be smaller than subsequent regions. As such,
604 * adjust the eager freeze failures tolerated for this region.
605 */
606 first_region_ratio = 1 - (float) vacrel->next_eager_scan_region_start /
608
609 vacrel->eager_scan_remaining_fails =
610 vacrel->eager_scan_max_fails_per_region *
612}

References Assert, EAGER_SCAN_REGION_SIZE, fb(), InvalidBlockNumber, VacuumParams::max_eager_freeze_failure_rate, MAX_EAGER_FREEZE_SUCCESS_RATE, MultiXactIdIsValid, MultiXactIdPrecedes(), pg_global_prng_state, pg_prng_uint32(), TransactionIdIsNormal, TransactionIdPrecedes(), and visibilitymap_count().

Referenced by heap_vacuum_rel().

◆ heap_vacuum_rel()

void heap_vacuum_rel ( Relation  rel,
const VacuumParams  params,
BufferAccessStrategy  bstrategy 
)

Definition at line 626 of file vacuumlazy.c.

628{
630 bool verbose,
631 instrument,
632 skipwithvm,
640 TimestampTz starttime = 0;
642 startwritetime = 0;
645 ErrorContextCallback errcallback;
646 char **indnames = NULL;
648
649 verbose = (params.options & VACOPT_VERBOSE) != 0;
650 instrument = (verbose || (AmAutoVacuumWorkerProcess() &&
651 params.log_vacuum_min_duration >= 0));
652 if (instrument)
653 {
655 if (track_io_timing)
656 {
659 }
660 }
661
662 /* Used for instrumentation and stats report */
663 starttime = GetCurrentTimestamp();
664
666 RelationGetRelid(rel));
669 params.is_wraparound
672 else
675
676 /*
677 * Setup error traceback support for ereport() first. The idea is to set
678 * up an error context callback to display additional information on any
679 * error during a vacuum. During different phases of vacuum, we update
680 * the state so that the error context callback always display current
681 * information.
682 *
683 * Copy the names of heap rel into local memory for error reporting
684 * purposes, too. It isn't always safe to assume that we can get the name
685 * of each rel. It's convenient for code in lazy_scan_heap to always use
686 * these temp copies.
687 */
690 vacrel->relnamespace = get_namespace_name(RelationGetNamespace(rel));
691 vacrel->relname = pstrdup(RelationGetRelationName(rel));
692 vacrel->indname = NULL;
694 vacrel->verbose = verbose;
695 errcallback.callback = vacuum_error_callback;
696 errcallback.arg = vacrel;
697 errcallback.previous = error_context_stack;
698 error_context_stack = &errcallback;
699
700 /* Set up high level stuff about rel and its indexes */
701 vacrel->rel = rel;
703 &vacrel->indrels);
704 vacrel->bstrategy = bstrategy;
705 if (instrument && vacrel->nindexes > 0)
706 {
707 /* Copy index names used by instrumentation (not error reporting) */
708 indnames = palloc_array(char *, vacrel->nindexes);
709 for (int i = 0; i < vacrel->nindexes; i++)
711 }
712
713 /*
714 * The index_cleanup param either disables index vacuuming and cleanup or
715 * forces it to go ahead when we would otherwise apply the index bypass
716 * optimization. The default is 'auto', which leaves the final decision
717 * up to lazy_vacuum().
718 *
719 * The truncate param allows user to avoid attempting relation truncation,
720 * though it can't force truncation to happen.
721 */
724 params.truncate != VACOPTVALUE_AUTO);
725
726 /*
727 * While VacuumFailSafeActive is reset to false before calling this, we
728 * still need to reset it here due to recursive calls.
729 */
730 VacuumFailsafeActive = false;
731 vacrel->consider_bypass_optimization = true;
732 vacrel->do_index_vacuuming = true;
733 vacrel->do_index_cleanup = true;
734 vacrel->do_rel_truncate = (params.truncate != VACOPTVALUE_DISABLED);
736 {
737 /* Force disable index vacuuming up-front */
738 vacrel->do_index_vacuuming = false;
739 vacrel->do_index_cleanup = false;
740 }
741 else if (params.index_cleanup == VACOPTVALUE_ENABLED)
742 {
743 /* Force index vacuuming. Note that failsafe can still bypass. */
744 vacrel->consider_bypass_optimization = false;
745 }
746 else
747 {
748 /* Default/auto, make all decisions dynamically */
750 }
751
752 /* Initialize page counters explicitly (be tidy) */
753 vacrel->scanned_pages = 0;
754 vacrel->eager_scanned_pages = 0;
755 vacrel->removed_pages = 0;
756 vacrel->new_frozen_tuple_pages = 0;
757 vacrel->lpdead_item_pages = 0;
758 vacrel->missed_dead_pages = 0;
759 vacrel->nonempty_pages = 0;
760 /* dead_items_alloc allocates vacrel->dead_items later on */
761
762 /* Allocate/initialize output statistics state */
763 vacrel->new_rel_tuples = 0;
764 vacrel->new_live_tuples = 0;
765 vacrel->indstats = (IndexBulkDeleteResult **)
766 palloc0(vacrel->nindexes * sizeof(IndexBulkDeleteResult *));
767
768 /* Initialize remaining counters (be tidy) */
769 vacrel->num_index_scans = 0;
770 vacrel->num_dead_items_resets = 0;
771 vacrel->total_dead_items_bytes = 0;
772 vacrel->tuples_deleted = 0;
773 vacrel->tuples_frozen = 0;
774 vacrel->lpdead_items = 0;
775 vacrel->live_tuples = 0;
776 vacrel->recently_dead_tuples = 0;
777 vacrel->missed_dead_tuples = 0;
778
779 vacrel->new_all_visible_pages = 0;
780 vacrel->new_all_visible_all_frozen_pages = 0;
781 vacrel->new_all_frozen_pages = 0;
782
783 /*
784 * Get cutoffs that determine which deleted tuples are considered DEAD,
785 * not just RECENTLY_DEAD, and which XIDs/MXIDs to freeze. Then determine
786 * the extent of the blocks that we'll scan in lazy_scan_heap. It has to
787 * happen in this order to ensure that the OldestXmin cutoff field works
788 * as an upper bound on the XIDs stored in the pages we'll actually scan
789 * (NewRelfrozenXid tracking must never be allowed to miss unfrozen XIDs).
790 *
791 * Next acquire vistest, a related cutoff that's used in pruning. We use
792 * vistest in combination with OldestXmin to ensure that
793 * heap_page_prune_and_freeze() always removes any deleted tuple whose
794 * xmax is < OldestXmin. lazy_scan_prune must never become confused about
795 * whether a tuple should be frozen or removed. (In the future we might
796 * want to teach lazy_scan_prune to recompute vistest from time to time,
797 * to increase the number of dead tuples it can prune away.)
798 */
799 vacrel->aggressive = vacuum_get_cutoffs(rel, params, &vacrel->cutoffs);
801 vacrel->vistest = GlobalVisTestFor(rel);
802
803 /* Initialize state used to track oldest extant XID/MXID */
804 vacrel->NewRelfrozenXid = vacrel->cutoffs.OldestXmin;
805 vacrel->NewRelminMxid = vacrel->cutoffs.OldestMxact;
806
807 /*
808 * Initialize state related to tracking all-visible page skipping. This is
809 * very important to determine whether or not it is safe to advance the
810 * relfrozenxid/relminmxid.
811 */
812 vacrel->skippedallvis = false;
813 skipwithvm = true;
815 {
816 /*
817 * Force aggressive mode, and disable skipping blocks using the
818 * visibility map (even those set all-frozen)
819 */
820 vacrel->aggressive = true;
821 skipwithvm = false;
822 }
823
824 vacrel->skipwithvm = skipwithvm;
825
826 /*
827 * Set up eager scan tracking state. This must happen after determining
828 * whether or not the vacuum must be aggressive, because only normal
829 * vacuums use the eager scan algorithm.
830 */
832
833 /* Report the vacuum mode: 'normal' or 'aggressive' */
835 vacrel->aggressive
838
839 if (verbose)
840 {
841 if (vacrel->aggressive)
843 (errmsg("aggressively vacuuming \"%s.%s.%s\"",
844 vacrel->dbname, vacrel->relnamespace,
845 vacrel->relname)));
846 else
848 (errmsg("vacuuming \"%s.%s.%s\"",
849 vacrel->dbname, vacrel->relnamespace,
850 vacrel->relname)));
851 }
852
853 /*
854 * Allocate dead_items memory using dead_items_alloc. This handles
855 * parallel VACUUM initialization as part of allocating shared memory
856 * space used for dead_items. (But do a failsafe precheck first, to
857 * ensure that parallel VACUUM won't be attempted at all when relfrozenxid
858 * is already dangerously old.)
859 */
862
863 /*
864 * Call lazy_scan_heap to perform all required heap pruning, index
865 * vacuuming, and heap vacuuming (plus related processing)
866 */
868
869 /*
870 * Save dead items max_bytes and update the memory usage statistics before
871 * cleanup, they are freed in parallel vacuum cases during
872 * dead_items_cleanup().
873 */
874 dead_items_max_bytes = vacrel->dead_items_info->max_bytes;
875 vacrel->total_dead_items_bytes += TidStoreMemoryUsage(vacrel->dead_items);
876
877 /*
878 * Free resources managed by dead_items_alloc. This ends parallel mode in
879 * passing when necessary.
880 */
883
884 /*
885 * Update pg_class entries for each of rel's indexes where appropriate.
886 *
887 * Unlike the later update to rel's pg_class entry, this is not critical.
888 * Maintains relpages/reltuples statistics used by the planner only.
889 */
890 if (vacrel->do_index_cleanup)
892
893 /* Done with rel's indexes */
894 vac_close_indexes(vacrel->nindexes, vacrel->indrels, NoLock);
895
896 /* Optionally truncate rel */
899
900 /* Pop the error context stack */
901 error_context_stack = errcallback.previous;
902
903 /* Report that we are now doing final cleanup */
906
907 /*
908 * Prepare to update rel's pg_class entry.
909 *
910 * Aggressive VACUUMs must always be able to advance relfrozenxid to a
911 * value >= FreezeLimit, and relminmxid to a value >= MultiXactCutoff.
912 * Non-aggressive VACUUMs may advance them by any amount, or not at all.
913 */
914 Assert(vacrel->NewRelfrozenXid == vacrel->cutoffs.OldestXmin ||
915 TransactionIdPrecedesOrEquals(vacrel->aggressive ? vacrel->cutoffs.FreezeLimit :
916 vacrel->cutoffs.relfrozenxid,
917 vacrel->NewRelfrozenXid));
918 Assert(vacrel->NewRelminMxid == vacrel->cutoffs.OldestMxact ||
919 MultiXactIdPrecedesOrEquals(vacrel->aggressive ? vacrel->cutoffs.MultiXactCutoff :
920 vacrel->cutoffs.relminmxid,
921 vacrel->NewRelminMxid));
922 if (vacrel->skippedallvis)
923 {
924 /*
925 * Must keep original relfrozenxid in a non-aggressive VACUUM that
926 * chose to skip an all-visible page range. The state that tracks new
927 * values will have missed unfrozen XIDs from the pages we skipped.
928 */
929 Assert(!vacrel->aggressive);
930 vacrel->NewRelfrozenXid = InvalidTransactionId;
931 vacrel->NewRelminMxid = InvalidMultiXactId;
932 }
933
934 /*
935 * For safety, clamp relallvisible to be not more than what we're setting
936 * pg_class.relpages to
937 */
938 new_rel_pages = vacrel->rel_pages; /* After possible rel truncation */
942
943 /*
944 * An all-frozen block _must_ be all-visible. As such, clamp the count of
945 * all-frozen blocks to the count of all-visible blocks. This matches the
946 * clamping of relallvisible above.
947 */
950
951 /*
952 * Now actually update rel's pg_class entry.
953 *
954 * In principle new_live_tuples could be -1 indicating that we (still)
955 * don't know the tuple count. In practice that can't happen, since we
956 * scan every page that isn't skipped using the visibility map.
957 */
958 vac_update_relstats(rel, new_rel_pages, vacrel->new_live_tuples,
960 vacrel->nindexes > 0,
961 vacrel->NewRelfrozenXid, vacrel->NewRelminMxid,
963
964 /*
965 * Report results to the cumulative stats system, too.
966 *
967 * Deliberately avoid telling the stats system about LP_DEAD items that
968 * remain in the table due to VACUUM bypassing index and heap vacuuming.
969 * ANALYZE will consider the remaining LP_DEAD items to be dead "tuples".
970 * It seems like a good idea to err on the side of not vacuuming again too
971 * soon in cases where the failsafe prevented significant amounts of heap
972 * vacuuming.
973 */
975 Max(vacrel->new_live_tuples, 0),
976 vacrel->recently_dead_tuples +
977 vacrel->missed_dead_tuples,
978 starttime);
980
981 if (instrument)
982 {
984
985 if (verbose || params.log_vacuum_min_duration == 0 ||
988 {
989 long secs_dur;
990 int usecs_dur;
991 WalUsage walusage;
992 BufferUsage bufferusage;
994 char *msgfmt;
995 int32 diff;
996 double read_rate = 0,
997 write_rate = 0;
1001
1003 memset(&walusage, 0, sizeof(WalUsage));
1005 memset(&bufferusage, 0, sizeof(BufferUsage));
1007
1008 total_blks_hit = bufferusage.shared_blks_hit +
1009 bufferusage.local_blks_hit;
1010 total_blks_read = bufferusage.shared_blks_read +
1011 bufferusage.local_blks_read;
1013 bufferusage.local_blks_dirtied;
1014
1016 if (verbose)
1017 {
1018 /*
1019 * Aggressiveness already reported earlier, in dedicated
1020 * VACUUM VERBOSE ereport
1021 */
1022 Assert(!params.is_wraparound);
1023 msgfmt = _("finished vacuuming \"%s.%s.%s\": index scans: %d\n");
1024 }
1025 else if (params.is_wraparound)
1026 {
1027 /*
1028 * While it's possible for a VACUUM to be both is_wraparound
1029 * and !aggressive, that's just a corner-case -- is_wraparound
1030 * implies aggressive. Produce distinct output for the corner
1031 * case all the same, just in case.
1032 */
1033 if (vacrel->aggressive)
1034 msgfmt = _("automatic aggressive vacuum to prevent wraparound of table \"%s.%s.%s\": index scans: %d\n");
1035 else
1036 msgfmt = _("automatic vacuum to prevent wraparound of table \"%s.%s.%s\": index scans: %d\n");
1037 }
1038 else
1039 {
1040 if (vacrel->aggressive)
1041 msgfmt = _("automatic aggressive vacuum of table \"%s.%s.%s\": index scans: %d\n");
1042 else
1043 msgfmt = _("automatic vacuum of table \"%s.%s.%s\": index scans: %d\n");
1044 }
1046 vacrel->dbname,
1047 vacrel->relnamespace,
1048 vacrel->relname,
1049 vacrel->num_index_scans);
1050 appendStringInfo(&buf, _("pages: %u removed, %u remain, %u scanned (%.2f%% of total), %u eagerly scanned\n"),
1051 vacrel->removed_pages,
1053 vacrel->scanned_pages,
1054 orig_rel_pages == 0 ? 100.0 :
1055 100.0 * vacrel->scanned_pages /
1057 vacrel->eager_scanned_pages);
1059 _("tuples: %" PRId64 " removed, %" PRId64 " remain, %" PRId64 " are dead but not yet removable\n"),
1060 vacrel->tuples_deleted,
1061 (int64) vacrel->new_rel_tuples,
1062 vacrel->recently_dead_tuples);
1063 if (vacrel->missed_dead_tuples > 0)
1065 _("tuples missed: %" PRId64 " dead from %u pages not removed due to cleanup lock contention\n"),
1066 vacrel->missed_dead_tuples,
1067 vacrel->missed_dead_pages);
1069 vacrel->cutoffs.OldestXmin);
1071 _("removable cutoff: %u, which was %d XIDs old when operation ended\n"),
1072 vacrel->cutoffs.OldestXmin, diff);
1074 {
1075 diff = (int32) (vacrel->NewRelfrozenXid -
1076 vacrel->cutoffs.relfrozenxid);
1078 _("new relfrozenxid: %u, which is %d XIDs ahead of previous value\n"),
1079 vacrel->NewRelfrozenXid, diff);
1080 }
1081 if (minmulti_updated)
1082 {
1083 diff = (int32) (vacrel->NewRelminMxid -
1084 vacrel->cutoffs.relminmxid);
1086 _("new relminmxid: %u, which is %d MXIDs ahead of previous value\n"),
1087 vacrel->NewRelminMxid, diff);
1088 }
1089 appendStringInfo(&buf, _("frozen: %u pages from table (%.2f%% of total) had %" PRId64 " tuples frozen\n"),
1090 vacrel->new_frozen_tuple_pages,
1091 orig_rel_pages == 0 ? 100.0 :
1092 100.0 * vacrel->new_frozen_tuple_pages /
1094 vacrel->tuples_frozen);
1095
1097 _("visibility map: %u pages set all-visible, %u pages set all-frozen (%u were all-visible)\n"),
1098 vacrel->new_all_visible_pages,
1099 vacrel->new_all_visible_all_frozen_pages +
1100 vacrel->new_all_frozen_pages,
1101 vacrel->new_all_frozen_pages);
1102 if (vacrel->do_index_vacuuming)
1103 {
1104 if (vacrel->nindexes == 0 || vacrel->num_index_scans == 0)
1105 appendStringInfoString(&buf, _("index scan not needed: "));
1106 else
1107 appendStringInfoString(&buf, _("index scan needed: "));
1108
1109 msgfmt = _("%u pages from table (%.2f%% of total) had %" PRId64 " dead item identifiers removed\n");
1110 }
1111 else
1112 {
1114 appendStringInfoString(&buf, _("index scan bypassed: "));
1115 else
1116 appendStringInfoString(&buf, _("index scan bypassed by failsafe: "));
1117
1118 msgfmt = _("%u pages from table (%.2f%% of total) have %" PRId64 " dead item identifiers\n");
1119 }
1121 vacrel->lpdead_item_pages,
1122 orig_rel_pages == 0 ? 100.0 :
1123 100.0 * vacrel->lpdead_item_pages / orig_rel_pages,
1124 vacrel->lpdead_items);
1125 for (int i = 0; i < vacrel->nindexes; i++)
1126 {
1127 IndexBulkDeleteResult *istat = vacrel->indstats[i];
1128
1129 if (!istat)
1130 continue;
1131
1133 _("index \"%s\": pages: %u in total, %u newly deleted, %u currently deleted, %u reusable\n"),
1134 indnames[i],
1135 istat->num_pages,
1136 istat->pages_newly_deleted,
1137 istat->pages_deleted,
1138 istat->pages_free);
1139 }
1141 {
1142 /*
1143 * We bypass the changecount mechanism because this value is
1144 * only updated by the calling process. We also rely on the
1145 * above call to pgstat_progress_end_command() to not clear
1146 * the st_progress_param array.
1147 */
1148 appendStringInfo(&buf, _("delay time: %.3f ms\n"),
1150 }
1151 if (track_io_timing)
1152 {
1153 double read_ms = (double) (pgStatBlockReadTime - startreadtime) / 1000;
1154 double write_ms = (double) (pgStatBlockWriteTime - startwritetime) / 1000;
1155
1156 appendStringInfo(&buf, _("I/O timings: read: %.3f ms, write: %.3f ms\n"),
1157 read_ms, write_ms);
1158 }
1159 if (secs_dur > 0 || usecs_dur > 0)
1160 {
1162 (1024 * 1024) / (secs_dur + usecs_dur / 1000000.0);
1164 (1024 * 1024) / (secs_dur + usecs_dur / 1000000.0);
1165 }
1166 appendStringInfo(&buf, _("avg read rate: %.3f MB/s, avg write rate: %.3f MB/s\n"),
1169 _("buffer usage: %" PRId64 " hits, %" PRId64 " reads, %" PRId64 " dirtied\n"),
1174 _("WAL usage: %" PRId64 " records, %" PRId64 " full page images, %" PRIu64 " bytes, %" PRIu64 " full page image bytes, %" PRId64 " buffers full\n"),
1175 walusage.wal_records,
1176 walusage.wal_fpi,
1177 walusage.wal_bytes,
1178 walusage.wal_fpi_bytes,
1179 walusage.wal_buffers_full);
1180
1181 /*
1182 * Report the dead items memory usage.
1183 *
1184 * The num_dead_items_resets counter increases when we reset the
1185 * collected dead items, so the counter is non-zero if at least
1186 * one dead items are collected, even if index vacuuming is
1187 * disabled.
1188 */
1190 ngettext("memory usage: dead item storage %.2f MB accumulated across %d reset (limit %.2f MB each)\n",
1191 "memory usage: dead item storage %.2f MB accumulated across %d resets (limit %.2f MB each)\n",
1192 vacrel->num_dead_items_resets),
1193 (double) vacrel->total_dead_items_bytes / (1024 * 1024),
1194 vacrel->num_dead_items_resets,
1195 (double) dead_items_max_bytes / (1024 * 1024));
1196 appendStringInfo(&buf, _("system usage: %s"), pg_rusage_show(&ru0));
1197
1198 ereport(verbose ? INFO : LOG,
1199 (errmsg_internal("%s", buf.data)));
1200 pfree(buf.data);
1201 }
1202 }
1203
1204 /* Cleanup index statistics and index names */
1205 for (int i = 0; i < vacrel->nindexes; i++)
1206 {
1207 if (vacrel->indstats[i])
1208 pfree(vacrel->indstats[i]);
1209
1210 if (instrument)
1211 pfree(indnames[i]);
1212 }
1213}

References _, AmAutoVacuumWorkerProcess, appendStringInfo(), appendStringInfoString(), ErrorContextCallback::arg, Assert, buf, BufferUsageAccumDiff(), ErrorContextCallback::callback, dead_items_alloc(), dead_items_cleanup(), ereport, errmsg(), errmsg_internal(), error_context_stack, fb(), get_database_name(), get_namespace_name(), GetCurrentTimestamp(), GlobalVisTestFor(), heap_vacuum_eager_scan_setup(), i, VacuumParams::index_cleanup, INFO, initStringInfo(), InvalidMultiXactId, InvalidTransactionId, VacuumParams::is_wraparound, IsInParallelMode(), lazy_check_wraparound_failsafe(), lazy_scan_heap(), lazy_truncate_heap(), BufferUsage::local_blks_dirtied, BufferUsage::local_blks_hit, BufferUsage::local_blks_read, LOG, VacuumParams::log_vacuum_min_duration, Max, MultiXactIdPrecedesOrEquals(), MyBEEntry, MyDatabaseId, ngettext, NoLock, IndexBulkDeleteResult::num_pages, VacuumParams::nworkers, VacuumParams::options, IndexBulkDeleteResult::pages_deleted, IndexBulkDeleteResult::pages_free, IndexBulkDeleteResult::pages_newly_deleted, palloc0(), palloc0_object, palloc_array, pfree(), pg_rusage_init(), pg_rusage_show(), pgBufferUsage, pgstat_progress_end_command(), pgstat_progress_start_command(), pgstat_progress_update_param(), pgstat_report_vacuum(), pgStatBlockReadTime, pgStatBlockWriteTime, pgWalUsage, ErrorContextCallback::previous, PROGRESS_COMMAND_VACUUM, PROGRESS_VACUUM_DELAY_TIME, PROGRESS_VACUUM_MODE, PROGRESS_VACUUM_MODE_AGGRESSIVE, PROGRESS_VACUUM_MODE_NORMAL, PROGRESS_VACUUM_PHASE, PROGRESS_VACUUM_PHASE_FINAL_CLEANUP, PROGRESS_VACUUM_STARTED_BY, PROGRESS_VACUUM_STARTED_BY_AUTOVACUUM, PROGRESS_VACUUM_STARTED_BY_AUTOVACUUM_WRAPAROUND, PROGRESS_VACUUM_STARTED_BY_MANUAL, pstrdup(), ReadNextTransactionId(), RelationGetNamespace, RelationGetNumberOfBlocks, RelationGetRelationName, RelationGetRelid, RowExclusiveLock, BufferUsage::shared_blks_dirtied, BufferUsage::shared_blks_hit, BufferUsage::shared_blks_read, should_attempt_truncation(), PgBackendStatus::st_progress_param, TidStoreMemoryUsage(), TimestampDifference(), TimestampDifferenceExceeds(), track_cost_delay_timing, track_io_timing, TransactionIdPrecedesOrEquals(), VacuumParams::truncate, update_relstats_all_indexes(), vac_close_indexes(), vac_open_indexes(), vac_update_relstats(), VACOPT_DISABLE_PAGE_SKIPPING, VACOPT_VERBOSE, VACOPTVALUE_AUTO, VACOPTVALUE_DISABLED, VACOPTVALUE_ENABLED, VACOPTVALUE_UNSPECIFIED, VACUUM_ERRCB_PHASE_UNKNOWN, vacuum_error_callback(), vacuum_get_cutoffs(), VacuumFailsafeActive, verbose, visibilitymap_count(), WalUsage::wal_buffers_full, WalUsage::wal_bytes, WalUsage::wal_fpi, WalUsage::wal_fpi_bytes, WalUsage::wal_records, and WalUsageAccumDiff().

◆ identify_and_fix_vm_corruption()

static void identify_and_fix_vm_corruption ( Relation  rel,
Buffer  heap_buffer,
BlockNumber  heap_blk,
Page  heap_page,
int  nlpdead_items,
Buffer  vmbuffer,
uint8 vmbits 
)
static

Definition at line 1983 of file vacuumlazy.c.

1988{
1989 Assert(visibilitymap_get_status(rel, heap_blk, &vmbuffer) == *vmbits);
1990
1992
1993 /*
1994 * As of PostgreSQL 9.2, the visibility map bit should never be set if the
1995 * page-level bit is clear. However, it's possible that the bit got
1996 * cleared after heap_vac_scan_next_block() was called, so we must recheck
1997 * with buffer lock before concluding that the VM is corrupt.
1998 */
2000 ((*vmbits & VISIBILITYMAP_VALID_BITS) != 0))
2001 {
2004 errmsg("page is not marked all-visible but visibility map bit is set in relation \"%s\" page %u",
2006
2007 visibilitymap_clear(rel, heap_blk, vmbuffer,
2009 *vmbits = 0;
2010 }
2011
2012 /*
2013 * It's possible for the value returned by
2014 * GetOldestNonRemovableTransactionId() to move backwards, so it's not
2015 * wrong for us to see tuples that appear to not be visible to everyone
2016 * yet, while PD_ALL_VISIBLE is already set. The real safe xmin value
2017 * never moves backwards, but GetOldestNonRemovableTransactionId() is
2018 * conservative and sometimes returns a value that's unnecessarily small,
2019 * so if we see that contradiction it just means that the tuples that we
2020 * think are not visible to everyone yet actually are, and the
2021 * PD_ALL_VISIBLE flag is correct.
2022 *
2023 * There should never be LP_DEAD items on a page with PD_ALL_VISIBLE set,
2024 * however.
2025 */
2026 else if (PageIsAllVisible(heap_page) && nlpdead_items > 0)
2027 {
2030 errmsg("page containing LP_DEAD items is marked as all-visible in relation \"%s\" page %u",
2032
2035 visibilitymap_clear(rel, heap_blk, vmbuffer,
2037 *vmbits = 0;
2038 }
2039}

References Assert, BUFFER_LOCK_EXCLUSIVE, BufferIsLockedByMeInMode(), ereport, errcode(), ERRCODE_DATA_CORRUPTED, errmsg(), fb(), MarkBufferDirty(), PageClearAllVisible(), PageIsAllVisible(), RelationGetRelationName, visibilitymap_clear(), visibilitymap_get_status(), VISIBILITYMAP_VALID_BITS, and WARNING.

Referenced by lazy_scan_prune().

◆ lazy_check_wraparound_failsafe()

static bool lazy_check_wraparound_failsafe ( LVRelState vacrel)
static

Definition at line 3004 of file vacuumlazy.c.

3005{
3006 /* Don't warn more than once per VACUUM */
3008 return true;
3009
3011 {
3012 const int progress_index[] = {
3016 };
3018
3019 VacuumFailsafeActive = true;
3020
3021 /*
3022 * Abandon use of a buffer access strategy to allow use of all of
3023 * shared buffers. We assume the caller who allocated the memory for
3024 * the BufferAccessStrategy will free it.
3025 */
3026 vacrel->bstrategy = NULL;
3027
3028 /* Disable index vacuuming, index cleanup, and heap rel truncation */
3029 vacrel->do_index_vacuuming = false;
3030 vacrel->do_index_cleanup = false;
3031 vacrel->do_rel_truncate = false;
3032
3033 /* Reset the progress counters and set the failsafe mode */
3035
3037 (errmsg("bypassing nonessential maintenance of table \"%s.%s.%s\" as a failsafe after %d index scans",
3038 vacrel->dbname, vacrel->relnamespace, vacrel->relname,
3039 vacrel->num_index_scans),
3040 errdetail("The table's relfrozenxid or relminmxid is too far in the past."),
3041 errhint("Consider increasing configuration parameter \"maintenance_work_mem\" or \"autovacuum_work_mem\".\n"
3042 "You might also need to consider other ways for VACUUM to keep up with the allocation of transaction IDs.")));
3043
3044 /* Stop applying cost limits from this point on */
3045 VacuumCostActive = false;
3047
3048 return true;
3049 }
3050
3051 return false;
3052}

References ereport, errdetail(), errhint(), errmsg(), fb(), pgstat_progress_update_multi_param(), PROGRESS_VACUUM_INDEXES_PROCESSED, PROGRESS_VACUUM_INDEXES_TOTAL, PROGRESS_VACUUM_MODE, PROGRESS_VACUUM_MODE_FAILSAFE, unlikely, vacuum_xid_failsafe_check(), VacuumCostActive, VacuumCostBalance, VacuumFailsafeActive, and WARNING.

Referenced by heap_vacuum_rel(), lazy_scan_heap(), and lazy_vacuum_all_indexes().

◆ lazy_cleanup_all_indexes()

static void lazy_cleanup_all_indexes ( LVRelState vacrel)
static

Definition at line 3058 of file vacuumlazy.c.

3059{
3060 double reltuples = vacrel->new_rel_tuples;
3061 bool estimated_count = vacrel->scanned_pages < vacrel->rel_pages;
3062 const int progress_start_index[] = {
3065 };
3066 const int progress_end_index[] = {
3069 };
3071 int64 progress_end_val[2] = {0, 0};
3072
3073 Assert(vacrel->do_index_cleanup);
3074 Assert(vacrel->nindexes > 0);
3075
3076 /*
3077 * Report that we are now cleaning up indexes and the number of indexes to
3078 * cleanup.
3079 */
3081 progress_start_val[1] = vacrel->nindexes;
3083
3085 {
3086 for (int idx = 0; idx < vacrel->nindexes; idx++)
3087 {
3088 Relation indrel = vacrel->indrels[idx];
3089 IndexBulkDeleteResult *istat = vacrel->indstats[idx];
3090
3091 vacrel->indstats[idx] =
3092 lazy_cleanup_one_index(indrel, istat, reltuples,
3093 estimated_count, vacrel);
3094
3095 /* Report the number of indexes cleaned up */
3097 idx + 1);
3098 }
3099 }
3100 else
3101 {
3102 /* Outsource everything to parallel variant */
3104 vacrel->num_index_scans,
3105 estimated_count);
3106 }
3107
3108 /* Reset the progress counters */
3110}

References Assert, fb(), idx(), lazy_cleanup_one_index(), parallel_vacuum_cleanup_all_indexes(), ParallelVacuumIsActive, pgstat_progress_update_multi_param(), pgstat_progress_update_param(), PROGRESS_VACUUM_INDEXES_PROCESSED, PROGRESS_VACUUM_INDEXES_TOTAL, PROGRESS_VACUUM_PHASE, and PROGRESS_VACUUM_PHASE_INDEX_CLEANUP.

Referenced by lazy_scan_heap().

◆ lazy_cleanup_one_index()

static IndexBulkDeleteResult * lazy_cleanup_one_index ( Relation  indrel,
IndexBulkDeleteResult istat,
double  reltuples,
bool  estimated_count,
LVRelState vacrel 
)
static

Definition at line 3175 of file vacuumlazy.c.

3178{
3181
3182 ivinfo.index = indrel;
3183 ivinfo.heaprel = vacrel->rel;
3184 ivinfo.analyze_only = false;
3185 ivinfo.report_progress = false;
3186 ivinfo.estimated_count = estimated_count;
3187 ivinfo.message_level = DEBUG2;
3188
3189 ivinfo.num_heap_tuples = reltuples;
3190 ivinfo.strategy = vacrel->bstrategy;
3191
3192 /*
3193 * Update error traceback information.
3194 *
3195 * The index name is saved during this phase and restored immediately
3196 * after this phase. See vacuum_error_callback.
3197 */
3198 Assert(vacrel->indname == NULL);
3203
3204 istat = vac_cleanup_one_index(&ivinfo, istat);
3205
3206 /* Revert to the previous phase information for error traceback */
3208 pfree(vacrel->indname);
3209 vacrel->indname = NULL;
3210
3211 return istat;
3212}

References Assert, DEBUG2, fb(), InvalidBlockNumber, InvalidOffsetNumber, pfree(), pstrdup(), RelationGetRelationName, restore_vacuum_error_info(), update_vacuum_error_info(), vac_cleanup_one_index(), and VACUUM_ERRCB_PHASE_INDEX_CLEANUP.

Referenced by lazy_cleanup_all_indexes().

◆ lazy_scan_heap()

static void lazy_scan_heap ( LVRelState vacrel)
static

Definition at line 1252 of file vacuumlazy.c.

1253{
1254 ReadStream *stream;
1255 BlockNumber rel_pages = vacrel->rel_pages,
1256 blkno = 0,
1259 vacrel->eager_scan_remaining_successes; /* for logging */
1260 Buffer vmbuffer = InvalidBuffer;
1261 const int initprog_index[] = {
1265 };
1267
1268 /* Report that we're scanning the heap, advertising total # of blocks */
1270 initprog_val[1] = rel_pages;
1271 initprog_val[2] = vacrel->dead_items_info->max_bytes;
1273
1274 /* Initialize for the first heap_vac_scan_next_block() call */
1275 vacrel->current_block = InvalidBlockNumber;
1276 vacrel->next_unskippable_block = InvalidBlockNumber;
1277 vacrel->next_unskippable_eager_scanned = false;
1278 vacrel->next_unskippable_vmbuffer = InvalidBuffer;
1279
1280 /*
1281 * Set up the read stream for vacuum's first pass through the heap.
1282 *
1283 * This could be made safe for READ_STREAM_USE_BATCHING, but only with
1284 * explicit work in heap_vac_scan_next_block.
1285 */
1287 vacrel->bstrategy,
1288 vacrel->rel,
1291 vacrel,
1292 sizeof(bool));
1293
1294 while (true)
1295 {
1296 Buffer buf;
1297 Page page;
1298 bool was_eager_scanned = false;
1299 int ndeleted = 0;
1300 bool has_lpdead_items;
1301 void *per_buffer_data = NULL;
1302 bool vm_page_frozen = false;
1303 bool got_cleanup_lock = false;
1304
1305 vacuum_delay_point(false);
1306
1307 /*
1308 * Regularly check if wraparound failsafe should trigger.
1309 *
1310 * There is a similar check inside lazy_vacuum_all_indexes(), but
1311 * relfrozenxid might start to look dangerously old before we reach
1312 * that point. This check also provides failsafe coverage for the
1313 * one-pass strategy, and the two-pass strategy with the index_cleanup
1314 * param set to 'off'.
1315 */
1316 if (vacrel->scanned_pages > 0 &&
1317 vacrel->scanned_pages % FAILSAFE_EVERY_PAGES == 0)
1319
1320 /*
1321 * Consider if we definitely have enough space to process TIDs on page
1322 * already. If we are close to overrunning the available space for
1323 * dead_items TIDs, pause and do a cycle of vacuuming before we tackle
1324 * this page. However, let's force at least one page-worth of tuples
1325 * to be stored as to ensure we do at least some work when the memory
1326 * configured is so low that we run out before storing anything.
1327 */
1328 if (vacrel->dead_items_info->num_items > 0 &&
1329 TidStoreMemoryUsage(vacrel->dead_items) > vacrel->dead_items_info->max_bytes)
1330 {
1331 /*
1332 * Before beginning index vacuuming, we release any pin we may
1333 * hold on the visibility map page. This isn't necessary for
1334 * correctness, but we do it anyway to avoid holding the pin
1335 * across a lengthy, unrelated operation.
1336 */
1337 if (BufferIsValid(vmbuffer))
1338 {
1339 ReleaseBuffer(vmbuffer);
1340 vmbuffer = InvalidBuffer;
1341 }
1342
1343 /* Perform a round of index and heap vacuuming */
1344 vacrel->consider_bypass_optimization = false;
1346
1347 /*
1348 * Vacuum the Free Space Map to make newly-freed space visible on
1349 * upper-level FSM pages. Note that blkno is the previously
1350 * processed block.
1351 */
1353 blkno + 1);
1355
1356 /* Report that we are once again scanning the heap */
1359 }
1360
1361 buf = read_stream_next_buffer(stream, &per_buffer_data);
1362
1363 /* The relation is exhausted. */
1364 if (!BufferIsValid(buf))
1365 break;
1366
1367 was_eager_scanned = *((bool *) per_buffer_data);
1369 page = BufferGetPage(buf);
1370 blkno = BufferGetBlockNumber(buf);
1371
1372 vacrel->scanned_pages++;
1374 vacrel->eager_scanned_pages++;
1375
1376 /* Report as block scanned, update error traceback information */
1379 blkno, InvalidOffsetNumber);
1380
1381 /*
1382 * Pin the visibility map page in case we need to mark the page
1383 * all-visible. In most cases this will be very cheap, because we'll
1384 * already have the correct page pinned anyway.
1385 */
1386 visibilitymap_pin(vacrel->rel, blkno, &vmbuffer);
1387
1388 /*
1389 * We need a buffer cleanup lock to prune HOT chains and defragment
1390 * the page in lazy_scan_prune. But when it's not possible to acquire
1391 * a cleanup lock right away, we may be able to settle for reduced
1392 * processing using lazy_scan_noprune.
1393 */
1395
1396 if (!got_cleanup_lock)
1398
1399 /* Check for new or empty pages before lazy_scan_[no]prune call */
1401 vmbuffer))
1402 {
1403 /* Processed as new/empty page (lock and pin released) */
1404 continue;
1405 }
1406
1407 /*
1408 * If we didn't get the cleanup lock, we can still collect LP_DEAD
1409 * items in the dead_items area for later vacuuming, count live and
1410 * recently dead tuples for vacuum logging, and determine if this
1411 * block could later be truncated. If we encounter any xid/mxids that
1412 * require advancing the relfrozenxid/relminxid, we'll have to wait
1413 * for a cleanup lock and call lazy_scan_prune().
1414 */
1415 if (!got_cleanup_lock &&
1416 !lazy_scan_noprune(vacrel, buf, blkno, page, &has_lpdead_items))
1417 {
1418 /*
1419 * lazy_scan_noprune could not do all required processing. Wait
1420 * for a cleanup lock, and call lazy_scan_prune in the usual way.
1421 */
1422 Assert(vacrel->aggressive);
1425 got_cleanup_lock = true;
1426 }
1427
1428 /*
1429 * If we have a cleanup lock, we must now prune, freeze, and count
1430 * tuples. We may have acquired the cleanup lock originally, or we may
1431 * have gone back and acquired it after lazy_scan_noprune() returned
1432 * false. Either way, the page hasn't been processed yet.
1433 *
1434 * Like lazy_scan_noprune(), lazy_scan_prune() will count
1435 * recently_dead_tuples and live tuples for vacuum logging, determine
1436 * if the block can later be truncated, and accumulate the details of
1437 * remaining LP_DEAD line pointers on the page into dead_items. These
1438 * dead items include those pruned by lazy_scan_prune() as well as
1439 * line pointers previously marked LP_DEAD.
1440 */
1441 if (got_cleanup_lock)
1442 ndeleted = lazy_scan_prune(vacrel, buf, blkno, page,
1443 vmbuffer,
1445
1446 /*
1447 * Count an eagerly scanned page as a failure or a success.
1448 *
1449 * Only lazy_scan_prune() freezes pages, so if we didn't get the
1450 * cleanup lock, we won't have frozen the page. However, we only count
1451 * pages that were too new to require freezing as eager freeze
1452 * failures.
1453 *
1454 * We could gather more information from lazy_scan_noprune() about
1455 * whether or not there were tuples with XIDs or MXIDs older than the
1456 * FreezeLimit or MultiXactCutoff. However, for simplicity, we simply
1457 * exclude pages skipped due to cleanup lock contention from eager
1458 * freeze algorithm caps.
1459 */
1461 {
1462 /* Aggressive vacuums do not eager scan. */
1463 Assert(!vacrel->aggressive);
1464
1465 if (vm_page_frozen)
1466 {
1467 if (vacrel->eager_scan_remaining_successes > 0)
1468 vacrel->eager_scan_remaining_successes--;
1469
1470 if (vacrel->eager_scan_remaining_successes == 0)
1471 {
1472 /*
1473 * Report only once that we disabled eager scanning. We
1474 * may eagerly read ahead blocks in excess of the success
1475 * or failure caps before attempting to freeze them, so we
1476 * could reach here even after disabling additional eager
1477 * scanning.
1478 */
1479 if (vacrel->eager_scan_max_fails_per_region > 0)
1480 ereport(vacrel->verbose ? INFO : DEBUG2,
1481 (errmsg("disabling eager scanning after freezing %u eagerly scanned blocks of relation \"%s.%s.%s\"",
1483 vacrel->dbname, vacrel->relnamespace,
1484 vacrel->relname)));
1485
1486 /*
1487 * If we hit our success cap, permanently disable eager
1488 * scanning by setting the other eager scan management
1489 * fields to their disabled values.
1490 */
1491 vacrel->eager_scan_remaining_fails = 0;
1492 vacrel->next_eager_scan_region_start = InvalidBlockNumber;
1493 vacrel->eager_scan_max_fails_per_region = 0;
1494 }
1495 }
1496 else if (vacrel->eager_scan_remaining_fails > 0)
1497 vacrel->eager_scan_remaining_fails--;
1498 }
1499
1500 /*
1501 * Now drop the buffer lock and, potentially, update the FSM.
1502 *
1503 * Our goal is to update the freespace map the last time we touch the
1504 * page. If we'll process a block in the second pass, we may free up
1505 * additional space on the page, so it is better to update the FSM
1506 * after the second pass. If the relation has no indexes, or if index
1507 * vacuuming is disabled, there will be no second heap pass; if this
1508 * particular page has no dead items, the second heap pass will not
1509 * touch this page. So, in those cases, update the FSM now.
1510 *
1511 * Note: In corner cases, it's possible to miss updating the FSM
1512 * entirely. If index vacuuming is currently enabled, we'll skip the
1513 * FSM update now. But if failsafe mode is later activated, or there
1514 * are so few dead tuples that index vacuuming is bypassed, there will
1515 * also be no opportunity to update the FSM later, because we'll never
1516 * revisit this page. Since updating the FSM is desirable but not
1517 * absolutely required, that's OK.
1518 */
1519 if (vacrel->nindexes == 0
1520 || !vacrel->do_index_vacuuming
1521 || !has_lpdead_items)
1522 {
1523 Size freespace = PageGetHeapFreeSpace(page);
1524
1526 RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
1527
1528 /*
1529 * Periodically perform FSM vacuuming to make newly-freed space
1530 * visible on upper FSM pages. This is done after vacuuming if the
1531 * table has indexes. There will only be newly-freed space if we
1532 * held the cleanup lock and lazy_scan_prune() was called.
1533 */
1534 if (got_cleanup_lock && vacrel->nindexes == 0 && ndeleted > 0 &&
1536 {
1538 blkno);
1540 }
1541 }
1542 else
1544 }
1545
1546 vacrel->blkno = InvalidBlockNumber;
1547 if (BufferIsValid(vmbuffer))
1548 ReleaseBuffer(vmbuffer);
1549
1550 /*
1551 * Report that everything is now scanned. We never skip scanning the last
1552 * block in the relation, so we can pass rel_pages here.
1553 */
1555 rel_pages);
1556
1557 /* now we can compute the new value for pg_class.reltuples */
1558 vacrel->new_live_tuples = vac_estimate_reltuples(vacrel->rel, rel_pages,
1559 vacrel->scanned_pages,
1560 vacrel->live_tuples);
1561
1562 /*
1563 * Also compute the total number of surviving heap entries. In the
1564 * (unlikely) scenario that new_live_tuples is -1, take it as zero.
1565 */
1566 vacrel->new_rel_tuples =
1567 Max(vacrel->new_live_tuples, 0) + vacrel->recently_dead_tuples +
1568 vacrel->missed_dead_tuples;
1569
1570 read_stream_end(stream);
1571
1572 /*
1573 * Do index vacuuming (call each index's ambulkdelete routine), then do
1574 * related heap vacuuming
1575 */
1576 if (vacrel->dead_items_info->num_items > 0)
1578
1579 /*
1580 * Vacuum the remainder of the Free Space Map. We must do this whether or
1581 * not there were indexes, and whether or not we bypassed index vacuuming.
1582 * We can pass rel_pages here because we never skip scanning the last
1583 * block of the relation.
1584 */
1585 if (rel_pages > next_fsm_block_to_vacuum)
1587
1588 /* report all blocks vacuumed */
1590
1591 /* Do final index cleanup (call each index's amvacuumcleanup routine) */
1592 if (vacrel->nindexes > 0 && vacrel->do_index_cleanup)
1594}

References Assert, buf, BUFFER_LOCK_SHARE, BUFFER_LOCK_UNLOCK, BufferGetBlockNumber(), BufferGetPage(), BufferIsValid(), CheckBufferIsPinnedOnce(), ConditionalLockBufferForCleanup(), DEBUG2, ereport, errmsg(), FAILSAFE_EVERY_PAGES, fb(), FreeSpaceMapVacuumRange(), heap_vac_scan_next_block(), INFO, InvalidBlockNumber, InvalidBuffer, InvalidOffsetNumber, lazy_check_wraparound_failsafe(), lazy_cleanup_all_indexes(), lazy_scan_new_or_empty(), lazy_scan_noprune(), lazy_scan_prune(), lazy_vacuum(), LockBuffer(), LockBufferForCleanup(), MAIN_FORKNUM, Max, PageGetHeapFreeSpace(), pgstat_progress_update_multi_param(), pgstat_progress_update_param(), PROGRESS_VACUUM_HEAP_BLKS_SCANNED, PROGRESS_VACUUM_HEAP_BLKS_VACUUMED, PROGRESS_VACUUM_MAX_DEAD_TUPLE_BYTES, PROGRESS_VACUUM_PHASE, PROGRESS_VACUUM_PHASE_SCAN_HEAP, PROGRESS_VACUUM_TOTAL_HEAP_BLKS, read_stream_begin_relation(), read_stream_end(), READ_STREAM_MAINTENANCE, read_stream_next_buffer(), RecordPageWithFreeSpace(), ReleaseBuffer(), TidStoreMemoryUsage(), UnlockReleaseBuffer(), update_vacuum_error_info(), vac_estimate_reltuples(), vacuum_delay_point(), VACUUM_ERRCB_PHASE_SCAN_HEAP, VACUUM_FSM_EVERY_PAGES, and visibilitymap_pin().

Referenced by heap_vacuum_rel().

◆ lazy_scan_new_or_empty()

static bool lazy_scan_new_or_empty ( LVRelState vacrel,
Buffer  buf,
BlockNumber  blkno,
Page  page,
bool  sharelock,
Buffer  vmbuffer 
)
static

Definition at line 1850 of file vacuumlazy.c.

1852{
1853 Size freespace;
1854
1855 if (PageIsNew(page))
1856 {
1857 /*
1858 * All-zeroes pages can be left over if either a backend extends the
1859 * relation by a single page, but crashes before the newly initialized
1860 * page has been written out, or when bulk-extending the relation
1861 * (which creates a number of empty pages at the tail end of the
1862 * relation), and then enters them into the FSM.
1863 *
1864 * Note we do not enter the page into the visibilitymap. That has the
1865 * downside that we repeatedly visit this page in subsequent vacuums,
1866 * but otherwise we'll never discover the space on a promoted standby.
1867 * The harm of repeated checking ought to normally not be too bad. The
1868 * space usually should be used at some point, otherwise there
1869 * wouldn't be any regular vacuums.
1870 *
1871 * Make sure these pages are in the FSM, to ensure they can be reused.
1872 * Do that by testing if there's any space recorded for the page. If
1873 * not, enter it. We do so after releasing the lock on the heap page,
1874 * the FSM is approximate, after all.
1875 */
1877
1878 if (GetRecordedFreeSpace(vacrel->rel, blkno) == 0)
1879 {
1880 freespace = BLCKSZ - SizeOfPageHeaderData;
1881
1882 RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
1883 }
1884
1885 return true;
1886 }
1887
1888 if (PageIsEmpty(page))
1889 {
1890 /*
1891 * It seems likely that caller will always be able to get a cleanup
1892 * lock on an empty page. But don't take any chances -- escalate to
1893 * an exclusive lock (still don't need a cleanup lock, though).
1894 */
1895 if (sharelock)
1896 {
1899
1900 if (!PageIsEmpty(page))
1901 {
1902 /* page isn't new or empty -- keep lock and pin for now */
1903 return false;
1904 }
1905 }
1906 else
1907 {
1908 /* Already have a full cleanup lock (which is more than enough) */
1909 }
1910
1911 /*
1912 * Unlike new pages, empty pages are always set all-visible and
1913 * all-frozen.
1914 */
1915 if (!PageIsAllVisible(page))
1916 {
1918
1919 /* mark buffer dirty before writing a WAL record */
1921
1922 /*
1923 * It's possible that another backend has extended the heap,
1924 * initialized the page, and then failed to WAL-log the page due
1925 * to an ERROR. Since heap extension is not WAL-logged, recovery
1926 * might try to replay our record setting the page all-visible and
1927 * find that the page isn't initialized, which will cause a PANIC.
1928 * To prevent that, check whether the page has been previously
1929 * WAL-logged, and if not, do that now.
1930 */
1931 if (RelationNeedsWAL(vacrel->rel) &&
1933 log_newpage_buffer(buf, true);
1934
1935 PageSetAllVisible(page);
1936 PageClearPrunable(page);
1937 visibilitymap_set(vacrel->rel, blkno, buf,
1939 vmbuffer, InvalidTransactionId,
1943
1944 /* Count the newly all-frozen pages for logging */
1945 vacrel->new_all_visible_pages++;
1946 vacrel->new_all_visible_all_frozen_pages++;
1947 }
1948
1949 freespace = PageGetHeapFreeSpace(page);
1951 RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
1952 return true;
1953 }
1954
1955 /* page isn't new or empty -- keep lock and pin */
1956 return false;
1957}

References buf, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, END_CRIT_SECTION, fb(), GetRecordedFreeSpace(), InvalidTransactionId, InvalidXLogRecPtr, LockBuffer(), log_newpage_buffer(), MarkBufferDirty(), PageClearPrunable, PageGetHeapFreeSpace(), PageGetLSN(), PageIsAllVisible(), PageIsEmpty(), PageIsNew(), PageSetAllVisible(), RecordPageWithFreeSpace(), RelationNeedsWAL, SizeOfPageHeaderData, START_CRIT_SECTION, UnlockReleaseBuffer(), VISIBILITYMAP_ALL_FROZEN, VISIBILITYMAP_ALL_VISIBLE, visibilitymap_set(), and XLogRecPtrIsValid.

Referenced by lazy_scan_heap().

◆ lazy_scan_noprune()

static bool lazy_scan_noprune ( LVRelState vacrel,
Buffer  buf,
BlockNumber  blkno,
Page  page,
bool has_lpdead_items 
)
static

Definition at line 2273 of file vacuumlazy.c.

2278{
2279 OffsetNumber offnum,
2280 maxoff;
2281 int lpdead_items,
2282 live_tuples,
2283 recently_dead_tuples,
2284 missed_dead_tuples;
2285 bool hastup;
2287 TransactionId NoFreezePageRelfrozenXid = vacrel->NewRelfrozenXid;
2288 MultiXactId NoFreezePageRelminMxid = vacrel->NewRelminMxid;
2290
2291 Assert(BufferGetBlockNumber(buf) == blkno);
2292
2293 hastup = false; /* for now */
2294
2295 lpdead_items = 0;
2296 live_tuples = 0;
2297 recently_dead_tuples = 0;
2298 missed_dead_tuples = 0;
2299
2300 maxoff = PageGetMaxOffsetNumber(page);
2301 for (offnum = FirstOffsetNumber;
2302 offnum <= maxoff;
2303 offnum = OffsetNumberNext(offnum))
2304 {
2305 ItemId itemid;
2306 HeapTupleData tuple;
2307
2308 vacrel->offnum = offnum;
2309 itemid = PageGetItemId(page, offnum);
2310
2311 if (!ItemIdIsUsed(itemid))
2312 continue;
2313
2314 if (ItemIdIsRedirected(itemid))
2315 {
2316 hastup = true;
2317 continue;
2318 }
2319
2320 if (ItemIdIsDead(itemid))
2321 {
2322 /*
2323 * Deliberately don't set hastup=true here. See same point in
2324 * lazy_scan_prune for an explanation.
2325 */
2326 deadoffsets[lpdead_items++] = offnum;
2327 continue;
2328 }
2329
2330 hastup = true; /* page prevents rel truncation */
2331 tupleheader = (HeapTupleHeader) PageGetItem(page, itemid);
2333 &NoFreezePageRelfrozenXid,
2334 &NoFreezePageRelminMxid))
2335 {
2336 /* Tuple with XID < FreezeLimit (or MXID < MultiXactCutoff) */
2337 if (vacrel->aggressive)
2338 {
2339 /*
2340 * Aggressive VACUUMs must always be able to advance rel's
2341 * relfrozenxid to a value >= FreezeLimit (and be able to
2342 * advance rel's relminmxid to a value >= MultiXactCutoff).
2343 * The ongoing aggressive VACUUM won't be able to do that
2344 * unless it can freeze an XID (or MXID) from this tuple now.
2345 *
2346 * The only safe option is to have caller perform processing
2347 * of this page using lazy_scan_prune. Caller might have to
2348 * wait a while for a cleanup lock, but it can't be helped.
2349 */
2350 vacrel->offnum = InvalidOffsetNumber;
2351 return false;
2352 }
2353
2354 /*
2355 * Non-aggressive VACUUMs are under no obligation to advance
2356 * relfrozenxid (even by one XID). We can be much laxer here.
2357 *
2358 * Currently we always just accept an older final relfrozenxid
2359 * and/or relminmxid value. We never make caller wait or work a
2360 * little harder, even when it likely makes sense to do so.
2361 */
2362 }
2363
2364 ItemPointerSet(&(tuple.t_self), blkno, offnum);
2365 tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
2366 tuple.t_len = ItemIdGetLength(itemid);
2367 tuple.t_tableOid = RelationGetRelid(vacrel->rel);
2368
2369 switch (HeapTupleSatisfiesVacuum(&tuple, vacrel->cutoffs.OldestXmin,
2370 buf))
2371 {
2373 case HEAPTUPLE_LIVE:
2374
2375 /*
2376 * Count both cases as live, just like lazy_scan_prune
2377 */
2378 live_tuples++;
2379
2380 break;
2381 case HEAPTUPLE_DEAD:
2382
2383 /*
2384 * There is some useful work for pruning to do, that won't be
2385 * done due to failure to get a cleanup lock.
2386 */
2387 missed_dead_tuples++;
2388 break;
2390
2391 /*
2392 * Count in recently_dead_tuples, just like lazy_scan_prune
2393 */
2394 recently_dead_tuples++;
2395 break;
2397
2398 /*
2399 * Do not count these rows as live, just like lazy_scan_prune
2400 */
2401 break;
2402 default:
2403 elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
2404 break;
2405 }
2406 }
2407
2408 vacrel->offnum = InvalidOffsetNumber;
2409
2410 /*
2411 * By here we know for sure that caller can put off freezing and pruning
2412 * this particular page until the next VACUUM. Remember its details now.
2413 * (lazy_scan_prune expects a clean slate, so we have to do this last.)
2414 */
2415 vacrel->NewRelfrozenXid = NoFreezePageRelfrozenXid;
2416 vacrel->NewRelminMxid = NoFreezePageRelminMxid;
2417
2418 /* Save any LP_DEAD items found on the page in dead_items */
2419 if (vacrel->nindexes == 0)
2420 {
2421 /* Using one-pass strategy (since table has no indexes) */
2422 if (lpdead_items > 0)
2423 {
2424 /*
2425 * Perfunctory handling for the corner case where a single pass
2426 * strategy VACUUM cannot get a cleanup lock, and it turns out
2427 * that there is one or more LP_DEAD items: just count the LP_DEAD
2428 * items as missed_dead_tuples instead. (This is a bit dishonest,
2429 * but it beats having to maintain specialized heap vacuuming code
2430 * forever, for vanishingly little benefit.)
2431 */
2432 hastup = true;
2433 missed_dead_tuples += lpdead_items;
2434 }
2435 }
2436 else if (lpdead_items > 0)
2437 {
2438 /*
2439 * Page has LP_DEAD items, and so any references/TIDs that remain in
2440 * indexes will be deleted during index vacuuming (and then marked
2441 * LP_UNUSED in the heap)
2442 */
2443 vacrel->lpdead_item_pages++;
2444
2445 dead_items_add(vacrel, blkno, deadoffsets, lpdead_items);
2446
2447 vacrel->lpdead_items += lpdead_items;
2448 }
2449
2450 /*
2451 * Finally, add relevant page-local counts to whole-VACUUM counts
2452 */
2453 vacrel->live_tuples += live_tuples;
2454 vacrel->recently_dead_tuples += recently_dead_tuples;
2455 vacrel->missed_dead_tuples += missed_dead_tuples;
2456 if (missed_dead_tuples > 0)
2457 vacrel->missed_dead_pages++;
2458
2459 /* Can't truncate this page */
2460 if (hastup)
2461 vacrel->nonempty_pages = blkno + 1;
2462
2463 /* Did we find LP_DEAD items? */
2464 *has_lpdead_items = (lpdead_items > 0);
2465
2466 /* Caller won't need to call lazy_scan_prune with same page */
2467 return true;
2468}

References Assert, buf, BufferGetBlockNumber(), dead_items_add(), elog, ERROR, fb(), FirstOffsetNumber, heap_tuple_should_freeze(), HEAPTUPLE_DEAD, HEAPTUPLE_DELETE_IN_PROGRESS, HEAPTUPLE_INSERT_IN_PROGRESS, HEAPTUPLE_LIVE, HEAPTUPLE_RECENTLY_DEAD, HeapTupleSatisfiesVacuum(), InvalidOffsetNumber, ItemIdGetLength, ItemIdIsDead, ItemIdIsRedirected, ItemIdIsUsed, ItemPointerSet(), MaxHeapTuplesPerPage, OffsetNumberNext, PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), RelationGetRelid, HeapTupleData::t_data, HeapTupleData::t_len, HeapTupleData::t_self, and HeapTupleData::t_tableOid.

Referenced by lazy_scan_heap().

◆ lazy_scan_prune()

static int lazy_scan_prune ( LVRelState vacrel,
Buffer  buf,
BlockNumber  blkno,
Page  page,
Buffer  vmbuffer,
bool has_lpdead_items,
bool vm_page_frozen 
)
static

Definition at line 2059 of file vacuumlazy.c.

2066{
2067 Relation rel = vacrel->rel;
2069 PruneFreezeParams params = {
2070 .relation = rel,
2071 .buffer = buf,
2072 .reason = PRUNE_VACUUM_SCAN,
2073 .options = HEAP_PAGE_PRUNE_FREEZE,
2074 .vistest = vacrel->vistest,
2075 .cutoffs = &vacrel->cutoffs,
2076 };
2077 uint8 old_vmbits = 0;
2078 uint8 new_vmbits = 0;
2079
2080 Assert(BufferGetBlockNumber(buf) == blkno);
2081
2082 /*
2083 * Prune all HOT-update chains and potentially freeze tuples on this page.
2084 *
2085 * If the relation has no indexes, we can immediately mark would-be dead
2086 * items LP_UNUSED.
2087 *
2088 * The number of tuples removed from the page is returned in
2089 * presult.ndeleted. It should not be confused with presult.lpdead_items;
2090 * presult.lpdead_items's final value can be thought of as the number of
2091 * tuples that were deleted from indexes.
2092 *
2093 * We will update the VM after collecting LP_DEAD items and freezing
2094 * tuples. Pruning will have determined whether or not the page is
2095 * all-visible.
2096 */
2097 if (vacrel->nindexes == 0)
2099
2101 &presult,
2102 &vacrel->offnum,
2103 &vacrel->NewRelfrozenXid, &vacrel->NewRelminMxid);
2104
2105 Assert(MultiXactIdIsValid(vacrel->NewRelminMxid));
2106 Assert(TransactionIdIsValid(vacrel->NewRelfrozenXid));
2107
2108 if (presult.nfrozen > 0)
2109 {
2110 /*
2111 * We don't increment the new_frozen_tuple_pages instrumentation
2112 * counter when nfrozen == 0, since it only counts pages with newly
2113 * frozen tuples (don't confuse that with pages newly set all-frozen
2114 * in VM).
2115 */
2116 vacrel->new_frozen_tuple_pages++;
2117 }
2118
2119 /*
2120 * VACUUM will call heap_page_is_all_visible() during the second pass over
2121 * the heap to determine all_visible and all_frozen for the page -- this
2122 * is a specialized version of the logic from this function. Now that
2123 * we've finished pruning and freezing, make sure that we're in total
2124 * agreement with heap_page_is_all_visible() using an assertion.
2125 */
2126#ifdef USE_ASSERT_CHECKING
2127 if (presult.all_visible)
2128 {
2130 bool debug_all_frozen;
2131
2132 Assert(presult.lpdead_items == 0);
2133
2135 vacrel->cutoffs.OldestXmin, &debug_all_frozen,
2136 &debug_cutoff, &vacrel->offnum));
2137
2138 Assert(presult.all_frozen == debug_all_frozen);
2139
2141 debug_cutoff == presult.vm_conflict_horizon);
2142 }
2143#endif
2144
2145 /*
2146 * Now save details of the LP_DEAD items from the page in vacrel
2147 */
2148 if (presult.lpdead_items > 0)
2149 {
2150 vacrel->lpdead_item_pages++;
2151
2152 /*
2153 * deadoffsets are collected incrementally in
2154 * heap_page_prune_and_freeze() as each dead line pointer is recorded,
2155 * with an indeterminate order, but dead_items_add requires them to be
2156 * sorted.
2157 */
2158 qsort(presult.deadoffsets, presult.lpdead_items, sizeof(OffsetNumber),
2160
2161 dead_items_add(vacrel, blkno, presult.deadoffsets, presult.lpdead_items);
2162 }
2163
2164 /* Finally, add page-local counts to whole-VACUUM counts */
2165 vacrel->tuples_deleted += presult.ndeleted;
2166 vacrel->tuples_frozen += presult.nfrozen;
2167 vacrel->lpdead_items += presult.lpdead_items;
2168 vacrel->live_tuples += presult.live_tuples;
2169 vacrel->recently_dead_tuples += presult.recently_dead_tuples;
2170
2171 /* Can't truncate this page */
2172 if (presult.hastup)
2173 vacrel->nonempty_pages = blkno + 1;
2174
2175 /* Did we find LP_DEAD items? */
2176 *has_lpdead_items = (presult.lpdead_items > 0);
2177
2178 Assert(!presult.all_visible || !(*has_lpdead_items));
2179 Assert(!presult.all_frozen || presult.all_visible);
2180
2181 old_vmbits = visibilitymap_get_status(vacrel->rel, blkno, &vmbuffer);
2182
2183 identify_and_fix_vm_corruption(vacrel->rel, buf, blkno, page,
2184 presult.lpdead_items, vmbuffer,
2185 &old_vmbits);
2186
2187 if (!presult.all_visible)
2188 return presult.ndeleted;
2189
2190 /* Set the visibility map and page visibility hint */
2192
2193 if (presult.all_frozen)
2195
2196 /* Nothing to do */
2197 if (old_vmbits == new_vmbits)
2198 return presult.ndeleted;
2199
2200 /*
2201 * It should never be the case that the visibility map page is set while
2202 * the page-level bit is clear (and if so, we cleared it above), but the
2203 * reverse is allowed (if checksums are not enabled). Regardless, set both
2204 * bits so that we get back in sync.
2205 *
2206 * The heap buffer must be marked dirty before adding it to the WAL chain
2207 * when setting the VM. We don't worry about unnecessarily dirtying the
2208 * heap buffer if PD_ALL_VISIBLE is already set, though. It is extremely
2209 * rare to have a clean heap buffer with PD_ALL_VISIBLE already set and
2210 * the VM bits clear, so there is no point in optimizing it.
2211 */
2212 PageSetAllVisible(page);
2213 PageClearPrunable(page);
2215
2216 /*
2217 * If the page is being set all-frozen, we pass InvalidTransactionId as
2218 * the cutoff_xid, since a snapshot conflict horizon sufficient to make
2219 * everything safe for REDO was logged when the page's tuples were frozen.
2220 */
2221 Assert(!presult.all_frozen ||
2222 !TransactionIdIsValid(presult.vm_conflict_horizon));
2223
2224 visibilitymap_set(vacrel->rel, blkno, buf,
2226 vmbuffer, presult.vm_conflict_horizon,
2227 new_vmbits);
2228
2229 /*
2230 * If the page wasn't already set all-visible and/or all-frozen in the VM,
2231 * count it as newly set for logging.
2232 */
2234 {
2235 vacrel->new_all_visible_pages++;
2236 if (presult.all_frozen)
2237 {
2238 vacrel->new_all_visible_all_frozen_pages++;
2239 *vm_page_frozen = true;
2240 }
2241 }
2242 else if ((old_vmbits & VISIBILITYMAP_ALL_FROZEN) == 0 &&
2243 presult.all_frozen)
2244 {
2245 vacrel->new_all_frozen_pages++;
2246 *vm_page_frozen = true;
2247 }
2248
2249 return presult.ndeleted;
2250}
Relation relation
Definition heapam.h:238

References Assert, buf, BufferGetBlockNumber(), cmpOffsetNumbers(), dead_items_add(), fb(), heap_page_prune_and_freeze(), HEAP_PAGE_PRUNE_FREEZE, HEAP_PAGE_PRUNE_MARK_UNUSED_NOW, identify_and_fix_vm_corruption(), InvalidXLogRecPtr, MarkBufferDirty(), MultiXactIdIsValid, PruneFreezeParams::options, PageClearPrunable, PageSetAllVisible(), PRUNE_VACUUM_SCAN, qsort, PruneFreezeParams::relation, TransactionIdIsValid, VISIBILITYMAP_ALL_FROZEN, VISIBILITYMAP_ALL_VISIBLE, visibilitymap_get_status(), and visibilitymap_set().

Referenced by lazy_scan_heap().

◆ lazy_truncate_heap()

static void lazy_truncate_heap ( LVRelState vacrel)
static

Definition at line 3255 of file vacuumlazy.c.

3256{
3257 BlockNumber orig_rel_pages = vacrel->rel_pages;
3260 int lock_retry;
3261
3262 /* Report that we are now truncating */
3265
3266 /* Update error traceback information one last time */
3268 vacrel->nonempty_pages, InvalidOffsetNumber);
3269
3270 /*
3271 * Loop until no more truncating can be done.
3272 */
3273 do
3274 {
3275 /*
3276 * We need full exclusive lock on the relation in order to do
3277 * truncation. If we can't get it, give up rather than waiting --- we
3278 * don't want to block other backends, and we don't want to deadlock
3279 * (which is quite possible considering we already hold a lower-grade
3280 * lock).
3281 */
3282 lock_waiter_detected = false;
3283 lock_retry = 0;
3284 while (true)
3285 {
3287 break;
3288
3289 /*
3290 * Check for interrupts while trying to (re-)acquire the exclusive
3291 * lock.
3292 */
3294
3297 {
3298 /*
3299 * We failed to establish the lock in the specified number of
3300 * retries. This means we give up truncating.
3301 */
3302 ereport(vacrel->verbose ? INFO : DEBUG2,
3303 (errmsg("\"%s\": stopping truncate due to conflicting lock request",
3304 vacrel->relname)));
3305 return;
3306 }
3307
3313 }
3314
3315 /*
3316 * Now that we have exclusive lock, look to see if the rel has grown
3317 * whilst we were vacuuming with non-exclusive lock. If so, give up;
3318 * the newly added pages presumably contain non-deletable tuples.
3319 */
3322 {
3323 /*
3324 * Note: we intentionally don't update vacrel->rel_pages with the
3325 * new rel size here. If we did, it would amount to assuming that
3326 * the new pages are empty, which is unlikely. Leaving the numbers
3327 * alone amounts to assuming that the new pages have the same
3328 * tuple density as existing ones, which is less unlikely.
3329 */
3331 return;
3332 }
3333
3334 /*
3335 * Scan backwards from the end to verify that the end pages actually
3336 * contain no tuples. This is *necessary*, not optional, because
3337 * other backends could have added tuples to these pages whilst we
3338 * were vacuuming.
3339 */
3341 vacrel->blkno = new_rel_pages;
3342
3344 {
3345 /* can't do anything after all */
3347 return;
3348 }
3349
3350 /*
3351 * Okay to truncate.
3352 */
3354
3355 /*
3356 * We can release the exclusive lock as soon as we have truncated.
3357 * Other backends can't safely access the relation until they have
3358 * processed the smgr invalidation that smgrtruncate sent out ... but
3359 * that should happen as part of standard invalidation processing once
3360 * they acquire lock on the relation.
3361 */
3363
3364 /*
3365 * Update statistics. Here, it *is* correct to adjust rel_pages
3366 * without also touching reltuples, since the tuple count wasn't
3367 * changed by the truncation.
3368 */
3369 vacrel->removed_pages += orig_rel_pages - new_rel_pages;
3370 vacrel->rel_pages = new_rel_pages;
3371
3372 ereport(vacrel->verbose ? INFO : DEBUG2,
3373 (errmsg("table \"%s\": truncated %u to %u pages",
3374 vacrel->relname,
3377 } while (new_rel_pages > vacrel->nonempty_pages && lock_waiter_detected);
3378}

References AccessExclusiveLock, CHECK_FOR_INTERRUPTS, ConditionalLockRelation(), count_nondeletable_pages(), DEBUG2, ereport, errmsg(), fb(), INFO, InvalidOffsetNumber, MyLatch, pgstat_progress_update_param(), PROGRESS_VACUUM_PHASE, PROGRESS_VACUUM_PHASE_TRUNCATE, RelationGetNumberOfBlocks, RelationTruncate(), ResetLatch(), UnlockRelation(), update_vacuum_error_info(), VACUUM_ERRCB_PHASE_TRUNCATE, VACUUM_TRUNCATE_LOCK_TIMEOUT, VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL, WaitLatch(), WL_EXIT_ON_PM_DEATH, WL_LATCH_SET, and WL_TIMEOUT.

Referenced by heap_vacuum_rel().

◆ lazy_vacuum()

static void lazy_vacuum ( LVRelState vacrel)
static

Definition at line 2484 of file vacuumlazy.c.

2485{
2486 bool bypass;
2487
2488 /* Should not end up here with no indexes */
2489 Assert(vacrel->nindexes > 0);
2490 Assert(vacrel->lpdead_item_pages > 0);
2491
2492 if (!vacrel->do_index_vacuuming)
2493 {
2494 Assert(!vacrel->do_index_cleanup);
2496 return;
2497 }
2498
2499 /*
2500 * Consider bypassing index vacuuming (and heap vacuuming) entirely.
2501 *
2502 * We currently only do this in cases where the number of LP_DEAD items
2503 * for the entire VACUUM operation is close to zero. This avoids sharp
2504 * discontinuities in the duration and overhead of successive VACUUM
2505 * operations that run against the same table with a fixed workload.
2506 * Ideally, successive VACUUM operations will behave as if there are
2507 * exactly zero LP_DEAD items in cases where there are close to zero.
2508 *
2509 * This is likely to be helpful with a table that is continually affected
2510 * by UPDATEs that can mostly apply the HOT optimization, but occasionally
2511 * have small aberrations that lead to just a few heap pages retaining
2512 * only one or two LP_DEAD items. This is pretty common; even when the
2513 * DBA goes out of their way to make UPDATEs use HOT, it is practically
2514 * impossible to predict whether HOT will be applied in 100% of cases.
2515 * It's far easier to ensure that 99%+ of all UPDATEs against a table use
2516 * HOT through careful tuning.
2517 */
2518 bypass = false;
2519 if (vacrel->consider_bypass_optimization && vacrel->rel_pages > 0)
2520 {
2522
2523 Assert(vacrel->num_index_scans == 0);
2524 Assert(vacrel->lpdead_items == vacrel->dead_items_info->num_items);
2525 Assert(vacrel->do_index_vacuuming);
2526 Assert(vacrel->do_index_cleanup);
2527
2528 /*
2529 * This crossover point at which we'll start to do index vacuuming is
2530 * expressed as a percentage of the total number of heap pages in the
2531 * table that are known to have at least one LP_DEAD item. This is
2532 * much more important than the total number of LP_DEAD items, since
2533 * it's a proxy for the number of heap pages whose visibility map bits
2534 * cannot be set on account of bypassing index and heap vacuuming.
2535 *
2536 * We apply one further precautionary test: the space currently used
2537 * to store the TIDs (TIDs that now all point to LP_DEAD items) must
2538 * not exceed 32MB. This limits the risk that we will bypass index
2539 * vacuuming again and again until eventually there is a VACUUM whose
2540 * dead_items space is not CPU cache resident.
2541 *
2542 * We don't take any special steps to remember the LP_DEAD items (such
2543 * as counting them in our final update to the stats system) when the
2544 * optimization is applied. Though the accounting used in analyze.c's
2545 * acquire_sample_rows() will recognize the same LP_DEAD items as dead
2546 * rows in its own stats report, that's okay. The discrepancy should
2547 * be negligible. If this optimization is ever expanded to cover more
2548 * cases then this may need to be reconsidered.
2549 */
2551 bypass = (vacrel->lpdead_item_pages < threshold &&
2552 TidStoreMemoryUsage(vacrel->dead_items) < 32 * 1024 * 1024);
2553 }
2554
2555 if (bypass)
2556 {
2557 /*
2558 * There are almost zero TIDs. Behave as if there were precisely
2559 * zero: bypass index vacuuming, but do index cleanup.
2560 *
2561 * We expect that the ongoing VACUUM operation will finish very
2562 * quickly, so there is no point in considering speeding up as a
2563 * failsafe against wraparound failure. (Index cleanup is expected to
2564 * finish very quickly in cases where there were no ambulkdelete()
2565 * calls.)
2566 */
2567 vacrel->do_index_vacuuming = false;
2568 }
2570 {
2571 /*
2572 * We successfully completed a round of index vacuuming. Do related
2573 * heap vacuuming now.
2574 */
2576 }
2577 else
2578 {
2579 /*
2580 * Failsafe case.
2581 *
2582 * We attempted index vacuuming, but didn't finish a full round/full
2583 * index scan. This happens when relfrozenxid or relminmxid is too
2584 * far in the past.
2585 *
2586 * From this point on the VACUUM operation will do no further index
2587 * vacuuming or heap vacuuming. This VACUUM operation won't end up
2588 * back here again.
2589 */
2591 }
2592
2593 /*
2594 * Forget the LP_DEAD items that we just vacuumed (or just decided to not
2595 * vacuum)
2596 */
2598}

References Assert, BYPASS_THRESHOLD_PAGES, dead_items_reset(), fb(), lazy_vacuum_all_indexes(), lazy_vacuum_heap_rel(), TidStoreMemoryUsage(), and VacuumFailsafeActive.

Referenced by lazy_scan_heap().

◆ lazy_vacuum_all_indexes()

static bool lazy_vacuum_all_indexes ( LVRelState vacrel)
static

Definition at line 2609 of file vacuumlazy.c.

2610{
2611 bool allindexes = true;
2612 double old_live_tuples = vacrel->rel->rd_rel->reltuples;
2613 const int progress_start_index[] = {
2616 };
2617 const int progress_end_index[] = {
2621 };
2624
2625 Assert(vacrel->nindexes > 0);
2626 Assert(vacrel->do_index_vacuuming);
2627 Assert(vacrel->do_index_cleanup);
2628
2629 /* Precheck for XID wraparound emergencies */
2631 {
2632 /* Wraparound emergency -- don't even start an index scan */
2633 return false;
2634 }
2635
2636 /*
2637 * Report that we are now vacuuming indexes and the number of indexes to
2638 * vacuum.
2639 */
2641 progress_start_val[1] = vacrel->nindexes;
2643
2645 {
2646 for (int idx = 0; idx < vacrel->nindexes; idx++)
2647 {
2648 Relation indrel = vacrel->indrels[idx];
2649 IndexBulkDeleteResult *istat = vacrel->indstats[idx];
2650
2651 vacrel->indstats[idx] = lazy_vacuum_one_index(indrel, istat,
2653 vacrel);
2654
2655 /* Report the number of indexes vacuumed */
2657 idx + 1);
2658
2660 {
2661 /* Wraparound emergency -- end current index scan */
2662 allindexes = false;
2663 break;
2664 }
2665 }
2666 }
2667 else
2668 {
2669 /* Outsource everything to parallel variant */
2671 vacrel->num_index_scans);
2672
2673 /*
2674 * Do a postcheck to consider applying wraparound failsafe now. Note
2675 * that parallel VACUUM only gets the precheck and this postcheck.
2676 */
2678 allindexes = false;
2679 }
2680
2681 /*
2682 * We delete all LP_DEAD items from the first heap pass in all indexes on
2683 * each call here (except calls where we choose to do the failsafe). This
2684 * makes the next call to lazy_vacuum_heap_rel() safe (except in the event
2685 * of the failsafe triggering, which prevents the next call from taking
2686 * place).
2687 */
2688 Assert(vacrel->num_index_scans > 0 ||
2689 vacrel->dead_items_info->num_items == vacrel->lpdead_items);
2691
2692 /*
2693 * Increase and report the number of index scans. Also, we reset
2694 * PROGRESS_VACUUM_INDEXES_TOTAL and PROGRESS_VACUUM_INDEXES_PROCESSED.
2695 *
2696 * We deliberately include the case where we started a round of bulk
2697 * deletes that we weren't able to finish due to the failsafe triggering.
2698 */
2699 vacrel->num_index_scans++;
2700 progress_end_val[0] = 0;
2701 progress_end_val[1] = 0;
2702 progress_end_val[2] = vacrel->num_index_scans;
2704
2705 return allindexes;
2706}

References Assert, fb(), idx(), lazy_check_wraparound_failsafe(), lazy_vacuum_one_index(), parallel_vacuum_bulkdel_all_indexes(), ParallelVacuumIsActive, pgstat_progress_update_multi_param(), pgstat_progress_update_param(), PROGRESS_VACUUM_INDEXES_PROCESSED, PROGRESS_VACUUM_INDEXES_TOTAL, PROGRESS_VACUUM_NUM_INDEX_VACUUMS, PROGRESS_VACUUM_PHASE, PROGRESS_VACUUM_PHASE_VACUUM_INDEX, and VacuumFailsafeActive.

Referenced by lazy_vacuum().

◆ lazy_vacuum_heap_page()

static void lazy_vacuum_heap_page ( LVRelState vacrel,
BlockNumber  blkno,
Buffer  buffer,
OffsetNumber deadoffsets,
int  num_offsets,
Buffer  vmbuffer 
)
static

Definition at line 2872 of file vacuumlazy.c.

2875{
2876 Page page = BufferGetPage(buffer);
2878 int nunused = 0;
2879 TransactionId visibility_cutoff_xid;
2881 bool all_frozen;
2883 uint8 vmflags = 0;
2884
2885 Assert(vacrel->do_index_vacuuming);
2886
2888
2889 /* Update error traceback information */
2893
2894 /*
2895 * Before marking dead items unused, check whether the page will become
2896 * all-visible once that change is applied. This lets us reap the tuples
2897 * and mark the page all-visible within the same critical section,
2898 * enabling both changes to be emitted in a single WAL record. Since the
2899 * visibility checks may perform I/O and allocate memory, they must be
2900 * done outside the critical section.
2901 */
2902 if (heap_page_would_be_all_visible(vacrel->rel, buffer,
2903 vacrel->cutoffs.OldestXmin,
2904 deadoffsets, num_offsets,
2905 &all_frozen, &visibility_cutoff_xid,
2906 &vacrel->offnum))
2907 {
2909 if (all_frozen)
2910 {
2912 Assert(!TransactionIdIsValid(visibility_cutoff_xid));
2913 }
2914
2915 /*
2916 * Take the lock on the vmbuffer before entering a critical section.
2917 * The heap page lock must also be held while updating the VM to
2918 * ensure consistency.
2919 */
2921 }
2922
2924
2925 for (int i = 0; i < num_offsets; i++)
2926 {
2927 ItemId itemid;
2928 OffsetNumber toff = deadoffsets[i];
2929
2930 itemid = PageGetItemId(page, toff);
2931
2932 Assert(ItemIdIsDead(itemid) && !ItemIdHasStorage(itemid));
2933 ItemIdSetUnused(itemid);
2934 unused[nunused++] = toff;
2935 }
2936
2937 Assert(nunused > 0);
2938
2939 /* Attempt to truncate line pointer array now */
2941
2942 if ((vmflags & VISIBILITYMAP_VALID_BITS) != 0)
2943 {
2944 /*
2945 * The page is guaranteed to have had dead line pointers, so we always
2946 * set PD_ALL_VISIBLE.
2947 */
2948 PageSetAllVisible(page);
2949 PageClearPrunable(page);
2951 vmbuffer, vmflags,
2952 vacrel->rel->rd_locator);
2953 conflict_xid = visibility_cutoff_xid;
2954 }
2955
2956 /*
2957 * Mark buffer dirty before we write WAL.
2958 */
2959 MarkBufferDirty(buffer);
2960
2961 /* XLOG stuff */
2962 if (RelationNeedsWAL(vacrel->rel))
2963 {
2964 log_heap_prune_and_freeze(vacrel->rel, buffer,
2965 vmflags != 0 ? vmbuffer : InvalidBuffer,
2966 vmflags,
2968 false, /* no cleanup lock required */
2970 NULL, 0, /* frozen */
2971 NULL, 0, /* redirected */
2972 NULL, 0, /* dead */
2973 unused, nunused);
2974 }
2975
2977
2979 {
2980 /* Count the newly set VM page for logging */
2981 LockBuffer(vmbuffer, BUFFER_LOCK_UNLOCK);
2982 vacrel->new_all_visible_pages++;
2983 if (all_frozen)
2984 vacrel->new_all_visible_all_frozen_pages++;
2985 }
2986
2987 /* Revert to the previous phase information for error traceback */
2989}

References Assert, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferGetPage(), END_CRIT_SECTION, fb(), heap_page_would_be_all_visible(), i, InvalidBuffer, InvalidOffsetNumber, InvalidTransactionId, ItemIdHasStorage, ItemIdIsDead, ItemIdSetUnused, LockBuffer(), log_heap_prune_and_freeze(), MarkBufferDirty(), MaxHeapTuplesPerPage, PageClearPrunable, PageGetItemId(), PageSetAllVisible(), PageTruncateLinePointerArray(), pgstat_progress_update_param(), PROGRESS_VACUUM_HEAP_BLKS_VACUUMED, PRUNE_VACUUM_CLEANUP, RelationNeedsWAL, restore_vacuum_error_info(), START_CRIT_SECTION, TransactionIdIsValid, update_vacuum_error_info(), VACUUM_ERRCB_PHASE_VACUUM_HEAP, VISIBILITYMAP_ALL_FROZEN, VISIBILITYMAP_ALL_VISIBLE, visibilitymap_set_vmbits(), and VISIBILITYMAP_VALID_BITS.

Referenced by lazy_vacuum_heap_rel().

◆ lazy_vacuum_heap_rel()

static void lazy_vacuum_heap_rel ( LVRelState vacrel)
static

Definition at line 2754 of file vacuumlazy.c.

2755{
2756 ReadStream *stream;
2758 Buffer vmbuffer = InvalidBuffer;
2760 TidStoreIter *iter;
2761
2762 Assert(vacrel->do_index_vacuuming);
2763 Assert(vacrel->do_index_cleanup);
2764 Assert(vacrel->num_index_scans > 0);
2765
2766 /* Report that we are now vacuuming the heap */
2769
2770 /* Update error traceback information */
2774
2775 iter = TidStoreBeginIterate(vacrel->dead_items);
2776
2777 /*
2778 * Set up the read stream for vacuum's second pass through the heap.
2779 *
2780 * It is safe to use batchmode, as vacuum_reap_lp_read_stream_next() does
2781 * not need to wait for IO and does not perform locking. Once we support
2782 * parallelism it should still be fine, as presumably the holder of locks
2783 * would never be blocked by IO while holding the lock.
2784 */
2787 vacrel->bstrategy,
2788 vacrel->rel,
2791 iter,
2792 sizeof(TidStoreIterResult));
2793
2794 while (true)
2795 {
2796 BlockNumber blkno;
2797 Buffer buf;
2798 Page page;
2800 Size freespace;
2802 int num_offsets;
2803
2804 vacuum_delay_point(false);
2805
2806 buf = read_stream_next_buffer(stream, (void **) &iter_result);
2807
2808 /* The relation is exhausted */
2809 if (!BufferIsValid(buf))
2810 break;
2811
2812 vacrel->blkno = blkno = BufferGetBlockNumber(buf);
2813
2816 Assert(num_offsets <= lengthof(offsets));
2817
2818 /*
2819 * Pin the visibility map page in case we need to mark the page
2820 * all-visible. In most cases this will be very cheap, because we'll
2821 * already have the correct page pinned anyway.
2822 */
2823 visibilitymap_pin(vacrel->rel, blkno, &vmbuffer);
2824
2825 /* We need a non-cleanup exclusive lock to mark dead_items unused */
2827 lazy_vacuum_heap_page(vacrel, blkno, buf, offsets,
2828 num_offsets, vmbuffer);
2829
2830 /* Now that we've vacuumed the page, record its available space */
2831 page = BufferGetPage(buf);
2832 freespace = PageGetHeapFreeSpace(page);
2833
2835 RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
2837 }
2838
2839 read_stream_end(stream);
2840 TidStoreEndIterate(iter);
2841
2842 vacrel->blkno = InvalidBlockNumber;
2843 if (BufferIsValid(vmbuffer))
2844 ReleaseBuffer(vmbuffer);
2845
2846 /*
2847 * We set all LP_DEAD items from the first heap pass to LP_UNUSED during
2848 * the second heap pass. No more, no less.
2849 */
2850 Assert(vacrel->num_index_scans > 1 ||
2851 (vacrel->dead_items_info->num_items == vacrel->lpdead_items &&
2852 vacuumed_pages == vacrel->lpdead_item_pages));
2853
2855 (errmsg("table \"%s\": removed %" PRId64 " dead item identifiers in %u pages",
2856 vacrel->relname, vacrel->dead_items_info->num_items,
2857 vacuumed_pages)));
2858
2859 /* Revert to the previous phase information for error traceback */
2861}

References Assert, buf, BUFFER_LOCK_EXCLUSIVE, BufferGetBlockNumber(), BufferGetPage(), BufferIsValid(), DEBUG2, ereport, errmsg(), fb(), InvalidBlockNumber, InvalidBuffer, InvalidOffsetNumber, lazy_vacuum_heap_page(), lengthof, LockBuffer(), MAIN_FORKNUM, MaxOffsetNumber, PageGetHeapFreeSpace(), pgstat_progress_update_param(), PROGRESS_VACUUM_PHASE, PROGRESS_VACUUM_PHASE_VACUUM_HEAP, read_stream_begin_relation(), read_stream_end(), READ_STREAM_MAINTENANCE, read_stream_next_buffer(), READ_STREAM_USE_BATCHING, RecordPageWithFreeSpace(), ReleaseBuffer(), restore_vacuum_error_info(), TidStoreBeginIterate(), TidStoreEndIterate(), TidStoreGetBlockOffsets(), UnlockReleaseBuffer(), update_vacuum_error_info(), vacuum_delay_point(), VACUUM_ERRCB_PHASE_VACUUM_HEAP, vacuum_reap_lp_read_stream_next(), and visibilitymap_pin().

Referenced by lazy_vacuum().

◆ lazy_vacuum_one_index()

static IndexBulkDeleteResult * lazy_vacuum_one_index ( Relation  indrel,
IndexBulkDeleteResult istat,
double  reltuples,
LVRelState vacrel 
)
static

Definition at line 3126 of file vacuumlazy.c.

3128{
3131
3132 ivinfo.index = indrel;
3133 ivinfo.heaprel = vacrel->rel;
3134 ivinfo.analyze_only = false;
3135 ivinfo.report_progress = false;
3136 ivinfo.estimated_count = true;
3137 ivinfo.message_level = DEBUG2;
3138 ivinfo.num_heap_tuples = reltuples;
3139 ivinfo.strategy = vacrel->bstrategy;
3140
3141 /*
3142 * Update error traceback information.
3143 *
3144 * The index name is saved during this phase and restored immediately
3145 * after this phase. See vacuum_error_callback.
3146 */
3147 Assert(vacrel->indname == NULL);
3152
3153 /* Do bulk deletion */
3154 istat = vac_bulkdel_one_index(&ivinfo, istat, vacrel->dead_items,
3155 vacrel->dead_items_info);
3156
3157 /* Revert to the previous phase information for error traceback */
3159 pfree(vacrel->indname);
3160 vacrel->indname = NULL;
3161
3162 return istat;
3163}

References Assert, DEBUG2, fb(), InvalidBlockNumber, InvalidOffsetNumber, pfree(), pstrdup(), RelationGetRelationName, restore_vacuum_error_info(), update_vacuum_error_info(), vac_bulkdel_one_index(), and VACUUM_ERRCB_PHASE_VACUUM_INDEX.

Referenced by lazy_vacuum_all_indexes().

◆ restore_vacuum_error_info()

static void restore_vacuum_error_info ( LVRelState vacrel,
const LVSavedErrInfo saved_vacrel 
)
static

Definition at line 3967 of file vacuumlazy.c.

3969{
3970 vacrel->blkno = saved_vacrel->blkno;
3971 vacrel->offnum = saved_vacrel->offnum;
3972 vacrel->phase = saved_vacrel->phase;
3973}

References fb().

Referenced by lazy_cleanup_one_index(), lazy_vacuum_heap_page(), lazy_vacuum_heap_rel(), and lazy_vacuum_one_index().

◆ should_attempt_truncation()

static bool should_attempt_truncation ( LVRelState vacrel)
static

Definition at line 3235 of file vacuumlazy.c.

3236{
3238
3239 if (!vacrel->do_rel_truncate || VacuumFailsafeActive)
3240 return false;
3241
3242 possibly_freeable = vacrel->rel_pages - vacrel->nonempty_pages;
3243 if (possibly_freeable > 0 &&
3246 return true;
3247
3248 return false;
3249}

References fb(), REL_TRUNCATE_FRACTION, REL_TRUNCATE_MINIMUM, and VacuumFailsafeActive.

Referenced by heap_vacuum_rel().

◆ update_relstats_all_indexes()

static void update_relstats_all_indexes ( LVRelState vacrel)
static

Definition at line 3849 of file vacuumlazy.c.

3850{
3851 Relation *indrels = vacrel->indrels;
3852 int nindexes = vacrel->nindexes;
3853 IndexBulkDeleteResult **indstats = vacrel->indstats;
3854
3855 Assert(vacrel->do_index_cleanup);
3856
3857 for (int idx = 0; idx < nindexes; idx++)
3858 {
3859 Relation indrel = indrels[idx];
3860 IndexBulkDeleteResult *istat = indstats[idx];
3861
3862 if (istat == NULL || istat->estimated_count)
3863 continue;
3864
3865 /* Update index statistics */
3867 istat->num_pages,
3868 istat->num_index_tuples,
3869 0, 0,
3870 false,
3873 NULL, NULL, false);
3874 }
3875}

References Assert, IndexBulkDeleteResult::estimated_count, fb(), idx(), InvalidMultiXactId, InvalidTransactionId, IndexBulkDeleteResult::num_index_tuples, IndexBulkDeleteResult::num_pages, and vac_update_relstats().

Referenced by heap_vacuum_rel().

◆ update_vacuum_error_info()

static void update_vacuum_error_info ( LVRelState vacrel,
LVSavedErrInfo saved_vacrel,
int  phase,
BlockNumber  blkno,
OffsetNumber  offnum 
)
static

Definition at line 3948 of file vacuumlazy.c.

3950{
3951 if (saved_vacrel)
3952 {
3953 saved_vacrel->offnum = vacrel->offnum;
3954 saved_vacrel->blkno = vacrel->blkno;
3955 saved_vacrel->phase = vacrel->phase;
3956 }
3957
3958 vacrel->blkno = blkno;
3959 vacrel->offnum = offnum;
3960 vacrel->phase = phase;
3961}

References fb().

Referenced by lazy_cleanup_one_index(), lazy_scan_heap(), lazy_truncate_heap(), lazy_vacuum_heap_page(), lazy_vacuum_heap_rel(), and lazy_vacuum_one_index().

◆ vacuum_error_callback()

static void vacuum_error_callback ( void arg)
static

Definition at line 3884 of file vacuumlazy.c.

3885{
3887
3888 switch (errinfo->phase)
3889 {
3891 if (BlockNumberIsValid(errinfo->blkno))
3892 {
3893 if (OffsetNumberIsValid(errinfo->offnum))
3894 errcontext("while scanning block %u offset %u of relation \"%s.%s\"",
3895 errinfo->blkno, errinfo->offnum, errinfo->relnamespace, errinfo->relname);
3896 else
3897 errcontext("while scanning block %u of relation \"%s.%s\"",
3898 errinfo->blkno, errinfo->relnamespace, errinfo->relname);
3899 }
3900 else
3901 errcontext("while scanning relation \"%s.%s\"",
3902 errinfo->relnamespace, errinfo->relname);
3903 break;
3904
3906 if (BlockNumberIsValid(errinfo->blkno))
3907 {
3908 if (OffsetNumberIsValid(errinfo->offnum))
3909 errcontext("while vacuuming block %u offset %u of relation \"%s.%s\"",
3910 errinfo->blkno, errinfo->offnum, errinfo->relnamespace, errinfo->relname);
3911 else
3912 errcontext("while vacuuming block %u of relation \"%s.%s\"",
3913 errinfo->blkno, errinfo->relnamespace, errinfo->relname);
3914 }
3915 else
3916 errcontext("while vacuuming relation \"%s.%s\"",
3917 errinfo->relnamespace, errinfo->relname);
3918 break;
3919
3921 errcontext("while vacuuming index \"%s\" of relation \"%s.%s\"",
3922 errinfo->indname, errinfo->relnamespace, errinfo->relname);
3923 break;
3924
3926 errcontext("while cleaning up index \"%s\" of relation \"%s.%s\"",
3927 errinfo->indname, errinfo->relnamespace, errinfo->relname);
3928 break;
3929
3931 if (BlockNumberIsValid(errinfo->blkno))
3932 errcontext("while truncating relation \"%s.%s\" to %u blocks",
3933 errinfo->relnamespace, errinfo->relname, errinfo->blkno);
3934 break;
3935
3937 default:
3938 return; /* do nothing; the errinfo may not be
3939 * initialized */
3940 }
3941}

References arg, BlockNumberIsValid(), errcontext, fb(), OffsetNumberIsValid, VACUUM_ERRCB_PHASE_INDEX_CLEANUP, VACUUM_ERRCB_PHASE_SCAN_HEAP, VACUUM_ERRCB_PHASE_TRUNCATE, VACUUM_ERRCB_PHASE_UNKNOWN, VACUUM_ERRCB_PHASE_VACUUM_HEAP, and VACUUM_ERRCB_PHASE_VACUUM_INDEX.

Referenced by heap_vacuum_rel().

◆ vacuum_reap_lp_read_stream_next()

static BlockNumber vacuum_reap_lp_read_stream_next ( ReadStream stream,
void callback_private_data,
void per_buffer_data 
)
static

Definition at line 2716 of file vacuumlazy.c.

2719{
2720 TidStoreIter *iter = callback_private_data;
2722
2724 if (iter_result == NULL)
2725 return InvalidBlockNumber;
2726
2727 /*
2728 * Save the TidStoreIterResult for later, so we can extract the offsets.
2729 * It is safe to copy the result, according to TidStoreIterateNext().
2730 */
2731 memcpy(per_buffer_data, iter_result, sizeof(*iter_result));
2732
2733 return iter_result->blkno;
2734}

References fb(), InvalidBlockNumber, and TidStoreIterateNext().

Referenced by lazy_vacuum_heap_rel().