PostgreSQL Source Code git master
Loading...
Searching...
No Matches
vacuumlazy.c File Reference
#include "postgres.h"
#include "access/genam.h"
#include "access/heapam.h"
#include "access/htup_details.h"
#include "access/multixact.h"
#include "access/tidstore.h"
#include "access/transam.h"
#include "access/visibilitymap.h"
#include "access/xloginsert.h"
#include "catalog/storage.h"
#include "commands/progress.h"
#include "commands/vacuum.h"
#include "common/int.h"
#include "common/pg_prng.h"
#include "executor/instrument.h"
#include "miscadmin.h"
#include "pgstat.h"
#include "portability/instr_time.h"
#include "postmaster/autovacuum.h"
#include "storage/bufmgr.h"
#include "storage/freespace.h"
#include "storage/lmgr.h"
#include "storage/read_stream.h"
#include "utils/lsyscache.h"
#include "utils/pg_rusage.h"
#include "utils/timestamp.h"
Include dependency graph for vacuumlazy.c:

Go to the source code of this file.

Data Structures

struct  LVRelState
 
struct  LVSavedErrInfo
 

Macros

#define REL_TRUNCATE_MINIMUM   1000
 
#define REL_TRUNCATE_FRACTION   16
 
#define VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL   20 /* ms */
 
#define VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL   50 /* ms */
 
#define VACUUM_TRUNCATE_LOCK_TIMEOUT   5000 /* ms */
 
#define BYPASS_THRESHOLD_PAGES   0.02 /* i.e. 2% of rel_pages */
 
#define FAILSAFE_EVERY_PAGES    ((BlockNumber) (((uint64) 4 * 1024 * 1024 * 1024) / BLCKSZ))
 
#define VACUUM_FSM_EVERY_PAGES    ((BlockNumber) (((uint64) 8 * 1024 * 1024 * 1024) / BLCKSZ))
 
#define SKIP_PAGES_THRESHOLD   ((BlockNumber) 32)
 
#define PREFETCH_SIZE   ((BlockNumber) 32)
 
#define ParallelVacuumIsActive(vacrel)   ((vacrel)->pvs != NULL)
 
#define MAX_EAGER_FREEZE_SUCCESS_RATE   0.2
 
#define EAGER_SCAN_REGION_SIZE   4096
 

Typedefs

typedef struct LVRelState LVRelState
 
typedef struct LVSavedErrInfo LVSavedErrInfo
 

Enumerations

enum  VacErrPhase {
  VACUUM_ERRCB_PHASE_UNKNOWN , VACUUM_ERRCB_PHASE_SCAN_HEAP , VACUUM_ERRCB_PHASE_VACUUM_INDEX , VACUUM_ERRCB_PHASE_VACUUM_HEAP ,
  VACUUM_ERRCB_PHASE_INDEX_CLEANUP , VACUUM_ERRCB_PHASE_TRUNCATE
}
 

Functions

static void lazy_scan_heap (LVRelState *vacrel)
 
static void heap_vacuum_eager_scan_setup (LVRelState *vacrel, const VacuumParams params)
 
static BlockNumber heap_vac_scan_next_block (ReadStream *stream, void *callback_private_data, void *per_buffer_data)
 
static void find_next_unskippable_block (LVRelState *vacrel, bool *skipsallvis)
 
static bool lazy_scan_new_or_empty (LVRelState *vacrel, Buffer buf, BlockNumber blkno, Page page, bool sharelock, Buffer vmbuffer)
 
static void identify_and_fix_vm_corruption (Relation rel, Buffer heap_buffer, BlockNumber heap_blk, Page heap_page, int nlpdead_items, Buffer vmbuffer, uint8 *vmbits)
 
static int lazy_scan_prune (LVRelState *vacrel, Buffer buf, BlockNumber blkno, Page page, Buffer vmbuffer, bool *has_lpdead_items, bool *vm_page_frozen)
 
static bool lazy_scan_noprune (LVRelState *vacrel, Buffer buf, BlockNumber blkno, Page page, bool *has_lpdead_items)
 
static void lazy_vacuum (LVRelState *vacrel)
 
static bool lazy_vacuum_all_indexes (LVRelState *vacrel)
 
static void lazy_vacuum_heap_rel (LVRelState *vacrel)
 
static void lazy_vacuum_heap_page (LVRelState *vacrel, BlockNumber blkno, Buffer buffer, OffsetNumber *deadoffsets, int num_offsets, Buffer vmbuffer)
 
static bool lazy_check_wraparound_failsafe (LVRelState *vacrel)
 
static void lazy_cleanup_all_indexes (LVRelState *vacrel)
 
static IndexBulkDeleteResultlazy_vacuum_one_index (Relation indrel, IndexBulkDeleteResult *istat, double reltuples, LVRelState *vacrel)
 
static IndexBulkDeleteResultlazy_cleanup_one_index (Relation indrel, IndexBulkDeleteResult *istat, double reltuples, bool estimated_count, LVRelState *vacrel)
 
static bool should_attempt_truncation (LVRelState *vacrel)
 
static void lazy_truncate_heap (LVRelState *vacrel)
 
static BlockNumber count_nondeletable_pages (LVRelState *vacrel, bool *lock_waiter_detected)
 
static void dead_items_alloc (LVRelState *vacrel, int nworkers)
 
static void dead_items_add (LVRelState *vacrel, BlockNumber blkno, OffsetNumber *offsets, int num_offsets)
 
static void dead_items_reset (LVRelState *vacrel)
 
static void dead_items_cleanup (LVRelState *vacrel)
 
static bool heap_page_would_be_all_visible (Relation rel, Buffer buf, TransactionId OldestXmin, OffsetNumber *deadoffsets, int ndeadoffsets, bool *all_frozen, TransactionId *visibility_cutoff_xid, OffsetNumber *logging_offnum)
 
static void update_relstats_all_indexes (LVRelState *vacrel)
 
static void vacuum_error_callback (void *arg)
 
static void update_vacuum_error_info (LVRelState *vacrel, LVSavedErrInfo *saved_vacrel, int phase, BlockNumber blkno, OffsetNumber offnum)
 
static void restore_vacuum_error_info (LVRelState *vacrel, const LVSavedErrInfo *saved_vacrel)
 
void heap_vacuum_rel (Relation rel, const VacuumParams params, BufferAccessStrategy bstrategy)
 
static int cmpOffsetNumbers (const void *a, const void *b)
 
static BlockNumber vacuum_reap_lp_read_stream_next (ReadStream *stream, void *callback_private_data, void *per_buffer_data)
 

Macro Definition Documentation

◆ BYPASS_THRESHOLD_PAGES

#define BYPASS_THRESHOLD_PAGES   0.02 /* i.e. 2% of rel_pages */

Definition at line 184 of file vacuumlazy.c.

◆ EAGER_SCAN_REGION_SIZE

#define EAGER_SCAN_REGION_SIZE   4096

Definition at line 247 of file vacuumlazy.c.

◆ FAILSAFE_EVERY_PAGES

#define FAILSAFE_EVERY_PAGES    ((BlockNumber) (((uint64) 4 * 1024 * 1024 * 1024) / BLCKSZ))

Definition at line 190 of file vacuumlazy.c.

221{
229
230/*
231 * An eager scan of a page that is set all-frozen in the VM is considered
232 * "successful". To spread out freezing overhead across multiple normal
233 * vacuums, we limit the number of successful eager page freezes. The maximum
234 * number of eager page freezes is calculated as a ratio of the all-visible
235 * but not all-frozen pages at the beginning of the vacuum.
236 */
237#define MAX_EAGER_FREEZE_SUCCESS_RATE 0.2
238
239/*
240 * On the assumption that different regions of the table tend to have
241 * similarly aged data, once vacuum fails to freeze
242 * vacuum_max_eager_freeze_failure_rate of the blocks in a region of size
243 * EAGER_SCAN_REGION_SIZE, it suspends eager scanning until it has progressed
244 * to another region of the table with potentially older data.
245 */
246#define EAGER_SCAN_REGION_SIZE 4096
247
248typedef struct LVRelState
249{
250 /* Target heap relation and its indexes */
253 int nindexes;
254
255 /* Buffer access strategy and parallel vacuum state */
258
259 /* Aggressive VACUUM? (must set relfrozenxid >= FreezeLimit) */
260 bool aggressive;
261 /* Use visibility map to skip? (disabled by DISABLE_PAGE_SKIPPING) */
262 bool skipwithvm;
263 /* Consider index vacuuming bypass optimization? */
265
266 /* Doing index vacuuming, index cleanup, rel truncation? */
268 bool do_index_cleanup;
269 bool do_rel_truncate;
270
271 /* VACUUM operation's cutoffs for freezing and pruning */
272 struct VacuumCutoffs cutoffs;
274 /* Tracks oldest extant XID/MXID for setting relfrozenxid/relminmxid */
277 bool skippedallvis;
278
279 /* Error reporting state */
280 char *dbname;
281 char *relnamespace;
282 char *relname;
283 char *indname; /* Current index name */
284 BlockNumber blkno; /* used only for heap operations */
285 OffsetNumber offnum; /* used only for heap operations */
287 bool verbose; /* VACUUM VERBOSE? */
288
289 /*
290 * dead_items stores TIDs whose index tuples are deleted by index
291 * vacuuming. Each TID points to an LP_DEAD line pointer from a heap page
292 * that has been processed by lazy_scan_prune. Also needed by
293 * lazy_vacuum_heap_rel, which marks the same LP_DEAD line pointers as
294 * LP_UNUSED during second heap pass.
295 *
296 * Both dead_items and dead_items_info are allocated in shared memory in
297 * parallel vacuum cases.
298 */
299 TidStore *dead_items; /* TIDs whose index tuples we'll delete */
301
302 BlockNumber rel_pages; /* total number of pages */
303 BlockNumber scanned_pages; /* # pages examined (not skipped via VM) */
304
305 /*
306 * Count of all-visible blocks eagerly scanned (for logging only). This
307 * does not include skippable blocks scanned due to SKIP_PAGES_THRESHOLD.
308 */
310
311 BlockNumber removed_pages; /* # pages removed by relation truncation */
312 BlockNumber new_frozen_tuple_pages; /* # pages with newly frozen tuples */
313
314 /* # pages newly set all-visible in the VM */
316
317 /*
318 * # pages newly set all-visible and all-frozen in the VM. This is a
319 * subset of vm_new_visible_pages. That is, vm_new_visible_pages includes
320 * all pages set all-visible, but vm_new_visible_frozen_pages includes
321 * only those which were also set all-frozen.
322 */
324
325 /* # all-visible pages newly set all-frozen in the VM */
327
328 BlockNumber lpdead_item_pages; /* # pages with LP_DEAD items */
329 BlockNumber missed_dead_pages; /* # pages with missed dead tuples */
330 BlockNumber nonempty_pages; /* actually, last nonempty page + 1 */
331
332 /* Statistics output by us, for table */
333 double new_rel_tuples; /* new estimated total # of tuples */
334 double new_live_tuples; /* new estimated total # of live tuples */
335 /* Statistics output by index AMs */
337
338 /* Instrumentation counters */
339 int num_index_scans;
342 /* Counters that follow are only for scanned_pages */
343 int64 tuples_deleted; /* # deleted from table */
344 int64 tuples_frozen; /* # newly frozen */
345 int64 lpdead_items; /* # deleted from indexes */
346 int64 live_tuples; /* # live tuples remaining */
347 int64 recently_dead_tuples; /* # dead, but not yet removable */
348 int64 missed_dead_tuples; /* # removable, but not removed */
349
350 /* State maintained by heap_vac_scan_next_block() */
351 BlockNumber current_block; /* last block returned */
352 BlockNumber next_unskippable_block; /* next unskippable block */
353 bool next_unskippable_eager_scanned; /* if it was eagerly scanned */
354 Buffer next_unskippable_vmbuffer; /* buffer containing its VM bit */
355
356 /* State related to managing eager scanning of all-visible pages */
357
358 /*
359 * A normal vacuum that has failed to freeze too many eagerly scanned
360 * blocks in a region suspends eager scanning.
361 * next_eager_scan_region_start is the block number of the first block
362 * eligible for resumed eager scanning.
363 *
364 * When eager scanning is permanently disabled, either initially
365 * (including for aggressive vacuum) or due to hitting the success cap,
366 * this is set to InvalidBlockNumber.
367 */
369
370 /*
371 * The remaining number of blocks a normal vacuum will consider eager
372 * scanning when it is successful. When eager scanning is enabled, this is
373 * initialized to MAX_EAGER_FREEZE_SUCCESS_RATE of the total number of
374 * all-visible but not all-frozen pages. For each eager freeze success,
375 * this is decremented. Once it hits 0, eager scanning is permanently
376 * disabled. It is initialized to 0 if eager scanning starts out disabled
377 * (including for aggressive vacuum).
378 */
380
381 /*
382 * The maximum number of blocks which may be eagerly scanned and not
383 * frozen before eager scanning is temporarily suspended. This is
384 * configurable both globally, via the
385 * vacuum_max_eager_freeze_failure_rate GUC, and per table, with a table
386 * storage parameter of the same name. It is calculated as
387 * vacuum_max_eager_freeze_failure_rate of EAGER_SCAN_REGION_SIZE blocks.
388 * It is 0 when eager scanning is disabled.
389 */
391
392 /*
393 * The number of eagerly scanned blocks vacuum failed to freeze (due to
394 * age) in the current eager scan region. Vacuum resets it to
395 * eager_scan_max_fails_per_region each time it enters a new region of the
396 * relation. If eager_scan_remaining_fails hits 0, eager scanning is
397 * suspended until the next region. It is also 0 if eager scanning has
398 * been permanently disabled.
399 */
401} LVRelState;
402
403
404/* Struct for saving and restoring vacuum error information. */
405typedef struct LVSavedErrInfo
406{
411
412
413/* non-export function prototypes */
414static void lazy_scan_heap(LVRelState *vacrel);
416 const VacuumParams params);
418 void *callback_private_data,
419 void *per_buffer_data);
422 BlockNumber blkno, Page page,
423 bool sharelock, Buffer vmbuffer);
426 int nlpdead_items,
427 Buffer vmbuffer,
428 uint8 *vmbits);
430 BlockNumber blkno, Page page,
431 Buffer vmbuffer,
432 bool *has_lpdead_items, bool *vm_page_frozen);
434 BlockNumber blkno, Page page,
435 bool *has_lpdead_items);
436static void lazy_vacuum(LVRelState *vacrel);
440 Buffer buffer, OffsetNumber *deadoffsets,
441 int num_offsets, Buffer vmbuffer);
446 double reltuples,
450 double reltuples,
451 bool estimated_count,
457static void dead_items_alloc(LVRelState *vacrel, int nworkers);
458static void dead_items_add(LVRelState *vacrel, BlockNumber blkno, OffsetNumber *offsets,
459 int num_offsets);
462
463#ifdef USE_ASSERT_CHECKING
465 TransactionId OldestXmin,
466 bool *all_frozen,
467 TransactionId *visibility_cutoff_xid,
469#endif
471 TransactionId OldestXmin,
472 OffsetNumber *deadoffsets,
473 int ndeadoffsets,
474 bool *all_frozen,
475 TransactionId *visibility_cutoff_xid,
478static void vacuum_error_callback(void *arg);
481 int phase, BlockNumber blkno,
482 OffsetNumber offnum);
485
486
487
488/*
489 * Helper to set up the eager scanning state for vacuuming a single relation.
490 * Initializes the eager scan management related members of the LVRelState.
491 *
492 * Caller provides whether or not an aggressive vacuum is required due to
493 * vacuum options or for relfrozenxid/relminmxid advancement.
494 */
495static void
497{
501 float first_region_ratio;
503
504 /*
505 * Initialize eager scan management fields to their disabled values.
506 * Aggressive vacuums, normal vacuums of small tables, and normal vacuums
507 * of tables without sufficiently old tuples disable eager scanning.
508 */
509 vacrel->next_eager_scan_region_start = InvalidBlockNumber;
510 vacrel->eager_scan_max_fails_per_region = 0;
511 vacrel->eager_scan_remaining_fails = 0;
512 vacrel->eager_scan_remaining_successes = 0;
513
514 /* If eager scanning is explicitly disabled, just return. */
515 if (params.max_eager_freeze_failure_rate == 0)
516 return;
517
518 /*
519 * The caller will have determined whether or not an aggressive vacuum is
520 * required by either the vacuum parameters or the relative age of the
521 * oldest unfrozen transaction IDs. An aggressive vacuum must scan every
522 * all-visible page to safely advance the relfrozenxid and/or relminmxid,
523 * so scans of all-visible pages are not considered eager.
524 */
525 if (vacrel->aggressive)
526 return;
527
528 /*
529 * Aggressively vacuuming a small relation shouldn't take long, so it
530 * isn't worth amortizing. We use two times the region size as the size
531 * cutoff because the eager scan start block is a random spot somewhere in
532 * the first region, making the second region the first to be eager
533 * scanned normally.
534 */
535 if (vacrel->rel_pages < 2 * EAGER_SCAN_REGION_SIZE)
536 return;
537
538 /*
539 * We only want to enable eager scanning if we are likely to be able to
540 * freeze some of the pages in the relation.
541 *
542 * Tuples with XIDs older than OldestXmin or MXIDs older than OldestMxact
543 * are technically freezable, but we won't freeze them unless the criteria
544 * for opportunistic freezing is met. Only tuples with XIDs/MXIDs older
545 * than the FreezeLimit/MultiXactCutoff are frozen in the common case.
546 *
547 * So, as a heuristic, we wait until the FreezeLimit has advanced past the
548 * relfrozenxid or the MultiXactCutoff has advanced past the relminmxid to
549 * enable eager scanning.
550 */
551 if (TransactionIdIsNormal(vacrel->cutoffs.relfrozenxid) &&
552 TransactionIdPrecedes(vacrel->cutoffs.relfrozenxid,
553 vacrel->cutoffs.FreezeLimit))
555
557 MultiXactIdIsValid(vacrel->cutoffs.relminmxid) &&
558 MultiXactIdPrecedes(vacrel->cutoffs.relminmxid,
559 vacrel->cutoffs.MultiXactCutoff))
561
563 return;
564
565 /* We have met the criteria to eagerly scan some pages. */
566
567 /*
568 * Our success cap is MAX_EAGER_FREEZE_SUCCESS_RATE of the number of
569 * all-visible but not all-frozen blocks in the relation.
570 */
572
573 vacrel->eager_scan_remaining_successes =
576
577 /* If every all-visible page is frozen, eager scanning is disabled. */
578 if (vacrel->eager_scan_remaining_successes == 0)
579 return;
580
581 /*
582 * Now calculate the bounds of the first eager scan region. Its end block
583 * will be a random spot somewhere in the first EAGER_SCAN_REGION_SIZE
584 * blocks. This affects the bounds of all subsequent regions and avoids
585 * eager scanning and failing to freeze the same blocks each vacuum of the
586 * relation.
587 */
589
590 vacrel->next_eager_scan_region_start = randseed % EAGER_SCAN_REGION_SIZE;
591
594
595 vacrel->eager_scan_max_fails_per_region =
598
599 /*
600 * The first region will be smaller than subsequent regions. As such,
601 * adjust the eager freeze failures tolerated for this region.
602 */
603 first_region_ratio = 1 - (float) vacrel->next_eager_scan_region_start /
605
606 vacrel->eager_scan_remaining_fails =
607 vacrel->eager_scan_max_fails_per_region *
609}
610
611/*
612 * heap_vacuum_rel() -- perform VACUUM for one heap relation
613 *
614 * This routine sets things up for and then calls lazy_scan_heap, where
615 * almost all work actually takes place. Finalizes everything after call
616 * returns by managing relation truncation and updating rel's pg_class
617 * entry. (Also updates pg_class entries for any indexes that need it.)
618 *
619 * At entry, we have already established a transaction and opened
620 * and locked the relation.
621 */
622void
623heap_vacuum_rel(Relation rel, const VacuumParams params,
624 BufferAccessStrategy bstrategy)
625{
627 bool verbose,
628 instrument,
629 skipwithvm,
637 TimestampTz starttime = 0;
639 startwritetime = 0;
642 ErrorContextCallback errcallback;
643 char **indnames = NULL;
645
646 verbose = (params.options & VACOPT_VERBOSE) != 0;
647 instrument = (verbose || (AmAutoVacuumWorkerProcess() &&
648 params.log_vacuum_min_duration >= 0));
649 if (instrument)
650 {
652 if (track_io_timing)
653 {
656 }
657 }
658
659 /* Used for instrumentation and stats report */
660 starttime = GetCurrentTimestamp();
661
663 RelationGetRelid(rel));
666 params.is_wraparound
669 else
672
673 /*
674 * Setup error traceback support for ereport() first. The idea is to set
675 * up an error context callback to display additional information on any
676 * error during a vacuum. During different phases of vacuum, we update
677 * the state so that the error context callback always display current
678 * information.
679 *
680 * Copy the names of heap rel into local memory for error reporting
681 * purposes, too. It isn't always safe to assume that we can get the name
682 * of each rel. It's convenient for code in lazy_scan_heap to always use
683 * these temp copies.
684 */
687 vacrel->relnamespace = get_namespace_name(RelationGetNamespace(rel));
688 vacrel->relname = pstrdup(RelationGetRelationName(rel));
689 vacrel->indname = NULL;
691 vacrel->verbose = verbose;
692 errcallback.callback = vacuum_error_callback;
693 errcallback.arg = vacrel;
694 errcallback.previous = error_context_stack;
695 error_context_stack = &errcallback;
696
697 /* Set up high level stuff about rel and its indexes */
698 vacrel->rel = rel;
700 &vacrel->indrels);
701 vacrel->bstrategy = bstrategy;
702 if (instrument && vacrel->nindexes > 0)
703 {
704 /* Copy index names used by instrumentation (not error reporting) */
705 indnames = palloc_array(char *, vacrel->nindexes);
706 for (int i = 0; i < vacrel->nindexes; i++)
708 }
709
710 /*
711 * The index_cleanup param either disables index vacuuming and cleanup or
712 * forces it to go ahead when we would otherwise apply the index bypass
713 * optimization. The default is 'auto', which leaves the final decision
714 * up to lazy_vacuum().
715 *
716 * The truncate param allows user to avoid attempting relation truncation,
717 * though it can't force truncation to happen.
718 */
721 params.truncate != VACOPTVALUE_AUTO);
722
723 /*
724 * While VacuumFailSafeActive is reset to false before calling this, we
725 * still need to reset it here due to recursive calls.
726 */
727 VacuumFailsafeActive = false;
728 vacrel->consider_bypass_optimization = true;
729 vacrel->do_index_vacuuming = true;
730 vacrel->do_index_cleanup = true;
731 vacrel->do_rel_truncate = (params.truncate != VACOPTVALUE_DISABLED);
733 {
734 /* Force disable index vacuuming up-front */
735 vacrel->do_index_vacuuming = false;
736 vacrel->do_index_cleanup = false;
737 }
738 else if (params.index_cleanup == VACOPTVALUE_ENABLED)
739 {
740 /* Force index vacuuming. Note that failsafe can still bypass. */
741 vacrel->consider_bypass_optimization = false;
742 }
743 else
744 {
745 /* Default/auto, make all decisions dynamically */
747 }
748
749 /* Initialize page counters explicitly (be tidy) */
750 vacrel->scanned_pages = 0;
751 vacrel->eager_scanned_pages = 0;
752 vacrel->removed_pages = 0;
753 vacrel->new_frozen_tuple_pages = 0;
754 vacrel->lpdead_item_pages = 0;
755 vacrel->missed_dead_pages = 0;
756 vacrel->nonempty_pages = 0;
757 /* dead_items_alloc allocates vacrel->dead_items later on */
758
759 /* Allocate/initialize output statistics state */
760 vacrel->new_rel_tuples = 0;
761 vacrel->new_live_tuples = 0;
762 vacrel->indstats = (IndexBulkDeleteResult **)
763 palloc0(vacrel->nindexes * sizeof(IndexBulkDeleteResult *));
764
765 /* Initialize remaining counters (be tidy) */
766 vacrel->num_index_scans = 0;
767 vacrel->num_dead_items_resets = 0;
768 vacrel->total_dead_items_bytes = 0;
769 vacrel->tuples_deleted = 0;
770 vacrel->tuples_frozen = 0;
771 vacrel->lpdead_items = 0;
772 vacrel->live_tuples = 0;
773 vacrel->recently_dead_tuples = 0;
774 vacrel->missed_dead_tuples = 0;
775
776 vacrel->vm_new_visible_pages = 0;
777 vacrel->vm_new_visible_frozen_pages = 0;
778 vacrel->vm_new_frozen_pages = 0;
779
780 /*
781 * Get cutoffs that determine which deleted tuples are considered DEAD,
782 * not just RECENTLY_DEAD, and which XIDs/MXIDs to freeze. Then determine
783 * the extent of the blocks that we'll scan in lazy_scan_heap. It has to
784 * happen in this order to ensure that the OldestXmin cutoff field works
785 * as an upper bound on the XIDs stored in the pages we'll actually scan
786 * (NewRelfrozenXid tracking must never be allowed to miss unfrozen XIDs).
787 *
788 * Next acquire vistest, a related cutoff that's used in pruning. We use
789 * vistest in combination with OldestXmin to ensure that
790 * heap_page_prune_and_freeze() always removes any deleted tuple whose
791 * xmax is < OldestXmin. lazy_scan_prune must never become confused about
792 * whether a tuple should be frozen or removed. (In the future we might
793 * want to teach lazy_scan_prune to recompute vistest from time to time,
794 * to increase the number of dead tuples it can prune away.)
795 */
796 vacrel->aggressive = vacuum_get_cutoffs(rel, params, &vacrel->cutoffs);
798 vacrel->vistest = GlobalVisTestFor(rel);
799
800 /* Initialize state used to track oldest extant XID/MXID */
801 vacrel->NewRelfrozenXid = vacrel->cutoffs.OldestXmin;
802 vacrel->NewRelminMxid = vacrel->cutoffs.OldestMxact;
803
804 /*
805 * Initialize state related to tracking all-visible page skipping. This is
806 * very important to determine whether or not it is safe to advance the
807 * relfrozenxid/relminmxid.
808 */
809 vacrel->skippedallvis = false;
810 skipwithvm = true;
812 {
813 /*
814 * Force aggressive mode, and disable skipping blocks using the
815 * visibility map (even those set all-frozen)
816 */
817 vacrel->aggressive = true;
818 skipwithvm = false;
819 }
820
821 vacrel->skipwithvm = skipwithvm;
822
823 /*
824 * Set up eager scan tracking state. This must happen after determining
825 * whether or not the vacuum must be aggressive, because only normal
826 * vacuums use the eager scan algorithm.
827 */
829
830 /* Report the vacuum mode: 'normal' or 'aggressive' */
832 vacrel->aggressive
835
836 if (verbose)
837 {
838 if (vacrel->aggressive)
840 (errmsg("aggressively vacuuming \"%s.%s.%s\"",
841 vacrel->dbname, vacrel->relnamespace,
842 vacrel->relname)));
843 else
845 (errmsg("vacuuming \"%s.%s.%s\"",
846 vacrel->dbname, vacrel->relnamespace,
847 vacrel->relname)));
848 }
849
850 /*
851 * Allocate dead_items memory using dead_items_alloc. This handles
852 * parallel VACUUM initialization as part of allocating shared memory
853 * space used for dead_items. (But do a failsafe precheck first, to
854 * ensure that parallel VACUUM won't be attempted at all when relfrozenxid
855 * is already dangerously old.)
856 */
859
860 /*
861 * Call lazy_scan_heap to perform all required heap pruning, index
862 * vacuuming, and heap vacuuming (plus related processing)
863 */
865
866 /*
867 * Save dead items max_bytes and update the memory usage statistics before
868 * cleanup, they are freed in parallel vacuum cases during
869 * dead_items_cleanup().
870 */
871 dead_items_max_bytes = vacrel->dead_items_info->max_bytes;
872 vacrel->total_dead_items_bytes += TidStoreMemoryUsage(vacrel->dead_items);
873
874 /*
875 * Free resources managed by dead_items_alloc. This ends parallel mode in
876 * passing when necessary.
877 */
880
881 /*
882 * Update pg_class entries for each of rel's indexes where appropriate.
883 *
884 * Unlike the later update to rel's pg_class entry, this is not critical.
885 * Maintains relpages/reltuples statistics used by the planner only.
886 */
887 if (vacrel->do_index_cleanup)
889
890 /* Done with rel's indexes */
891 vac_close_indexes(vacrel->nindexes, vacrel->indrels, NoLock);
892
893 /* Optionally truncate rel */
896
897 /* Pop the error context stack */
898 error_context_stack = errcallback.previous;
899
900 /* Report that we are now doing final cleanup */
903
904 /*
905 * Prepare to update rel's pg_class entry.
906 *
907 * Aggressive VACUUMs must always be able to advance relfrozenxid to a
908 * value >= FreezeLimit, and relminmxid to a value >= MultiXactCutoff.
909 * Non-aggressive VACUUMs may advance them by any amount, or not at all.
910 */
911 Assert(vacrel->NewRelfrozenXid == vacrel->cutoffs.OldestXmin ||
912 TransactionIdPrecedesOrEquals(vacrel->aggressive ? vacrel->cutoffs.FreezeLimit :
913 vacrel->cutoffs.relfrozenxid,
914 vacrel->NewRelfrozenXid));
915 Assert(vacrel->NewRelminMxid == vacrel->cutoffs.OldestMxact ||
916 MultiXactIdPrecedesOrEquals(vacrel->aggressive ? vacrel->cutoffs.MultiXactCutoff :
917 vacrel->cutoffs.relminmxid,
918 vacrel->NewRelminMxid));
919 if (vacrel->skippedallvis)
920 {
921 /*
922 * Must keep original relfrozenxid in a non-aggressive VACUUM that
923 * chose to skip an all-visible page range. The state that tracks new
924 * values will have missed unfrozen XIDs from the pages we skipped.
925 */
926 Assert(!vacrel->aggressive);
927 vacrel->NewRelfrozenXid = InvalidTransactionId;
928 vacrel->NewRelminMxid = InvalidMultiXactId;
929 }
930
931 /*
932 * For safety, clamp relallvisible to be not more than what we're setting
933 * pg_class.relpages to
934 */
935 new_rel_pages = vacrel->rel_pages; /* After possible rel truncation */
939
940 /*
941 * An all-frozen block _must_ be all-visible. As such, clamp the count of
942 * all-frozen blocks to the count of all-visible blocks. This matches the
943 * clamping of relallvisible above.
944 */
947
948 /*
949 * Now actually update rel's pg_class entry.
950 *
951 * In principle new_live_tuples could be -1 indicating that we (still)
952 * don't know the tuple count. In practice that can't happen, since we
953 * scan every page that isn't skipped using the visibility map.
954 */
955 vac_update_relstats(rel, new_rel_pages, vacrel->new_live_tuples,
957 vacrel->nindexes > 0,
958 vacrel->NewRelfrozenXid, vacrel->NewRelminMxid,
960
961 /*
962 * Report results to the cumulative stats system, too.
963 *
964 * Deliberately avoid telling the stats system about LP_DEAD items that
965 * remain in the table due to VACUUM bypassing index and heap vacuuming.
966 * ANALYZE will consider the remaining LP_DEAD items to be dead "tuples".
967 * It seems like a good idea to err on the side of not vacuuming again too
968 * soon in cases where the failsafe prevented significant amounts of heap
969 * vacuuming.
970 */
972 Max(vacrel->new_live_tuples, 0),
973 vacrel->recently_dead_tuples +
974 vacrel->missed_dead_tuples,
975 starttime);
977
978 if (instrument)
979 {
981
982 if (verbose || params.log_vacuum_min_duration == 0 ||
985 {
986 long secs_dur;
987 int usecs_dur;
988 WalUsage walusage;
989 BufferUsage bufferusage;
991 char *msgfmt;
992 int32 diff;
993 double read_rate = 0,
994 write_rate = 0;
998
1000 memset(&walusage, 0, sizeof(WalUsage));
1002 memset(&bufferusage, 0, sizeof(BufferUsage));
1004
1005 total_blks_hit = bufferusage.shared_blks_hit +
1006 bufferusage.local_blks_hit;
1007 total_blks_read = bufferusage.shared_blks_read +
1008 bufferusage.local_blks_read;
1010 bufferusage.local_blks_dirtied;
1011
1013 if (verbose)
1014 {
1015 /*
1016 * Aggressiveness already reported earlier, in dedicated
1017 * VACUUM VERBOSE ereport
1018 */
1019 Assert(!params.is_wraparound);
1020 msgfmt = _("finished vacuuming \"%s.%s.%s\": index scans: %d\n");
1021 }
1022 else if (params.is_wraparound)
1023 {
1024 /*
1025 * While it's possible for a VACUUM to be both is_wraparound
1026 * and !aggressive, that's just a corner-case -- is_wraparound
1027 * implies aggressive. Produce distinct output for the corner
1028 * case all the same, just in case.
1029 */
1030 if (vacrel->aggressive)
1031 msgfmt = _("automatic aggressive vacuum to prevent wraparound of table \"%s.%s.%s\": index scans: %d\n");
1032 else
1033 msgfmt = _("automatic vacuum to prevent wraparound of table \"%s.%s.%s\": index scans: %d\n");
1034 }
1035 else
1036 {
1037 if (vacrel->aggressive)
1038 msgfmt = _("automatic aggressive vacuum of table \"%s.%s.%s\": index scans: %d\n");
1039 else
1040 msgfmt = _("automatic vacuum of table \"%s.%s.%s\": index scans: %d\n");
1041 }
1043 vacrel->dbname,
1044 vacrel->relnamespace,
1045 vacrel->relname,
1046 vacrel->num_index_scans);
1047 appendStringInfo(&buf, _("pages: %u removed, %u remain, %u scanned (%.2f%% of total), %u eagerly scanned\n"),
1048 vacrel->removed_pages,
1050 vacrel->scanned_pages,
1051 orig_rel_pages == 0 ? 100.0 :
1052 100.0 * vacrel->scanned_pages /
1054 vacrel->eager_scanned_pages);
1056 _("tuples: %" PRId64 " removed, %" PRId64 " remain, %" PRId64 " are dead but not yet removable\n"),
1057 vacrel->tuples_deleted,
1058 (int64) vacrel->new_rel_tuples,
1059 vacrel->recently_dead_tuples);
1060 if (vacrel->missed_dead_tuples > 0)
1062 _("tuples missed: %" PRId64 " dead from %u pages not removed due to cleanup lock contention\n"),
1063 vacrel->missed_dead_tuples,
1064 vacrel->missed_dead_pages);
1066 vacrel->cutoffs.OldestXmin);
1068 _("removable cutoff: %u, which was %d XIDs old when operation ended\n"),
1069 vacrel->cutoffs.OldestXmin, diff);
1071 {
1072 diff = (int32) (vacrel->NewRelfrozenXid -
1073 vacrel->cutoffs.relfrozenxid);
1075 _("new relfrozenxid: %u, which is %d XIDs ahead of previous value\n"),
1076 vacrel->NewRelfrozenXid, diff);
1077 }
1078 if (minmulti_updated)
1079 {
1080 diff = (int32) (vacrel->NewRelminMxid -
1081 vacrel->cutoffs.relminmxid);
1083 _("new relminmxid: %u, which is %d MXIDs ahead of previous value\n"),
1084 vacrel->NewRelminMxid, diff);
1085 }
1086 appendStringInfo(&buf, _("frozen: %u pages from table (%.2f%% of total) had %" PRId64 " tuples frozen\n"),
1087 vacrel->new_frozen_tuple_pages,
1088 orig_rel_pages == 0 ? 100.0 :
1089 100.0 * vacrel->new_frozen_tuple_pages /
1091 vacrel->tuples_frozen);
1092
1094 _("visibility map: %u pages set all-visible, %u pages set all-frozen (%u were all-visible)\n"),
1095 vacrel->vm_new_visible_pages,
1096 vacrel->vm_new_visible_frozen_pages +
1097 vacrel->vm_new_frozen_pages,
1098 vacrel->vm_new_frozen_pages);
1099 if (vacrel->do_index_vacuuming)
1100 {
1101 if (vacrel->nindexes == 0 || vacrel->num_index_scans == 0)
1102 appendStringInfoString(&buf, _("index scan not needed: "));
1103 else
1104 appendStringInfoString(&buf, _("index scan needed: "));
1105
1106 msgfmt = _("%u pages from table (%.2f%% of total) had %" PRId64 " dead item identifiers removed\n");
1107 }
1108 else
1109 {
1111 appendStringInfoString(&buf, _("index scan bypassed: "));
1112 else
1113 appendStringInfoString(&buf, _("index scan bypassed by failsafe: "));
1114
1115 msgfmt = _("%u pages from table (%.2f%% of total) have %" PRId64 " dead item identifiers\n");
1116 }
1118 vacrel->lpdead_item_pages,
1119 orig_rel_pages == 0 ? 100.0 :
1120 100.0 * vacrel->lpdead_item_pages / orig_rel_pages,
1121 vacrel->lpdead_items);
1122 for (int i = 0; i < vacrel->nindexes; i++)
1123 {
1124 IndexBulkDeleteResult *istat = vacrel->indstats[i];
1125
1126 if (!istat)
1127 continue;
1128
1130 _("index \"%s\": pages: %u in total, %u newly deleted, %u currently deleted, %u reusable\n"),
1131 indnames[i],
1132 istat->num_pages,
1133 istat->pages_newly_deleted,
1134 istat->pages_deleted,
1135 istat->pages_free);
1136 }
1138 {
1139 /*
1140 * We bypass the changecount mechanism because this value is
1141 * only updated by the calling process. We also rely on the
1142 * above call to pgstat_progress_end_command() to not clear
1143 * the st_progress_param array.
1144 */
1145 appendStringInfo(&buf, _("delay time: %.3f ms\n"),
1147 }
1148 if (track_io_timing)
1149 {
1150 double read_ms = (double) (pgStatBlockReadTime - startreadtime) / 1000;
1151 double write_ms = (double) (pgStatBlockWriteTime - startwritetime) / 1000;
1152
1153 appendStringInfo(&buf, _("I/O timings: read: %.3f ms, write: %.3f ms\n"),
1154 read_ms, write_ms);
1155 }
1156 if (secs_dur > 0 || usecs_dur > 0)
1157 {
1159 (1024 * 1024) / (secs_dur + usecs_dur / 1000000.0);
1161 (1024 * 1024) / (secs_dur + usecs_dur / 1000000.0);
1162 }
1163 appendStringInfo(&buf, _("avg read rate: %.3f MB/s, avg write rate: %.3f MB/s\n"),
1166 _("buffer usage: %" PRId64 " hits, %" PRId64 " reads, %" PRId64 " dirtied\n"),
1171 _("WAL usage: %" PRId64 " records, %" PRId64 " full page images, %" PRIu64 " bytes, %" PRIu64 " full page image bytes, %" PRId64 " buffers full\n"),
1172 walusage.wal_records,
1173 walusage.wal_fpi,
1174 walusage.wal_bytes,
1175 walusage.wal_fpi_bytes,
1176 walusage.wal_buffers_full);
1177
1178 /*
1179 * Report the dead items memory usage.
1180 *
1181 * The num_dead_items_resets counter increases when we reset the
1182 * collected dead items, so the counter is non-zero if at least
1183 * one dead items are collected, even if index vacuuming is
1184 * disabled.
1185 */
1187 ngettext("memory usage: dead item storage %.2f MB accumulated across %d reset (limit %.2f MB each)\n",
1188 "memory usage: dead item storage %.2f MB accumulated across %d resets (limit %.2f MB each)\n",
1189 vacrel->num_dead_items_resets),
1190 (double) vacrel->total_dead_items_bytes / (1024 * 1024),
1191 vacrel->num_dead_items_resets,
1192 (double) dead_items_max_bytes / (1024 * 1024));
1193 appendStringInfo(&buf, _("system usage: %s"), pg_rusage_show(&ru0));
1194
1195 ereport(verbose ? INFO : LOG,
1196 (errmsg_internal("%s", buf.data)));
1197 pfree(buf.data);
1198 }
1199 }
1200
1201 /* Cleanup index statistics and index names */
1202 for (int i = 0; i < vacrel->nindexes; i++)
1203 {
1204 if (vacrel->indstats[i])
1205 pfree(vacrel->indstats[i]);
1206
1207 if (instrument)
1208 pfree(indnames[i]);
1209 }
1210}
1211
1212/*
1213 * lazy_scan_heap() -- workhorse function for VACUUM
1214 *
1215 * This routine prunes each page in the heap, and considers the need to
1216 * freeze remaining tuples with storage (not including pages that can be
1217 * skipped using the visibility map). Also performs related maintenance
1218 * of the FSM and visibility map. These steps all take place during an
1219 * initial pass over the target heap relation.
1220 *
1221 * Also invokes lazy_vacuum_all_indexes to vacuum indexes, which largely
1222 * consists of deleting index tuples that point to LP_DEAD items left in
1223 * heap pages following pruning. Earlier initial pass over the heap will
1224 * have collected the TIDs whose index tuples need to be removed.
1225 *
1226 * Finally, invokes lazy_vacuum_heap_rel to vacuum heap pages, which
1227 * largely consists of marking LP_DEAD items (from vacrel->dead_items)
1228 * as LP_UNUSED. This has to happen in a second, final pass over the
1229 * heap, to preserve a basic invariant that all index AMs rely on: no
1230 * extant index tuple can ever be allowed to contain a TID that points to
1231 * an LP_UNUSED line pointer in the heap. We must disallow premature
1232 * recycling of line pointers to avoid index scans that get confused
1233 * about which TID points to which tuple immediately after recycling.
1234 * (Actually, this isn't a concern when target heap relation happens to
1235 * have no indexes, which allows us to safely apply the one-pass strategy
1236 * as an optimization).
1237 *
1238 * In practice we often have enough space to fit all TIDs, and so won't
1239 * need to call lazy_vacuum more than once, after our initial pass over
1240 * the heap has totally finished. Otherwise things are slightly more
1241 * complicated: our "initial pass" over the heap applies only to those
1242 * pages that were pruned before we needed to call lazy_vacuum, and our
1243 * "final pass" over the heap only vacuums these same heap pages.
1244 * However, we process indexes in full every time lazy_vacuum is called,
1245 * which makes index processing very inefficient when memory is in short
1246 * supply.
1247 */
1248static void
1250{
1251 ReadStream *stream;
1252 BlockNumber rel_pages = vacrel->rel_pages,
1253 blkno = 0,
1256 vacrel->eager_scan_remaining_successes; /* for logging */
1257 Buffer vmbuffer = InvalidBuffer;
1258 const int initprog_index[] = {
1262 };
1264
1265 /* Report that we're scanning the heap, advertising total # of blocks */
1267 initprog_val[1] = rel_pages;
1268 initprog_val[2] = vacrel->dead_items_info->max_bytes;
1270
1271 /* Initialize for the first heap_vac_scan_next_block() call */
1272 vacrel->current_block = InvalidBlockNumber;
1273 vacrel->next_unskippable_block = InvalidBlockNumber;
1274 vacrel->next_unskippable_eager_scanned = false;
1275 vacrel->next_unskippable_vmbuffer = InvalidBuffer;
1276
1277 /*
1278 * Set up the read stream for vacuum's first pass through the heap.
1279 *
1280 * This could be made safe for READ_STREAM_USE_BATCHING, but only with
1281 * explicit work in heap_vac_scan_next_block.
1282 */
1284 vacrel->bstrategy,
1285 vacrel->rel,
1288 vacrel,
1289 sizeof(bool));
1290
1291 while (true)
1292 {
1293 Buffer buf;
1294 Page page;
1295 bool was_eager_scanned = false;
1296 int ndeleted = 0;
1297 bool has_lpdead_items;
1298 void *per_buffer_data = NULL;
1299 bool vm_page_frozen = false;
1300 bool got_cleanup_lock = false;
1301
1302 vacuum_delay_point(false);
1303
1304 /*
1305 * Regularly check if wraparound failsafe should trigger.
1306 *
1307 * There is a similar check inside lazy_vacuum_all_indexes(), but
1308 * relfrozenxid might start to look dangerously old before we reach
1309 * that point. This check also provides failsafe coverage for the
1310 * one-pass strategy, and the two-pass strategy with the index_cleanup
1311 * param set to 'off'.
1312 */
1313 if (vacrel->scanned_pages > 0 &&
1314 vacrel->scanned_pages % FAILSAFE_EVERY_PAGES == 0)
1316
1317 /*
1318 * Consider if we definitely have enough space to process TIDs on page
1319 * already. If we are close to overrunning the available space for
1320 * dead_items TIDs, pause and do a cycle of vacuuming before we tackle
1321 * this page. However, let's force at least one page-worth of tuples
1322 * to be stored as to ensure we do at least some work when the memory
1323 * configured is so low that we run out before storing anything.
1324 */
1325 if (vacrel->dead_items_info->num_items > 0 &&
1326 TidStoreMemoryUsage(vacrel->dead_items) > vacrel->dead_items_info->max_bytes)
1327 {
1328 /*
1329 * Before beginning index vacuuming, we release any pin we may
1330 * hold on the visibility map page. This isn't necessary for
1331 * correctness, but we do it anyway to avoid holding the pin
1332 * across a lengthy, unrelated operation.
1333 */
1334 if (BufferIsValid(vmbuffer))
1335 {
1336 ReleaseBuffer(vmbuffer);
1337 vmbuffer = InvalidBuffer;
1338 }
1339
1340 /* Perform a round of index and heap vacuuming */
1341 vacrel->consider_bypass_optimization = false;
1343
1344 /*
1345 * Vacuum the Free Space Map to make newly-freed space visible on
1346 * upper-level FSM pages. Note that blkno is the previously
1347 * processed block.
1348 */
1350 blkno + 1);
1352
1353 /* Report that we are once again scanning the heap */
1356 }
1357
1358 buf = read_stream_next_buffer(stream, &per_buffer_data);
1359
1360 /* The relation is exhausted. */
1361 if (!BufferIsValid(buf))
1362 break;
1363
1364 was_eager_scanned = *((bool *) per_buffer_data);
1366 page = BufferGetPage(buf);
1367 blkno = BufferGetBlockNumber(buf);
1368
1369 vacrel->scanned_pages++;
1371 vacrel->eager_scanned_pages++;
1372
1373 /* Report as block scanned, update error traceback information */
1376 blkno, InvalidOffsetNumber);
1377
1378 /*
1379 * Pin the visibility map page in case we need to mark the page
1380 * all-visible. In most cases this will be very cheap, because we'll
1381 * already have the correct page pinned anyway.
1382 */
1383 visibilitymap_pin(vacrel->rel, blkno, &vmbuffer);
1384
1385 /*
1386 * We need a buffer cleanup lock to prune HOT chains and defragment
1387 * the page in lazy_scan_prune. But when it's not possible to acquire
1388 * a cleanup lock right away, we may be able to settle for reduced
1389 * processing using lazy_scan_noprune.
1390 */
1392
1393 if (!got_cleanup_lock)
1395
1396 /* Check for new or empty pages before lazy_scan_[no]prune call */
1398 vmbuffer))
1399 {
1400 /* Processed as new/empty page (lock and pin released) */
1401 continue;
1402 }
1403
1404 /*
1405 * If we didn't get the cleanup lock, we can still collect LP_DEAD
1406 * items in the dead_items area for later vacuuming, count live and
1407 * recently dead tuples for vacuum logging, and determine if this
1408 * block could later be truncated. If we encounter any xid/mxids that
1409 * require advancing the relfrozenxid/relminxid, we'll have to wait
1410 * for a cleanup lock and call lazy_scan_prune().
1411 */
1412 if (!got_cleanup_lock &&
1413 !lazy_scan_noprune(vacrel, buf, blkno, page, &has_lpdead_items))
1414 {
1415 /*
1416 * lazy_scan_noprune could not do all required processing. Wait
1417 * for a cleanup lock, and call lazy_scan_prune in the usual way.
1418 */
1419 Assert(vacrel->aggressive);
1422 got_cleanup_lock = true;
1423 }
1424
1425 /*
1426 * If we have a cleanup lock, we must now prune, freeze, and count
1427 * tuples. We may have acquired the cleanup lock originally, or we may
1428 * have gone back and acquired it after lazy_scan_noprune() returned
1429 * false. Either way, the page hasn't been processed yet.
1430 *
1431 * Like lazy_scan_noprune(), lazy_scan_prune() will count
1432 * recently_dead_tuples and live tuples for vacuum logging, determine
1433 * if the block can later be truncated, and accumulate the details of
1434 * remaining LP_DEAD line pointers on the page into dead_items. These
1435 * dead items include those pruned by lazy_scan_prune() as well as
1436 * line pointers previously marked LP_DEAD.
1437 */
1438 if (got_cleanup_lock)
1439 ndeleted = lazy_scan_prune(vacrel, buf, blkno, page,
1440 vmbuffer,
1442
1443 /*
1444 * Count an eagerly scanned page as a failure or a success.
1445 *
1446 * Only lazy_scan_prune() freezes pages, so if we didn't get the
1447 * cleanup lock, we won't have frozen the page. However, we only count
1448 * pages that were too new to require freezing as eager freeze
1449 * failures.
1450 *
1451 * We could gather more information from lazy_scan_noprune() about
1452 * whether or not there were tuples with XIDs or MXIDs older than the
1453 * FreezeLimit or MultiXactCutoff. However, for simplicity, we simply
1454 * exclude pages skipped due to cleanup lock contention from eager
1455 * freeze algorithm caps.
1456 */
1458 {
1459 /* Aggressive vacuums do not eager scan. */
1460 Assert(!vacrel->aggressive);
1461
1462 if (vm_page_frozen)
1463 {
1464 if (vacrel->eager_scan_remaining_successes > 0)
1465 vacrel->eager_scan_remaining_successes--;
1466
1467 if (vacrel->eager_scan_remaining_successes == 0)
1468 {
1469 /*
1470 * Report only once that we disabled eager scanning. We
1471 * may eagerly read ahead blocks in excess of the success
1472 * or failure caps before attempting to freeze them, so we
1473 * could reach here even after disabling additional eager
1474 * scanning.
1475 */
1476 if (vacrel->eager_scan_max_fails_per_region > 0)
1477 ereport(vacrel->verbose ? INFO : DEBUG2,
1478 (errmsg("disabling eager scanning after freezing %u eagerly scanned blocks of relation \"%s.%s.%s\"",
1480 vacrel->dbname, vacrel->relnamespace,
1481 vacrel->relname)));
1482
1483 /*
1484 * If we hit our success cap, permanently disable eager
1485 * scanning by setting the other eager scan management
1486 * fields to their disabled values.
1487 */
1488 vacrel->eager_scan_remaining_fails = 0;
1489 vacrel->next_eager_scan_region_start = InvalidBlockNumber;
1490 vacrel->eager_scan_max_fails_per_region = 0;
1491 }
1492 }
1493 else if (vacrel->eager_scan_remaining_fails > 0)
1494 vacrel->eager_scan_remaining_fails--;
1495 }
1496
1497 /*
1498 * Now drop the buffer lock and, potentially, update the FSM.
1499 *
1500 * Our goal is to update the freespace map the last time we touch the
1501 * page. If we'll process a block in the second pass, we may free up
1502 * additional space on the page, so it is better to update the FSM
1503 * after the second pass. If the relation has no indexes, or if index
1504 * vacuuming is disabled, there will be no second heap pass; if this
1505 * particular page has no dead items, the second heap pass will not
1506 * touch this page. So, in those cases, update the FSM now.
1507 *
1508 * Note: In corner cases, it's possible to miss updating the FSM
1509 * entirely. If index vacuuming is currently enabled, we'll skip the
1510 * FSM update now. But if failsafe mode is later activated, or there
1511 * are so few dead tuples that index vacuuming is bypassed, there will
1512 * also be no opportunity to update the FSM later, because we'll never
1513 * revisit this page. Since updating the FSM is desirable but not
1514 * absolutely required, that's OK.
1515 */
1516 if (vacrel->nindexes == 0
1517 || !vacrel->do_index_vacuuming
1518 || !has_lpdead_items)
1519 {
1520 Size freespace = PageGetHeapFreeSpace(page);
1521
1523 RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
1524
1525 /*
1526 * Periodically perform FSM vacuuming to make newly-freed space
1527 * visible on upper FSM pages. This is done after vacuuming if the
1528 * table has indexes. There will only be newly-freed space if we
1529 * held the cleanup lock and lazy_scan_prune() was called.
1530 */
1531 if (got_cleanup_lock && vacrel->nindexes == 0 && ndeleted > 0 &&
1533 {
1535 blkno);
1537 }
1538 }
1539 else
1541 }
1542
1543 vacrel->blkno = InvalidBlockNumber;
1544 if (BufferIsValid(vmbuffer))
1545 ReleaseBuffer(vmbuffer);
1546
1547 /*
1548 * Report that everything is now scanned. We never skip scanning the last
1549 * block in the relation, so we can pass rel_pages here.
1550 */
1552 rel_pages);
1553
1554 /* now we can compute the new value for pg_class.reltuples */
1555 vacrel->new_live_tuples = vac_estimate_reltuples(vacrel->rel, rel_pages,
1556 vacrel->scanned_pages,
1557 vacrel->live_tuples);
1558
1559 /*
1560 * Also compute the total number of surviving heap entries. In the
1561 * (unlikely) scenario that new_live_tuples is -1, take it as zero.
1562 */
1563 vacrel->new_rel_tuples =
1564 Max(vacrel->new_live_tuples, 0) + vacrel->recently_dead_tuples +
1565 vacrel->missed_dead_tuples;
1566
1567 read_stream_end(stream);
1568
1569 /*
1570 * Do index vacuuming (call each index's ambulkdelete routine), then do
1571 * related heap vacuuming
1572 */
1573 if (vacrel->dead_items_info->num_items > 0)
1575
1576 /*
1577 * Vacuum the remainder of the Free Space Map. We must do this whether or
1578 * not there were indexes, and whether or not we bypassed index vacuuming.
1579 * We can pass rel_pages here because we never skip scanning the last
1580 * block of the relation.
1581 */
1582 if (rel_pages > next_fsm_block_to_vacuum)
1584
1585 /* report all blocks vacuumed */
1587
1588 /* Do final index cleanup (call each index's amvacuumcleanup routine) */
1589 if (vacrel->nindexes > 0 && vacrel->do_index_cleanup)
1591}
1592
1593/*
1594 * heap_vac_scan_next_block() -- read stream callback to get the next block
1595 * for vacuum to process
1596 *
1597 * Every time lazy_scan_heap() needs a new block to process during its first
1598 * phase, it invokes read_stream_next_buffer() with a stream set up to call
1599 * heap_vac_scan_next_block() to get the next block.
1600 *
1601 * heap_vac_scan_next_block() uses the visibility map, vacuum options, and
1602 * various thresholds to skip blocks which do not need to be processed and
1603 * returns the next block to process or InvalidBlockNumber if there are no
1604 * remaining blocks.
1605 *
1606 * The visibility status of the next block to process and whether or not it
1607 * was eager scanned is set in the per_buffer_data.
1608 *
1609 * callback_private_data contains a reference to the LVRelState, passed to the
1610 * read stream API during stream setup. The LVRelState is an in/out parameter
1611 * here (locally named `vacrel`). Vacuum options and information about the
1612 * relation are read from it. vacrel->skippedallvis is set if we skip a block
1613 * that's all-visible but not all-frozen (to ensure that we don't update
1614 * relfrozenxid in that case). vacrel also holds information about the next
1615 * unskippable block -- as bookkeeping for this function.
1616 */
1617static BlockNumber
1619 void *callback_private_data,
1620 void *per_buffer_data)
1621{
1623 LVRelState *vacrel = callback_private_data;
1624
1625 /* relies on InvalidBlockNumber + 1 overflowing to 0 on first call */
1627
1628 /* Have we reached the end of the relation? */
1629 if (next_block >= vacrel->rel_pages)
1630 {
1631 if (BufferIsValid(vacrel->next_unskippable_vmbuffer))
1632 {
1633 ReleaseBuffer(vacrel->next_unskippable_vmbuffer);
1634 vacrel->next_unskippable_vmbuffer = InvalidBuffer;
1635 }
1636 return InvalidBlockNumber;
1637 }
1638
1639 /*
1640 * We must be in one of the three following states:
1641 */
1642 if (next_block > vacrel->next_unskippable_block ||
1643 vacrel->next_unskippable_block == InvalidBlockNumber)
1644 {
1645 /*
1646 * 1. We have just processed an unskippable block (or we're at the
1647 * beginning of the scan). Find the next unskippable block using the
1648 * visibility map.
1649 */
1650 bool skipsallvis;
1651
1653
1654 /*
1655 * We now know the next block that we must process. It can be the
1656 * next block after the one we just processed, or something further
1657 * ahead. If it's further ahead, we can jump to it, but we choose to
1658 * do so only if we can skip at least SKIP_PAGES_THRESHOLD consecutive
1659 * pages. Since we're reading sequentially, the OS should be doing
1660 * readahead for us, so there's no gain in skipping a page now and
1661 * then. Skipping such a range might even discourage sequential
1662 * detection.
1663 *
1664 * This test also enables more frequent relfrozenxid advancement
1665 * during non-aggressive VACUUMs. If the range has any all-visible
1666 * pages then skipping makes updating relfrozenxid unsafe, which is a
1667 * real downside.
1668 */
1669 if (vacrel->next_unskippable_block - next_block >= SKIP_PAGES_THRESHOLD)
1670 {
1671 next_block = vacrel->next_unskippable_block;
1672 if (skipsallvis)
1673 vacrel->skippedallvis = true;
1674 }
1675 }
1676
1677 /* Now we must be in one of the two remaining states: */
1678 if (next_block < vacrel->next_unskippable_block)
1679 {
1680 /*
1681 * 2. We are processing a range of blocks that we could have skipped
1682 * but chose not to. We know that they are all-visible in the VM,
1683 * otherwise they would've been unskippable.
1684 */
1685 vacrel->current_block = next_block;
1686 /* Block was not eager scanned */
1687 *((bool *) per_buffer_data) = false;
1688 return vacrel->current_block;
1689 }
1690 else
1691 {
1692 /*
1693 * 3. We reached the next unskippable block. Process it. On next
1694 * iteration, we will be back in state 1.
1695 */
1696 Assert(next_block == vacrel->next_unskippable_block);
1697
1698 vacrel->current_block = next_block;
1699 *((bool *) per_buffer_data) = vacrel->next_unskippable_eager_scanned;
1700 return vacrel->current_block;
1701 }
1702}
1703
1704/*
1705 * Find the next unskippable block in a vacuum scan using the visibility map.
1706 * The next unskippable block and its visibility information is updated in
1707 * vacrel.
1708 *
1709 * Note: our opinion of which blocks can be skipped can go stale immediately.
1710 * It's okay if caller "misses" a page whose all-visible or all-frozen marking
1711 * was concurrently cleared, though. All that matters is that caller scan all
1712 * pages whose tuples might contain XIDs < OldestXmin, or MXIDs < OldestMxact.
1713 * (Actually, non-aggressive VACUUMs can choose to skip all-visible pages with
1714 * older XIDs/MXIDs. The *skippedallvis flag will be set here when the choice
1715 * to skip such a range is actually made, making everything safe.)
1716 */
1717static void
1719{
1720 BlockNumber rel_pages = vacrel->rel_pages;
1721 BlockNumber next_unskippable_block = vacrel->next_unskippable_block + 1;
1722 Buffer next_unskippable_vmbuffer = vacrel->next_unskippable_vmbuffer;
1723 bool next_unskippable_eager_scanned = false;
1724
1725 *skipsallvis = false;
1726
1727 for (;; next_unskippable_block++)
1728 {
1730 next_unskippable_block,
1731 &next_unskippable_vmbuffer);
1732
1733
1734 /*
1735 * At the start of each eager scan region, normal vacuums with eager
1736 * scanning enabled reset the failure counter, allowing vacuum to
1737 * resume eager scanning if it had been suspended in the previous
1738 * region.
1739 */
1740 if (next_unskippable_block >= vacrel->next_eager_scan_region_start)
1741 {
1742 vacrel->eager_scan_remaining_fails =
1743 vacrel->eager_scan_max_fails_per_region;
1744 vacrel->next_eager_scan_region_start += EAGER_SCAN_REGION_SIZE;
1745 }
1746
1747 /*
1748 * A block is unskippable if it is not all visible according to the
1749 * visibility map.
1750 */
1752 {
1754 break;
1755 }
1756
1757 /*
1758 * Caller must scan the last page to determine whether it has tuples
1759 * (caller must have the opportunity to set vacrel->nonempty_pages).
1760 * This rule avoids having lazy_truncate_heap() take access-exclusive
1761 * lock on rel to attempt a truncation that fails anyway, just because
1762 * there are tuples on the last page (it is likely that there will be
1763 * tuples on other nearby pages as well, but those can be skipped).
1764 *
1765 * Implement this by always treating the last block as unsafe to skip.
1766 */
1767 if (next_unskippable_block == rel_pages - 1)
1768 break;
1769
1770 /* DISABLE_PAGE_SKIPPING makes all skipping unsafe */
1771 if (!vacrel->skipwithvm)
1772 break;
1773
1774 /*
1775 * All-frozen pages cannot contain XIDs < OldestXmin (XIDs that aren't
1776 * already frozen by now), so this page can be skipped.
1777 */
1778 if ((mapbits & VISIBILITYMAP_ALL_FROZEN) != 0)
1779 continue;
1780
1781 /*
1782 * Aggressive vacuums cannot skip any all-visible pages that are not
1783 * also all-frozen.
1784 */
1785 if (vacrel->aggressive)
1786 break;
1787
1788 /*
1789 * Normal vacuums with eager scanning enabled only skip all-visible
1790 * but not all-frozen pages if they have hit the failure limit for the
1791 * current eager scan region.
1792 */
1793 if (vacrel->eager_scan_remaining_fails > 0)
1794 {
1795 next_unskippable_eager_scanned = true;
1796 break;
1797 }
1798
1799 /*
1800 * All-visible blocks are safe to skip in a normal vacuum. But
1801 * remember that the final range contains such a block for later.
1802 */
1803 *skipsallvis = true;
1804 }
1805
1806 /* write the local variables back to vacrel */
1807 vacrel->next_unskippable_block = next_unskippable_block;
1808 vacrel->next_unskippable_eager_scanned = next_unskippable_eager_scanned;
1809 vacrel->next_unskippable_vmbuffer = next_unskippable_vmbuffer;
1810}
1811
1812/*
1813 * lazy_scan_new_or_empty() -- lazy_scan_heap() new/empty page handling.
1814 *
1815 * Must call here to handle both new and empty pages before calling
1816 * lazy_scan_prune or lazy_scan_noprune, since they're not prepared to deal
1817 * with new or empty pages.
1818 *
1819 * It's necessary to consider new pages as a special case, since the rules for
1820 * maintaining the visibility map and FSM with empty pages are a little
1821 * different (though new pages can be truncated away during rel truncation).
1822 *
1823 * Empty pages are not really a special case -- they're just heap pages that
1824 * have no allocated tuples (including even LP_UNUSED items). You might
1825 * wonder why we need to handle them here all the same. It's only necessary
1826 * because of a corner-case involving a hard crash during heap relation
1827 * extension. If we ever make relation-extension crash safe, then it should
1828 * no longer be necessary to deal with empty pages here (or new pages, for
1829 * that matter).
1830 *
1831 * Caller must hold at least a shared lock. We might need to escalate the
1832 * lock in that case, so the type of lock caller holds needs to be specified
1833 * using 'sharelock' argument.
1834 *
1835 * Returns false in common case where caller should go on to call
1836 * lazy_scan_prune (or lazy_scan_noprune). Otherwise returns true, indicating
1837 * that lazy_scan_heap is done processing the page, releasing lock on caller's
1838 * behalf.
1839 *
1840 * No vm_page_frozen output parameter (like that passed to lazy_scan_prune())
1841 * is passed here because neither empty nor new pages can be eagerly frozen.
1842 * New pages are never frozen. Empty pages are always set frozen in the VM at
1843 * the same time that they are set all-visible, and we don't eagerly scan
1844 * frozen pages.
1845 */
1846static bool
1848 Page page, bool sharelock, Buffer vmbuffer)
1849{
1850 Size freespace;
1851
1852 if (PageIsNew(page))
1853 {
1854 /*
1855 * All-zeroes pages can be left over if either a backend extends the
1856 * relation by a single page, but crashes before the newly initialized
1857 * page has been written out, or when bulk-extending the relation
1858 * (which creates a number of empty pages at the tail end of the
1859 * relation), and then enters them into the FSM.
1860 *
1861 * Note we do not enter the page into the visibilitymap. That has the
1862 * downside that we repeatedly visit this page in subsequent vacuums,
1863 * but otherwise we'll never discover the space on a promoted standby.
1864 * The harm of repeated checking ought to normally not be too bad. The
1865 * space usually should be used at some point, otherwise there
1866 * wouldn't be any regular vacuums.
1867 *
1868 * Make sure these pages are in the FSM, to ensure they can be reused.
1869 * Do that by testing if there's any space recorded for the page. If
1870 * not, enter it. We do so after releasing the lock on the heap page,
1871 * the FSM is approximate, after all.
1872 */
1874
1875 if (GetRecordedFreeSpace(vacrel->rel, blkno) == 0)
1876 {
1877 freespace = BLCKSZ - SizeOfPageHeaderData;
1878
1879 RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
1880 }
1881
1882 return true;
1883 }
1884
1885 if (PageIsEmpty(page))
1886 {
1887 /*
1888 * It seems likely that caller will always be able to get a cleanup
1889 * lock on an empty page. But don't take any chances -- escalate to
1890 * an exclusive lock (still don't need a cleanup lock, though).
1891 */
1892 if (sharelock)
1893 {
1896
1897 if (!PageIsEmpty(page))
1898 {
1899 /* page isn't new or empty -- keep lock and pin for now */
1900 return false;
1901 }
1902 }
1903 else
1904 {
1905 /* Already have a full cleanup lock (which is more than enough) */
1906 }
1907
1908 /*
1909 * Unlike new pages, empty pages are always set all-visible and
1910 * all-frozen.
1911 */
1912 if (!PageIsAllVisible(page))
1913 {
1915
1916 /* mark buffer dirty before writing a WAL record */
1918
1919 /*
1920 * It's possible that another backend has extended the heap,
1921 * initialized the page, and then failed to WAL-log the page due
1922 * to an ERROR. Since heap extension is not WAL-logged, recovery
1923 * might try to replay our record setting the page all-visible and
1924 * find that the page isn't initialized, which will cause a PANIC.
1925 * To prevent that, check whether the page has been previously
1926 * WAL-logged, and if not, do that now.
1927 */
1928 if (RelationNeedsWAL(vacrel->rel) &&
1930 log_newpage_buffer(buf, true);
1931
1932 PageSetAllVisible(page);
1933 visibilitymap_set(vacrel->rel, blkno, buf,
1935 vmbuffer, InvalidTransactionId,
1939
1940 /* Count the newly all-frozen pages for logging */
1941 vacrel->vm_new_visible_pages++;
1942 vacrel->vm_new_visible_frozen_pages++;
1943 }
1944
1945 freespace = PageGetHeapFreeSpace(page);
1947 RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
1948 return true;
1949 }
1950
1951 /* page isn't new or empty -- keep lock and pin */
1952 return false;
1953}
1954
1955/* qsort comparator for sorting OffsetNumbers */
1956static int
1957cmpOffsetNumbers(const void *a, const void *b)
1958{
1959 return pg_cmp_u16(*(const OffsetNumber *) a, *(const OffsetNumber *) b);
1960}
1961
1962/*
1963 * Helper to correct any corruption detected on a heap page and its
1964 * corresponding visibility map page after pruning but before setting the
1965 * visibility map. It examines the heap page, the associated VM page, and the
1966 * number of dead items previously identified.
1967 *
1968 * This function must be called while holding an exclusive lock on the heap
1969 * buffer, and the dead items must have been discovered under that same lock.
1970
1971 * The provided vmbits must reflect the current state of the VM block
1972 * referenced by vmbuffer. Although we do not hold a lock on the VM buffer, it
1973 * is pinned, and the heap buffer is exclusively locked, ensuring that no
1974 * other backend can update the VM bits corresponding to this heap page.
1975 *
1976 * If it clears corruption, it will zero out vmbits.
1977 */
1978static void
1981 int nlpdead_items,
1982 Buffer vmbuffer,
1983 uint8 *vmbits)
1984{
1985 Assert(visibilitymap_get_status(rel, heap_blk, &vmbuffer) == *vmbits);
1986
1988
1989 /*
1990 * As of PostgreSQL 9.2, the visibility map bit should never be set if the
1991 * page-level bit is clear. However, it's possible that the bit got
1992 * cleared after heap_vac_scan_next_block() was called, so we must recheck
1993 * with buffer lock before concluding that the VM is corrupt.
1994 */
1996 ((*vmbits & VISIBILITYMAP_VALID_BITS) != 0))
1997 {
2000 errmsg("page is not marked all-visible but visibility map bit is set in relation \"%s\" page %u",
2002
2003 visibilitymap_clear(rel, heap_blk, vmbuffer,
2005 *vmbits = 0;
2006 }
2007
2008 /*
2009 * It's possible for the value returned by
2010 * GetOldestNonRemovableTransactionId() to move backwards, so it's not
2011 * wrong for us to see tuples that appear to not be visible to everyone
2012 * yet, while PD_ALL_VISIBLE is already set. The real safe xmin value
2013 * never moves backwards, but GetOldestNonRemovableTransactionId() is
2014 * conservative and sometimes returns a value that's unnecessarily small,
2015 * so if we see that contradiction it just means that the tuples that we
2016 * think are not visible to everyone yet actually are, and the
2017 * PD_ALL_VISIBLE flag is correct.
2018 *
2019 * There should never be LP_DEAD items on a page with PD_ALL_VISIBLE set,
2020 * however.
2021 */
2022 else if (PageIsAllVisible(heap_page) && nlpdead_items > 0)
2023 {
2026 errmsg("page containing LP_DEAD items is marked as all-visible in relation \"%s\" page %u",
2028
2031 visibilitymap_clear(rel, heap_blk, vmbuffer,
2033 *vmbits = 0;
2034 }
2035}
2036
2037/*
2038 * lazy_scan_prune() -- lazy_scan_heap() pruning and freezing.
2039 *
2040 * Caller must hold pin and buffer cleanup lock on the buffer.
2041 *
2042 * vmbuffer is the buffer containing the VM block with visibility information
2043 * for the heap block, blkno.
2044 *
2045 * *has_lpdead_items is set to true or false depending on whether, upon return
2046 * from this function, any LP_DEAD items are still present on the page.
2047 *
2048 * *vm_page_frozen is set to true if the page is newly set all-frozen in the
2049 * VM. The caller currently only uses this for determining whether an eagerly
2050 * scanned page was successfully set all-frozen.
2051 *
2052 * Returns the number of tuples deleted from the page during HOT pruning.
2053 */
2054static int
2056 Buffer buf,
2057 BlockNumber blkno,
2058 Page page,
2059 Buffer vmbuffer,
2060 bool *has_lpdead_items,
2061 bool *vm_page_frozen)
2062{
2063 Relation rel = vacrel->rel;
2065 PruneFreezeParams params = {
2066 .relation = rel,
2067 .buffer = buf,
2068 .reason = PRUNE_VACUUM_SCAN,
2069 .options = HEAP_PAGE_PRUNE_FREEZE,
2070 .vistest = vacrel->vistest,
2071 .cutoffs = &vacrel->cutoffs,
2072 };
2073 uint8 old_vmbits = 0;
2074 uint8 new_vmbits = 0;
2075
2076 Assert(BufferGetBlockNumber(buf) == blkno);
2077
2078 /*
2079 * Prune all HOT-update chains and potentially freeze tuples on this page.
2080 *
2081 * If the relation has no indexes, we can immediately mark would-be dead
2082 * items LP_UNUSED.
2083 *
2084 * The number of tuples removed from the page is returned in
2085 * presult.ndeleted. It should not be confused with presult.lpdead_items;
2086 * presult.lpdead_items's final value can be thought of as the number of
2087 * tuples that were deleted from indexes.
2088 *
2089 * We will update the VM after collecting LP_DEAD items and freezing
2090 * tuples. Pruning will have determined whether or not the page is
2091 * all-visible.
2092 */
2093 if (vacrel->nindexes == 0)
2095
2097 &presult,
2098 &vacrel->offnum,
2099 &vacrel->NewRelfrozenXid, &vacrel->NewRelminMxid);
2100
2101 Assert(MultiXactIdIsValid(vacrel->NewRelminMxid));
2102 Assert(TransactionIdIsValid(vacrel->NewRelfrozenXid));
2103
2104 if (presult.nfrozen > 0)
2105 {
2106 /*
2107 * We don't increment the new_frozen_tuple_pages instrumentation
2108 * counter when nfrozen == 0, since it only counts pages with newly
2109 * frozen tuples (don't confuse that with pages newly set all-frozen
2110 * in VM).
2111 */
2112 vacrel->new_frozen_tuple_pages++;
2113 }
2114
2115 /*
2116 * VACUUM will call heap_page_is_all_visible() during the second pass over
2117 * the heap to determine all_visible and all_frozen for the page -- this
2118 * is a specialized version of the logic from this function. Now that
2119 * we've finished pruning and freezing, make sure that we're in total
2120 * agreement with heap_page_is_all_visible() using an assertion.
2121 */
2122#ifdef USE_ASSERT_CHECKING
2123 if (presult.all_visible)
2124 {
2126 bool debug_all_frozen;
2127
2128 Assert(presult.lpdead_items == 0);
2129
2131 vacrel->cutoffs.OldestXmin, &debug_all_frozen,
2132 &debug_cutoff, &vacrel->offnum));
2133
2134 Assert(presult.all_frozen == debug_all_frozen);
2135
2137 debug_cutoff == presult.vm_conflict_horizon);
2138 }
2139#endif
2140
2141 /*
2142 * Now save details of the LP_DEAD items from the page in vacrel
2143 */
2144 if (presult.lpdead_items > 0)
2145 {
2146 vacrel->lpdead_item_pages++;
2147
2148 /*
2149 * deadoffsets are collected incrementally in
2150 * heap_page_prune_and_freeze() as each dead line pointer is recorded,
2151 * with an indeterminate order, but dead_items_add requires them to be
2152 * sorted.
2153 */
2154 qsort(presult.deadoffsets, presult.lpdead_items, sizeof(OffsetNumber),
2156
2157 dead_items_add(vacrel, blkno, presult.deadoffsets, presult.lpdead_items);
2158 }
2159
2160 /* Finally, add page-local counts to whole-VACUUM counts */
2161 vacrel->tuples_deleted += presult.ndeleted;
2162 vacrel->tuples_frozen += presult.nfrozen;
2163 vacrel->lpdead_items += presult.lpdead_items;
2164 vacrel->live_tuples += presult.live_tuples;
2165 vacrel->recently_dead_tuples += presult.recently_dead_tuples;
2166
2167 /* Can't truncate this page */
2168 if (presult.hastup)
2169 vacrel->nonempty_pages = blkno + 1;
2170
2171 /* Did we find LP_DEAD items? */
2172 *has_lpdead_items = (presult.lpdead_items > 0);
2173
2174 Assert(!presult.all_visible || !(*has_lpdead_items));
2175 Assert(!presult.all_frozen || presult.all_visible);
2176
2177 old_vmbits = visibilitymap_get_status(vacrel->rel, blkno, &vmbuffer);
2178
2179 identify_and_fix_vm_corruption(vacrel->rel, buf, blkno, page,
2180 presult.lpdead_items, vmbuffer,
2181 &old_vmbits);
2182
2183 if (!presult.all_visible)
2184 return presult.ndeleted;
2185
2186 /* Set the visibility map and page visibility hint */
2188
2189 if (presult.all_frozen)
2191
2192 /* Nothing to do */
2193 if (old_vmbits == new_vmbits)
2194 return presult.ndeleted;
2195
2196 /*
2197 * It should never be the case that the visibility map page is set while
2198 * the page-level bit is clear (and if so, we cleared it above), but the
2199 * reverse is allowed (if checksums are not enabled). Regardless, set both
2200 * bits so that we get back in sync.
2201 *
2202 * The heap buffer must be marked dirty before adding it to the WAL chain
2203 * when setting the VM. We don't worry about unnecessarily dirtying the
2204 * heap buffer if PD_ALL_VISIBLE is already set, though. It is extremely
2205 * rare to have a clean heap buffer with PD_ALL_VISIBLE already set and
2206 * the VM bits clear, so there is no point in optimizing it.
2207 */
2208 PageSetAllVisible(page);
2210
2211 /*
2212 * If the page is being set all-frozen, we pass InvalidTransactionId as
2213 * the cutoff_xid, since a snapshot conflict horizon sufficient to make
2214 * everything safe for REDO was logged when the page's tuples were frozen.
2215 */
2216 Assert(!presult.all_frozen ||
2217 !TransactionIdIsValid(presult.vm_conflict_horizon));
2218
2219 visibilitymap_set(vacrel->rel, blkno, buf,
2221 vmbuffer, presult.vm_conflict_horizon,
2222 new_vmbits);
2223
2224 /*
2225 * If the page wasn't already set all-visible and/or all-frozen in the VM,
2226 * count it as newly set for logging.
2227 */
2229 {
2230 vacrel->vm_new_visible_pages++;
2231 if (presult.all_frozen)
2232 {
2233 vacrel->vm_new_visible_frozen_pages++;
2234 *vm_page_frozen = true;
2235 }
2236 }
2237 else if ((old_vmbits & VISIBILITYMAP_ALL_FROZEN) == 0 &&
2238 presult.all_frozen)
2239 {
2240 vacrel->vm_new_frozen_pages++;
2241 *vm_page_frozen = true;
2242 }
2243
2244 return presult.ndeleted;
2245}
2246
2247/*
2248 * lazy_scan_noprune() -- lazy_scan_prune() without pruning or freezing
2249 *
2250 * Caller need only hold a pin and share lock on the buffer, unlike
2251 * lazy_scan_prune, which requires a full cleanup lock. While pruning isn't
2252 * performed here, it's quite possible that an earlier opportunistic pruning
2253 * operation left LP_DEAD items behind. We'll at least collect any such items
2254 * in dead_items for removal from indexes.
2255 *
2256 * For aggressive VACUUM callers, we may return false to indicate that a full
2257 * cleanup lock is required for processing by lazy_scan_prune. This is only
2258 * necessary when the aggressive VACUUM needs to freeze some tuple XIDs from
2259 * one or more tuples on the page. We always return true for non-aggressive
2260 * callers.
2261 *
2262 * If this function returns true, *has_lpdead_items gets set to true or false
2263 * depending on whether, upon return from this function, any LP_DEAD items are
2264 * present on the page. If this function returns false, *has_lpdead_items
2265 * is not updated.
2266 */
2267static bool
2269 Buffer buf,
2270 BlockNumber blkno,
2271 Page page,
2272 bool *has_lpdead_items)
2273{
2274 OffsetNumber offnum,
2275 maxoff;
2276 int lpdead_items,
2277 live_tuples,
2278 recently_dead_tuples,
2279 missed_dead_tuples;
2280 bool hastup;
2282 TransactionId NoFreezePageRelfrozenXid = vacrel->NewRelfrozenXid;
2283 MultiXactId NoFreezePageRelminMxid = vacrel->NewRelminMxid;
2285
2286 Assert(BufferGetBlockNumber(buf) == blkno);
2287
2288 hastup = false; /* for now */
2289
2290 lpdead_items = 0;
2291 live_tuples = 0;
2292 recently_dead_tuples = 0;
2293 missed_dead_tuples = 0;
2294
2295 maxoff = PageGetMaxOffsetNumber(page);
2296 for (offnum = FirstOffsetNumber;
2297 offnum <= maxoff;
2298 offnum = OffsetNumberNext(offnum))
2299 {
2300 ItemId itemid;
2301 HeapTupleData tuple;
2302
2303 vacrel->offnum = offnum;
2304 itemid = PageGetItemId(page, offnum);
2305
2306 if (!ItemIdIsUsed(itemid))
2307 continue;
2308
2309 if (ItemIdIsRedirected(itemid))
2310 {
2311 hastup = true;
2312 continue;
2313 }
2314
2315 if (ItemIdIsDead(itemid))
2316 {
2317 /*
2318 * Deliberately don't set hastup=true here. See same point in
2319 * lazy_scan_prune for an explanation.
2320 */
2321 deadoffsets[lpdead_items++] = offnum;
2322 continue;
2323 }
2324
2325 hastup = true; /* page prevents rel truncation */
2326 tupleheader = (HeapTupleHeader) PageGetItem(page, itemid);
2328 &NoFreezePageRelfrozenXid,
2329 &NoFreezePageRelminMxid))
2330 {
2331 /* Tuple with XID < FreezeLimit (or MXID < MultiXactCutoff) */
2332 if (vacrel->aggressive)
2333 {
2334 /*
2335 * Aggressive VACUUMs must always be able to advance rel's
2336 * relfrozenxid to a value >= FreezeLimit (and be able to
2337 * advance rel's relminmxid to a value >= MultiXactCutoff).
2338 * The ongoing aggressive VACUUM won't be able to do that
2339 * unless it can freeze an XID (or MXID) from this tuple now.
2340 *
2341 * The only safe option is to have caller perform processing
2342 * of this page using lazy_scan_prune. Caller might have to
2343 * wait a while for a cleanup lock, but it can't be helped.
2344 */
2345 vacrel->offnum = InvalidOffsetNumber;
2346 return false;
2347 }
2348
2349 /*
2350 * Non-aggressive VACUUMs are under no obligation to advance
2351 * relfrozenxid (even by one XID). We can be much laxer here.
2352 *
2353 * Currently we always just accept an older final relfrozenxid
2354 * and/or relminmxid value. We never make caller wait or work a
2355 * little harder, even when it likely makes sense to do so.
2356 */
2357 }
2358
2359 ItemPointerSet(&(tuple.t_self), blkno, offnum);
2360 tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
2361 tuple.t_len = ItemIdGetLength(itemid);
2362 tuple.t_tableOid = RelationGetRelid(vacrel->rel);
2363
2364 switch (HeapTupleSatisfiesVacuum(&tuple, vacrel->cutoffs.OldestXmin,
2365 buf))
2366 {
2368 case HEAPTUPLE_LIVE:
2369
2370 /*
2371 * Count both cases as live, just like lazy_scan_prune
2372 */
2373 live_tuples++;
2374
2375 break;
2376 case HEAPTUPLE_DEAD:
2377
2378 /*
2379 * There is some useful work for pruning to do, that won't be
2380 * done due to failure to get a cleanup lock.
2381 */
2382 missed_dead_tuples++;
2383 break;
2385
2386 /*
2387 * Count in recently_dead_tuples, just like lazy_scan_prune
2388 */
2389 recently_dead_tuples++;
2390 break;
2392
2393 /*
2394 * Do not count these rows as live, just like lazy_scan_prune
2395 */
2396 break;
2397 default:
2398 elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
2399 break;
2400 }
2401 }
2402
2403 vacrel->offnum = InvalidOffsetNumber;
2404
2405 /*
2406 * By here we know for sure that caller can put off freezing and pruning
2407 * this particular page until the next VACUUM. Remember its details now.
2408 * (lazy_scan_prune expects a clean slate, so we have to do this last.)
2409 */
2410 vacrel->NewRelfrozenXid = NoFreezePageRelfrozenXid;
2411 vacrel->NewRelminMxid = NoFreezePageRelminMxid;
2412
2413 /* Save any LP_DEAD items found on the page in dead_items */
2414 if (vacrel->nindexes == 0)
2415 {
2416 /* Using one-pass strategy (since table has no indexes) */
2417 if (lpdead_items > 0)
2418 {
2419 /*
2420 * Perfunctory handling for the corner case where a single pass
2421 * strategy VACUUM cannot get a cleanup lock, and it turns out
2422 * that there is one or more LP_DEAD items: just count the LP_DEAD
2423 * items as missed_dead_tuples instead. (This is a bit dishonest,
2424 * but it beats having to maintain specialized heap vacuuming code
2425 * forever, for vanishingly little benefit.)
2426 */
2427 hastup = true;
2428 missed_dead_tuples += lpdead_items;
2429 }
2430 }
2431 else if (lpdead_items > 0)
2432 {
2433 /*
2434 * Page has LP_DEAD items, and so any references/TIDs that remain in
2435 * indexes will be deleted during index vacuuming (and then marked
2436 * LP_UNUSED in the heap)
2437 */
2438 vacrel->lpdead_item_pages++;
2439
2440 dead_items_add(vacrel, blkno, deadoffsets, lpdead_items);
2441
2442 vacrel->lpdead_items += lpdead_items;
2443 }
2444
2445 /*
2446 * Finally, add relevant page-local counts to whole-VACUUM counts
2447 */
2448 vacrel->live_tuples += live_tuples;
2449 vacrel->recently_dead_tuples += recently_dead_tuples;
2450 vacrel->missed_dead_tuples += missed_dead_tuples;
2451 if (missed_dead_tuples > 0)
2452 vacrel->missed_dead_pages++;
2453
2454 /* Can't truncate this page */
2455 if (hastup)
2456 vacrel->nonempty_pages = blkno + 1;
2457
2458 /* Did we find LP_DEAD items? */
2459 *has_lpdead_items = (lpdead_items > 0);
2460
2461 /* Caller won't need to call lazy_scan_prune with same page */
2462 return true;
2463}
2464
2465/*
2466 * Main entry point for index vacuuming and heap vacuuming.
2467 *
2468 * Removes items collected in dead_items from table's indexes, then marks the
2469 * same items LP_UNUSED in the heap. See the comments above lazy_scan_heap
2470 * for full details.
2471 *
2472 * Also empties dead_items, freeing up space for later TIDs.
2473 *
2474 * We may choose to bypass index vacuuming at this point, though only when the
2475 * ongoing VACUUM operation will definitely only have one index scan/round of
2476 * index vacuuming.
2477 */
2478static void
2480{
2481 bool bypass;
2482
2483 /* Should not end up here with no indexes */
2484 Assert(vacrel->nindexes > 0);
2485 Assert(vacrel->lpdead_item_pages > 0);
2486
2487 if (!vacrel->do_index_vacuuming)
2488 {
2489 Assert(!vacrel->do_index_cleanup);
2491 return;
2492 }
2493
2494 /*
2495 * Consider bypassing index vacuuming (and heap vacuuming) entirely.
2496 *
2497 * We currently only do this in cases where the number of LP_DEAD items
2498 * for the entire VACUUM operation is close to zero. This avoids sharp
2499 * discontinuities in the duration and overhead of successive VACUUM
2500 * operations that run against the same table with a fixed workload.
2501 * Ideally, successive VACUUM operations will behave as if there are
2502 * exactly zero LP_DEAD items in cases where there are close to zero.
2503 *
2504 * This is likely to be helpful with a table that is continually affected
2505 * by UPDATEs that can mostly apply the HOT optimization, but occasionally
2506 * have small aberrations that lead to just a few heap pages retaining
2507 * only one or two LP_DEAD items. This is pretty common; even when the
2508 * DBA goes out of their way to make UPDATEs use HOT, it is practically
2509 * impossible to predict whether HOT will be applied in 100% of cases.
2510 * It's far easier to ensure that 99%+ of all UPDATEs against a table use
2511 * HOT through careful tuning.
2512 */
2513 bypass = false;
2514 if (vacrel->consider_bypass_optimization && vacrel->rel_pages > 0)
2515 {
2517
2518 Assert(vacrel->num_index_scans == 0);
2519 Assert(vacrel->lpdead_items == vacrel->dead_items_info->num_items);
2520 Assert(vacrel->do_index_vacuuming);
2521 Assert(vacrel->do_index_cleanup);
2522
2523 /*
2524 * This crossover point at which we'll start to do index vacuuming is
2525 * expressed as a percentage of the total number of heap pages in the
2526 * table that are known to have at least one LP_DEAD item. This is
2527 * much more important than the total number of LP_DEAD items, since
2528 * it's a proxy for the number of heap pages whose visibility map bits
2529 * cannot be set on account of bypassing index and heap vacuuming.
2530 *
2531 * We apply one further precautionary test: the space currently used
2532 * to store the TIDs (TIDs that now all point to LP_DEAD items) must
2533 * not exceed 32MB. This limits the risk that we will bypass index
2534 * vacuuming again and again until eventually there is a VACUUM whose
2535 * dead_items space is not CPU cache resident.
2536 *
2537 * We don't take any special steps to remember the LP_DEAD items (such
2538 * as counting them in our final update to the stats system) when the
2539 * optimization is applied. Though the accounting used in analyze.c's
2540 * acquire_sample_rows() will recognize the same LP_DEAD items as dead
2541 * rows in its own stats report, that's okay. The discrepancy should
2542 * be negligible. If this optimization is ever expanded to cover more
2543 * cases then this may need to be reconsidered.
2544 */
2546 bypass = (vacrel->lpdead_item_pages < threshold &&
2547 TidStoreMemoryUsage(vacrel->dead_items) < 32 * 1024 * 1024);
2548 }
2549
2550 if (bypass)
2551 {
2552 /*
2553 * There are almost zero TIDs. Behave as if there were precisely
2554 * zero: bypass index vacuuming, but do index cleanup.
2555 *
2556 * We expect that the ongoing VACUUM operation will finish very
2557 * quickly, so there is no point in considering speeding up as a
2558 * failsafe against wraparound failure. (Index cleanup is expected to
2559 * finish very quickly in cases where there were no ambulkdelete()
2560 * calls.)
2561 */
2562 vacrel->do_index_vacuuming = false;
2563 }
2565 {
2566 /*
2567 * We successfully completed a round of index vacuuming. Do related
2568 * heap vacuuming now.
2569 */
2571 }
2572 else
2573 {
2574 /*
2575 * Failsafe case.
2576 *
2577 * We attempted index vacuuming, but didn't finish a full round/full
2578 * index scan. This happens when relfrozenxid or relminmxid is too
2579 * far in the past.
2580 *
2581 * From this point on the VACUUM operation will do no further index
2582 * vacuuming or heap vacuuming. This VACUUM operation won't end up
2583 * back here again.
2584 */
2586 }
2587
2588 /*
2589 * Forget the LP_DEAD items that we just vacuumed (or just decided to not
2590 * vacuum)
2591 */
2593}
2594
2595/*
2596 * lazy_vacuum_all_indexes() -- Main entry for index vacuuming
2597 *
2598 * Returns true in the common case when all indexes were successfully
2599 * vacuumed. Returns false in rare cases where we determined that the ongoing
2600 * VACUUM operation is at risk of taking too long to finish, leading to
2601 * wraparound failure.
2602 */
2603static bool
2605{
2606 bool allindexes = true;
2607 double old_live_tuples = vacrel->rel->rd_rel->reltuples;
2608 const int progress_start_index[] = {
2611 };
2612 const int progress_end_index[] = {
2616 };
2619
2620 Assert(vacrel->nindexes > 0);
2621 Assert(vacrel->do_index_vacuuming);
2622 Assert(vacrel->do_index_cleanup);
2623
2624 /* Precheck for XID wraparound emergencies */
2626 {
2627 /* Wraparound emergency -- don't even start an index scan */
2628 return false;
2629 }
2630
2631 /*
2632 * Report that we are now vacuuming indexes and the number of indexes to
2633 * vacuum.
2634 */
2636 progress_start_val[1] = vacrel->nindexes;
2638
2640 {
2641 for (int idx = 0; idx < vacrel->nindexes; idx++)
2642 {
2643 Relation indrel = vacrel->indrels[idx];
2644 IndexBulkDeleteResult *istat = vacrel->indstats[idx];
2645
2646 vacrel->indstats[idx] = lazy_vacuum_one_index(indrel, istat,
2648 vacrel);
2649
2650 /* Report the number of indexes vacuumed */
2652 idx + 1);
2653
2655 {
2656 /* Wraparound emergency -- end current index scan */
2657 allindexes = false;
2658 break;
2659 }
2660 }
2661 }
2662 else
2663 {
2664 /* Outsource everything to parallel variant */
2666 vacrel->num_index_scans);
2667
2668 /*
2669 * Do a postcheck to consider applying wraparound failsafe now. Note
2670 * that parallel VACUUM only gets the precheck and this postcheck.
2671 */
2673 allindexes = false;
2674 }
2675
2676 /*
2677 * We delete all LP_DEAD items from the first heap pass in all indexes on
2678 * each call here (except calls where we choose to do the failsafe). This
2679 * makes the next call to lazy_vacuum_heap_rel() safe (except in the event
2680 * of the failsafe triggering, which prevents the next call from taking
2681 * place).
2682 */
2683 Assert(vacrel->num_index_scans > 0 ||
2684 vacrel->dead_items_info->num_items == vacrel->lpdead_items);
2686
2687 /*
2688 * Increase and report the number of index scans. Also, we reset
2689 * PROGRESS_VACUUM_INDEXES_TOTAL and PROGRESS_VACUUM_INDEXES_PROCESSED.
2690 *
2691 * We deliberately include the case where we started a round of bulk
2692 * deletes that we weren't able to finish due to the failsafe triggering.
2693 */
2694 vacrel->num_index_scans++;
2695 progress_end_val[0] = 0;
2696 progress_end_val[1] = 0;
2697 progress_end_val[2] = vacrel->num_index_scans;
2699
2700 return allindexes;
2701}
2702
2703/*
2704 * Read stream callback for vacuum's third phase (second pass over the heap).
2705 * Gets the next block from the TID store and returns it or InvalidBlockNumber
2706 * if there are no further blocks to vacuum.
2707 *
2708 * NB: Assumed to be safe to use with READ_STREAM_USE_BATCHING.
2709 */
2710static BlockNumber
2712 void *callback_private_data,
2713 void *per_buffer_data)
2714{
2715 TidStoreIter *iter = callback_private_data;
2717
2719 if (iter_result == NULL)
2720 return InvalidBlockNumber;
2721
2722 /*
2723 * Save the TidStoreIterResult for later, so we can extract the offsets.
2724 * It is safe to copy the result, according to TidStoreIterateNext().
2725 */
2726 memcpy(per_buffer_data, iter_result, sizeof(*iter_result));
2727
2728 return iter_result->blkno;
2729}
2730
2731/*
2732 * lazy_vacuum_heap_rel() -- second pass over the heap for two pass strategy
2733 *
2734 * This routine marks LP_DEAD items in vacrel->dead_items as LP_UNUSED. Pages
2735 * that never had lazy_scan_prune record LP_DEAD items are not visited at all.
2736 *
2737 * We may also be able to truncate the line pointer array of the heap pages we
2738 * visit. If there is a contiguous group of LP_UNUSED items at the end of the
2739 * array, it can be reclaimed as free space. These LP_UNUSED items usually
2740 * start out as LP_DEAD items recorded by lazy_scan_prune (we set items from
2741 * each page to LP_UNUSED, and then consider if it's possible to truncate the
2742 * page's line pointer array).
2743 *
2744 * Note: the reason for doing this as a second pass is we cannot remove the
2745 * tuples until we've removed their index entries, and we want to process
2746 * index entry removal in batches as large as possible.
2747 */
2748static void
2750{
2751 ReadStream *stream;
2753 Buffer vmbuffer = InvalidBuffer;
2755 TidStoreIter *iter;
2756
2757 Assert(vacrel->do_index_vacuuming);
2758 Assert(vacrel->do_index_cleanup);
2759 Assert(vacrel->num_index_scans > 0);
2760
2761 /* Report that we are now vacuuming the heap */
2764
2765 /* Update error traceback information */
2769
2770 iter = TidStoreBeginIterate(vacrel->dead_items);
2771
2772 /*
2773 * Set up the read stream for vacuum's second pass through the heap.
2774 *
2775 * It is safe to use batchmode, as vacuum_reap_lp_read_stream_next() does
2776 * not need to wait for IO and does not perform locking. Once we support
2777 * parallelism it should still be fine, as presumably the holder of locks
2778 * would never be blocked by IO while holding the lock.
2779 */
2782 vacrel->bstrategy,
2783 vacrel->rel,
2786 iter,
2787 sizeof(TidStoreIterResult));
2788
2789 while (true)
2790 {
2791 BlockNumber blkno;
2792 Buffer buf;
2793 Page page;
2795 Size freespace;
2797 int num_offsets;
2798
2799 vacuum_delay_point(false);
2800
2801 buf = read_stream_next_buffer(stream, (void **) &iter_result);
2802
2803 /* The relation is exhausted */
2804 if (!BufferIsValid(buf))
2805 break;
2806
2807 vacrel->blkno = blkno = BufferGetBlockNumber(buf);
2808
2811 Assert(num_offsets <= lengthof(offsets));
2812
2813 /*
2814 * Pin the visibility map page in case we need to mark the page
2815 * all-visible. In most cases this will be very cheap, because we'll
2816 * already have the correct page pinned anyway.
2817 */
2818 visibilitymap_pin(vacrel->rel, blkno, &vmbuffer);
2819
2820 /* We need a non-cleanup exclusive lock to mark dead_items unused */
2822 lazy_vacuum_heap_page(vacrel, blkno, buf, offsets,
2823 num_offsets, vmbuffer);
2824
2825 /* Now that we've vacuumed the page, record its available space */
2826 page = BufferGetPage(buf);
2827 freespace = PageGetHeapFreeSpace(page);
2828
2830 RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
2832 }
2833
2834 read_stream_end(stream);
2835 TidStoreEndIterate(iter);
2836
2837 vacrel->blkno = InvalidBlockNumber;
2838 if (BufferIsValid(vmbuffer))
2839 ReleaseBuffer(vmbuffer);
2840
2841 /*
2842 * We set all LP_DEAD items from the first heap pass to LP_UNUSED during
2843 * the second heap pass. No more, no less.
2844 */
2845 Assert(vacrel->num_index_scans > 1 ||
2846 (vacrel->dead_items_info->num_items == vacrel->lpdead_items &&
2847 vacuumed_pages == vacrel->lpdead_item_pages));
2848
2850 (errmsg("table \"%s\": removed %" PRId64 " dead item identifiers in %u pages",
2851 vacrel->relname, vacrel->dead_items_info->num_items,
2852 vacuumed_pages)));
2853
2854 /* Revert to the previous phase information for error traceback */
2856}
2857
2858/*
2859 * lazy_vacuum_heap_page() -- free page's LP_DEAD items listed in the
2860 * vacrel->dead_items store.
2861 *
2862 * Caller must have an exclusive buffer lock on the buffer (though a full
2863 * cleanup lock is also acceptable). vmbuffer must be valid and already have
2864 * a pin on blkno's visibility map page.
2865 */
2866static void
2868 OffsetNumber *deadoffsets, int num_offsets,
2869 Buffer vmbuffer)
2870{
2871 Page page = BufferGetPage(buffer);
2873 int nunused = 0;
2874 TransactionId visibility_cutoff_xid;
2876 bool all_frozen;
2878 uint8 vmflags = 0;
2879
2880 Assert(vacrel->do_index_vacuuming);
2881
2883
2884 /* Update error traceback information */
2888
2889 /*
2890 * Before marking dead items unused, check whether the page will become
2891 * all-visible once that change is applied. This lets us reap the tuples
2892 * and mark the page all-visible within the same critical section,
2893 * enabling both changes to be emitted in a single WAL record. Since the
2894 * visibility checks may perform I/O and allocate memory, they must be
2895 * done outside the critical section.
2896 */
2897 if (heap_page_would_be_all_visible(vacrel->rel, buffer,
2898 vacrel->cutoffs.OldestXmin,
2899 deadoffsets, num_offsets,
2900 &all_frozen, &visibility_cutoff_xid,
2901 &vacrel->offnum))
2902 {
2904 if (all_frozen)
2905 {
2907 Assert(!TransactionIdIsValid(visibility_cutoff_xid));
2908 }
2909
2910 /*
2911 * Take the lock on the vmbuffer before entering a critical section.
2912 * The heap page lock must also be held while updating the VM to
2913 * ensure consistency.
2914 */
2916 }
2917
2919
2920 for (int i = 0; i < num_offsets; i++)
2921 {
2922 ItemId itemid;
2923 OffsetNumber toff = deadoffsets[i];
2924
2925 itemid = PageGetItemId(page, toff);
2926
2927 Assert(ItemIdIsDead(itemid) && !ItemIdHasStorage(itemid));
2928 ItemIdSetUnused(itemid);
2929 unused[nunused++] = toff;
2930 }
2931
2932 Assert(nunused > 0);
2933
2934 /* Attempt to truncate line pointer array now */
2936
2937 if ((vmflags & VISIBILITYMAP_VALID_BITS) != 0)
2938 {
2939 /*
2940 * The page is guaranteed to have had dead line pointers, so we always
2941 * set PD_ALL_VISIBLE.
2942 */
2943 PageSetAllVisible(page);
2945 vmbuffer, vmflags,
2946 vacrel->rel->rd_locator);
2947 conflict_xid = visibility_cutoff_xid;
2948 }
2949
2950 /*
2951 * Mark buffer dirty before we write WAL.
2952 */
2953 MarkBufferDirty(buffer);
2954
2955 /* XLOG stuff */
2956 if (RelationNeedsWAL(vacrel->rel))
2957 {
2958 log_heap_prune_and_freeze(vacrel->rel, buffer,
2959 vmflags != 0 ? vmbuffer : InvalidBuffer,
2960 vmflags,
2962 false, /* no cleanup lock required */
2964 NULL, 0, /* frozen */
2965 NULL, 0, /* redirected */
2966 NULL, 0, /* dead */
2967 unused, nunused);
2968 }
2969
2971
2973 {
2974 /* Count the newly set VM page for logging */
2975 LockBuffer(vmbuffer, BUFFER_LOCK_UNLOCK);
2976 vacrel->vm_new_visible_pages++;
2977 if (all_frozen)
2978 vacrel->vm_new_visible_frozen_pages++;
2979 }
2980
2981 /* Revert to the previous phase information for error traceback */
2983}
2984
2985/*
2986 * Trigger the failsafe to avoid wraparound failure when vacrel table has a
2987 * relfrozenxid and/or relminmxid that is dangerously far in the past.
2988 * Triggering the failsafe makes the ongoing VACUUM bypass any further index
2989 * vacuuming and heap vacuuming. Truncating the heap is also bypassed.
2990 *
2991 * Any remaining work (work that VACUUM cannot just bypass) is typically sped
2992 * up when the failsafe triggers. VACUUM stops applying any cost-based delay
2993 * that it started out with.
2994 *
2995 * Returns true when failsafe has been triggered.
2996 */
2997static bool
2999{
3000 /* Don't warn more than once per VACUUM */
3002 return true;
3003
3005 {
3006 const int progress_index[] = {
3010 };
3012
3013 VacuumFailsafeActive = true;
3014
3015 /*
3016 * Abandon use of a buffer access strategy to allow use of all of
3017 * shared buffers. We assume the caller who allocated the memory for
3018 * the BufferAccessStrategy will free it.
3019 */
3020 vacrel->bstrategy = NULL;
3021
3022 /* Disable index vacuuming, index cleanup, and heap rel truncation */
3023 vacrel->do_index_vacuuming = false;
3024 vacrel->do_index_cleanup = false;
3025 vacrel->do_rel_truncate = false;
3026
3027 /* Reset the progress counters and set the failsafe mode */
3029
3031 (errmsg("bypassing nonessential maintenance of table \"%s.%s.%s\" as a failsafe after %d index scans",
3032 vacrel->dbname, vacrel->relnamespace, vacrel->relname,
3033 vacrel->num_index_scans),
3034 errdetail("The table's relfrozenxid or relminmxid is too far in the past."),
3035 errhint("Consider increasing configuration parameter \"maintenance_work_mem\" or \"autovacuum_work_mem\".\n"
3036 "You might also need to consider other ways for VACUUM to keep up with the allocation of transaction IDs.")));
3037
3038 /* Stop applying cost limits from this point on */
3039 VacuumCostActive = false;
3041
3042 return true;
3043 }
3044
3045 return false;
3046}
3047
3048/*
3049 * lazy_cleanup_all_indexes() -- cleanup all indexes of relation.
3050 */
3051static void
3053{
3054 double reltuples = vacrel->new_rel_tuples;
3055 bool estimated_count = vacrel->scanned_pages < vacrel->rel_pages;
3056 const int progress_start_index[] = {
3059 };
3060 const int progress_end_index[] = {
3063 };
3065 int64 progress_end_val[2] = {0, 0};
3066
3067 Assert(vacrel->do_index_cleanup);
3068 Assert(vacrel->nindexes > 0);
3069
3070 /*
3071 * Report that we are now cleaning up indexes and the number of indexes to
3072 * cleanup.
3073 */
3075 progress_start_val[1] = vacrel->nindexes;
3077
3079 {
3080 for (int idx = 0; idx < vacrel->nindexes; idx++)
3081 {
3082 Relation indrel = vacrel->indrels[idx];
3083 IndexBulkDeleteResult *istat = vacrel->indstats[idx];
3084
3085 vacrel->indstats[idx] =
3086 lazy_cleanup_one_index(indrel, istat, reltuples,
3087 estimated_count, vacrel);
3088
3089 /* Report the number of indexes cleaned up */
3091 idx + 1);
3092 }
3093 }
3094 else
3095 {
3096 /* Outsource everything to parallel variant */
3098 vacrel->num_index_scans,
3099 estimated_count);
3100 }
3101
3102 /* Reset the progress counters */
3104}
3105
3106/*
3107 * lazy_vacuum_one_index() -- vacuum index relation.
3108 *
3109 * Delete all the index tuples containing a TID collected in
3110 * vacrel->dead_items. Also update running statistics. Exact
3111 * details depend on index AM's ambulkdelete routine.
3112 *
3113 * reltuples is the number of heap tuples to be passed to the
3114 * bulkdelete callback. It's always assumed to be estimated.
3115 * See indexam.sgml for more info.
3116 *
3117 * Returns bulk delete stats derived from input stats
3118 */
3119static IndexBulkDeleteResult *
3121 double reltuples, LVRelState *vacrel)
3122{
3125
3126 ivinfo.index = indrel;
3127 ivinfo.heaprel = vacrel->rel;
3128 ivinfo.analyze_only = false;
3129 ivinfo.report_progress = false;
3130 ivinfo.estimated_count = true;
3131 ivinfo.message_level = DEBUG2;
3132 ivinfo.num_heap_tuples = reltuples;
3133 ivinfo.strategy = vacrel->bstrategy;
3134
3135 /*
3136 * Update error traceback information.
3137 *
3138 * The index name is saved during this phase and restored immediately
3139 * after this phase. See vacuum_error_callback.
3140 */
3141 Assert(vacrel->indname == NULL);
3146
3147 /* Do bulk deletion */
3148 istat = vac_bulkdel_one_index(&ivinfo, istat, vacrel->dead_items,
3149 vacrel->dead_items_info);
3150
3151 /* Revert to the previous phase information for error traceback */
3153 pfree(vacrel->indname);
3154 vacrel->indname = NULL;
3155
3156 return istat;
3157}
3158
3159/*
3160 * lazy_cleanup_one_index() -- do post-vacuum cleanup for index relation.
3161 *
3162 * Calls index AM's amvacuumcleanup routine. reltuples is the number
3163 * of heap tuples and estimated_count is true if reltuples is an
3164 * estimated value. See indexam.sgml for more info.
3165 *
3166 * Returns bulk delete stats derived from input stats
3167 */
3168static IndexBulkDeleteResult *
3170 double reltuples, bool estimated_count,
3172{
3175
3176 ivinfo.index = indrel;
3177 ivinfo.heaprel = vacrel->rel;
3178 ivinfo.analyze_only = false;
3179 ivinfo.report_progress = false;
3180 ivinfo.estimated_count = estimated_count;
3181 ivinfo.message_level = DEBUG2;
3182
3183 ivinfo.num_heap_tuples = reltuples;
3184 ivinfo.strategy = vacrel->bstrategy;
3185
3186 /*
3187 * Update error traceback information.
3188 *
3189 * The index name is saved during this phase and restored immediately
3190 * after this phase. See vacuum_error_callback.
3191 */
3192 Assert(vacrel->indname == NULL);
3197
3198 istat = vac_cleanup_one_index(&ivinfo, istat);
3199
3200 /* Revert to the previous phase information for error traceback */
3202 pfree(vacrel->indname);
3203 vacrel->indname = NULL;
3204
3205 return istat;
3206}
3207
3208/*
3209 * should_attempt_truncation - should we attempt to truncate the heap?
3210 *
3211 * Don't even think about it unless we have a shot at releasing a goodly
3212 * number of pages. Otherwise, the time taken isn't worth it, mainly because
3213 * an AccessExclusive lock must be replayed on any hot standby, where it can
3214 * be particularly disruptive.
3215 *
3216 * Also don't attempt it if wraparound failsafe is in effect. The entire
3217 * system might be refusing to allocate new XIDs at this point. The system
3218 * definitely won't return to normal unless and until VACUUM actually advances
3219 * the oldest relfrozenxid -- which hasn't happened for target rel just yet.
3220 * If lazy_truncate_heap attempted to acquire an AccessExclusiveLock to
3221 * truncate the table under these circumstances, an XID exhaustion error might
3222 * make it impossible for VACUUM to fix the underlying XID exhaustion problem.
3223 * There is very little chance of truncation working out when the failsafe is
3224 * in effect in any case. lazy_scan_prune makes the optimistic assumption
3225 * that any LP_DEAD items it encounters will always be LP_UNUSED by the time
3226 * we're called.
3227 */
3228static bool
3230{
3232
3233 if (!vacrel->do_rel_truncate || VacuumFailsafeActive)
3234 return false;
3235
3236 possibly_freeable = vacrel->rel_pages - vacrel->nonempty_pages;
3237 if (possibly_freeable > 0 &&
3240 return true;
3241
3242 return false;
3243}
3244
3245/*
3246 * lazy_truncate_heap - try to truncate off any empty pages at the end
3247 */
3248static void
3250{
3251 BlockNumber orig_rel_pages = vacrel->rel_pages;
3254 int lock_retry;
3255
3256 /* Report that we are now truncating */
3259
3260 /* Update error traceback information one last time */
3262 vacrel->nonempty_pages, InvalidOffsetNumber);
3263
3264 /*
3265 * Loop until no more truncating can be done.
3266 */
3267 do
3268 {
3269 /*
3270 * We need full exclusive lock on the relation in order to do
3271 * truncation. If we can't get it, give up rather than waiting --- we
3272 * don't want to block other backends, and we don't want to deadlock
3273 * (which is quite possible considering we already hold a lower-grade
3274 * lock).
3275 */
3276 lock_waiter_detected = false;
3277 lock_retry = 0;
3278 while (true)
3279 {
3281 break;
3282
3283 /*
3284 * Check for interrupts while trying to (re-)acquire the exclusive
3285 * lock.
3286 */
3288
3291 {
3292 /*
3293 * We failed to establish the lock in the specified number of
3294 * retries. This means we give up truncating.
3295 */
3296 ereport(vacrel->verbose ? INFO : DEBUG2,
3297 (errmsg("\"%s\": stopping truncate due to conflicting lock request",
3298 vacrel->relname)));
3299 return;
3300 }
3301
3307 }
3308
3309 /*
3310 * Now that we have exclusive lock, look to see if the rel has grown
3311 * whilst we were vacuuming with non-exclusive lock. If so, give up;
3312 * the newly added pages presumably contain non-deletable tuples.
3313 */
3316 {
3317 /*
3318 * Note: we intentionally don't update vacrel->rel_pages with the
3319 * new rel size here. If we did, it would amount to assuming that
3320 * the new pages are empty, which is unlikely. Leaving the numbers
3321 * alone amounts to assuming that the new pages have the same
3322 * tuple density as existing ones, which is less unlikely.
3323 */
3325 return;
3326 }
3327
3328 /*
3329 * Scan backwards from the end to verify that the end pages actually
3330 * contain no tuples. This is *necessary*, not optional, because
3331 * other backends could have added tuples to these pages whilst we
3332 * were vacuuming.
3333 */
3335 vacrel->blkno = new_rel_pages;
3336
3338 {
3339 /* can't do anything after all */
3341 return;
3342 }
3343
3344 /*
3345 * Okay to truncate.
3346 */
3348
3349 /*
3350 * We can release the exclusive lock as soon as we have truncated.
3351 * Other backends can't safely access the relation until they have
3352 * processed the smgr invalidation that smgrtruncate sent out ... but
3353 * that should happen as part of standard invalidation processing once
3354 * they acquire lock on the relation.
3355 */
3357
3358 /*
3359 * Update statistics. Here, it *is* correct to adjust rel_pages
3360 * without also touching reltuples, since the tuple count wasn't
3361 * changed by the truncation.
3362 */
3363 vacrel->removed_pages += orig_rel_pages - new_rel_pages;
3364 vacrel->rel_pages = new_rel_pages;
3365
3366 ereport(vacrel->verbose ? INFO : DEBUG2,
3367 (errmsg("table \"%s\": truncated %u to %u pages",
3368 vacrel->relname,
3371 } while (new_rel_pages > vacrel->nonempty_pages && lock_waiter_detected);
3372}
3373
3374/*
3375 * Rescan end pages to verify that they are (still) empty of tuples.
3376 *
3377 * Returns number of nondeletable pages (last nonempty page + 1).
3378 */
3379static BlockNumber
3381{
3383 "prefetch size must be power of 2");
3384
3385 BlockNumber blkno;
3387 instr_time starttime;
3388
3389 /* Initialize the starttime if we check for conflicting lock requests */
3390 INSTR_TIME_SET_CURRENT(starttime);
3391
3392 /*
3393 * Start checking blocks at what we believe relation end to be and move
3394 * backwards. (Strange coding of loop control is needed because blkno is
3395 * unsigned.) To make the scan faster, we prefetch a few blocks at a time
3396 * in forward direction, so that OS-level readahead can kick in.
3397 */
3398 blkno = vacrel->rel_pages;
3400 while (blkno > vacrel->nonempty_pages)
3401 {
3402 Buffer buf;
3403 Page page;
3404 OffsetNumber offnum,
3405 maxoff;
3406 bool hastup;
3407
3408 /*
3409 * Check if another process requests a lock on our relation. We are
3410 * holding an AccessExclusiveLock here, so they will be waiting. We
3411 * only do this once per VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL, and we
3412 * only check if that interval has elapsed once every 32 blocks to
3413 * keep the number of system calls and actual shared lock table
3414 * lookups to a minimum.
3415 */
3416 if ((blkno % 32) == 0)
3417 {
3420
3423 INSTR_TIME_SUBTRACT(elapsed, starttime);
3424 if ((INSTR_TIME_GET_MICROSEC(elapsed) / 1000)
3426 {
3428 {
3429 ereport(vacrel->verbose ? INFO : DEBUG2,
3430 (errmsg("table \"%s\": suspending truncate due to conflicting lock request",
3431 vacrel->relname)));
3432
3433 *lock_waiter_detected = true;
3434 return blkno;
3435 }
3436 starttime = currenttime;
3437 }
3438 }
3439
3440 /*
3441 * We don't insert a vacuum delay point here, because we have an
3442 * exclusive lock on the table which we want to hold for as short a
3443 * time as possible. We still need to check for interrupts however.
3444 */
3446
3447 blkno--;
3448
3449 /* If we haven't prefetched this lot yet, do so now. */
3450 if (prefetchedUntil > blkno)
3451 {
3454
3455 prefetchStart = blkno & ~(PREFETCH_SIZE - 1);
3456 for (pblkno = prefetchStart; pblkno <= blkno; pblkno++)
3457 {
3460 }
3462 }
3463
3465 vacrel->bstrategy);
3466
3467 /* In this phase we only need shared access to the buffer */
3469
3470 page = BufferGetPage(buf);
3471
3472 if (PageIsNew(page) || PageIsEmpty(page))
3473 {
3475 continue;
3476 }
3477
3478 hastup = false;
3479 maxoff = PageGetMaxOffsetNumber(page);
3480 for (offnum = FirstOffsetNumber;
3481 offnum <= maxoff;
3482 offnum = OffsetNumberNext(offnum))
3483 {
3484 ItemId itemid;
3485
3486 itemid = PageGetItemId(page, offnum);
3487
3488 /*
3489 * Note: any non-unused item should be taken as a reason to keep
3490 * this page. Even an LP_DEAD item makes truncation unsafe, since
3491 * we must not have cleaned out its index entries.
3492 */
3493 if (ItemIdIsUsed(itemid))
3494 {
3495 hastup = true;
3496 break; /* can stop scanning */
3497 }
3498 } /* scan along page */
3499
3501
3502 /* Done scanning if we found a tuple here */
3503 if (hastup)
3504 return blkno + 1;
3505 }
3506
3507 /*
3508 * If we fall out of the loop, all the previously-thought-to-be-empty
3509 * pages still are; we need not bother to look at the last known-nonempty
3510 * page.
3511 */
3512 return vacrel->nonempty_pages;
3513}
3514
3515/*
3516 * Allocate dead_items and dead_items_info (either using palloc, or in dynamic
3517 * shared memory). Sets both in vacrel for caller.
3518 *
3519 * Also handles parallel initialization as part of allocating dead_items in
3520 * DSM when required.
3521 */
3522static void
3523dead_items_alloc(LVRelState *vacrel, int nworkers)
3524{
3525 VacDeadItemsInfo *dead_items_info;
3527 autovacuum_work_mem != -1 ?
3529
3530 /*
3531 * Initialize state for a parallel vacuum. As of now, only one worker can
3532 * be used for an index, so we invoke parallelism only if there are at
3533 * least two indexes on a table.
3534 */
3535 if (nworkers >= 0 && vacrel->nindexes > 1 && vacrel->do_index_vacuuming)
3536 {
3537 /*
3538 * Since parallel workers cannot access data in temporary tables, we
3539 * can't perform parallel vacuum on them.
3540 */
3542 {
3543 /*
3544 * Give warning only if the user explicitly tries to perform a
3545 * parallel vacuum on the temporary table.
3546 */
3547 if (nworkers > 0)
3549 (errmsg("disabling parallel option of vacuum on \"%s\" --- cannot vacuum temporary tables in parallel",
3550 vacrel->relname)));
3551 }
3552 else
3553 vacrel->pvs = parallel_vacuum_init(vacrel->rel, vacrel->indrels,
3554 vacrel->nindexes, nworkers,
3556 vacrel->verbose ? INFO : DEBUG2,
3557 vacrel->bstrategy);
3558
3559 /*
3560 * If parallel mode started, dead_items and dead_items_info spaces are
3561 * allocated in DSM.
3562 */
3564 {
3566 &vacrel->dead_items_info);
3567 return;
3568 }
3569 }
3570
3571 /*
3572 * Serial VACUUM case. Allocate both dead_items and dead_items_info
3573 * locally.
3574 */
3575
3576 dead_items_info = palloc_object(VacDeadItemsInfo);
3577 dead_items_info->max_bytes = vac_work_mem * (Size) 1024;
3578 dead_items_info->num_items = 0;
3579 vacrel->dead_items_info = dead_items_info;
3580
3581 vacrel->dead_items = TidStoreCreateLocal(dead_items_info->max_bytes, true);
3582}
3583
3584/*
3585 * Add the given block number and offset numbers to dead_items.
3586 */
3587static void
3589 int num_offsets)
3590{
3591 const int prog_index[2] = {
3594 };
3595 int64 prog_val[2];
3596
3597 TidStoreSetBlockOffsets(vacrel->dead_items, blkno, offsets, num_offsets);
3598 vacrel->dead_items_info->num_items += num_offsets;
3599
3600 /* update the progress information */
3601 prog_val[0] = vacrel->dead_items_info->num_items;
3602 prog_val[1] = TidStoreMemoryUsage(vacrel->dead_items);
3604}
3605
3606/*
3607 * Forget all collected dead items.
3608 */
3609static void
3611{
3612 /* Update statistics for dead items */
3613 vacrel->num_dead_items_resets++;
3614 vacrel->total_dead_items_bytes += TidStoreMemoryUsage(vacrel->dead_items);
3615
3617 {
3619 vacrel->dead_items = parallel_vacuum_get_dead_items(vacrel->pvs,
3620 &vacrel->dead_items_info);
3621 return;
3622 }
3623
3624 /* Recreate the tidstore with the same max_bytes limitation */
3625 TidStoreDestroy(vacrel->dead_items);
3626 vacrel->dead_items = TidStoreCreateLocal(vacrel->dead_items_info->max_bytes, true);
3627
3628 /* Reset the counter */
3629 vacrel->dead_items_info->num_items = 0;
3630}
3631
3632/*
3633 * Perform cleanup for resources allocated in dead_items_alloc
3634 */
3635static void
3637{
3639 {
3640 /* Don't bother with pfree here */
3641 return;
3642 }
3643
3644 /* End parallel mode */
3645 parallel_vacuum_end(vacrel->pvs, vacrel->indstats);
3646 vacrel->pvs = NULL;
3647}
3648
3649#ifdef USE_ASSERT_CHECKING
3650
3651/*
3652 * Wrapper for heap_page_would_be_all_visible() which can be used for callers
3653 * that expect no LP_DEAD on the page. Currently assert-only, but there is no
3654 * reason not to use it outside of asserts.
3655 */
3656static bool
3658 TransactionId OldestXmin,
3659 bool *all_frozen,
3660 TransactionId *visibility_cutoff_xid,
3662{
3663
3665 OldestXmin,
3666 NULL, 0,
3667 all_frozen,
3668 visibility_cutoff_xid,
3670}
3671#endif
3672
3673/*
3674 * Check whether the heap page in buf is all-visible except for the dead
3675 * tuples referenced in the deadoffsets array.
3676 *
3677 * Vacuum uses this to check if a page would become all-visible after reaping
3678 * known dead tuples. This function does not remove the dead items.
3679 *
3680 * This cannot be called in a critical section, as the visibility checks may
3681 * perform IO and allocate memory.
3682 *
3683 * Returns true if the page is all-visible other than the provided
3684 * deadoffsets and false otherwise.
3685 *
3686 * OldestXmin is used to determine visibility.
3687 *
3688 * Output parameters:
3689 *
3690 * - *all_frozen: true if every tuple on the page is frozen
3691 * - *visibility_cutoff_xid: newest xmin; valid only if page is all-visible
3692 * - *logging_offnum: OffsetNumber of current tuple being processed;
3693 * used by vacuum's error callback system.
3694 *
3695 * Callers looking to verify that the page is already all-visible can call
3696 * heap_page_is_all_visible().
3697 *
3698 * This logic is closely related to heap_prune_record_unchanged_lp_normal().
3699 * If you modify this function, ensure consistency with that code. An
3700 * assertion cross-checks that both remain in agreement. Do not introduce new
3701 * side-effects.
3702 */
3703static bool
3705 TransactionId OldestXmin,
3706 OffsetNumber *deadoffsets,
3707 int ndeadoffsets,
3708 bool *all_frozen,
3709 TransactionId *visibility_cutoff_xid,
3711{
3712 Page page = BufferGetPage(buf);
3714 OffsetNumber offnum,
3715 maxoff;
3716 bool all_visible = true;
3717 int matched_dead_count = 0;
3718
3719 *visibility_cutoff_xid = InvalidTransactionId;
3720 *all_frozen = true;
3721
3722 Assert(ndeadoffsets == 0 || deadoffsets);
3723
3724#ifdef USE_ASSERT_CHECKING
3725 /* Confirm input deadoffsets[] is strictly sorted */
3726 if (ndeadoffsets > 1)
3727 {
3728 for (int i = 1; i < ndeadoffsets; i++)
3729 Assert(deadoffsets[i - 1] < deadoffsets[i]);
3730 }
3731#endif
3732
3733 maxoff = PageGetMaxOffsetNumber(page);
3734 for (offnum = FirstOffsetNumber;
3735 offnum <= maxoff && all_visible;
3736 offnum = OffsetNumberNext(offnum))
3737 {
3738 ItemId itemid;
3739 HeapTupleData tuple;
3740
3741 /*
3742 * Set the offset number so that we can display it along with any
3743 * error that occurred while processing this tuple.
3744 */
3745 *logging_offnum = offnum;
3746 itemid = PageGetItemId(page, offnum);
3747
3748 /* Unused or redirect line pointers are of no interest */
3749 if (!ItemIdIsUsed(itemid) || ItemIdIsRedirected(itemid))
3750 continue;
3751
3752 ItemPointerSet(&(tuple.t_self), blockno, offnum);
3753
3754 /*
3755 * Dead line pointers can have index pointers pointing to them. So
3756 * they can't be treated as visible
3757 */
3758 if (ItemIdIsDead(itemid))
3759 {
3760 if (!deadoffsets ||
3762 deadoffsets[matched_dead_count] != offnum)
3763 {
3764 *all_frozen = all_visible = false;
3765 break;
3766 }
3768 continue;
3769 }
3770
3771 Assert(ItemIdIsNormal(itemid));
3772
3773 tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
3774 tuple.t_len = ItemIdGetLength(itemid);
3775 tuple.t_tableOid = RelationGetRelid(rel);
3776
3777 /* Visibility checks may do IO or allocate memory */
3779 switch (HeapTupleSatisfiesVacuum(&tuple, OldestXmin, buf))
3780 {
3781 case HEAPTUPLE_LIVE:
3782 {
3783 TransactionId xmin;
3784
3785 /* Check comments in lazy_scan_prune. */
3787 {
3788 all_visible = false;
3789 *all_frozen = false;
3790 break;
3791 }
3792
3793 /*
3794 * The inserter definitely committed. But is it old enough
3795 * that everyone sees it as committed?
3796 */
3797 xmin = HeapTupleHeaderGetXmin(tuple.t_data);
3798 if (!TransactionIdPrecedes(xmin, OldestXmin))
3799 {
3800 all_visible = false;
3801 *all_frozen = false;
3802 break;
3803 }
3804
3805 /* Track newest xmin on page. */
3806 if (TransactionIdFollows(xmin, *visibility_cutoff_xid) &&
3808 *visibility_cutoff_xid = xmin;
3809
3810 /* Check whether this tuple is already frozen or not */
3811 if (all_visible && *all_frozen &&
3813 *all_frozen = false;
3814 }
3815 break;
3816
3817 case HEAPTUPLE_DEAD:
3821 {
3822 all_visible = false;
3823 *all_frozen = false;
3824 break;
3825 }
3826 default:
3827 elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
3828 break;
3829 }
3830 } /* scan along page */
3831
3832 /* Clear the offset information once we have processed the given page. */
3834
3835 return all_visible;
3836}
3837
3838/*
3839 * Update index statistics in pg_class if the statistics are accurate.
3840 */
3841static void
3843{
3844 Relation *indrels = vacrel->indrels;
3845 int nindexes = vacrel->nindexes;
3846 IndexBulkDeleteResult **indstats = vacrel->indstats;
3847
3848 Assert(vacrel->do_index_cleanup);
3849
3850 for (int idx = 0; idx < nindexes; idx++)
3851 {
3852 Relation indrel = indrels[idx];
3853 IndexBulkDeleteResult *istat = indstats[idx];
3854
3855 if (istat == NULL || istat->estimated_count)
3856 continue;
3857
3858 /* Update index statistics */
3860 istat->num_pages,
3861 istat->num_index_tuples,
3862 0, 0,
3863 false,
3866 NULL, NULL, false);
3867 }
3868}
3869
3870/*
3871 * Error context callback for errors occurring during vacuum. The error
3872 * context messages for index phases should match the messages set in parallel
3873 * vacuum. If you change this function for those phases, change
3874 * parallel_vacuum_error_callback() as well.
3875 */
3876static void
3878{
3880
3881 switch (errinfo->phase)
3882 {
3884 if (BlockNumberIsValid(errinfo->blkno))
3885 {
3886 if (OffsetNumberIsValid(errinfo->offnum))
3887 errcontext("while scanning block %u offset %u of relation \"%s.%s\"",
3888 errinfo->blkno, errinfo->offnum, errinfo->relnamespace, errinfo->relname);
3889 else
3890 errcontext("while scanning block %u of relation \"%s.%s\"",
3891 errinfo->blkno, errinfo->relnamespace, errinfo->relname);
3892 }
3893 else
3894 errcontext("while scanning relation \"%s.%s\"",
3895 errinfo->relnamespace, errinfo->relname);
3896 break;
3897
3899 if (BlockNumberIsValid(errinfo->blkno))
3900 {
3901 if (OffsetNumberIsValid(errinfo->offnum))
3902 errcontext("while vacuuming block %u offset %u of relation \"%s.%s\"",
3903 errinfo->blkno, errinfo->offnum, errinfo->relnamespace, errinfo->relname);
3904 else
3905 errcontext("while vacuuming block %u of relation \"%s.%s\"",
3906 errinfo->blkno, errinfo->relnamespace, errinfo->relname);
3907 }
3908 else
3909 errcontext("while vacuuming relation \"%s.%s\"",
3910 errinfo->relnamespace, errinfo->relname);
3911 break;
3912
3914 errcontext("while vacuuming index \"%s\" of relation \"%s.%s\"",
3915 errinfo->indname, errinfo->relnamespace, errinfo->relname);
3916 break;
3917
3919 errcontext("while cleaning up index \"%s\" of relation \"%s.%s\"",
3920 errinfo->indname, errinfo->relnamespace, errinfo->relname);
3921 break;
3922
3924 if (BlockNumberIsValid(errinfo->blkno))
3925 errcontext("while truncating relation \"%s.%s\" to %u blocks",
3926 errinfo->relnamespace, errinfo->relname, errinfo->blkno);
3927 break;
3928
3930 default:
3931 return; /* do nothing; the errinfo may not be
3932 * initialized */
3933 }
3934}
3935
3936/*
3937 * Updates the information required for vacuum error callback. This also saves
3938 * the current information which can be later restored via restore_vacuum_error_info.
3939 */
3940static void
3942 int phase, BlockNumber blkno, OffsetNumber offnum)
3943{
3944 if (saved_vacrel)
3945 {
3946 saved_vacrel->offnum = vacrel->offnum;
3947 saved_vacrel->blkno = vacrel->blkno;
3948 saved_vacrel->phase = vacrel->phase;
3949 }
3950
3951 vacrel->blkno = blkno;
3952 vacrel->offnum = offnum;
3953 vacrel->phase = phase;
3954}
3955
3956/*
3957 * Restores the vacuum information saved via a prior call to update_vacuum_error_info.
3958 */
3959static void
3962{
3963 vacrel->blkno = saved_vacrel->blkno;
3964 vacrel->offnum = saved_vacrel->offnum;
3965 vacrel->phase = saved_vacrel->phase;
3966}
Datum idx(PG_FUNCTION_ARGS)
Definition _int_op.c:262
int autovacuum_work_mem
Definition autovacuum.c:120
void TimestampDifference(TimestampTz start_time, TimestampTz stop_time, long *secs, int *microsecs)
Definition timestamp.c:1721
bool TimestampDifferenceExceeds(TimestampTz start_time, TimestampTz stop_time, int msec)
Definition timestamp.c:1781
TimestampTz GetCurrentTimestamp(void)
Definition timestamp.c:1645
void pgstat_progress_start_command(ProgressCommandType cmdtype, Oid relid)
void pgstat_progress_update_param(int index, int64 val)
void pgstat_progress_update_multi_param(int nparam, const int *index, const int64 *val)
void pgstat_progress_end_command(void)
@ PROGRESS_COMMAND_VACUUM
PgBackendStatus * MyBEEntry
uint32 BlockNumber
Definition block.h:31
#define InvalidBlockNumber
Definition block.h:33
static bool BlockNumberIsValid(BlockNumber blockNumber)
Definition block.h:71
int Buffer
Definition buf.h:23
#define InvalidBuffer
Definition buf.h:25
bool track_io_timing
Definition bufmgr.c:176
void CheckBufferIsPinnedOnce(Buffer buffer)
Definition bufmgr.c:6484
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition bufmgr.c:4356
PrefetchBufferResult PrefetchBuffer(Relation reln, ForkNumber forkNum, BlockNumber blockNum)
Definition bufmgr.c:772
bool BufferIsLockedByMeInMode(Buffer buffer, BufferLockMode mode)
Definition bufmgr.c:2997
void ReleaseBuffer(Buffer buffer)
Definition bufmgr.c:5501
void UnlockReleaseBuffer(Buffer buffer)
Definition bufmgr.c:5518
void MarkBufferDirty(Buffer buffer)
Definition bufmgr.c:3056
void LockBufferForCleanup(Buffer buffer)
Definition bufmgr.c:6517
Buffer ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
Definition bufmgr.c:911
bool ConditionalLockBufferForCleanup(Buffer buffer)
Definition bufmgr.c:6690
#define RelationGetNumberOfBlocks(reln)
Definition bufmgr.h:307
static Page BufferGetPage(Buffer buffer)
Definition bufmgr.h:466
@ BUFFER_LOCK_SHARE
Definition bufmgr.h:210
@ BUFFER_LOCK_EXCLUSIVE
Definition bufmgr.h:220
@ BUFFER_LOCK_UNLOCK
Definition bufmgr.h:205
static void LockBuffer(Buffer buffer, BufferLockMode mode)
Definition bufmgr.h:328
@ RBM_NORMAL
Definition bufmgr.h:46
static bool BufferIsValid(Buffer bufnum)
Definition bufmgr.h:417
Size PageGetHeapFreeSpace(const PageData *page)
Definition bufpage.c:990
void PageTruncateLinePointerArray(Page page)
Definition bufpage.c:834
static bool PageIsEmpty(const PageData *page)
Definition bufpage.h:223
static bool PageIsAllVisible(const PageData *page)
Definition bufpage.h:428
static void PageClearAllVisible(Page page)
Definition bufpage.h:438
static bool PageIsNew(const PageData *page)
Definition bufpage.h:233
#define SizeOfPageHeaderData
Definition bufpage.h:216
static void PageSetAllVisible(Page page)
Definition bufpage.h:433
static ItemId PageGetItemId(Page page, OffsetNumber offsetNumber)
Definition bufpage.h:243
static void * PageGetItem(PageData *page, const ItemIdData *itemId)
Definition bufpage.h:353
PageData * Page
Definition bufpage.h:81
static XLogRecPtr PageGetLSN(const PageData *page)
Definition bufpage.h:385
static OffsetNumber PageGetMaxOffsetNumber(const PageData *page)
Definition bufpage.h:371
uint8_t uint8
Definition c.h:544
#define ngettext(s, p, n)
Definition c.h:1170
#define Max(x, y)
Definition c.h:991
#define Assert(condition)
Definition c.h:873
int64_t int64
Definition c.h:543
TransactionId MultiXactId
Definition c.h:676
int32_t int32
Definition c.h:542
#define unlikely(x)
Definition c.h:412
uint32_t uint32
Definition c.h:546
#define lengthof(array)
Definition c.h:803
#define StaticAssertDecl(condition, errmessage)
Definition c.h:942
uint32 TransactionId
Definition c.h:666
size_t Size
Definition c.h:619
int64 TimestampTz
Definition timestamp.h:39
int errmsg_internal(const char *fmt,...)
Definition elog.c:1170
int errdetail(const char *fmt,...)
Definition elog.c:1216
ErrorContextCallback * error_context_stack
Definition elog.c:95
int errhint(const char *fmt,...)
Definition elog.c:1330
int errcode(int sqlerrcode)
Definition elog.c:863
int errmsg(const char *fmt,...)
Definition elog.c:1080
#define _(x)
Definition elog.c:91
#define LOG
Definition elog.h:31
#define errcontext
Definition elog.h:198
#define WARNING
Definition elog.h:36
#define DEBUG2
Definition elog.h:29
#define ERROR
Definition elog.h:39
#define elog(elevel,...)
Definition elog.h:226
#define INFO
Definition elog.h:34
#define ereport(elevel,...)
Definition elog.h:150
#define palloc_object(type)
Definition fe_memutils.h:74
#define palloc_array(type, count)
Definition fe_memutils.h:76
#define palloc0_object(type)
Definition fe_memutils.h:75
void FreeSpaceMapVacuumRange(Relation rel, BlockNumber start, BlockNumber end)
Definition freespace.c:377
Size GetRecordedFreeSpace(Relation rel, BlockNumber heapBlk)
Definition freespace.c:244
void RecordPageWithFreeSpace(Relation rel, BlockNumber heapBlk, Size spaceAvail)
Definition freespace.c:194
bool VacuumCostActive
Definition globals.c:158
int VacuumCostBalance
Definition globals.c:157
int maintenance_work_mem
Definition globals.c:133
volatile uint32 CritSectionCount
Definition globals.c:45
struct Latch * MyLatch
Definition globals.c:63
Oid MyDatabaseId
Definition globals.c:94
bool heap_tuple_needs_eventual_freeze(HeapTupleHeader tuple)
Definition heapam.c:7899
bool heap_tuple_should_freeze(HeapTupleHeader tuple, const struct VacuumCutoffs *cutoffs, TransactionId *NoFreezePageRelfrozenXid, MultiXactId *NoFreezePageRelminMxid)
Definition heapam.c:7954
#define HEAP_PAGE_PRUNE_FREEZE
Definition heapam.h:44
@ HEAPTUPLE_RECENTLY_DEAD
Definition heapam.h:128
@ HEAPTUPLE_INSERT_IN_PROGRESS
Definition heapam.h:129
@ HEAPTUPLE_LIVE
Definition heapam.h:127
@ HEAPTUPLE_DELETE_IN_PROGRESS
Definition heapam.h:130
@ HEAPTUPLE_DEAD
Definition heapam.h:126
@ PRUNE_VACUUM_CLEANUP
Definition heapam.h:230
@ PRUNE_VACUUM_SCAN
Definition heapam.h:229
#define HEAP_PAGE_PRUNE_MARK_UNUSED_NOW
Definition heapam.h:43
HTSV_Result HeapTupleSatisfiesVacuum(HeapTuple htup, TransactionId OldestXmin, Buffer buffer)
HeapTupleHeaderData * HeapTupleHeader
Definition htup.h:23
static TransactionId HeapTupleHeaderGetXmin(const HeapTupleHeaderData *tup)
#define MaxHeapTuplesPerPage
static bool HeapTupleHeaderXminCommitted(const HeapTupleHeaderData *tup)
#define false
#define INSTR_TIME_SET_CURRENT(t)
Definition instr_time.h:122
#define INSTR_TIME_SUBTRACT(x, y)
Definition instr_time.h:181
#define INSTR_TIME_GET_MICROSEC(t)
Definition instr_time.h:196
WalUsage pgWalUsage
Definition instrument.c:22
void WalUsageAccumDiff(WalUsage *dst, const WalUsage *add, const WalUsage *sub)
Definition instrument.c:285
BufferUsage pgBufferUsage
Definition instrument.c:20
void BufferUsageAccumDiff(BufferUsage *dst, const BufferUsage *add, const BufferUsage *sub)
Definition instrument.c:245
static int pg_cmp_u16(uint16 a, uint16 b)
Definition int.h:707
int b
Definition isn.c:74
int a
Definition isn.c:73
int i
Definition isn.c:77
#define ItemIdGetLength(itemId)
Definition itemid.h:59
#define ItemIdIsNormal(itemId)
Definition itemid.h:99
#define ItemIdIsDead(itemId)
Definition itemid.h:113
#define ItemIdIsUsed(itemId)
Definition itemid.h:92
#define ItemIdSetUnused(itemId)
Definition itemid.h:128
#define ItemIdIsRedirected(itemId)
Definition itemid.h:106
#define ItemIdHasStorage(itemId)
Definition itemid.h:120
static void ItemPointerSet(ItemPointerData *pointer, BlockNumber blockNumber, OffsetNumber offNum)
Definition itemptr.h:135
void ResetLatch(Latch *latch)
Definition latch.c:374
int WaitLatch(Latch *latch, int wakeEvents, long timeout, uint32 wait_event_info)
Definition latch.c:172
void UnlockRelation(Relation relation, LOCKMODE lockmode)
Definition lmgr.c:314
bool ConditionalLockRelation(Relation relation, LOCKMODE lockmode)
Definition lmgr.c:278
bool LockHasWaitersRelation(Relation relation, LOCKMODE lockmode)
Definition lmgr.c:367
#define NoLock
Definition lockdefs.h:34
#define AccessExclusiveLock
Definition lockdefs.h:43
#define RowExclusiveLock
Definition lockdefs.h:38
char * get_database_name(Oid dbid)
Definition lsyscache.c:1242
char * get_namespace_name(Oid nspid)
Definition lsyscache.c:3516
char * pstrdup(const char *in)
Definition mcxt.c:1781
void pfree(void *pointer)
Definition mcxt.c:1616
void * palloc0(Size size)
Definition mcxt.c:1417
#define AmAutoVacuumWorkerProcess()
Definition miscadmin.h:383
#define START_CRIT_SECTION()
Definition miscadmin.h:150
#define CHECK_FOR_INTERRUPTS()
Definition miscadmin.h:123
#define END_CRIT_SECTION()
Definition miscadmin.h:152
bool MultiXactIdPrecedes(MultiXactId multi1, MultiXactId multi2)
Definition multixact.c:2765
bool MultiXactIdPrecedesOrEquals(MultiXactId multi1, MultiXactId multi2)
Definition multixact.c:2779
#define MultiXactIdIsValid(multi)
Definition multixact.h:29
#define InvalidMultiXactId
Definition multixact.h:25
#define InvalidOffsetNumber
Definition off.h:26
#define OffsetNumberIsValid(offsetNumber)
Definition off.h:39
#define OffsetNumberNext(offsetNumber)
Definition off.h:52
uint16 OffsetNumber
Definition off.h:24
#define FirstOffsetNumber
Definition off.h:27
#define MaxOffsetNumber
Definition off.h:28
void * arg
static int verbose
#define ERRCODE_DATA_CORRUPTED
NameData relname
Definition pg_class.h:38
const void * data
uint32 pg_prng_uint32(pg_prng_state *state)
Definition pg_prng.c:227
pg_prng_state pg_global_prng_state
Definition pg_prng.c:34
const char * pg_rusage_show(const PGRUsage *ru0)
Definition pg_rusage.c:40
void pg_rusage_init(PGRUsage *ru0)
Definition pg_rusage.c:27
static char buf[DEFAULT_XLOG_SEG_SIZE]
int64 PgStat_Counter
Definition pgstat.h:67
PgStat_Counter pgStatBlockReadTime
PgStat_Counter pgStatBlockWriteTime
void pgstat_report_vacuum(Relation rel, PgStat_Counter livetuples, PgStat_Counter deadtuples, TimestampTz starttime)
#define qsort(a, b, c, d)
Definition port.h:495
static int fb(int x)
GlobalVisState * GlobalVisTestFor(Relation rel)
Definition procarray.c:4086
#define PROGRESS_VACUUM_PHASE_FINAL_CLEANUP
Definition progress.h:41
#define PROGRESS_VACUUM_MODE
Definition progress.h:32
#define PROGRESS_VACUUM_MODE_NORMAL
Definition progress.h:44
#define PROGRESS_VACUUM_STARTED_BY_AUTOVACUUM
Definition progress.h:50
#define PROGRESS_VACUUM_DEAD_TUPLE_BYTES
Definition progress.h:27
#define PROGRESS_VACUUM_PHASE_SCAN_HEAP
Definition progress.h:36
#define PROGRESS_VACUUM_TOTAL_HEAP_BLKS
Definition progress.h:22
#define PROGRESS_VACUUM_PHASE
Definition progress.h:21
#define PROGRESS_VACUUM_DELAY_TIME
Definition progress.h:31
#define PROGRESS_VACUUM_STARTED_BY_AUTOVACUUM_WRAPAROUND
Definition progress.h:51
#define PROGRESS_VACUUM_NUM_INDEX_VACUUMS
Definition progress.h:25
#define PROGRESS_VACUUM_PHASE_VACUUM_HEAP
Definition progress.h:38
#define PROGRESS_VACUUM_NUM_DEAD_ITEM_IDS
Definition progress.h:28
#define PROGRESS_VACUUM_MAX_DEAD_TUPLE_BYTES
Definition progress.h:26
#define PROGRESS_VACUUM_STARTED_BY_MANUAL
Definition progress.h:49
#define PROGRESS_VACUUM_HEAP_BLKS_SCANNED
Definition progress.h:23
#define PROGRESS_VACUUM_STARTED_BY
Definition progress.h:33
#define PROGRESS_VACUUM_PHASE_INDEX_CLEANUP
Definition progress.h:39
#define PROGRESS_VACUUM_PHASE_VACUUM_INDEX
Definition progress.h:37
#define PROGRESS_VACUUM_MODE_FAILSAFE
Definition progress.h:46
#define PROGRESS_VACUUM_INDEXES_PROCESSED
Definition progress.h:30
#define PROGRESS_VACUUM_INDEXES_TOTAL
Definition progress.h:29
#define PROGRESS_VACUUM_MODE_AGGRESSIVE
Definition progress.h:45
#define PROGRESS_VACUUM_HEAP_BLKS_VACUUMED
Definition progress.h:24
#define PROGRESS_VACUUM_PHASE_TRUNCATE
Definition progress.h:40
void heap_page_prune_and_freeze(PruneFreezeParams *params, PruneFreezeResult *presult, OffsetNumber *off_loc, TransactionId *new_relfrozen_xid, MultiXactId *new_relmin_mxid)
Definition pruneheap.c:819
void log_heap_prune_and_freeze(Relation relation, Buffer buffer, Buffer vmbuffer, uint8 vmflags, TransactionId conflict_xid, bool cleanup_lock, PruneReason reason, HeapTupleFreeze *frozen, int nfrozen, OffsetNumber *redirected, int nredirected, OffsetNumber *dead, int ndead, OffsetNumber *unused, int nunused)
Definition pruneheap.c:2167
Buffer read_stream_next_buffer(ReadStream *stream, void **per_buffer_data)
ReadStream * read_stream_begin_relation(int flags, BufferAccessStrategy strategy, Relation rel, ForkNumber forknum, ReadStreamBlockNumberCB callback, void *callback_private_data, size_t per_buffer_data_size)
void read_stream_end(ReadStream *stream)
#define READ_STREAM_MAINTENANCE
Definition read_stream.h:28
#define READ_STREAM_USE_BATCHING
Definition read_stream.h:64
#define RelationGetRelid(relation)
Definition rel.h:514
#define RelationGetRelationName(relation)
Definition rel.h:548
#define RelationNeedsWAL(relation)
Definition rel.h:637
#define RelationUsesLocalBuffers(relation)
Definition rel.h:646
#define RelationGetNamespace(relation)
Definition rel.h:555
@ MAIN_FORKNUM
Definition relpath.h:58
void RelationTruncate(Relation rel, BlockNumber nblocks)
Definition storage.c:289
char * dbname
Definition streamutil.c:49
void appendStringInfo(StringInfo str, const char *fmt,...)
Definition stringinfo.c:145
void appendStringInfoString(StringInfo str, const char *s)
Definition stringinfo.c:230
void initStringInfo(StringInfo str)
Definition stringinfo.c:97
int64 shared_blks_dirtied
Definition instrument.h:28
int64 local_blks_hit
Definition instrument.h:30
int64 shared_blks_read
Definition instrument.h:27
int64 local_blks_read
Definition instrument.h:31
int64 local_blks_dirtied
Definition instrument.h:32
int64 shared_blks_hit
Definition instrument.h:26
struct ErrorContextCallback * previous
Definition elog.h:297
void(* callback)(void *arg)
Definition elog.h:298
ItemPointerData t_self
Definition htup.h:65
uint32 t_len
Definition htup.h:64
HeapTupleHeader t_data
Definition htup.h:68
Oid t_tableOid
Definition htup.h:66
BlockNumber pages_deleted
Definition genam.h:88
BlockNumber pages_newly_deleted
Definition genam.h:87
BlockNumber pages_free
Definition genam.h:89
BlockNumber num_pages
Definition genam.h:83
double num_index_tuples
Definition genam.h:85
BlockNumber next_eager_scan_region_start
Definition vacuumlazy.c:369
ParallelVacuumState * pvs
Definition vacuumlazy.c:258
bool next_unskippable_eager_scanned
Definition vacuumlazy.c:354
VacDeadItemsInfo * dead_items_info
Definition vacuumlazy.c:301
BlockNumber vm_new_frozen_pages
Definition vacuumlazy.c:327
Buffer next_unskippable_vmbuffer
Definition vacuumlazy.c:355
OffsetNumber offnum
Definition vacuumlazy.c:286
TidStore * dead_items
Definition vacuumlazy.c:300
int64 tuples_deleted
Definition vacuumlazy.c:344
BlockNumber nonempty_pages
Definition vacuumlazy.c:331
BlockNumber eager_scan_remaining_fails
Definition vacuumlazy.c:401
bool do_rel_truncate
Definition vacuumlazy.c:270
BlockNumber scanned_pages
Definition vacuumlazy.c:304
int num_dead_items_resets
Definition vacuumlazy.c:341
bool aggressive
Definition vacuumlazy.c:261
BlockNumber new_frozen_tuple_pages
Definition vacuumlazy.c:313
GlobalVisState * vistest
Definition vacuumlazy.c:274
BlockNumber removed_pages
Definition vacuumlazy.c:312
int num_index_scans
Definition vacuumlazy.c:340
IndexBulkDeleteResult ** indstats
Definition vacuumlazy.c:337
double new_live_tuples
Definition vacuumlazy.c:335
double new_rel_tuples
Definition vacuumlazy.c:334
TransactionId NewRelfrozenXid
Definition vacuumlazy.c:276
Relation rel
Definition vacuumlazy.c:252
bool consider_bypass_optimization
Definition vacuumlazy.c:265
BlockNumber rel_pages
Definition vacuumlazy.c:303
Size total_dead_items_bytes
Definition vacuumlazy.c:342
BlockNumber next_unskippable_block
Definition vacuumlazy.c:353
int64 recently_dead_tuples
Definition vacuumlazy.c:348
int64 tuples_frozen
Definition vacuumlazy.c:345
char * dbname
Definition vacuumlazy.c:281
BlockNumber missed_dead_pages
Definition vacuumlazy.c:330
BlockNumber current_block
Definition vacuumlazy.c:352
char * relnamespace
Definition vacuumlazy.c:282
int64 live_tuples
Definition vacuumlazy.c:347
int64 lpdead_items
Definition vacuumlazy.c:346
BufferAccessStrategy bstrategy
Definition vacuumlazy.c:257
BlockNumber eager_scan_remaining_successes
Definition vacuumlazy.c:380
bool skippedallvis
Definition vacuumlazy.c:278
BlockNumber lpdead_item_pages
Definition vacuumlazy.c:329
BlockNumber eager_scanned_pages
Definition vacuumlazy.c:310
Relation * indrels
Definition vacuumlazy.c:253
bool skipwithvm
Definition vacuumlazy.c:263
bool do_index_cleanup
Definition vacuumlazy.c:269
MultiXactId NewRelminMxid
Definition vacuumlazy.c:277
int64 missed_dead_tuples
Definition vacuumlazy.c:349
BlockNumber blkno
Definition vacuumlazy.c:285
struct VacuumCutoffs cutoffs
Definition vacuumlazy.c:273
BlockNumber vm_new_visible_pages
Definition vacuumlazy.c:316
char * relname
Definition vacuumlazy.c:283
BlockNumber eager_scan_max_fails_per_region
Definition vacuumlazy.c:391
VacErrPhase phase
Definition vacuumlazy.c:287
char * indname
Definition vacuumlazy.c:284
BlockNumber vm_new_visible_frozen_pages
Definition vacuumlazy.c:324
bool do_index_vacuuming
Definition vacuumlazy.c:268
BlockNumber blkno
Definition vacuumlazy.c:408
VacErrPhase phase
Definition vacuumlazy.c:410
OffsetNumber offnum
Definition vacuumlazy.c:409
int64 st_progress_param[PGSTAT_NUM_PROGRESS_PARAM]
size_t max_bytes
Definition vacuum.h:299
int64 num_items
Definition vacuum.h:300
int nworkers
Definition vacuum.h:251
VacOptValue truncate
Definition vacuum.h:236
bits32 options
Definition vacuum.h:219
int log_vacuum_min_duration
Definition vacuum.h:227
bool is_wraparound
Definition vacuum.h:226
VacOptValue index_cleanup
Definition vacuum.h:235
double max_eager_freeze_failure_rate
Definition vacuum.h:244
int64 wal_buffers_full
Definition instrument.h:57
uint64 wal_bytes
Definition instrument.h:55
int64 wal_fpi
Definition instrument.h:54
uint64 wal_fpi_bytes
Definition instrument.h:56
int64 wal_records
Definition instrument.h:53
TidStoreIter * TidStoreBeginIterate(TidStore *ts)
Definition tidstore.c:471
void TidStoreEndIterate(TidStoreIter *iter)
Definition tidstore.c:518
TidStoreIterResult * TidStoreIterateNext(TidStoreIter *iter)
Definition tidstore.c:493
TidStore * TidStoreCreateLocal(size_t max_bytes, bool insert_only)
Definition tidstore.c:162
void TidStoreDestroy(TidStore *ts)
Definition tidstore.c:317
int TidStoreGetBlockOffsets(TidStoreIterResult *result, OffsetNumber *offsets, int max_offsets)
Definition tidstore.c:566
void TidStoreSetBlockOffsets(TidStore *ts, BlockNumber blkno, OffsetNumber *offsets, int num_offsets)
Definition tidstore.c:345
size_t TidStoreMemoryUsage(TidStore *ts)
Definition tidstore.c:532
static bool TransactionIdFollows(TransactionId id1, TransactionId id2)
Definition transam.h:297
static TransactionId ReadNextTransactionId(void)
Definition transam.h:377
#define InvalidTransactionId
Definition transam.h:31
static bool TransactionIdPrecedesOrEquals(TransactionId id1, TransactionId id2)
Definition transam.h:282
#define TransactionIdIsValid(xid)
Definition transam.h:41
#define TransactionIdIsNormal(xid)
Definition transam.h:42
static bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition transam.h:263
bool track_cost_delay_timing
Definition vacuum.c:82
void vac_open_indexes(Relation relation, LOCKMODE lockmode, int *nindexes, Relation **Irel)
Definition vacuum.c:2362
IndexBulkDeleteResult * vac_cleanup_one_index(IndexVacuumInfo *ivinfo, IndexBulkDeleteResult *istat)
Definition vacuum.c:2654
void vac_close_indexes(int nindexes, Relation *Irel, LOCKMODE lockmode)
Definition vacuum.c:2405
void vacuum_delay_point(bool is_analyze)
Definition vacuum.c:2426
bool vacuum_xid_failsafe_check(const struct VacuumCutoffs *cutoffs)
Definition vacuum.c:1268
bool VacuumFailsafeActive
Definition vacuum.c:110
double vac_estimate_reltuples(Relation relation, BlockNumber total_pages, BlockNumber scanned_pages, double scanned_tuples)
Definition vacuum.c:1330
void vac_update_relstats(Relation relation, BlockNumber num_pages, double num_tuples, BlockNumber num_all_visible_pages, BlockNumber num_all_frozen_pages, bool hasindex, TransactionId frozenxid, MultiXactId minmulti, bool *frozenxid_updated, bool *minmulti_updated, bool in_outer_xact)
Definition vacuum.c:1426
bool vacuum_get_cutoffs(Relation rel, const VacuumParams params, struct VacuumCutoffs *cutoffs)
Definition vacuum.c:1100
IndexBulkDeleteResult * vac_bulkdel_one_index(IndexVacuumInfo *ivinfo, IndexBulkDeleteResult *istat, TidStore *dead_items, VacDeadItemsInfo *dead_items_info)
Definition vacuum.c:2633
#define VACOPT_VERBOSE
Definition vacuum.h:182
@ VACOPTVALUE_AUTO
Definition vacuum.h:203
@ VACOPTVALUE_ENABLED
Definition vacuum.h:205
@ VACOPTVALUE_UNSPECIFIED
Definition vacuum.h:202
@ VACOPTVALUE_DISABLED
Definition vacuum.h:204
#define VACOPT_DISABLE_PAGE_SKIPPING
Definition vacuum.h:188
static int lazy_scan_prune(LVRelState *vacrel, Buffer buf, BlockNumber blkno, Page page, Buffer vmbuffer, bool *has_lpdead_items, bool *vm_page_frozen)
static void dead_items_cleanup(LVRelState *vacrel)
static void identify_and_fix_vm_corruption(Relation rel, Buffer heap_buffer, BlockNumber heap_blk, Page heap_page, int nlpdead_items, Buffer vmbuffer, uint8 *vmbits)
static void update_relstats_all_indexes(LVRelState *vacrel)
static void dead_items_add(LVRelState *vacrel, BlockNumber blkno, OffsetNumber *offsets, int num_offsets)
void heap_vacuum_rel(Relation rel, const VacuumParams params, BufferAccessStrategy bstrategy)
Definition vacuumlazy.c:624
static BlockNumber heap_vac_scan_next_block(ReadStream *stream, void *callback_private_data, void *per_buffer_data)
static void heap_vacuum_eager_scan_setup(LVRelState *vacrel, const VacuumParams params)
Definition vacuumlazy.c:497
#define VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL
Definition vacuumlazy.c:177
static void vacuum_error_callback(void *arg)
static bool heap_page_would_be_all_visible(Relation rel, Buffer buf, TransactionId OldestXmin, OffsetNumber *deadoffsets, int ndeadoffsets, bool *all_frozen, TransactionId *visibility_cutoff_xid, OffsetNumber *logging_offnum)
#define EAGER_SCAN_REGION_SIZE
Definition vacuumlazy.c:247
static void lazy_truncate_heap(LVRelState *vacrel)
static void lazy_vacuum(LVRelState *vacrel)
static void lazy_cleanup_all_indexes(LVRelState *vacrel)
#define MAX_EAGER_FREEZE_SUCCESS_RATE
Definition vacuumlazy.c:238
static bool lazy_scan_noprune(LVRelState *vacrel, Buffer buf, BlockNumber blkno, Page page, bool *has_lpdead_items)
static BlockNumber vacuum_reap_lp_read_stream_next(ReadStream *stream, void *callback_private_data, void *per_buffer_data)
#define REL_TRUNCATE_MINIMUM
Definition vacuumlazy.c:166
static bool should_attempt_truncation(LVRelState *vacrel)
static bool lazy_scan_new_or_empty(LVRelState *vacrel, Buffer buf, BlockNumber blkno, Page page, bool sharelock, Buffer vmbuffer)
VacErrPhase
Definition vacuumlazy.c:222
@ VACUUM_ERRCB_PHASE_SCAN_HEAP
Definition vacuumlazy.c:224
@ VACUUM_ERRCB_PHASE_VACUUM_INDEX
Definition vacuumlazy.c:225
@ VACUUM_ERRCB_PHASE_TRUNCATE
Definition vacuumlazy.c:228
@ VACUUM_ERRCB_PHASE_INDEX_CLEANUP
Definition vacuumlazy.c:227
@ VACUUM_ERRCB_PHASE_VACUUM_HEAP
Definition vacuumlazy.c:226
@ VACUUM_ERRCB_PHASE_UNKNOWN
Definition vacuumlazy.c:223
static void lazy_scan_heap(LVRelState *vacrel)
#define ParallelVacuumIsActive(vacrel)
Definition vacuumlazy.c:218
static void restore_vacuum_error_info(LVRelState *vacrel, const LVSavedErrInfo *saved_vacrel)
static IndexBulkDeleteResult * lazy_vacuum_one_index(Relation indrel, IndexBulkDeleteResult *istat, double reltuples, LVRelState *vacrel)
static void find_next_unskippable_block(LVRelState *vacrel, bool *skipsallvis)
static void dead_items_reset(LVRelState *vacrel)
#define REL_TRUNCATE_FRACTION
Definition vacuumlazy.c:167
static bool lazy_check_wraparound_failsafe(LVRelState *vacrel)
static IndexBulkDeleteResult * lazy_cleanup_one_index(Relation indrel, IndexBulkDeleteResult *istat, double reltuples, bool estimated_count, LVRelState *vacrel)
#define PREFETCH_SIZE
Definition vacuumlazy.c:212
static void lazy_vacuum_heap_page(LVRelState *vacrel, BlockNumber blkno, Buffer buffer, OffsetNumber *deadoffsets, int num_offsets, Buffer vmbuffer)
#define BYPASS_THRESHOLD_PAGES
Definition vacuumlazy.c:184
static void dead_items_alloc(LVRelState *vacrel, int nworkers)
#define VACUUM_TRUNCATE_LOCK_TIMEOUT
Definition vacuumlazy.c:178
static bool lazy_vacuum_all_indexes(LVRelState *vacrel)
static void update_vacuum_error_info(LVRelState *vacrel, LVSavedErrInfo *saved_vacrel, int phase, BlockNumber blkno, OffsetNumber offnum)
static BlockNumber count_nondeletable_pages(LVRelState *vacrel, bool *lock_waiter_detected)
#define SKIP_PAGES_THRESHOLD
Definition vacuumlazy.c:206
#define FAILSAFE_EVERY_PAGES
Definition vacuumlazy.c:190
#define VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL
Definition vacuumlazy.c:176
static int cmpOffsetNumbers(const void *a, const void *b)
static void lazy_vacuum_heap_rel(LVRelState *vacrel)
#define VACUUM_FSM_EVERY_PAGES
Definition vacuumlazy.c:199
TidStore * parallel_vacuum_get_dead_items(ParallelVacuumState *pvs, VacDeadItemsInfo **dead_items_info_p)
ParallelVacuumState * parallel_vacuum_init(Relation rel, Relation *indrels, int nindexes, int nrequested_workers, int vac_work_mem, int elevel, BufferAccessStrategy bstrategy)
void parallel_vacuum_bulkdel_all_indexes(ParallelVacuumState *pvs, long num_table_tuples, int num_index_scans)
void parallel_vacuum_reset_dead_items(ParallelVacuumState *pvs)
void parallel_vacuum_cleanup_all_indexes(ParallelVacuumState *pvs, long num_table_tuples, int num_index_scans, bool estimated_count)
void parallel_vacuum_end(ParallelVacuumState *pvs, IndexBulkDeleteResult **istats)
void visibilitymap_set(Relation rel, BlockNumber heapBlk, Buffer heapBuf, XLogRecPtr recptr, Buffer vmBuf, TransactionId cutoff_xid, uint8 flags)
bool visibilitymap_clear(Relation rel, BlockNumber heapBlk, Buffer vmbuf, uint8 flags)
void visibilitymap_pin(Relation rel, BlockNumber heapBlk, Buffer *vmbuf)
uint8 visibilitymap_get_status(Relation rel, BlockNumber heapBlk, Buffer *vmbuf)
void visibilitymap_count(Relation rel, BlockNumber *all_visible, BlockNumber *all_frozen)
void visibilitymap_set_vmbits(BlockNumber heapBlk, Buffer vmBuf, uint8 flags, const RelFileLocator rlocator)
#define VISIBILITYMAP_VALID_BITS
#define VISIBILITYMAP_ALL_FROZEN
#define VISIBILITYMAP_ALL_VISIBLE
#define WL_TIMEOUT
#define WL_EXIT_ON_PM_DEATH
#define WL_LATCH_SET
bool IsInParallelMode(void)
Definition xact.c:1090
#define XLogRecPtrIsValid(r)
Definition xlogdefs.h:29
#define InvalidXLogRecPtr
Definition xlogdefs.h:28
XLogRecPtr log_newpage_buffer(Buffer buffer, bool page_std)

◆ MAX_EAGER_FREEZE_SUCCESS_RATE

#define MAX_EAGER_FREEZE_SUCCESS_RATE   0.2

Definition at line 238 of file vacuumlazy.c.

◆ ParallelVacuumIsActive

#define ParallelVacuumIsActive (   vacrel)    ((vacrel)->pvs != NULL)

Definition at line 218 of file vacuumlazy.c.

◆ PREFETCH_SIZE

#define PREFETCH_SIZE   ((BlockNumber) 32)

Definition at line 212 of file vacuumlazy.c.

◆ REL_TRUNCATE_FRACTION

#define REL_TRUNCATE_FRACTION   16

Definition at line 167 of file vacuumlazy.c.

◆ REL_TRUNCATE_MINIMUM

#define REL_TRUNCATE_MINIMUM   1000

Definition at line 166 of file vacuumlazy.c.

◆ SKIP_PAGES_THRESHOLD

#define SKIP_PAGES_THRESHOLD   ((BlockNumber) 32)

Definition at line 206 of file vacuumlazy.c.

◆ VACUUM_FSM_EVERY_PAGES

#define VACUUM_FSM_EVERY_PAGES    ((BlockNumber) (((uint64) 8 * 1024 * 1024 * 1024) / BLCKSZ))

Definition at line 199 of file vacuumlazy.c.

◆ VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL

#define VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL   20 /* ms */

Definition at line 176 of file vacuumlazy.c.

◆ VACUUM_TRUNCATE_LOCK_TIMEOUT

#define VACUUM_TRUNCATE_LOCK_TIMEOUT   5000 /* ms */

Definition at line 178 of file vacuumlazy.c.

◆ VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL

#define VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL   50 /* ms */

Definition at line 177 of file vacuumlazy.c.

Typedef Documentation

◆ LVRelState

◆ LVSavedErrInfo

Enumeration Type Documentation

◆ VacErrPhase

Enumerator
VACUUM_ERRCB_PHASE_UNKNOWN 
VACUUM_ERRCB_PHASE_SCAN_HEAP 
VACUUM_ERRCB_PHASE_VACUUM_INDEX 
VACUUM_ERRCB_PHASE_VACUUM_HEAP 
VACUUM_ERRCB_PHASE_INDEX_CLEANUP 
VACUUM_ERRCB_PHASE_TRUNCATE 

Definition at line 221 of file vacuumlazy.c.

Function Documentation

◆ cmpOffsetNumbers()

static int cmpOffsetNumbers ( const void a,
const void b 
)
static

Definition at line 1958 of file vacuumlazy.c.

1959{
1960 return pg_cmp_u16(*(const OffsetNumber *) a, *(const OffsetNumber *) b);
1961}

References a, b, and pg_cmp_u16().

Referenced by lazy_scan_prune().

◆ count_nondeletable_pages()

static BlockNumber count_nondeletable_pages ( LVRelState vacrel,
bool lock_waiter_detected 
)
static

Definition at line 3381 of file vacuumlazy.c.

3382{
3384 "prefetch size must be power of 2");
3385
3386 BlockNumber blkno;
3388 instr_time starttime;
3389
3390 /* Initialize the starttime if we check for conflicting lock requests */
3391 INSTR_TIME_SET_CURRENT(starttime);
3392
3393 /*
3394 * Start checking blocks at what we believe relation end to be and move
3395 * backwards. (Strange coding of loop control is needed because blkno is
3396 * unsigned.) To make the scan faster, we prefetch a few blocks at a time
3397 * in forward direction, so that OS-level readahead can kick in.
3398 */
3399 blkno = vacrel->rel_pages;
3401 while (blkno > vacrel->nonempty_pages)
3402 {
3403 Buffer buf;
3404 Page page;
3405 OffsetNumber offnum,
3406 maxoff;
3407 bool hastup;
3408
3409 /*
3410 * Check if another process requests a lock on our relation. We are
3411 * holding an AccessExclusiveLock here, so they will be waiting. We
3412 * only do this once per VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL, and we
3413 * only check if that interval has elapsed once every 32 blocks to
3414 * keep the number of system calls and actual shared lock table
3415 * lookups to a minimum.
3416 */
3417 if ((blkno % 32) == 0)
3418 {
3421
3424 INSTR_TIME_SUBTRACT(elapsed, starttime);
3425 if ((INSTR_TIME_GET_MICROSEC(elapsed) / 1000)
3427 {
3429 {
3430 ereport(vacrel->verbose ? INFO : DEBUG2,
3431 (errmsg("table \"%s\": suspending truncate due to conflicting lock request",
3432 vacrel->relname)));
3433
3434 *lock_waiter_detected = true;
3435 return blkno;
3436 }
3437 starttime = currenttime;
3438 }
3439 }
3440
3441 /*
3442 * We don't insert a vacuum delay point here, because we have an
3443 * exclusive lock on the table which we want to hold for as short a
3444 * time as possible. We still need to check for interrupts however.
3445 */
3447
3448 blkno--;
3449
3450 /* If we haven't prefetched this lot yet, do so now. */
3451 if (prefetchedUntil > blkno)
3452 {
3455
3456 prefetchStart = blkno & ~(PREFETCH_SIZE - 1);
3457 for (pblkno = prefetchStart; pblkno <= blkno; pblkno++)
3458 {
3461 }
3463 }
3464
3466 vacrel->bstrategy);
3467
3468 /* In this phase we only need shared access to the buffer */
3470
3471 page = BufferGetPage(buf);
3472
3473 if (PageIsNew(page) || PageIsEmpty(page))
3474 {
3476 continue;
3477 }
3478
3479 hastup = false;
3480 maxoff = PageGetMaxOffsetNumber(page);
3481 for (offnum = FirstOffsetNumber;
3482 offnum <= maxoff;
3483 offnum = OffsetNumberNext(offnum))
3484 {
3485 ItemId itemid;
3486
3487 itemid = PageGetItemId(page, offnum);
3488
3489 /*
3490 * Note: any non-unused item should be taken as a reason to keep
3491 * this page. Even an LP_DEAD item makes truncation unsafe, since
3492 * we must not have cleaned out its index entries.
3493 */
3494 if (ItemIdIsUsed(itemid))
3495 {
3496 hastup = true;
3497 break; /* can stop scanning */
3498 }
3499 } /* scan along page */
3500
3502
3503 /* Done scanning if we found a tuple here */
3504 if (hastup)
3505 return blkno + 1;
3506 }
3507
3508 /*
3509 * If we fall out of the loop, all the previously-thought-to-be-empty
3510 * pages still are; we need not bother to look at the last known-nonempty
3511 * page.
3512 */
3513 return vacrel->nonempty_pages;
3514}

References AccessExclusiveLock, buf, BUFFER_LOCK_SHARE, BufferGetPage(), CHECK_FOR_INTERRUPTS, DEBUG2, ereport, errmsg(), fb(), FirstOffsetNumber, INFO, INSTR_TIME_GET_MICROSEC, INSTR_TIME_SET_CURRENT, INSTR_TIME_SUBTRACT, InvalidBlockNumber, ItemIdIsUsed, LockBuffer(), LockHasWaitersRelation(), MAIN_FORKNUM, OffsetNumberNext, PageGetItemId(), PageGetMaxOffsetNumber(), PageIsEmpty(), PageIsNew(), PREFETCH_SIZE, PrefetchBuffer(), RBM_NORMAL, ReadBufferExtended(), StaticAssertDecl, UnlockReleaseBuffer(), and VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL.

Referenced by lazy_truncate_heap().

◆ dead_items_add()

static void dead_items_add ( LVRelState vacrel,
BlockNumber  blkno,
OffsetNumber offsets,
int  num_offsets 
)
static

Definition at line 3589 of file vacuumlazy.c.

3591{
3592 const int prog_index[2] = {
3595 };
3596 int64 prog_val[2];
3597
3598 TidStoreSetBlockOffsets(vacrel->dead_items, blkno, offsets, num_offsets);
3599 vacrel->dead_items_info->num_items += num_offsets;
3600
3601 /* update the progress information */
3602 prog_val[0] = vacrel->dead_items_info->num_items;
3603 prog_val[1] = TidStoreMemoryUsage(vacrel->dead_items);
3605}

References fb(), pgstat_progress_update_multi_param(), PROGRESS_VACUUM_DEAD_TUPLE_BYTES, PROGRESS_VACUUM_NUM_DEAD_ITEM_IDS, TidStoreMemoryUsage(), and TidStoreSetBlockOffsets().

Referenced by lazy_scan_noprune(), and lazy_scan_prune().

◆ dead_items_alloc()

static void dead_items_alloc ( LVRelState vacrel,
int  nworkers 
)
static

Definition at line 3524 of file vacuumlazy.c.

3525{
3526 VacDeadItemsInfo *dead_items_info;
3528 autovacuum_work_mem != -1 ?
3530
3531 /*
3532 * Initialize state for a parallel vacuum. As of now, only one worker can
3533 * be used for an index, so we invoke parallelism only if there are at
3534 * least two indexes on a table.
3535 */
3536 if (nworkers >= 0 && vacrel->nindexes > 1 && vacrel->do_index_vacuuming)
3537 {
3538 /*
3539 * Since parallel workers cannot access data in temporary tables, we
3540 * can't perform parallel vacuum on them.
3541 */
3543 {
3544 /*
3545 * Give warning only if the user explicitly tries to perform a
3546 * parallel vacuum on the temporary table.
3547 */
3548 if (nworkers > 0)
3550 (errmsg("disabling parallel option of vacuum on \"%s\" --- cannot vacuum temporary tables in parallel",
3551 vacrel->relname)));
3552 }
3553 else
3554 vacrel->pvs = parallel_vacuum_init(vacrel->rel, vacrel->indrels,
3555 vacrel->nindexes, nworkers,
3557 vacrel->verbose ? INFO : DEBUG2,
3558 vacrel->bstrategy);
3559
3560 /*
3561 * If parallel mode started, dead_items and dead_items_info spaces are
3562 * allocated in DSM.
3563 */
3565 {
3567 &vacrel->dead_items_info);
3568 return;
3569 }
3570 }
3571
3572 /*
3573 * Serial VACUUM case. Allocate both dead_items and dead_items_info
3574 * locally.
3575 */
3576
3577 dead_items_info = palloc_object(VacDeadItemsInfo);
3578 dead_items_info->max_bytes = vac_work_mem * (Size) 1024;
3579 dead_items_info->num_items = 0;
3580 vacrel->dead_items_info = dead_items_info;
3581
3582 vacrel->dead_items = TidStoreCreateLocal(dead_items_info->max_bytes, true);
3583}

References AmAutoVacuumWorkerProcess, autovacuum_work_mem, ParallelVacuumState::dead_items, DEBUG2, ereport, errmsg(), fb(), INFO, maintenance_work_mem, VacDeadItemsInfo::max_bytes, VacDeadItemsInfo::num_items, palloc_object, parallel_vacuum_get_dead_items(), parallel_vacuum_init(), ParallelVacuumIsActive, RelationUsesLocalBuffers, TidStoreCreateLocal(), and WARNING.

Referenced by heap_vacuum_rel().

◆ dead_items_cleanup()

static void dead_items_cleanup ( LVRelState vacrel)
static

Definition at line 3637 of file vacuumlazy.c.

3638{
3640 {
3641 /* Don't bother with pfree here */
3642 return;
3643 }
3644
3645 /* End parallel mode */
3646 parallel_vacuum_end(vacrel->pvs, vacrel->indstats);
3647 vacrel->pvs = NULL;
3648}

References fb(), parallel_vacuum_end(), and ParallelVacuumIsActive.

Referenced by heap_vacuum_rel().

◆ dead_items_reset()

static void dead_items_reset ( LVRelState vacrel)
static

Definition at line 3611 of file vacuumlazy.c.

3612{
3613 /* Update statistics for dead items */
3614 vacrel->num_dead_items_resets++;
3615 vacrel->total_dead_items_bytes += TidStoreMemoryUsage(vacrel->dead_items);
3616
3618 {
3620 vacrel->dead_items = parallel_vacuum_get_dead_items(vacrel->pvs,
3621 &vacrel->dead_items_info);
3622 return;
3623 }
3624
3625 /* Recreate the tidstore with the same max_bytes limitation */
3626 TidStoreDestroy(vacrel->dead_items);
3627 vacrel->dead_items = TidStoreCreateLocal(vacrel->dead_items_info->max_bytes, true);
3628
3629 /* Reset the counter */
3630 vacrel->dead_items_info->num_items = 0;
3631}

References fb(), parallel_vacuum_get_dead_items(), parallel_vacuum_reset_dead_items(), ParallelVacuumIsActive, TidStoreCreateLocal(), TidStoreDestroy(), and TidStoreMemoryUsage().

Referenced by lazy_vacuum().

◆ find_next_unskippable_block()

static void find_next_unskippable_block ( LVRelState vacrel,
bool skipsallvis 
)
static

Definition at line 1719 of file vacuumlazy.c.

1720{
1721 BlockNumber rel_pages = vacrel->rel_pages;
1722 BlockNumber next_unskippable_block = vacrel->next_unskippable_block + 1;
1723 Buffer next_unskippable_vmbuffer = vacrel->next_unskippable_vmbuffer;
1724 bool next_unskippable_eager_scanned = false;
1725
1726 *skipsallvis = false;
1727
1728 for (;; next_unskippable_block++)
1729 {
1731 next_unskippable_block,
1732 &next_unskippable_vmbuffer);
1733
1734
1735 /*
1736 * At the start of each eager scan region, normal vacuums with eager
1737 * scanning enabled reset the failure counter, allowing vacuum to
1738 * resume eager scanning if it had been suspended in the previous
1739 * region.
1740 */
1741 if (next_unskippable_block >= vacrel->next_eager_scan_region_start)
1742 {
1743 vacrel->eager_scan_remaining_fails =
1744 vacrel->eager_scan_max_fails_per_region;
1745 vacrel->next_eager_scan_region_start += EAGER_SCAN_REGION_SIZE;
1746 }
1747
1748 /*
1749 * A block is unskippable if it is not all visible according to the
1750 * visibility map.
1751 */
1753 {
1755 break;
1756 }
1757
1758 /*
1759 * Caller must scan the last page to determine whether it has tuples
1760 * (caller must have the opportunity to set vacrel->nonempty_pages).
1761 * This rule avoids having lazy_truncate_heap() take access-exclusive
1762 * lock on rel to attempt a truncation that fails anyway, just because
1763 * there are tuples on the last page (it is likely that there will be
1764 * tuples on other nearby pages as well, but those can be skipped).
1765 *
1766 * Implement this by always treating the last block as unsafe to skip.
1767 */
1768 if (next_unskippable_block == rel_pages - 1)
1769 break;
1770
1771 /* DISABLE_PAGE_SKIPPING makes all skipping unsafe */
1772 if (!vacrel->skipwithvm)
1773 break;
1774
1775 /*
1776 * All-frozen pages cannot contain XIDs < OldestXmin (XIDs that aren't
1777 * already frozen by now), so this page can be skipped.
1778 */
1779 if ((mapbits & VISIBILITYMAP_ALL_FROZEN) != 0)
1780 continue;
1781
1782 /*
1783 * Aggressive vacuums cannot skip any all-visible pages that are not
1784 * also all-frozen.
1785 */
1786 if (vacrel->aggressive)
1787 break;
1788
1789 /*
1790 * Normal vacuums with eager scanning enabled only skip all-visible
1791 * but not all-frozen pages if they have hit the failure limit for the
1792 * current eager scan region.
1793 */
1794 if (vacrel->eager_scan_remaining_fails > 0)
1795 {
1796 next_unskippable_eager_scanned = true;
1797 break;
1798 }
1799
1800 /*
1801 * All-visible blocks are safe to skip in a normal vacuum. But
1802 * remember that the final range contains such a block for later.
1803 */
1804 *skipsallvis = true;
1805 }
1806
1807 /* write the local variables back to vacrel */
1808 vacrel->next_unskippable_block = next_unskippable_block;
1809 vacrel->next_unskippable_eager_scanned = next_unskippable_eager_scanned;
1810 vacrel->next_unskippable_vmbuffer = next_unskippable_vmbuffer;
1811}

References Assert, EAGER_SCAN_REGION_SIZE, fb(), VISIBILITYMAP_ALL_FROZEN, VISIBILITYMAP_ALL_VISIBLE, and visibilitymap_get_status().

Referenced by heap_vac_scan_next_block().

◆ heap_page_would_be_all_visible()

static bool heap_page_would_be_all_visible ( Relation  rel,
Buffer  buf,
TransactionId  OldestXmin,
OffsetNumber deadoffsets,
int  ndeadoffsets,
bool all_frozen,
TransactionId visibility_cutoff_xid,
OffsetNumber logging_offnum 
)
static

Definition at line 3705 of file vacuumlazy.c.

3712{
3713 Page page = BufferGetPage(buf);
3715 OffsetNumber offnum,
3716 maxoff;
3717 bool all_visible = true;
3718 int matched_dead_count = 0;
3719
3720 *visibility_cutoff_xid = InvalidTransactionId;
3721 *all_frozen = true;
3722
3723 Assert(ndeadoffsets == 0 || deadoffsets);
3724
3725#ifdef USE_ASSERT_CHECKING
3726 /* Confirm input deadoffsets[] is strictly sorted */
3727 if (ndeadoffsets > 1)
3728 {
3729 for (int i = 1; i < ndeadoffsets; i++)
3730 Assert(deadoffsets[i - 1] < deadoffsets[i]);
3731 }
3732#endif
3733
3734 maxoff = PageGetMaxOffsetNumber(page);
3735 for (offnum = FirstOffsetNumber;
3736 offnum <= maxoff && all_visible;
3737 offnum = OffsetNumberNext(offnum))
3738 {
3739 ItemId itemid;
3740 HeapTupleData tuple;
3741
3742 /*
3743 * Set the offset number so that we can display it along with any
3744 * error that occurred while processing this tuple.
3745 */
3746 *logging_offnum = offnum;
3747 itemid = PageGetItemId(page, offnum);
3748
3749 /* Unused or redirect line pointers are of no interest */
3750 if (!ItemIdIsUsed(itemid) || ItemIdIsRedirected(itemid))
3751 continue;
3752
3753 ItemPointerSet(&(tuple.t_self), blockno, offnum);
3754
3755 /*
3756 * Dead line pointers can have index pointers pointing to them. So
3757 * they can't be treated as visible
3758 */
3759 if (ItemIdIsDead(itemid))
3760 {
3761 if (!deadoffsets ||
3763 deadoffsets[matched_dead_count] != offnum)
3764 {
3765 *all_frozen = all_visible = false;
3766 break;
3767 }
3769 continue;
3770 }
3771
3772 Assert(ItemIdIsNormal(itemid));
3773
3774 tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
3775 tuple.t_len = ItemIdGetLength(itemid);
3776 tuple.t_tableOid = RelationGetRelid(rel);
3777
3778 /* Visibility checks may do IO or allocate memory */
3780 switch (HeapTupleSatisfiesVacuum(&tuple, OldestXmin, buf))
3781 {
3782 case HEAPTUPLE_LIVE:
3783 {
3784 TransactionId xmin;
3785
3786 /* Check comments in lazy_scan_prune. */
3788 {
3789 all_visible = false;
3790 *all_frozen = false;
3791 break;
3792 }
3793
3794 /*
3795 * The inserter definitely committed. But is it old enough
3796 * that everyone sees it as committed?
3797 */
3798 xmin = HeapTupleHeaderGetXmin(tuple.t_data);
3799 if (!TransactionIdPrecedes(xmin, OldestXmin))
3800 {
3801 all_visible = false;
3802 *all_frozen = false;
3803 break;
3804 }
3805
3806 /* Track newest xmin on page. */
3807 if (TransactionIdFollows(xmin, *visibility_cutoff_xid) &&
3809 *visibility_cutoff_xid = xmin;
3810
3811 /* Check whether this tuple is already frozen or not */
3812 if (all_visible && *all_frozen &&
3814 *all_frozen = false;
3815 }
3816 break;
3817
3818 case HEAPTUPLE_DEAD:
3822 {
3823 all_visible = false;
3824 *all_frozen = false;
3825 break;
3826 }
3827 default:
3828 elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
3829 break;
3830 }
3831 } /* scan along page */
3832
3833 /* Clear the offset information once we have processed the given page. */
3835
3836 return all_visible;
3837}

References Assert, buf, BufferGetBlockNumber(), BufferGetPage(), CritSectionCount, elog, ERROR, fb(), FirstOffsetNumber, heap_tuple_needs_eventual_freeze(), HEAPTUPLE_DEAD, HEAPTUPLE_DELETE_IN_PROGRESS, HEAPTUPLE_INSERT_IN_PROGRESS, HEAPTUPLE_LIVE, HEAPTUPLE_RECENTLY_DEAD, HeapTupleHeaderGetXmin(), HeapTupleHeaderXminCommitted(), HeapTupleSatisfiesVacuum(), i, InvalidOffsetNumber, InvalidTransactionId, ItemIdGetLength, ItemIdIsDead, ItemIdIsNormal, ItemIdIsRedirected, ItemIdIsUsed, ItemPointerSet(), OffsetNumberNext, PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), RelationGetRelid, HeapTupleData::t_data, HeapTupleData::t_len, HeapTupleData::t_self, HeapTupleData::t_tableOid, TransactionIdFollows(), TransactionIdIsNormal, and TransactionIdPrecedes().

Referenced by lazy_vacuum_heap_page().

◆ heap_vac_scan_next_block()

static BlockNumber heap_vac_scan_next_block ( ReadStream stream,
void callback_private_data,
void per_buffer_data 
)
static

Definition at line 1619 of file vacuumlazy.c.

1622{
1624 LVRelState *vacrel = callback_private_data;
1625
1626 /* relies on InvalidBlockNumber + 1 overflowing to 0 on first call */
1628
1629 /* Have we reached the end of the relation? */
1630 if (next_block >= vacrel->rel_pages)
1631 {
1632 if (BufferIsValid(vacrel->next_unskippable_vmbuffer))
1633 {
1634 ReleaseBuffer(vacrel->next_unskippable_vmbuffer);
1635 vacrel->next_unskippable_vmbuffer = InvalidBuffer;
1636 }
1637 return InvalidBlockNumber;
1638 }
1639
1640 /*
1641 * We must be in one of the three following states:
1642 */
1643 if (next_block > vacrel->next_unskippable_block ||
1644 vacrel->next_unskippable_block == InvalidBlockNumber)
1645 {
1646 /*
1647 * 1. We have just processed an unskippable block (or we're at the
1648 * beginning of the scan). Find the next unskippable block using the
1649 * visibility map.
1650 */
1651 bool skipsallvis;
1652
1654
1655 /*
1656 * We now know the next block that we must process. It can be the
1657 * next block after the one we just processed, or something further
1658 * ahead. If it's further ahead, we can jump to it, but we choose to
1659 * do so only if we can skip at least SKIP_PAGES_THRESHOLD consecutive
1660 * pages. Since we're reading sequentially, the OS should be doing
1661 * readahead for us, so there's no gain in skipping a page now and
1662 * then. Skipping such a range might even discourage sequential
1663 * detection.
1664 *
1665 * This test also enables more frequent relfrozenxid advancement
1666 * during non-aggressive VACUUMs. If the range has any all-visible
1667 * pages then skipping makes updating relfrozenxid unsafe, which is a
1668 * real downside.
1669 */
1670 if (vacrel->next_unskippable_block - next_block >= SKIP_PAGES_THRESHOLD)
1671 {
1672 next_block = vacrel->next_unskippable_block;
1673 if (skipsallvis)
1674 vacrel->skippedallvis = true;
1675 }
1676 }
1677
1678 /* Now we must be in one of the two remaining states: */
1679 if (next_block < vacrel->next_unskippable_block)
1680 {
1681 /*
1682 * 2. We are processing a range of blocks that we could have skipped
1683 * but chose not to. We know that they are all-visible in the VM,
1684 * otherwise they would've been unskippable.
1685 */
1686 vacrel->current_block = next_block;
1687 /* Block was not eager scanned */
1688 *((bool *) per_buffer_data) = false;
1689 return vacrel->current_block;
1690 }
1691 else
1692 {
1693 /*
1694 * 3. We reached the next unskippable block. Process it. On next
1695 * iteration, we will be back in state 1.
1696 */
1697 Assert(next_block == vacrel->next_unskippable_block);
1698
1699 vacrel->current_block = next_block;
1700 *((bool *) per_buffer_data) = vacrel->next_unskippable_eager_scanned;
1701 return vacrel->current_block;
1702 }
1703}

References Assert, BufferIsValid(), LVRelState::current_block, fb(), find_next_unskippable_block(), InvalidBlockNumber, InvalidBuffer, ReleaseBuffer(), and SKIP_PAGES_THRESHOLD.

Referenced by lazy_scan_heap().

◆ heap_vacuum_eager_scan_setup()

static void heap_vacuum_eager_scan_setup ( LVRelState vacrel,
const VacuumParams  params 
)
static

Definition at line 497 of file vacuumlazy.c.

498{
502 float first_region_ratio;
504
505 /*
506 * Initialize eager scan management fields to their disabled values.
507 * Aggressive vacuums, normal vacuums of small tables, and normal vacuums
508 * of tables without sufficiently old tuples disable eager scanning.
509 */
510 vacrel->next_eager_scan_region_start = InvalidBlockNumber;
511 vacrel->eager_scan_max_fails_per_region = 0;
512 vacrel->eager_scan_remaining_fails = 0;
513 vacrel->eager_scan_remaining_successes = 0;
514
515 /* If eager scanning is explicitly disabled, just return. */
516 if (params.max_eager_freeze_failure_rate == 0)
517 return;
518
519 /*
520 * The caller will have determined whether or not an aggressive vacuum is
521 * required by either the vacuum parameters or the relative age of the
522 * oldest unfrozen transaction IDs. An aggressive vacuum must scan every
523 * all-visible page to safely advance the relfrozenxid and/or relminmxid,
524 * so scans of all-visible pages are not considered eager.
525 */
526 if (vacrel->aggressive)
527 return;
528
529 /*
530 * Aggressively vacuuming a small relation shouldn't take long, so it
531 * isn't worth amortizing. We use two times the region size as the size
532 * cutoff because the eager scan start block is a random spot somewhere in
533 * the first region, making the second region the first to be eager
534 * scanned normally.
535 */
536 if (vacrel->rel_pages < 2 * EAGER_SCAN_REGION_SIZE)
537 return;
538
539 /*
540 * We only want to enable eager scanning if we are likely to be able to
541 * freeze some of the pages in the relation.
542 *
543 * Tuples with XIDs older than OldestXmin or MXIDs older than OldestMxact
544 * are technically freezable, but we won't freeze them unless the criteria
545 * for opportunistic freezing is met. Only tuples with XIDs/MXIDs older
546 * than the FreezeLimit/MultiXactCutoff are frozen in the common case.
547 *
548 * So, as a heuristic, we wait until the FreezeLimit has advanced past the
549 * relfrozenxid or the MultiXactCutoff has advanced past the relminmxid to
550 * enable eager scanning.
551 */
552 if (TransactionIdIsNormal(vacrel->cutoffs.relfrozenxid) &&
553 TransactionIdPrecedes(vacrel->cutoffs.relfrozenxid,
554 vacrel->cutoffs.FreezeLimit))
556
558 MultiXactIdIsValid(vacrel->cutoffs.relminmxid) &&
559 MultiXactIdPrecedes(vacrel->cutoffs.relminmxid,
560 vacrel->cutoffs.MultiXactCutoff))
562
564 return;
565
566 /* We have met the criteria to eagerly scan some pages. */
567
568 /*
569 * Our success cap is MAX_EAGER_FREEZE_SUCCESS_RATE of the number of
570 * all-visible but not all-frozen blocks in the relation.
571 */
573
574 vacrel->eager_scan_remaining_successes =
577
578 /* If every all-visible page is frozen, eager scanning is disabled. */
579 if (vacrel->eager_scan_remaining_successes == 0)
580 return;
581
582 /*
583 * Now calculate the bounds of the first eager scan region. Its end block
584 * will be a random spot somewhere in the first EAGER_SCAN_REGION_SIZE
585 * blocks. This affects the bounds of all subsequent regions and avoids
586 * eager scanning and failing to freeze the same blocks each vacuum of the
587 * relation.
588 */
590
591 vacrel->next_eager_scan_region_start = randseed % EAGER_SCAN_REGION_SIZE;
592
595
596 vacrel->eager_scan_max_fails_per_region =
599
600 /*
601 * The first region will be smaller than subsequent regions. As such,
602 * adjust the eager freeze failures tolerated for this region.
603 */
604 first_region_ratio = 1 - (float) vacrel->next_eager_scan_region_start /
606
607 vacrel->eager_scan_remaining_fails =
608 vacrel->eager_scan_max_fails_per_region *
610}

References Assert, EAGER_SCAN_REGION_SIZE, fb(), InvalidBlockNumber, VacuumParams::max_eager_freeze_failure_rate, MAX_EAGER_FREEZE_SUCCESS_RATE, MultiXactIdIsValid, MultiXactIdPrecedes(), pg_global_prng_state, pg_prng_uint32(), TransactionIdIsNormal, TransactionIdPrecedes(), and visibilitymap_count().

Referenced by heap_vacuum_rel().

◆ heap_vacuum_rel()

void heap_vacuum_rel ( Relation  rel,
const VacuumParams  params,
BufferAccessStrategy  bstrategy 
)

Definition at line 624 of file vacuumlazy.c.

626{
628 bool verbose,
629 instrument,
630 skipwithvm,
638 TimestampTz starttime = 0;
640 startwritetime = 0;
643 ErrorContextCallback errcallback;
644 char **indnames = NULL;
646
647 verbose = (params.options & VACOPT_VERBOSE) != 0;
648 instrument = (verbose || (AmAutoVacuumWorkerProcess() &&
649 params.log_vacuum_min_duration >= 0));
650 if (instrument)
651 {
653 if (track_io_timing)
654 {
657 }
658 }
659
660 /* Used for instrumentation and stats report */
661 starttime = GetCurrentTimestamp();
662
664 RelationGetRelid(rel));
667 params.is_wraparound
670 else
673
674 /*
675 * Setup error traceback support for ereport() first. The idea is to set
676 * up an error context callback to display additional information on any
677 * error during a vacuum. During different phases of vacuum, we update
678 * the state so that the error context callback always display current
679 * information.
680 *
681 * Copy the names of heap rel into local memory for error reporting
682 * purposes, too. It isn't always safe to assume that we can get the name
683 * of each rel. It's convenient for code in lazy_scan_heap to always use
684 * these temp copies.
685 */
688 vacrel->relnamespace = get_namespace_name(RelationGetNamespace(rel));
689 vacrel->relname = pstrdup(RelationGetRelationName(rel));
690 vacrel->indname = NULL;
692 vacrel->verbose = verbose;
693 errcallback.callback = vacuum_error_callback;
694 errcallback.arg = vacrel;
695 errcallback.previous = error_context_stack;
696 error_context_stack = &errcallback;
697
698 /* Set up high level stuff about rel and its indexes */
699 vacrel->rel = rel;
701 &vacrel->indrels);
702 vacrel->bstrategy = bstrategy;
703 if (instrument && vacrel->nindexes > 0)
704 {
705 /* Copy index names used by instrumentation (not error reporting) */
706 indnames = palloc_array(char *, vacrel->nindexes);
707 for (int i = 0; i < vacrel->nindexes; i++)
709 }
710
711 /*
712 * The index_cleanup param either disables index vacuuming and cleanup or
713 * forces it to go ahead when we would otherwise apply the index bypass
714 * optimization. The default is 'auto', which leaves the final decision
715 * up to lazy_vacuum().
716 *
717 * The truncate param allows user to avoid attempting relation truncation,
718 * though it can't force truncation to happen.
719 */
722 params.truncate != VACOPTVALUE_AUTO);
723
724 /*
725 * While VacuumFailSafeActive is reset to false before calling this, we
726 * still need to reset it here due to recursive calls.
727 */
728 VacuumFailsafeActive = false;
729 vacrel->consider_bypass_optimization = true;
730 vacrel->do_index_vacuuming = true;
731 vacrel->do_index_cleanup = true;
732 vacrel->do_rel_truncate = (params.truncate != VACOPTVALUE_DISABLED);
734 {
735 /* Force disable index vacuuming up-front */
736 vacrel->do_index_vacuuming = false;
737 vacrel->do_index_cleanup = false;
738 }
739 else if (params.index_cleanup == VACOPTVALUE_ENABLED)
740 {
741 /* Force index vacuuming. Note that failsafe can still bypass. */
742 vacrel->consider_bypass_optimization = false;
743 }
744 else
745 {
746 /* Default/auto, make all decisions dynamically */
748 }
749
750 /* Initialize page counters explicitly (be tidy) */
751 vacrel->scanned_pages = 0;
752 vacrel->eager_scanned_pages = 0;
753 vacrel->removed_pages = 0;
754 vacrel->new_frozen_tuple_pages = 0;
755 vacrel->lpdead_item_pages = 0;
756 vacrel->missed_dead_pages = 0;
757 vacrel->nonempty_pages = 0;
758 /* dead_items_alloc allocates vacrel->dead_items later on */
759
760 /* Allocate/initialize output statistics state */
761 vacrel->new_rel_tuples = 0;
762 vacrel->new_live_tuples = 0;
763 vacrel->indstats = (IndexBulkDeleteResult **)
764 palloc0(vacrel->nindexes * sizeof(IndexBulkDeleteResult *));
765
766 /* Initialize remaining counters (be tidy) */
767 vacrel->num_index_scans = 0;
768 vacrel->num_dead_items_resets = 0;
769 vacrel->total_dead_items_bytes = 0;
770 vacrel->tuples_deleted = 0;
771 vacrel->tuples_frozen = 0;
772 vacrel->lpdead_items = 0;
773 vacrel->live_tuples = 0;
774 vacrel->recently_dead_tuples = 0;
775 vacrel->missed_dead_tuples = 0;
776
777 vacrel->vm_new_visible_pages = 0;
778 vacrel->vm_new_visible_frozen_pages = 0;
779 vacrel->vm_new_frozen_pages = 0;
780
781 /*
782 * Get cutoffs that determine which deleted tuples are considered DEAD,
783 * not just RECENTLY_DEAD, and which XIDs/MXIDs to freeze. Then determine
784 * the extent of the blocks that we'll scan in lazy_scan_heap. It has to
785 * happen in this order to ensure that the OldestXmin cutoff field works
786 * as an upper bound on the XIDs stored in the pages we'll actually scan
787 * (NewRelfrozenXid tracking must never be allowed to miss unfrozen XIDs).
788 *
789 * Next acquire vistest, a related cutoff that's used in pruning. We use
790 * vistest in combination with OldestXmin to ensure that
791 * heap_page_prune_and_freeze() always removes any deleted tuple whose
792 * xmax is < OldestXmin. lazy_scan_prune must never become confused about
793 * whether a tuple should be frozen or removed. (In the future we might
794 * want to teach lazy_scan_prune to recompute vistest from time to time,
795 * to increase the number of dead tuples it can prune away.)
796 */
797 vacrel->aggressive = vacuum_get_cutoffs(rel, params, &vacrel->cutoffs);
799 vacrel->vistest = GlobalVisTestFor(rel);
800
801 /* Initialize state used to track oldest extant XID/MXID */
802 vacrel->NewRelfrozenXid = vacrel->cutoffs.OldestXmin;
803 vacrel->NewRelminMxid = vacrel->cutoffs.OldestMxact;
804
805 /*
806 * Initialize state related to tracking all-visible page skipping. This is
807 * very important to determine whether or not it is safe to advance the
808 * relfrozenxid/relminmxid.
809 */
810 vacrel->skippedallvis = false;
811 skipwithvm = true;
813 {
814 /*
815 * Force aggressive mode, and disable skipping blocks using the
816 * visibility map (even those set all-frozen)
817 */
818 vacrel->aggressive = true;
819 skipwithvm = false;
820 }
821
822 vacrel->skipwithvm = skipwithvm;
823
824 /*
825 * Set up eager scan tracking state. This must happen after determining
826 * whether or not the vacuum must be aggressive, because only normal
827 * vacuums use the eager scan algorithm.
828 */
830
831 /* Report the vacuum mode: 'normal' or 'aggressive' */
833 vacrel->aggressive
836
837 if (verbose)
838 {
839 if (vacrel->aggressive)
841 (errmsg("aggressively vacuuming \"%s.%s.%s\"",
842 vacrel->dbname, vacrel->relnamespace,
843 vacrel->relname)));
844 else
846 (errmsg("vacuuming \"%s.%s.%s\"",
847 vacrel->dbname, vacrel->relnamespace,
848 vacrel->relname)));
849 }
850
851 /*
852 * Allocate dead_items memory using dead_items_alloc. This handles
853 * parallel VACUUM initialization as part of allocating shared memory
854 * space used for dead_items. (But do a failsafe precheck first, to
855 * ensure that parallel VACUUM won't be attempted at all when relfrozenxid
856 * is already dangerously old.)
857 */
860
861 /*
862 * Call lazy_scan_heap to perform all required heap pruning, index
863 * vacuuming, and heap vacuuming (plus related processing)
864 */
866
867 /*
868 * Save dead items max_bytes and update the memory usage statistics before
869 * cleanup, they are freed in parallel vacuum cases during
870 * dead_items_cleanup().
871 */
872 dead_items_max_bytes = vacrel->dead_items_info->max_bytes;
873 vacrel->total_dead_items_bytes += TidStoreMemoryUsage(vacrel->dead_items);
874
875 /*
876 * Free resources managed by dead_items_alloc. This ends parallel mode in
877 * passing when necessary.
878 */
881
882 /*
883 * Update pg_class entries for each of rel's indexes where appropriate.
884 *
885 * Unlike the later update to rel's pg_class entry, this is not critical.
886 * Maintains relpages/reltuples statistics used by the planner only.
887 */
888 if (vacrel->do_index_cleanup)
890
891 /* Done with rel's indexes */
892 vac_close_indexes(vacrel->nindexes, vacrel->indrels, NoLock);
893
894 /* Optionally truncate rel */
897
898 /* Pop the error context stack */
899 error_context_stack = errcallback.previous;
900
901 /* Report that we are now doing final cleanup */
904
905 /*
906 * Prepare to update rel's pg_class entry.
907 *
908 * Aggressive VACUUMs must always be able to advance relfrozenxid to a
909 * value >= FreezeLimit, and relminmxid to a value >= MultiXactCutoff.
910 * Non-aggressive VACUUMs may advance them by any amount, or not at all.
911 */
912 Assert(vacrel->NewRelfrozenXid == vacrel->cutoffs.OldestXmin ||
913 TransactionIdPrecedesOrEquals(vacrel->aggressive ? vacrel->cutoffs.FreezeLimit :
914 vacrel->cutoffs.relfrozenxid,
915 vacrel->NewRelfrozenXid));
916 Assert(vacrel->NewRelminMxid == vacrel->cutoffs.OldestMxact ||
917 MultiXactIdPrecedesOrEquals(vacrel->aggressive ? vacrel->cutoffs.MultiXactCutoff :
918 vacrel->cutoffs.relminmxid,
919 vacrel->NewRelminMxid));
920 if (vacrel->skippedallvis)
921 {
922 /*
923 * Must keep original relfrozenxid in a non-aggressive VACUUM that
924 * chose to skip an all-visible page range. The state that tracks new
925 * values will have missed unfrozen XIDs from the pages we skipped.
926 */
927 Assert(!vacrel->aggressive);
928 vacrel->NewRelfrozenXid = InvalidTransactionId;
929 vacrel->NewRelminMxid = InvalidMultiXactId;
930 }
931
932 /*
933 * For safety, clamp relallvisible to be not more than what we're setting
934 * pg_class.relpages to
935 */
936 new_rel_pages = vacrel->rel_pages; /* After possible rel truncation */
940
941 /*
942 * An all-frozen block _must_ be all-visible. As such, clamp the count of
943 * all-frozen blocks to the count of all-visible blocks. This matches the
944 * clamping of relallvisible above.
945 */
948
949 /*
950 * Now actually update rel's pg_class entry.
951 *
952 * In principle new_live_tuples could be -1 indicating that we (still)
953 * don't know the tuple count. In practice that can't happen, since we
954 * scan every page that isn't skipped using the visibility map.
955 */
956 vac_update_relstats(rel, new_rel_pages, vacrel->new_live_tuples,
958 vacrel->nindexes > 0,
959 vacrel->NewRelfrozenXid, vacrel->NewRelminMxid,
961
962 /*
963 * Report results to the cumulative stats system, too.
964 *
965 * Deliberately avoid telling the stats system about LP_DEAD items that
966 * remain in the table due to VACUUM bypassing index and heap vacuuming.
967 * ANALYZE will consider the remaining LP_DEAD items to be dead "tuples".
968 * It seems like a good idea to err on the side of not vacuuming again too
969 * soon in cases where the failsafe prevented significant amounts of heap
970 * vacuuming.
971 */
973 Max(vacrel->new_live_tuples, 0),
974 vacrel->recently_dead_tuples +
975 vacrel->missed_dead_tuples,
976 starttime);
978
979 if (instrument)
980 {
982
983 if (verbose || params.log_vacuum_min_duration == 0 ||
986 {
987 long secs_dur;
988 int usecs_dur;
989 WalUsage walusage;
990 BufferUsage bufferusage;
992 char *msgfmt;
993 int32 diff;
994 double read_rate = 0,
995 write_rate = 0;
999
1001 memset(&walusage, 0, sizeof(WalUsage));
1003 memset(&bufferusage, 0, sizeof(BufferUsage));
1005
1006 total_blks_hit = bufferusage.shared_blks_hit +
1007 bufferusage.local_blks_hit;
1008 total_blks_read = bufferusage.shared_blks_read +
1009 bufferusage.local_blks_read;
1011 bufferusage.local_blks_dirtied;
1012
1014 if (verbose)
1015 {
1016 /*
1017 * Aggressiveness already reported earlier, in dedicated
1018 * VACUUM VERBOSE ereport
1019 */
1020 Assert(!params.is_wraparound);
1021 msgfmt = _("finished vacuuming \"%s.%s.%s\": index scans: %d\n");
1022 }
1023 else if (params.is_wraparound)
1024 {
1025 /*
1026 * While it's possible for a VACUUM to be both is_wraparound
1027 * and !aggressive, that's just a corner-case -- is_wraparound
1028 * implies aggressive. Produce distinct output for the corner
1029 * case all the same, just in case.
1030 */
1031 if (vacrel->aggressive)
1032 msgfmt = _("automatic aggressive vacuum to prevent wraparound of table \"%s.%s.%s\": index scans: %d\n");
1033 else
1034 msgfmt = _("automatic vacuum to prevent wraparound of table \"%s.%s.%s\": index scans: %d\n");
1035 }
1036 else
1037 {
1038 if (vacrel->aggressive)
1039 msgfmt = _("automatic aggressive vacuum of table \"%s.%s.%s\": index scans: %d\n");
1040 else
1041 msgfmt = _("automatic vacuum of table \"%s.%s.%s\": index scans: %d\n");
1042 }
1044 vacrel->dbname,
1045 vacrel->relnamespace,
1046 vacrel->relname,
1047 vacrel->num_index_scans);
1048 appendStringInfo(&buf, _("pages: %u removed, %u remain, %u scanned (%.2f%% of total), %u eagerly scanned\n"),
1049 vacrel->removed_pages,
1051 vacrel->scanned_pages,
1052 orig_rel_pages == 0 ? 100.0 :
1053 100.0 * vacrel->scanned_pages /
1055 vacrel->eager_scanned_pages);
1057 _("tuples: %" PRId64 " removed, %" PRId64 " remain, %" PRId64 " are dead but not yet removable\n"),
1058 vacrel->tuples_deleted,
1059 (int64) vacrel->new_rel_tuples,
1060 vacrel->recently_dead_tuples);
1061 if (vacrel->missed_dead_tuples > 0)
1063 _("tuples missed: %" PRId64 " dead from %u pages not removed due to cleanup lock contention\n"),
1064 vacrel->missed_dead_tuples,
1065 vacrel->missed_dead_pages);
1067 vacrel->cutoffs.OldestXmin);
1069 _("removable cutoff: %u, which was %d XIDs old when operation ended\n"),
1070 vacrel->cutoffs.OldestXmin, diff);
1072 {
1073 diff = (int32) (vacrel->NewRelfrozenXid -
1074 vacrel->cutoffs.relfrozenxid);
1076 _("new relfrozenxid: %u, which is %d XIDs ahead of previous value\n"),
1077 vacrel->NewRelfrozenXid, diff);
1078 }
1079 if (minmulti_updated)
1080 {
1081 diff = (int32) (vacrel->NewRelminMxid -
1082 vacrel->cutoffs.relminmxid);
1084 _("new relminmxid: %u, which is %d MXIDs ahead of previous value\n"),
1085 vacrel->NewRelminMxid, diff);
1086 }
1087 appendStringInfo(&buf, _("frozen: %u pages from table (%.2f%% of total) had %" PRId64 " tuples frozen\n"),
1088 vacrel->new_frozen_tuple_pages,
1089 orig_rel_pages == 0 ? 100.0 :
1090 100.0 * vacrel->new_frozen_tuple_pages /
1092 vacrel->tuples_frozen);
1093
1095 _("visibility map: %u pages set all-visible, %u pages set all-frozen (%u were all-visible)\n"),
1096 vacrel->vm_new_visible_pages,
1097 vacrel->vm_new_visible_frozen_pages +
1098 vacrel->vm_new_frozen_pages,
1099 vacrel->vm_new_frozen_pages);
1100 if (vacrel->do_index_vacuuming)
1101 {
1102 if (vacrel->nindexes == 0 || vacrel->num_index_scans == 0)
1103 appendStringInfoString(&buf, _("index scan not needed: "));
1104 else
1105 appendStringInfoString(&buf, _("index scan needed: "));
1106
1107 msgfmt = _("%u pages from table (%.2f%% of total) had %" PRId64 " dead item identifiers removed\n");
1108 }
1109 else
1110 {
1112 appendStringInfoString(&buf, _("index scan bypassed: "));
1113 else
1114 appendStringInfoString(&buf, _("index scan bypassed by failsafe: "));
1115
1116 msgfmt = _("%u pages from table (%.2f%% of total) have %" PRId64 " dead item identifiers\n");
1117 }
1119 vacrel->lpdead_item_pages,
1120 orig_rel_pages == 0 ? 100.0 :
1121 100.0 * vacrel->lpdead_item_pages / orig_rel_pages,
1122 vacrel->lpdead_items);
1123 for (int i = 0; i < vacrel->nindexes; i++)
1124 {
1125 IndexBulkDeleteResult *istat = vacrel->indstats[i];
1126
1127 if (!istat)
1128 continue;
1129
1131 _("index \"%s\": pages: %u in total, %u newly deleted, %u currently deleted, %u reusable\n"),
1132 indnames[i],
1133 istat->num_pages,
1134 istat->pages_newly_deleted,
1135 istat->pages_deleted,
1136 istat->pages_free);
1137 }
1139 {
1140 /*
1141 * We bypass the changecount mechanism because this value is
1142 * only updated by the calling process. We also rely on the
1143 * above call to pgstat_progress_end_command() to not clear
1144 * the st_progress_param array.
1145 */
1146 appendStringInfo(&buf, _("delay time: %.3f ms\n"),
1148 }
1149 if (track_io_timing)
1150 {
1151 double read_ms = (double) (pgStatBlockReadTime - startreadtime) / 1000;
1152 double write_ms = (double) (pgStatBlockWriteTime - startwritetime) / 1000;
1153
1154 appendStringInfo(&buf, _("I/O timings: read: %.3f ms, write: %.3f ms\n"),
1155 read_ms, write_ms);
1156 }
1157 if (secs_dur > 0 || usecs_dur > 0)
1158 {
1160 (1024 * 1024) / (secs_dur + usecs_dur / 1000000.0);
1162 (1024 * 1024) / (secs_dur + usecs_dur / 1000000.0);
1163 }
1164 appendStringInfo(&buf, _("avg read rate: %.3f MB/s, avg write rate: %.3f MB/s\n"),
1167 _("buffer usage: %" PRId64 " hits, %" PRId64 " reads, %" PRId64 " dirtied\n"),
1172 _("WAL usage: %" PRId64 " records, %" PRId64 " full page images, %" PRIu64 " bytes, %" PRIu64 " full page image bytes, %" PRId64 " buffers full\n"),
1173 walusage.wal_records,
1174 walusage.wal_fpi,
1175 walusage.wal_bytes,
1176 walusage.wal_fpi_bytes,
1177 walusage.wal_buffers_full);
1178
1179 /*
1180 * Report the dead items memory usage.
1181 *
1182 * The num_dead_items_resets counter increases when we reset the
1183 * collected dead items, so the counter is non-zero if at least
1184 * one dead items are collected, even if index vacuuming is
1185 * disabled.
1186 */
1188 ngettext("memory usage: dead item storage %.2f MB accumulated across %d reset (limit %.2f MB each)\n",
1189 "memory usage: dead item storage %.2f MB accumulated across %d resets (limit %.2f MB each)\n",
1190 vacrel->num_dead_items_resets),
1191 (double) vacrel->total_dead_items_bytes / (1024 * 1024),
1192 vacrel->num_dead_items_resets,
1193 (double) dead_items_max_bytes / (1024 * 1024));
1194 appendStringInfo(&buf, _("system usage: %s"), pg_rusage_show(&ru0));
1195
1196 ereport(verbose ? INFO : LOG,
1197 (errmsg_internal("%s", buf.data)));
1198 pfree(buf.data);
1199 }
1200 }
1201
1202 /* Cleanup index statistics and index names */
1203 for (int i = 0; i < vacrel->nindexes; i++)
1204 {
1205 if (vacrel->indstats[i])
1206 pfree(vacrel->indstats[i]);
1207
1208 if (instrument)
1209 pfree(indnames[i]);
1210 }
1211}

References _, AmAutoVacuumWorkerProcess, appendStringInfo(), appendStringInfoString(), ErrorContextCallback::arg, Assert, buf, BufferUsageAccumDiff(), ErrorContextCallback::callback, dead_items_alloc(), dead_items_cleanup(), ereport, errmsg(), errmsg_internal(), error_context_stack, fb(), get_database_name(), get_namespace_name(), GetCurrentTimestamp(), GlobalVisTestFor(), heap_vacuum_eager_scan_setup(), i, VacuumParams::index_cleanup, INFO, initStringInfo(), InvalidMultiXactId, InvalidTransactionId, VacuumParams::is_wraparound, IsInParallelMode(), lazy_check_wraparound_failsafe(), lazy_scan_heap(), lazy_truncate_heap(), BufferUsage::local_blks_dirtied, BufferUsage::local_blks_hit, BufferUsage::local_blks_read, LOG, VacuumParams::log_vacuum_min_duration, Max, MultiXactIdPrecedesOrEquals(), MyBEEntry, MyDatabaseId, ngettext, NoLock, IndexBulkDeleteResult::num_pages, VacuumParams::nworkers, VacuumParams::options, IndexBulkDeleteResult::pages_deleted, IndexBulkDeleteResult::pages_free, IndexBulkDeleteResult::pages_newly_deleted, palloc0(), palloc0_object, palloc_array, pfree(), pg_rusage_init(), pg_rusage_show(), pgBufferUsage, pgstat_progress_end_command(), pgstat_progress_start_command(), pgstat_progress_update_param(), pgstat_report_vacuum(), pgStatBlockReadTime, pgStatBlockWriteTime, pgWalUsage, ErrorContextCallback::previous, PROGRESS_COMMAND_VACUUM, PROGRESS_VACUUM_DELAY_TIME, PROGRESS_VACUUM_MODE, PROGRESS_VACUUM_MODE_AGGRESSIVE, PROGRESS_VACUUM_MODE_NORMAL, PROGRESS_VACUUM_PHASE, PROGRESS_VACUUM_PHASE_FINAL_CLEANUP, PROGRESS_VACUUM_STARTED_BY, PROGRESS_VACUUM_STARTED_BY_AUTOVACUUM, PROGRESS_VACUUM_STARTED_BY_AUTOVACUUM_WRAPAROUND, PROGRESS_VACUUM_STARTED_BY_MANUAL, pstrdup(), ReadNextTransactionId(), RelationGetNamespace, RelationGetNumberOfBlocks, RelationGetRelationName, RelationGetRelid, RowExclusiveLock, BufferUsage::shared_blks_dirtied, BufferUsage::shared_blks_hit, BufferUsage::shared_blks_read, should_attempt_truncation(), PgBackendStatus::st_progress_param, TidStoreMemoryUsage(), TimestampDifference(), TimestampDifferenceExceeds(), track_cost_delay_timing, track_io_timing, TransactionIdPrecedesOrEquals(), VacuumParams::truncate, update_relstats_all_indexes(), vac_close_indexes(), vac_open_indexes(), vac_update_relstats(), VACOPT_DISABLE_PAGE_SKIPPING, VACOPT_VERBOSE, VACOPTVALUE_AUTO, VACOPTVALUE_DISABLED, VACOPTVALUE_ENABLED, VACOPTVALUE_UNSPECIFIED, VACUUM_ERRCB_PHASE_UNKNOWN, vacuum_error_callback(), vacuum_get_cutoffs(), VacuumFailsafeActive, verbose, visibilitymap_count(), WalUsage::wal_buffers_full, WalUsage::wal_bytes, WalUsage::wal_fpi, WalUsage::wal_fpi_bytes, WalUsage::wal_records, and WalUsageAccumDiff().

◆ identify_and_fix_vm_corruption()

static void identify_and_fix_vm_corruption ( Relation  rel,
Buffer  heap_buffer,
BlockNumber  heap_blk,
Page  heap_page,
int  nlpdead_items,
Buffer  vmbuffer,
uint8 vmbits 
)
static

Definition at line 1980 of file vacuumlazy.c.

1985{
1986 Assert(visibilitymap_get_status(rel, heap_blk, &vmbuffer) == *vmbits);
1987
1989
1990 /*
1991 * As of PostgreSQL 9.2, the visibility map bit should never be set if the
1992 * page-level bit is clear. However, it's possible that the bit got
1993 * cleared after heap_vac_scan_next_block() was called, so we must recheck
1994 * with buffer lock before concluding that the VM is corrupt.
1995 */
1997 ((*vmbits & VISIBILITYMAP_VALID_BITS) != 0))
1998 {
2001 errmsg("page is not marked all-visible but visibility map bit is set in relation \"%s\" page %u",
2003
2004 visibilitymap_clear(rel, heap_blk, vmbuffer,
2006 *vmbits = 0;
2007 }
2008
2009 /*
2010 * It's possible for the value returned by
2011 * GetOldestNonRemovableTransactionId() to move backwards, so it's not
2012 * wrong for us to see tuples that appear to not be visible to everyone
2013 * yet, while PD_ALL_VISIBLE is already set. The real safe xmin value
2014 * never moves backwards, but GetOldestNonRemovableTransactionId() is
2015 * conservative and sometimes returns a value that's unnecessarily small,
2016 * so if we see that contradiction it just means that the tuples that we
2017 * think are not visible to everyone yet actually are, and the
2018 * PD_ALL_VISIBLE flag is correct.
2019 *
2020 * There should never be LP_DEAD items on a page with PD_ALL_VISIBLE set,
2021 * however.
2022 */
2023 else if (PageIsAllVisible(heap_page) && nlpdead_items > 0)
2024 {
2027 errmsg("page containing LP_DEAD items is marked as all-visible in relation \"%s\" page %u",
2029
2032 visibilitymap_clear(rel, heap_blk, vmbuffer,
2034 *vmbits = 0;
2035 }
2036}

References Assert, BUFFER_LOCK_EXCLUSIVE, BufferIsLockedByMeInMode(), ereport, errcode(), ERRCODE_DATA_CORRUPTED, errmsg(), fb(), MarkBufferDirty(), PageClearAllVisible(), PageIsAllVisible(), RelationGetRelationName, visibilitymap_clear(), visibilitymap_get_status(), VISIBILITYMAP_VALID_BITS, and WARNING.

Referenced by lazy_scan_prune().

◆ lazy_check_wraparound_failsafe()

static bool lazy_check_wraparound_failsafe ( LVRelState vacrel)
static

Definition at line 2999 of file vacuumlazy.c.

3000{
3001 /* Don't warn more than once per VACUUM */
3003 return true;
3004
3006 {
3007 const int progress_index[] = {
3011 };
3013
3014 VacuumFailsafeActive = true;
3015
3016 /*
3017 * Abandon use of a buffer access strategy to allow use of all of
3018 * shared buffers. We assume the caller who allocated the memory for
3019 * the BufferAccessStrategy will free it.
3020 */
3021 vacrel->bstrategy = NULL;
3022
3023 /* Disable index vacuuming, index cleanup, and heap rel truncation */
3024 vacrel->do_index_vacuuming = false;
3025 vacrel->do_index_cleanup = false;
3026 vacrel->do_rel_truncate = false;
3027
3028 /* Reset the progress counters and set the failsafe mode */
3030
3032 (errmsg("bypassing nonessential maintenance of table \"%s.%s.%s\" as a failsafe after %d index scans",
3033 vacrel->dbname, vacrel->relnamespace, vacrel->relname,
3034 vacrel->num_index_scans),
3035 errdetail("The table's relfrozenxid or relminmxid is too far in the past."),
3036 errhint("Consider increasing configuration parameter \"maintenance_work_mem\" or \"autovacuum_work_mem\".\n"
3037 "You might also need to consider other ways for VACUUM to keep up with the allocation of transaction IDs.")));
3038
3039 /* Stop applying cost limits from this point on */
3040 VacuumCostActive = false;
3042
3043 return true;
3044 }
3045
3046 return false;
3047}

References ereport, errdetail(), errhint(), errmsg(), fb(), pgstat_progress_update_multi_param(), PROGRESS_VACUUM_INDEXES_PROCESSED, PROGRESS_VACUUM_INDEXES_TOTAL, PROGRESS_VACUUM_MODE, PROGRESS_VACUUM_MODE_FAILSAFE, unlikely, vacuum_xid_failsafe_check(), VacuumCostActive, VacuumCostBalance, VacuumFailsafeActive, and WARNING.

Referenced by heap_vacuum_rel(), lazy_scan_heap(), and lazy_vacuum_all_indexes().

◆ lazy_cleanup_all_indexes()

static void lazy_cleanup_all_indexes ( LVRelState vacrel)
static

Definition at line 3053 of file vacuumlazy.c.

3054{
3055 double reltuples = vacrel->new_rel_tuples;
3056 bool estimated_count = vacrel->scanned_pages < vacrel->rel_pages;
3057 const int progress_start_index[] = {
3060 };
3061 const int progress_end_index[] = {
3064 };
3066 int64 progress_end_val[2] = {0, 0};
3067
3068 Assert(vacrel->do_index_cleanup);
3069 Assert(vacrel->nindexes > 0);
3070
3071 /*
3072 * Report that we are now cleaning up indexes and the number of indexes to
3073 * cleanup.
3074 */
3076 progress_start_val[1] = vacrel->nindexes;
3078
3080 {
3081 for (int idx = 0; idx < vacrel->nindexes; idx++)
3082 {
3083 Relation indrel = vacrel->indrels[idx];
3084 IndexBulkDeleteResult *istat = vacrel->indstats[idx];
3085
3086 vacrel->indstats[idx] =
3087 lazy_cleanup_one_index(indrel, istat, reltuples,
3088 estimated_count, vacrel);
3089
3090 /* Report the number of indexes cleaned up */
3092 idx + 1);
3093 }
3094 }
3095 else
3096 {
3097 /* Outsource everything to parallel variant */
3099 vacrel->num_index_scans,
3100 estimated_count);
3101 }
3102
3103 /* Reset the progress counters */
3105}

References Assert, fb(), idx(), lazy_cleanup_one_index(), parallel_vacuum_cleanup_all_indexes(), ParallelVacuumIsActive, pgstat_progress_update_multi_param(), pgstat_progress_update_param(), PROGRESS_VACUUM_INDEXES_PROCESSED, PROGRESS_VACUUM_INDEXES_TOTAL, PROGRESS_VACUUM_PHASE, and PROGRESS_VACUUM_PHASE_INDEX_CLEANUP.

Referenced by lazy_scan_heap().

◆ lazy_cleanup_one_index()

static IndexBulkDeleteResult * lazy_cleanup_one_index ( Relation  indrel,
IndexBulkDeleteResult istat,
double  reltuples,
bool  estimated_count,
LVRelState vacrel 
)
static

Definition at line 3170 of file vacuumlazy.c.

3173{
3176
3177 ivinfo.index = indrel;
3178 ivinfo.heaprel = vacrel->rel;
3179 ivinfo.analyze_only = false;
3180 ivinfo.report_progress = false;
3181 ivinfo.estimated_count = estimated_count;
3182 ivinfo.message_level = DEBUG2;
3183
3184 ivinfo.num_heap_tuples = reltuples;
3185 ivinfo.strategy = vacrel->bstrategy;
3186
3187 /*
3188 * Update error traceback information.
3189 *
3190 * The index name is saved during this phase and restored immediately
3191 * after this phase. See vacuum_error_callback.
3192 */
3193 Assert(vacrel->indname == NULL);
3198
3199 istat = vac_cleanup_one_index(&ivinfo, istat);
3200
3201 /* Revert to the previous phase information for error traceback */
3203 pfree(vacrel->indname);
3204 vacrel->indname = NULL;
3205
3206 return istat;
3207}

References Assert, DEBUG2, fb(), InvalidBlockNumber, InvalidOffsetNumber, pfree(), pstrdup(), RelationGetRelationName, restore_vacuum_error_info(), update_vacuum_error_info(), vac_cleanup_one_index(), and VACUUM_ERRCB_PHASE_INDEX_CLEANUP.

Referenced by lazy_cleanup_all_indexes().

◆ lazy_scan_heap()

static void lazy_scan_heap ( LVRelState vacrel)
static

Definition at line 1250 of file vacuumlazy.c.

1251{
1252 ReadStream *stream;
1253 BlockNumber rel_pages = vacrel->rel_pages,
1254 blkno = 0,
1257 vacrel->eager_scan_remaining_successes; /* for logging */
1258 Buffer vmbuffer = InvalidBuffer;
1259 const int initprog_index[] = {
1263 };
1265
1266 /* Report that we're scanning the heap, advertising total # of blocks */
1268 initprog_val[1] = rel_pages;
1269 initprog_val[2] = vacrel->dead_items_info->max_bytes;
1271
1272 /* Initialize for the first heap_vac_scan_next_block() call */
1273 vacrel->current_block = InvalidBlockNumber;
1274 vacrel->next_unskippable_block = InvalidBlockNumber;
1275 vacrel->next_unskippable_eager_scanned = false;
1276 vacrel->next_unskippable_vmbuffer = InvalidBuffer;
1277
1278 /*
1279 * Set up the read stream for vacuum's first pass through the heap.
1280 *
1281 * This could be made safe for READ_STREAM_USE_BATCHING, but only with
1282 * explicit work in heap_vac_scan_next_block.
1283 */
1285 vacrel->bstrategy,
1286 vacrel->rel,
1289 vacrel,
1290 sizeof(bool));
1291
1292 while (true)
1293 {
1294 Buffer buf;
1295 Page page;
1296 bool was_eager_scanned = false;
1297 int ndeleted = 0;
1298 bool has_lpdead_items;
1299 void *per_buffer_data = NULL;
1300 bool vm_page_frozen = false;
1301 bool got_cleanup_lock = false;
1302
1303 vacuum_delay_point(false);
1304
1305 /*
1306 * Regularly check if wraparound failsafe should trigger.
1307 *
1308 * There is a similar check inside lazy_vacuum_all_indexes(), but
1309 * relfrozenxid might start to look dangerously old before we reach
1310 * that point. This check also provides failsafe coverage for the
1311 * one-pass strategy, and the two-pass strategy with the index_cleanup
1312 * param set to 'off'.
1313 */
1314 if (vacrel->scanned_pages > 0 &&
1315 vacrel->scanned_pages % FAILSAFE_EVERY_PAGES == 0)
1317
1318 /*
1319 * Consider if we definitely have enough space to process TIDs on page
1320 * already. If we are close to overrunning the available space for
1321 * dead_items TIDs, pause and do a cycle of vacuuming before we tackle
1322 * this page. However, let's force at least one page-worth of tuples
1323 * to be stored as to ensure we do at least some work when the memory
1324 * configured is so low that we run out before storing anything.
1325 */
1326 if (vacrel->dead_items_info->num_items > 0 &&
1327 TidStoreMemoryUsage(vacrel->dead_items) > vacrel->dead_items_info->max_bytes)
1328 {
1329 /*
1330 * Before beginning index vacuuming, we release any pin we may
1331 * hold on the visibility map page. This isn't necessary for
1332 * correctness, but we do it anyway to avoid holding the pin
1333 * across a lengthy, unrelated operation.
1334 */
1335 if (BufferIsValid(vmbuffer))
1336 {
1337 ReleaseBuffer(vmbuffer);
1338 vmbuffer = InvalidBuffer;
1339 }
1340
1341 /* Perform a round of index and heap vacuuming */
1342 vacrel->consider_bypass_optimization = false;
1344
1345 /*
1346 * Vacuum the Free Space Map to make newly-freed space visible on
1347 * upper-level FSM pages. Note that blkno is the previously
1348 * processed block.
1349 */
1351 blkno + 1);
1353
1354 /* Report that we are once again scanning the heap */
1357 }
1358
1359 buf = read_stream_next_buffer(stream, &per_buffer_data);
1360
1361 /* The relation is exhausted. */
1362 if (!BufferIsValid(buf))
1363 break;
1364
1365 was_eager_scanned = *((bool *) per_buffer_data);
1367 page = BufferGetPage(buf);
1368 blkno = BufferGetBlockNumber(buf);
1369
1370 vacrel->scanned_pages++;
1372 vacrel->eager_scanned_pages++;
1373
1374 /* Report as block scanned, update error traceback information */
1377 blkno, InvalidOffsetNumber);
1378
1379 /*
1380 * Pin the visibility map page in case we need to mark the page
1381 * all-visible. In most cases this will be very cheap, because we'll
1382 * already have the correct page pinned anyway.
1383 */
1384 visibilitymap_pin(vacrel->rel, blkno, &vmbuffer);
1385
1386 /*
1387 * We need a buffer cleanup lock to prune HOT chains and defragment
1388 * the page in lazy_scan_prune. But when it's not possible to acquire
1389 * a cleanup lock right away, we may be able to settle for reduced
1390 * processing using lazy_scan_noprune.
1391 */
1393
1394 if (!got_cleanup_lock)
1396
1397 /* Check for new or empty pages before lazy_scan_[no]prune call */
1399 vmbuffer))
1400 {
1401 /* Processed as new/empty page (lock and pin released) */
1402 continue;
1403 }
1404
1405 /*
1406 * If we didn't get the cleanup lock, we can still collect LP_DEAD
1407 * items in the dead_items area for later vacuuming, count live and
1408 * recently dead tuples for vacuum logging, and determine if this
1409 * block could later be truncated. If we encounter any xid/mxids that
1410 * require advancing the relfrozenxid/relminxid, we'll have to wait
1411 * for a cleanup lock and call lazy_scan_prune().
1412 */
1413 if (!got_cleanup_lock &&
1414 !lazy_scan_noprune(vacrel, buf, blkno, page, &has_lpdead_items))
1415 {
1416 /*
1417 * lazy_scan_noprune could not do all required processing. Wait
1418 * for a cleanup lock, and call lazy_scan_prune in the usual way.
1419 */
1420 Assert(vacrel->aggressive);
1423 got_cleanup_lock = true;
1424 }
1425
1426 /*
1427 * If we have a cleanup lock, we must now prune, freeze, and count
1428 * tuples. We may have acquired the cleanup lock originally, or we may
1429 * have gone back and acquired it after lazy_scan_noprune() returned
1430 * false. Either way, the page hasn't been processed yet.
1431 *
1432 * Like lazy_scan_noprune(), lazy_scan_prune() will count
1433 * recently_dead_tuples and live tuples for vacuum logging, determine
1434 * if the block can later be truncated, and accumulate the details of
1435 * remaining LP_DEAD line pointers on the page into dead_items. These
1436 * dead items include those pruned by lazy_scan_prune() as well as
1437 * line pointers previously marked LP_DEAD.
1438 */
1439 if (got_cleanup_lock)
1440 ndeleted = lazy_scan_prune(vacrel, buf, blkno, page,
1441 vmbuffer,
1443
1444 /*
1445 * Count an eagerly scanned page as a failure or a success.
1446 *
1447 * Only lazy_scan_prune() freezes pages, so if we didn't get the
1448 * cleanup lock, we won't have frozen the page. However, we only count
1449 * pages that were too new to require freezing as eager freeze
1450 * failures.
1451 *
1452 * We could gather more information from lazy_scan_noprune() about
1453 * whether or not there were tuples with XIDs or MXIDs older than the
1454 * FreezeLimit or MultiXactCutoff. However, for simplicity, we simply
1455 * exclude pages skipped due to cleanup lock contention from eager
1456 * freeze algorithm caps.
1457 */
1459 {
1460 /* Aggressive vacuums do not eager scan. */
1461 Assert(!vacrel->aggressive);
1462
1463 if (vm_page_frozen)
1464 {
1465 if (vacrel->eager_scan_remaining_successes > 0)
1466 vacrel->eager_scan_remaining_successes--;
1467
1468 if (vacrel->eager_scan_remaining_successes == 0)
1469 {
1470 /*
1471 * Report only once that we disabled eager scanning. We
1472 * may eagerly read ahead blocks in excess of the success
1473 * or failure caps before attempting to freeze them, so we
1474 * could reach here even after disabling additional eager
1475 * scanning.
1476 */
1477 if (vacrel->eager_scan_max_fails_per_region > 0)
1478 ereport(vacrel->verbose ? INFO : DEBUG2,
1479 (errmsg("disabling eager scanning after freezing %u eagerly scanned blocks of relation \"%s.%s.%s\"",
1481 vacrel->dbname, vacrel->relnamespace,
1482 vacrel->relname)));
1483
1484 /*
1485 * If we hit our success cap, permanently disable eager
1486 * scanning by setting the other eager scan management
1487 * fields to their disabled values.
1488 */
1489 vacrel->eager_scan_remaining_fails = 0;
1490 vacrel->next_eager_scan_region_start = InvalidBlockNumber;
1491 vacrel->eager_scan_max_fails_per_region = 0;
1492 }
1493 }
1494 else if (vacrel->eager_scan_remaining_fails > 0)
1495 vacrel->eager_scan_remaining_fails--;
1496 }
1497
1498 /*
1499 * Now drop the buffer lock and, potentially, update the FSM.
1500 *
1501 * Our goal is to update the freespace map the last time we touch the
1502 * page. If we'll process a block in the second pass, we may free up
1503 * additional space on the page, so it is better to update the FSM
1504 * after the second pass. If the relation has no indexes, or if index
1505 * vacuuming is disabled, there will be no second heap pass; if this
1506 * particular page has no dead items, the second heap pass will not
1507 * touch this page. So, in those cases, update the FSM now.
1508 *
1509 * Note: In corner cases, it's possible to miss updating the FSM
1510 * entirely. If index vacuuming is currently enabled, we'll skip the
1511 * FSM update now. But if failsafe mode is later activated, or there
1512 * are so few dead tuples that index vacuuming is bypassed, there will
1513 * also be no opportunity to update the FSM later, because we'll never
1514 * revisit this page. Since updating the FSM is desirable but not
1515 * absolutely required, that's OK.
1516 */
1517 if (vacrel->nindexes == 0
1518 || !vacrel->do_index_vacuuming
1519 || !has_lpdead_items)
1520 {
1521 Size freespace = PageGetHeapFreeSpace(page);
1522
1524 RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
1525
1526 /*
1527 * Periodically perform FSM vacuuming to make newly-freed space
1528 * visible on upper FSM pages. This is done after vacuuming if the
1529 * table has indexes. There will only be newly-freed space if we
1530 * held the cleanup lock and lazy_scan_prune() was called.
1531 */
1532 if (got_cleanup_lock && vacrel->nindexes == 0 && ndeleted > 0 &&
1534 {
1536 blkno);
1538 }
1539 }
1540 else
1542 }
1543
1544 vacrel->blkno = InvalidBlockNumber;
1545 if (BufferIsValid(vmbuffer))
1546 ReleaseBuffer(vmbuffer);
1547
1548 /*
1549 * Report that everything is now scanned. We never skip scanning the last
1550 * block in the relation, so we can pass rel_pages here.
1551 */
1553 rel_pages);
1554
1555 /* now we can compute the new value for pg_class.reltuples */
1556 vacrel->new_live_tuples = vac_estimate_reltuples(vacrel->rel, rel_pages,
1557 vacrel->scanned_pages,
1558 vacrel->live_tuples);
1559
1560 /*
1561 * Also compute the total number of surviving heap entries. In the
1562 * (unlikely) scenario that new_live_tuples is -1, take it as zero.
1563 */
1564 vacrel->new_rel_tuples =
1565 Max(vacrel->new_live_tuples, 0) + vacrel->recently_dead_tuples +
1566 vacrel->missed_dead_tuples;
1567
1568 read_stream_end(stream);
1569
1570 /*
1571 * Do index vacuuming (call each index's ambulkdelete routine), then do
1572 * related heap vacuuming
1573 */
1574 if (vacrel->dead_items_info->num_items > 0)
1576
1577 /*
1578 * Vacuum the remainder of the Free Space Map. We must do this whether or
1579 * not there were indexes, and whether or not we bypassed index vacuuming.
1580 * We can pass rel_pages here because we never skip scanning the last
1581 * block of the relation.
1582 */
1583 if (rel_pages > next_fsm_block_to_vacuum)
1585
1586 /* report all blocks vacuumed */
1588
1589 /* Do final index cleanup (call each index's amvacuumcleanup routine) */
1590 if (vacrel->nindexes > 0 && vacrel->do_index_cleanup)
1592}

References Assert, buf, BUFFER_LOCK_SHARE, BUFFER_LOCK_UNLOCK, BufferGetBlockNumber(), BufferGetPage(), BufferIsValid(), CheckBufferIsPinnedOnce(), ConditionalLockBufferForCleanup(), DEBUG2, ereport, errmsg(), FAILSAFE_EVERY_PAGES, fb(), FreeSpaceMapVacuumRange(), heap_vac_scan_next_block(), INFO, InvalidBlockNumber, InvalidBuffer, InvalidOffsetNumber, lazy_check_wraparound_failsafe(), lazy_cleanup_all_indexes(), lazy_scan_new_or_empty(), lazy_scan_noprune(), lazy_scan_prune(), lazy_vacuum(), LockBuffer(), LockBufferForCleanup(), MAIN_FORKNUM, Max, PageGetHeapFreeSpace(), pgstat_progress_update_multi_param(), pgstat_progress_update_param(), PROGRESS_VACUUM_HEAP_BLKS_SCANNED, PROGRESS_VACUUM_HEAP_BLKS_VACUUMED, PROGRESS_VACUUM_MAX_DEAD_TUPLE_BYTES, PROGRESS_VACUUM_PHASE, PROGRESS_VACUUM_PHASE_SCAN_HEAP, PROGRESS_VACUUM_TOTAL_HEAP_BLKS, read_stream_begin_relation(), read_stream_end(), READ_STREAM_MAINTENANCE, read_stream_next_buffer(), RecordPageWithFreeSpace(), ReleaseBuffer(), TidStoreMemoryUsage(), UnlockReleaseBuffer(), update_vacuum_error_info(), vac_estimate_reltuples(), vacuum_delay_point(), VACUUM_ERRCB_PHASE_SCAN_HEAP, VACUUM_FSM_EVERY_PAGES, and visibilitymap_pin().

Referenced by heap_vacuum_rel().

◆ lazy_scan_new_or_empty()

static bool lazy_scan_new_or_empty ( LVRelState vacrel,
Buffer  buf,
BlockNumber  blkno,
Page  page,
bool  sharelock,
Buffer  vmbuffer 
)
static

Definition at line 1848 of file vacuumlazy.c.

1850{
1851 Size freespace;
1852
1853 if (PageIsNew(page))
1854 {
1855 /*
1856 * All-zeroes pages can be left over if either a backend extends the
1857 * relation by a single page, but crashes before the newly initialized
1858 * page has been written out, or when bulk-extending the relation
1859 * (which creates a number of empty pages at the tail end of the
1860 * relation), and then enters them into the FSM.
1861 *
1862 * Note we do not enter the page into the visibilitymap. That has the
1863 * downside that we repeatedly visit this page in subsequent vacuums,
1864 * but otherwise we'll never discover the space on a promoted standby.
1865 * The harm of repeated checking ought to normally not be too bad. The
1866 * space usually should be used at some point, otherwise there
1867 * wouldn't be any regular vacuums.
1868 *
1869 * Make sure these pages are in the FSM, to ensure they can be reused.
1870 * Do that by testing if there's any space recorded for the page. If
1871 * not, enter it. We do so after releasing the lock on the heap page,
1872 * the FSM is approximate, after all.
1873 */
1875
1876 if (GetRecordedFreeSpace(vacrel->rel, blkno) == 0)
1877 {
1878 freespace = BLCKSZ - SizeOfPageHeaderData;
1879
1880 RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
1881 }
1882
1883 return true;
1884 }
1885
1886 if (PageIsEmpty(page))
1887 {
1888 /*
1889 * It seems likely that caller will always be able to get a cleanup
1890 * lock on an empty page. But don't take any chances -- escalate to
1891 * an exclusive lock (still don't need a cleanup lock, though).
1892 */
1893 if (sharelock)
1894 {
1897
1898 if (!PageIsEmpty(page))
1899 {
1900 /* page isn't new or empty -- keep lock and pin for now */
1901 return false;
1902 }
1903 }
1904 else
1905 {
1906 /* Already have a full cleanup lock (which is more than enough) */
1907 }
1908
1909 /*
1910 * Unlike new pages, empty pages are always set all-visible and
1911 * all-frozen.
1912 */
1913 if (!PageIsAllVisible(page))
1914 {
1916
1917 /* mark buffer dirty before writing a WAL record */
1919
1920 /*
1921 * It's possible that another backend has extended the heap,
1922 * initialized the page, and then failed to WAL-log the page due
1923 * to an ERROR. Since heap extension is not WAL-logged, recovery
1924 * might try to replay our record setting the page all-visible and
1925 * find that the page isn't initialized, which will cause a PANIC.
1926 * To prevent that, check whether the page has been previously
1927 * WAL-logged, and if not, do that now.
1928 */
1929 if (RelationNeedsWAL(vacrel->rel) &&
1931 log_newpage_buffer(buf, true);
1932
1933 PageSetAllVisible(page);
1934 visibilitymap_set(vacrel->rel, blkno, buf,
1936 vmbuffer, InvalidTransactionId,
1940
1941 /* Count the newly all-frozen pages for logging */
1942 vacrel->vm_new_visible_pages++;
1943 vacrel->vm_new_visible_frozen_pages++;
1944 }
1945
1946 freespace = PageGetHeapFreeSpace(page);
1948 RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
1949 return true;
1950 }
1951
1952 /* page isn't new or empty -- keep lock and pin */
1953 return false;
1954}

References buf, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, END_CRIT_SECTION, fb(), GetRecordedFreeSpace(), InvalidTransactionId, InvalidXLogRecPtr, LockBuffer(), log_newpage_buffer(), MarkBufferDirty(), PageGetHeapFreeSpace(), PageGetLSN(), PageIsAllVisible(), PageIsEmpty(), PageIsNew(), PageSetAllVisible(), RecordPageWithFreeSpace(), RelationNeedsWAL, SizeOfPageHeaderData, START_CRIT_SECTION, UnlockReleaseBuffer(), VISIBILITYMAP_ALL_FROZEN, VISIBILITYMAP_ALL_VISIBLE, visibilitymap_set(), and XLogRecPtrIsValid.

Referenced by lazy_scan_heap().

◆ lazy_scan_noprune()

static bool lazy_scan_noprune ( LVRelState vacrel,
Buffer  buf,
BlockNumber  blkno,
Page  page,
bool has_lpdead_items 
)
static

Definition at line 2269 of file vacuumlazy.c.

2274{
2275 OffsetNumber offnum,
2276 maxoff;
2277 int lpdead_items,
2278 live_tuples,
2279 recently_dead_tuples,
2280 missed_dead_tuples;
2281 bool hastup;
2283 TransactionId NoFreezePageRelfrozenXid = vacrel->NewRelfrozenXid;
2284 MultiXactId NoFreezePageRelminMxid = vacrel->NewRelminMxid;
2286
2287 Assert(BufferGetBlockNumber(buf) == blkno);
2288
2289 hastup = false; /* for now */
2290
2291 lpdead_items = 0;
2292 live_tuples = 0;
2293 recently_dead_tuples = 0;
2294 missed_dead_tuples = 0;
2295
2296 maxoff = PageGetMaxOffsetNumber(page);
2297 for (offnum = FirstOffsetNumber;
2298 offnum <= maxoff;
2299 offnum = OffsetNumberNext(offnum))
2300 {
2301 ItemId itemid;
2302 HeapTupleData tuple;
2303
2304 vacrel->offnum = offnum;
2305 itemid = PageGetItemId(page, offnum);
2306
2307 if (!ItemIdIsUsed(itemid))
2308 continue;
2309
2310 if (ItemIdIsRedirected(itemid))
2311 {
2312 hastup = true;
2313 continue;
2314 }
2315
2316 if (ItemIdIsDead(itemid))
2317 {
2318 /*
2319 * Deliberately don't set hastup=true here. See same point in
2320 * lazy_scan_prune for an explanation.
2321 */
2322 deadoffsets[lpdead_items++] = offnum;
2323 continue;
2324 }
2325
2326 hastup = true; /* page prevents rel truncation */
2327 tupleheader = (HeapTupleHeader) PageGetItem(page, itemid);
2329 &NoFreezePageRelfrozenXid,
2330 &NoFreezePageRelminMxid))
2331 {
2332 /* Tuple with XID < FreezeLimit (or MXID < MultiXactCutoff) */
2333 if (vacrel->aggressive)
2334 {
2335 /*
2336 * Aggressive VACUUMs must always be able to advance rel's
2337 * relfrozenxid to a value >= FreezeLimit (and be able to
2338 * advance rel's relminmxid to a value >= MultiXactCutoff).
2339 * The ongoing aggressive VACUUM won't be able to do that
2340 * unless it can freeze an XID (or MXID) from this tuple now.
2341 *
2342 * The only safe option is to have caller perform processing
2343 * of this page using lazy_scan_prune. Caller might have to
2344 * wait a while for a cleanup lock, but it can't be helped.
2345 */
2346 vacrel->offnum = InvalidOffsetNumber;
2347 return false;
2348 }
2349
2350 /*
2351 * Non-aggressive VACUUMs are under no obligation to advance
2352 * relfrozenxid (even by one XID). We can be much laxer here.
2353 *
2354 * Currently we always just accept an older final relfrozenxid
2355 * and/or relminmxid value. We never make caller wait or work a
2356 * little harder, even when it likely makes sense to do so.
2357 */
2358 }
2359
2360 ItemPointerSet(&(tuple.t_self), blkno, offnum);
2361 tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
2362 tuple.t_len = ItemIdGetLength(itemid);
2363 tuple.t_tableOid = RelationGetRelid(vacrel->rel);
2364
2365 switch (HeapTupleSatisfiesVacuum(&tuple, vacrel->cutoffs.OldestXmin,
2366 buf))
2367 {
2369 case HEAPTUPLE_LIVE:
2370
2371 /*
2372 * Count both cases as live, just like lazy_scan_prune
2373 */
2374 live_tuples++;
2375
2376 break;
2377 case HEAPTUPLE_DEAD:
2378
2379 /*
2380 * There is some useful work for pruning to do, that won't be
2381 * done due to failure to get a cleanup lock.
2382 */
2383 missed_dead_tuples++;
2384 break;
2386
2387 /*
2388 * Count in recently_dead_tuples, just like lazy_scan_prune
2389 */
2390 recently_dead_tuples++;
2391 break;
2393
2394 /*
2395 * Do not count these rows as live, just like lazy_scan_prune
2396 */
2397 break;
2398 default:
2399 elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
2400 break;
2401 }
2402 }
2403
2404 vacrel->offnum = InvalidOffsetNumber;
2405
2406 /*
2407 * By here we know for sure that caller can put off freezing and pruning
2408 * this particular page until the next VACUUM. Remember its details now.
2409 * (lazy_scan_prune expects a clean slate, so we have to do this last.)
2410 */
2411 vacrel->NewRelfrozenXid = NoFreezePageRelfrozenXid;
2412 vacrel->NewRelminMxid = NoFreezePageRelminMxid;
2413
2414 /* Save any LP_DEAD items found on the page in dead_items */
2415 if (vacrel->nindexes == 0)
2416 {
2417 /* Using one-pass strategy (since table has no indexes) */
2418 if (lpdead_items > 0)
2419 {
2420 /*
2421 * Perfunctory handling for the corner case where a single pass
2422 * strategy VACUUM cannot get a cleanup lock, and it turns out
2423 * that there is one or more LP_DEAD items: just count the LP_DEAD
2424 * items as missed_dead_tuples instead. (This is a bit dishonest,
2425 * but it beats having to maintain specialized heap vacuuming code
2426 * forever, for vanishingly little benefit.)
2427 */
2428 hastup = true;
2429 missed_dead_tuples += lpdead_items;
2430 }
2431 }
2432 else if (lpdead_items > 0)
2433 {
2434 /*
2435 * Page has LP_DEAD items, and so any references/TIDs that remain in
2436 * indexes will be deleted during index vacuuming (and then marked
2437 * LP_UNUSED in the heap)
2438 */
2439 vacrel->lpdead_item_pages++;
2440
2441 dead_items_add(vacrel, blkno, deadoffsets, lpdead_items);
2442
2443 vacrel->lpdead_items += lpdead_items;
2444 }
2445
2446 /*
2447 * Finally, add relevant page-local counts to whole-VACUUM counts
2448 */
2449 vacrel->live_tuples += live_tuples;
2450 vacrel->recently_dead_tuples += recently_dead_tuples;
2451 vacrel->missed_dead_tuples += missed_dead_tuples;
2452 if (missed_dead_tuples > 0)
2453 vacrel->missed_dead_pages++;
2454
2455 /* Can't truncate this page */
2456 if (hastup)
2457 vacrel->nonempty_pages = blkno + 1;
2458
2459 /* Did we find LP_DEAD items? */
2460 *has_lpdead_items = (lpdead_items > 0);
2461
2462 /* Caller won't need to call lazy_scan_prune with same page */
2463 return true;
2464}

References Assert, buf, BufferGetBlockNumber(), dead_items_add(), elog, ERROR, fb(), FirstOffsetNumber, heap_tuple_should_freeze(), HEAPTUPLE_DEAD, HEAPTUPLE_DELETE_IN_PROGRESS, HEAPTUPLE_INSERT_IN_PROGRESS, HEAPTUPLE_LIVE, HEAPTUPLE_RECENTLY_DEAD, HeapTupleSatisfiesVacuum(), InvalidOffsetNumber, ItemIdGetLength, ItemIdIsDead, ItemIdIsRedirected, ItemIdIsUsed, ItemPointerSet(), MaxHeapTuplesPerPage, OffsetNumberNext, PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), RelationGetRelid, HeapTupleData::t_data, HeapTupleData::t_len, HeapTupleData::t_self, and HeapTupleData::t_tableOid.

Referenced by lazy_scan_heap().

◆ lazy_scan_prune()

static int lazy_scan_prune ( LVRelState vacrel,
Buffer  buf,
BlockNumber  blkno,
Page  page,
Buffer  vmbuffer,
bool has_lpdead_items,
bool vm_page_frozen 
)
static

Definition at line 2056 of file vacuumlazy.c.

2063{
2064 Relation rel = vacrel->rel;
2066 PruneFreezeParams params = {
2067 .relation = rel,
2068 .buffer = buf,
2069 .reason = PRUNE_VACUUM_SCAN,
2070 .options = HEAP_PAGE_PRUNE_FREEZE,
2071 .vistest = vacrel->vistest,
2072 .cutoffs = &vacrel->cutoffs,
2073 };
2074 uint8 old_vmbits = 0;
2075 uint8 new_vmbits = 0;
2076
2077 Assert(BufferGetBlockNumber(buf) == blkno);
2078
2079 /*
2080 * Prune all HOT-update chains and potentially freeze tuples on this page.
2081 *
2082 * If the relation has no indexes, we can immediately mark would-be dead
2083 * items LP_UNUSED.
2084 *
2085 * The number of tuples removed from the page is returned in
2086 * presult.ndeleted. It should not be confused with presult.lpdead_items;
2087 * presult.lpdead_items's final value can be thought of as the number of
2088 * tuples that were deleted from indexes.
2089 *
2090 * We will update the VM after collecting LP_DEAD items and freezing
2091 * tuples. Pruning will have determined whether or not the page is
2092 * all-visible.
2093 */
2094 if (vacrel->nindexes == 0)
2096
2098 &presult,
2099 &vacrel->offnum,
2100 &vacrel->NewRelfrozenXid, &vacrel->NewRelminMxid);
2101
2102 Assert(MultiXactIdIsValid(vacrel->NewRelminMxid));
2103 Assert(TransactionIdIsValid(vacrel->NewRelfrozenXid));
2104
2105 if (presult.nfrozen > 0)
2106 {
2107 /*
2108 * We don't increment the new_frozen_tuple_pages instrumentation
2109 * counter when nfrozen == 0, since it only counts pages with newly
2110 * frozen tuples (don't confuse that with pages newly set all-frozen
2111 * in VM).
2112 */
2113 vacrel->new_frozen_tuple_pages++;
2114 }
2115
2116 /*
2117 * VACUUM will call heap_page_is_all_visible() during the second pass over
2118 * the heap to determine all_visible and all_frozen for the page -- this
2119 * is a specialized version of the logic from this function. Now that
2120 * we've finished pruning and freezing, make sure that we're in total
2121 * agreement with heap_page_is_all_visible() using an assertion.
2122 */
2123#ifdef USE_ASSERT_CHECKING
2124 if (presult.all_visible)
2125 {
2127 bool debug_all_frozen;
2128
2129 Assert(presult.lpdead_items == 0);
2130
2132 vacrel->cutoffs.OldestXmin, &debug_all_frozen,
2133 &debug_cutoff, &vacrel->offnum));
2134
2135 Assert(presult.all_frozen == debug_all_frozen);
2136
2138 debug_cutoff == presult.vm_conflict_horizon);
2139 }
2140#endif
2141
2142 /*
2143 * Now save details of the LP_DEAD items from the page in vacrel
2144 */
2145 if (presult.lpdead_items > 0)
2146 {
2147 vacrel->lpdead_item_pages++;
2148
2149 /*
2150 * deadoffsets are collected incrementally in
2151 * heap_page_prune_and_freeze() as each dead line pointer is recorded,
2152 * with an indeterminate order, but dead_items_add requires them to be
2153 * sorted.
2154 */
2155 qsort(presult.deadoffsets, presult.lpdead_items, sizeof(OffsetNumber),
2157
2158 dead_items_add(vacrel, blkno, presult.deadoffsets, presult.lpdead_items);
2159 }
2160
2161 /* Finally, add page-local counts to whole-VACUUM counts */
2162 vacrel->tuples_deleted += presult.ndeleted;
2163 vacrel->tuples_frozen += presult.nfrozen;
2164 vacrel->lpdead_items += presult.lpdead_items;
2165 vacrel->live_tuples += presult.live_tuples;
2166 vacrel->recently_dead_tuples += presult.recently_dead_tuples;
2167
2168 /* Can't truncate this page */
2169 if (presult.hastup)
2170 vacrel->nonempty_pages = blkno + 1;
2171
2172 /* Did we find LP_DEAD items? */
2173 *has_lpdead_items = (presult.lpdead_items > 0);
2174
2175 Assert(!presult.all_visible || !(*has_lpdead_items));
2176 Assert(!presult.all_frozen || presult.all_visible);
2177
2178 old_vmbits = visibilitymap_get_status(vacrel->rel, blkno, &vmbuffer);
2179
2180 identify_and_fix_vm_corruption(vacrel->rel, buf, blkno, page,
2181 presult.lpdead_items, vmbuffer,
2182 &old_vmbits);
2183
2184 if (!presult.all_visible)
2185 return presult.ndeleted;
2186
2187 /* Set the visibility map and page visibility hint */
2189
2190 if (presult.all_frozen)
2192
2193 /* Nothing to do */
2194 if (old_vmbits == new_vmbits)
2195 return presult.ndeleted;
2196
2197 /*
2198 * It should never be the case that the visibility map page is set while
2199 * the page-level bit is clear (and if so, we cleared it above), but the
2200 * reverse is allowed (if checksums are not enabled). Regardless, set both
2201 * bits so that we get back in sync.
2202 *
2203 * The heap buffer must be marked dirty before adding it to the WAL chain
2204 * when setting the VM. We don't worry about unnecessarily dirtying the
2205 * heap buffer if PD_ALL_VISIBLE is already set, though. It is extremely
2206 * rare to have a clean heap buffer with PD_ALL_VISIBLE already set and
2207 * the VM bits clear, so there is no point in optimizing it.
2208 */
2209 PageSetAllVisible(page);
2211
2212 /*
2213 * If the page is being set all-frozen, we pass InvalidTransactionId as
2214 * the cutoff_xid, since a snapshot conflict horizon sufficient to make
2215 * everything safe for REDO was logged when the page's tuples were frozen.
2216 */
2217 Assert(!presult.all_frozen ||
2218 !TransactionIdIsValid(presult.vm_conflict_horizon));
2219
2220 visibilitymap_set(vacrel->rel, blkno, buf,
2222 vmbuffer, presult.vm_conflict_horizon,
2223 new_vmbits);
2224
2225 /*
2226 * If the page wasn't already set all-visible and/or all-frozen in the VM,
2227 * count it as newly set for logging.
2228 */
2230 {
2231 vacrel->vm_new_visible_pages++;
2232 if (presult.all_frozen)
2233 {
2234 vacrel->vm_new_visible_frozen_pages++;
2235 *vm_page_frozen = true;
2236 }
2237 }
2238 else if ((old_vmbits & VISIBILITYMAP_ALL_FROZEN) == 0 &&
2239 presult.all_frozen)
2240 {
2241 vacrel->vm_new_frozen_pages++;
2242 *vm_page_frozen = true;
2243 }
2244
2245 return presult.ndeleted;
2246}
Relation relation
Definition heapam.h:238

References Assert, buf, BufferGetBlockNumber(), cmpOffsetNumbers(), dead_items_add(), fb(), heap_page_prune_and_freeze(), HEAP_PAGE_PRUNE_FREEZE, HEAP_PAGE_PRUNE_MARK_UNUSED_NOW, identify_and_fix_vm_corruption(), InvalidXLogRecPtr, MarkBufferDirty(), MultiXactIdIsValid, PruneFreezeParams::options, PageSetAllVisible(), PRUNE_VACUUM_SCAN, qsort, PruneFreezeParams::relation, TransactionIdIsValid, VISIBILITYMAP_ALL_FROZEN, VISIBILITYMAP_ALL_VISIBLE, visibilitymap_get_status(), and visibilitymap_set().

Referenced by lazy_scan_heap().

◆ lazy_truncate_heap()

static void lazy_truncate_heap ( LVRelState vacrel)
static

Definition at line 3250 of file vacuumlazy.c.

3251{
3252 BlockNumber orig_rel_pages = vacrel->rel_pages;
3255 int lock_retry;
3256
3257 /* Report that we are now truncating */
3260
3261 /* Update error traceback information one last time */
3263 vacrel->nonempty_pages, InvalidOffsetNumber);
3264
3265 /*
3266 * Loop until no more truncating can be done.
3267 */
3268 do
3269 {
3270 /*
3271 * We need full exclusive lock on the relation in order to do
3272 * truncation. If we can't get it, give up rather than waiting --- we
3273 * don't want to block other backends, and we don't want to deadlock
3274 * (which is quite possible considering we already hold a lower-grade
3275 * lock).
3276 */
3277 lock_waiter_detected = false;
3278 lock_retry = 0;
3279 while (true)
3280 {
3282 break;
3283
3284 /*
3285 * Check for interrupts while trying to (re-)acquire the exclusive
3286 * lock.
3287 */
3289
3292 {
3293 /*
3294 * We failed to establish the lock in the specified number of
3295 * retries. This means we give up truncating.
3296 */
3297 ereport(vacrel->verbose ? INFO : DEBUG2,
3298 (errmsg("\"%s\": stopping truncate due to conflicting lock request",
3299 vacrel->relname)));
3300 return;
3301 }
3302
3308 }
3309
3310 /*
3311 * Now that we have exclusive lock, look to see if the rel has grown
3312 * whilst we were vacuuming with non-exclusive lock. If so, give up;
3313 * the newly added pages presumably contain non-deletable tuples.
3314 */
3317 {
3318 /*
3319 * Note: we intentionally don't update vacrel->rel_pages with the
3320 * new rel size here. If we did, it would amount to assuming that
3321 * the new pages are empty, which is unlikely. Leaving the numbers
3322 * alone amounts to assuming that the new pages have the same
3323 * tuple density as existing ones, which is less unlikely.
3324 */
3326 return;
3327 }
3328
3329 /*
3330 * Scan backwards from the end to verify that the end pages actually
3331 * contain no tuples. This is *necessary*, not optional, because
3332 * other backends could have added tuples to these pages whilst we
3333 * were vacuuming.
3334 */
3336 vacrel->blkno = new_rel_pages;
3337
3339 {
3340 /* can't do anything after all */
3342 return;
3343 }
3344
3345 /*
3346 * Okay to truncate.
3347 */
3349
3350 /*
3351 * We can release the exclusive lock as soon as we have truncated.
3352 * Other backends can't safely access the relation until they have
3353 * processed the smgr invalidation that smgrtruncate sent out ... but
3354 * that should happen as part of standard invalidation processing once
3355 * they acquire lock on the relation.
3356 */
3358
3359 /*
3360 * Update statistics. Here, it *is* correct to adjust rel_pages
3361 * without also touching reltuples, since the tuple count wasn't
3362 * changed by the truncation.
3363 */
3364 vacrel->removed_pages += orig_rel_pages - new_rel_pages;
3365 vacrel->rel_pages = new_rel_pages;
3366
3367 ereport(vacrel->verbose ? INFO : DEBUG2,
3368 (errmsg("table \"%s\": truncated %u to %u pages",
3369 vacrel->relname,
3372 } while (new_rel_pages > vacrel->nonempty_pages && lock_waiter_detected);
3373}

References AccessExclusiveLock, CHECK_FOR_INTERRUPTS, ConditionalLockRelation(), count_nondeletable_pages(), DEBUG2, ereport, errmsg(), fb(), INFO, InvalidOffsetNumber, MyLatch, pgstat_progress_update_param(), PROGRESS_VACUUM_PHASE, PROGRESS_VACUUM_PHASE_TRUNCATE, RelationGetNumberOfBlocks, RelationTruncate(), ResetLatch(), UnlockRelation(), update_vacuum_error_info(), VACUUM_ERRCB_PHASE_TRUNCATE, VACUUM_TRUNCATE_LOCK_TIMEOUT, VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL, WaitLatch(), WL_EXIT_ON_PM_DEATH, WL_LATCH_SET, and WL_TIMEOUT.

Referenced by heap_vacuum_rel().

◆ lazy_vacuum()

static void lazy_vacuum ( LVRelState vacrel)
static

Definition at line 2480 of file vacuumlazy.c.

2481{
2482 bool bypass;
2483
2484 /* Should not end up here with no indexes */
2485 Assert(vacrel->nindexes > 0);
2486 Assert(vacrel->lpdead_item_pages > 0);
2487
2488 if (!vacrel->do_index_vacuuming)
2489 {
2490 Assert(!vacrel->do_index_cleanup);
2492 return;
2493 }
2494
2495 /*
2496 * Consider bypassing index vacuuming (and heap vacuuming) entirely.
2497 *
2498 * We currently only do this in cases where the number of LP_DEAD items
2499 * for the entire VACUUM operation is close to zero. This avoids sharp
2500 * discontinuities in the duration and overhead of successive VACUUM
2501 * operations that run against the same table with a fixed workload.
2502 * Ideally, successive VACUUM operations will behave as if there are
2503 * exactly zero LP_DEAD items in cases where there are close to zero.
2504 *
2505 * This is likely to be helpful with a table that is continually affected
2506 * by UPDATEs that can mostly apply the HOT optimization, but occasionally
2507 * have small aberrations that lead to just a few heap pages retaining
2508 * only one or two LP_DEAD items. This is pretty common; even when the
2509 * DBA goes out of their way to make UPDATEs use HOT, it is practically
2510 * impossible to predict whether HOT will be applied in 100% of cases.
2511 * It's far easier to ensure that 99%+ of all UPDATEs against a table use
2512 * HOT through careful tuning.
2513 */
2514 bypass = false;
2515 if (vacrel->consider_bypass_optimization && vacrel->rel_pages > 0)
2516 {
2518
2519 Assert(vacrel->num_index_scans == 0);
2520 Assert(vacrel->lpdead_items == vacrel->dead_items_info->num_items);
2521 Assert(vacrel->do_index_vacuuming);
2522 Assert(vacrel->do_index_cleanup);
2523
2524 /*
2525 * This crossover point at which we'll start to do index vacuuming is
2526 * expressed as a percentage of the total number of heap pages in the
2527 * table that are known to have at least one LP_DEAD item. This is
2528 * much more important than the total number of LP_DEAD items, since
2529 * it's a proxy for the number of heap pages whose visibility map bits
2530 * cannot be set on account of bypassing index and heap vacuuming.
2531 *
2532 * We apply one further precautionary test: the space currently used
2533 * to store the TIDs (TIDs that now all point to LP_DEAD items) must
2534 * not exceed 32MB. This limits the risk that we will bypass index
2535 * vacuuming again and again until eventually there is a VACUUM whose
2536 * dead_items space is not CPU cache resident.
2537 *
2538 * We don't take any special steps to remember the LP_DEAD items (such
2539 * as counting them in our final update to the stats system) when the
2540 * optimization is applied. Though the accounting used in analyze.c's
2541 * acquire_sample_rows() will recognize the same LP_DEAD items as dead
2542 * rows in its own stats report, that's okay. The discrepancy should
2543 * be negligible. If this optimization is ever expanded to cover more
2544 * cases then this may need to be reconsidered.
2545 */
2547 bypass = (vacrel->lpdead_item_pages < threshold &&
2548 TidStoreMemoryUsage(vacrel->dead_items) < 32 * 1024 * 1024);
2549 }
2550
2551 if (bypass)
2552 {
2553 /*
2554 * There are almost zero TIDs. Behave as if there were precisely
2555 * zero: bypass index vacuuming, but do index cleanup.
2556 *
2557 * We expect that the ongoing VACUUM operation will finish very
2558 * quickly, so there is no point in considering speeding up as a
2559 * failsafe against wraparound failure. (Index cleanup is expected to
2560 * finish very quickly in cases where there were no ambulkdelete()
2561 * calls.)
2562 */
2563 vacrel->do_index_vacuuming = false;
2564 }
2566 {
2567 /*
2568 * We successfully completed a round of index vacuuming. Do related
2569 * heap vacuuming now.
2570 */
2572 }
2573 else
2574 {
2575 /*
2576 * Failsafe case.
2577 *
2578 * We attempted index vacuuming, but didn't finish a full round/full
2579 * index scan. This happens when relfrozenxid or relminmxid is too
2580 * far in the past.
2581 *
2582 * From this point on the VACUUM operation will do no further index
2583 * vacuuming or heap vacuuming. This VACUUM operation won't end up
2584 * back here again.
2585 */
2587 }
2588
2589 /*
2590 * Forget the LP_DEAD items that we just vacuumed (or just decided to not
2591 * vacuum)
2592 */
2594}

References Assert, BYPASS_THRESHOLD_PAGES, dead_items_reset(), fb(), lazy_vacuum_all_indexes(), lazy_vacuum_heap_rel(), TidStoreMemoryUsage(), and VacuumFailsafeActive.

Referenced by lazy_scan_heap().

◆ lazy_vacuum_all_indexes()

static bool lazy_vacuum_all_indexes ( LVRelState vacrel)
static

Definition at line 2605 of file vacuumlazy.c.

2606{
2607 bool allindexes = true;
2608 double old_live_tuples = vacrel->rel->rd_rel->reltuples;
2609 const int progress_start_index[] = {
2612 };
2613 const int progress_end_index[] = {
2617 };
2620
2621 Assert(vacrel->nindexes > 0);
2622 Assert(vacrel->do_index_vacuuming);
2623 Assert(vacrel->do_index_cleanup);
2624
2625 /* Precheck for XID wraparound emergencies */
2627 {
2628 /* Wraparound emergency -- don't even start an index scan */
2629 return false;
2630 }
2631
2632 /*
2633 * Report that we are now vacuuming indexes and the number of indexes to
2634 * vacuum.
2635 */
2637 progress_start_val[1] = vacrel->nindexes;
2639
2641 {
2642 for (int idx = 0; idx < vacrel->nindexes; idx++)
2643 {
2644 Relation indrel = vacrel->indrels[idx];
2645 IndexBulkDeleteResult *istat = vacrel->indstats[idx];
2646
2647 vacrel->indstats[idx] = lazy_vacuum_one_index(indrel, istat,
2649 vacrel);
2650
2651 /* Report the number of indexes vacuumed */
2653 idx + 1);
2654
2656 {
2657 /* Wraparound emergency -- end current index scan */
2658 allindexes = false;
2659 break;
2660 }
2661 }
2662 }
2663 else
2664 {
2665 /* Outsource everything to parallel variant */
2667 vacrel->num_index_scans);
2668
2669 /*
2670 * Do a postcheck to consider applying wraparound failsafe now. Note
2671 * that parallel VACUUM only gets the precheck and this postcheck.
2672 */
2674 allindexes = false;
2675 }
2676
2677 /*
2678 * We delete all LP_DEAD items from the first heap pass in all indexes on
2679 * each call here (except calls where we choose to do the failsafe). This
2680 * makes the next call to lazy_vacuum_heap_rel() safe (except in the event
2681 * of the failsafe triggering, which prevents the next call from taking
2682 * place).
2683 */
2684 Assert(vacrel->num_index_scans > 0 ||
2685 vacrel->dead_items_info->num_items == vacrel->lpdead_items);
2687
2688 /*
2689 * Increase and report the number of index scans. Also, we reset
2690 * PROGRESS_VACUUM_INDEXES_TOTAL and PROGRESS_VACUUM_INDEXES_PROCESSED.
2691 *
2692 * We deliberately include the case where we started a round of bulk
2693 * deletes that we weren't able to finish due to the failsafe triggering.
2694 */
2695 vacrel->num_index_scans++;
2696 progress_end_val[0] = 0;
2697 progress_end_val[1] = 0;
2698 progress_end_val[2] = vacrel->num_index_scans;
2700
2701 return allindexes;
2702}

References Assert, fb(), idx(), lazy_check_wraparound_failsafe(), lazy_vacuum_one_index(), parallel_vacuum_bulkdel_all_indexes(), ParallelVacuumIsActive, pgstat_progress_update_multi_param(), pgstat_progress_update_param(), PROGRESS_VACUUM_INDEXES_PROCESSED, PROGRESS_VACUUM_INDEXES_TOTAL, PROGRESS_VACUUM_NUM_INDEX_VACUUMS, PROGRESS_VACUUM_PHASE, PROGRESS_VACUUM_PHASE_VACUUM_INDEX, and VacuumFailsafeActive.

Referenced by lazy_vacuum().

◆ lazy_vacuum_heap_page()

static void lazy_vacuum_heap_page ( LVRelState vacrel,
BlockNumber  blkno,
Buffer  buffer,
OffsetNumber deadoffsets,
int  num_offsets,
Buffer  vmbuffer 
)
static

Definition at line 2868 of file vacuumlazy.c.

2871{
2872 Page page = BufferGetPage(buffer);
2874 int nunused = 0;
2875 TransactionId visibility_cutoff_xid;
2877 bool all_frozen;
2879 uint8 vmflags = 0;
2880
2881 Assert(vacrel->do_index_vacuuming);
2882
2884
2885 /* Update error traceback information */
2889
2890 /*
2891 * Before marking dead items unused, check whether the page will become
2892 * all-visible once that change is applied. This lets us reap the tuples
2893 * and mark the page all-visible within the same critical section,
2894 * enabling both changes to be emitted in a single WAL record. Since the
2895 * visibility checks may perform I/O and allocate memory, they must be
2896 * done outside the critical section.
2897 */
2898 if (heap_page_would_be_all_visible(vacrel->rel, buffer,
2899 vacrel->cutoffs.OldestXmin,
2900 deadoffsets, num_offsets,
2901 &all_frozen, &visibility_cutoff_xid,
2902 &vacrel->offnum))
2903 {
2905 if (all_frozen)
2906 {
2908 Assert(!TransactionIdIsValid(visibility_cutoff_xid));
2909 }
2910
2911 /*
2912 * Take the lock on the vmbuffer before entering a critical section.
2913 * The heap page lock must also be held while updating the VM to
2914 * ensure consistency.
2915 */
2917 }
2918
2920
2921 for (int i = 0; i < num_offsets; i++)
2922 {
2923 ItemId itemid;
2924 OffsetNumber toff = deadoffsets[i];
2925
2926 itemid = PageGetItemId(page, toff);
2927
2928 Assert(ItemIdIsDead(itemid) && !ItemIdHasStorage(itemid));
2929 ItemIdSetUnused(itemid);
2930 unused[nunused++] = toff;
2931 }
2932
2933 Assert(nunused > 0);
2934
2935 /* Attempt to truncate line pointer array now */
2937
2938 if ((vmflags & VISIBILITYMAP_VALID_BITS) != 0)
2939 {
2940 /*
2941 * The page is guaranteed to have had dead line pointers, so we always
2942 * set PD_ALL_VISIBLE.
2943 */
2944 PageSetAllVisible(page);
2946 vmbuffer, vmflags,
2947 vacrel->rel->rd_locator);
2948 conflict_xid = visibility_cutoff_xid;
2949 }
2950
2951 /*
2952 * Mark buffer dirty before we write WAL.
2953 */
2954 MarkBufferDirty(buffer);
2955
2956 /* XLOG stuff */
2957 if (RelationNeedsWAL(vacrel->rel))
2958 {
2959 log_heap_prune_and_freeze(vacrel->rel, buffer,
2960 vmflags != 0 ? vmbuffer : InvalidBuffer,
2961 vmflags,
2963 false, /* no cleanup lock required */
2965 NULL, 0, /* frozen */
2966 NULL, 0, /* redirected */
2967 NULL, 0, /* dead */
2968 unused, nunused);
2969 }
2970
2972
2974 {
2975 /* Count the newly set VM page for logging */
2976 LockBuffer(vmbuffer, BUFFER_LOCK_UNLOCK);
2977 vacrel->vm_new_visible_pages++;
2978 if (all_frozen)
2979 vacrel->vm_new_visible_frozen_pages++;
2980 }
2981
2982 /* Revert to the previous phase information for error traceback */
2984}

References Assert, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferGetPage(), END_CRIT_SECTION, fb(), heap_page_would_be_all_visible(), i, InvalidBuffer, InvalidOffsetNumber, InvalidTransactionId, ItemIdHasStorage, ItemIdIsDead, ItemIdSetUnused, LockBuffer(), log_heap_prune_and_freeze(), MarkBufferDirty(), MaxHeapTuplesPerPage, PageGetItemId(), PageSetAllVisible(), PageTruncateLinePointerArray(), pgstat_progress_update_param(), PROGRESS_VACUUM_HEAP_BLKS_VACUUMED, PRUNE_VACUUM_CLEANUP, RelationNeedsWAL, restore_vacuum_error_info(), START_CRIT_SECTION, TransactionIdIsValid, update_vacuum_error_info(), VACUUM_ERRCB_PHASE_VACUUM_HEAP, VISIBILITYMAP_ALL_FROZEN, VISIBILITYMAP_ALL_VISIBLE, visibilitymap_set_vmbits(), and VISIBILITYMAP_VALID_BITS.

Referenced by lazy_vacuum_heap_rel().

◆ lazy_vacuum_heap_rel()

static void lazy_vacuum_heap_rel ( LVRelState vacrel)
static

Definition at line 2750 of file vacuumlazy.c.

2751{
2752 ReadStream *stream;
2754 Buffer vmbuffer = InvalidBuffer;
2756 TidStoreIter *iter;
2757
2758 Assert(vacrel->do_index_vacuuming);
2759 Assert(vacrel->do_index_cleanup);
2760 Assert(vacrel->num_index_scans > 0);
2761
2762 /* Report that we are now vacuuming the heap */
2765
2766 /* Update error traceback information */
2770
2771 iter = TidStoreBeginIterate(vacrel->dead_items);
2772
2773 /*
2774 * Set up the read stream for vacuum's second pass through the heap.
2775 *
2776 * It is safe to use batchmode, as vacuum_reap_lp_read_stream_next() does
2777 * not need to wait for IO and does not perform locking. Once we support
2778 * parallelism it should still be fine, as presumably the holder of locks
2779 * would never be blocked by IO while holding the lock.
2780 */
2783 vacrel->bstrategy,
2784 vacrel->rel,
2787 iter,
2788 sizeof(TidStoreIterResult));
2789
2790 while (true)
2791 {
2792 BlockNumber blkno;
2793 Buffer buf;
2794 Page page;
2796 Size freespace;
2798 int num_offsets;
2799
2800 vacuum_delay_point(false);
2801
2802 buf = read_stream_next_buffer(stream, (void **) &iter_result);
2803
2804 /* The relation is exhausted */
2805 if (!BufferIsValid(buf))
2806 break;
2807
2808 vacrel->blkno = blkno = BufferGetBlockNumber(buf);
2809
2812 Assert(num_offsets <= lengthof(offsets));
2813
2814 /*
2815 * Pin the visibility map page in case we need to mark the page
2816 * all-visible. In most cases this will be very cheap, because we'll
2817 * already have the correct page pinned anyway.
2818 */
2819 visibilitymap_pin(vacrel->rel, blkno, &vmbuffer);
2820
2821 /* We need a non-cleanup exclusive lock to mark dead_items unused */
2823 lazy_vacuum_heap_page(vacrel, blkno, buf, offsets,
2824 num_offsets, vmbuffer);
2825
2826 /* Now that we've vacuumed the page, record its available space */
2827 page = BufferGetPage(buf);
2828 freespace = PageGetHeapFreeSpace(page);
2829
2831 RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
2833 }
2834
2835 read_stream_end(stream);
2836 TidStoreEndIterate(iter);
2837
2838 vacrel->blkno = InvalidBlockNumber;
2839 if (BufferIsValid(vmbuffer))
2840 ReleaseBuffer(vmbuffer);
2841
2842 /*
2843 * We set all LP_DEAD items from the first heap pass to LP_UNUSED during
2844 * the second heap pass. No more, no less.
2845 */
2846 Assert(vacrel->num_index_scans > 1 ||
2847 (vacrel->dead_items_info->num_items == vacrel->lpdead_items &&
2848 vacuumed_pages == vacrel->lpdead_item_pages));
2849
2851 (errmsg("table \"%s\": removed %" PRId64 " dead item identifiers in %u pages",
2852 vacrel->relname, vacrel->dead_items_info->num_items,
2853 vacuumed_pages)));
2854
2855 /* Revert to the previous phase information for error traceback */
2857}

References Assert, buf, BUFFER_LOCK_EXCLUSIVE, BufferGetBlockNumber(), BufferGetPage(), BufferIsValid(), DEBUG2, ereport, errmsg(), fb(), InvalidBlockNumber, InvalidBuffer, InvalidOffsetNumber, lazy_vacuum_heap_page(), lengthof, LockBuffer(), MAIN_FORKNUM, MaxOffsetNumber, PageGetHeapFreeSpace(), pgstat_progress_update_param(), PROGRESS_VACUUM_PHASE, PROGRESS_VACUUM_PHASE_VACUUM_HEAP, read_stream_begin_relation(), read_stream_end(), READ_STREAM_MAINTENANCE, read_stream_next_buffer(), READ_STREAM_USE_BATCHING, RecordPageWithFreeSpace(), ReleaseBuffer(), restore_vacuum_error_info(), TidStoreBeginIterate(), TidStoreEndIterate(), TidStoreGetBlockOffsets(), UnlockReleaseBuffer(), update_vacuum_error_info(), vacuum_delay_point(), VACUUM_ERRCB_PHASE_VACUUM_HEAP, vacuum_reap_lp_read_stream_next(), and visibilitymap_pin().

Referenced by lazy_vacuum().

◆ lazy_vacuum_one_index()

static IndexBulkDeleteResult * lazy_vacuum_one_index ( Relation  indrel,
IndexBulkDeleteResult istat,
double  reltuples,
LVRelState vacrel 
)
static

Definition at line 3121 of file vacuumlazy.c.

3123{
3126
3127 ivinfo.index = indrel;
3128 ivinfo.heaprel = vacrel->rel;
3129 ivinfo.analyze_only = false;
3130 ivinfo.report_progress = false;
3131 ivinfo.estimated_count = true;
3132 ivinfo.message_level = DEBUG2;
3133 ivinfo.num_heap_tuples = reltuples;
3134 ivinfo.strategy = vacrel->bstrategy;
3135
3136 /*
3137 * Update error traceback information.
3138 *
3139 * The index name is saved during this phase and restored immediately
3140 * after this phase. See vacuum_error_callback.
3141 */
3142 Assert(vacrel->indname == NULL);
3147
3148 /* Do bulk deletion */
3149 istat = vac_bulkdel_one_index(&ivinfo, istat, vacrel->dead_items,
3150 vacrel->dead_items_info);
3151
3152 /* Revert to the previous phase information for error traceback */
3154 pfree(vacrel->indname);
3155 vacrel->indname = NULL;
3156
3157 return istat;
3158}

References Assert, DEBUG2, fb(), InvalidBlockNumber, InvalidOffsetNumber, pfree(), pstrdup(), RelationGetRelationName, restore_vacuum_error_info(), update_vacuum_error_info(), vac_bulkdel_one_index(), and VACUUM_ERRCB_PHASE_VACUUM_INDEX.

Referenced by lazy_vacuum_all_indexes().

◆ restore_vacuum_error_info()

static void restore_vacuum_error_info ( LVRelState vacrel,
const LVSavedErrInfo saved_vacrel 
)
static

Definition at line 3961 of file vacuumlazy.c.

3963{
3964 vacrel->blkno = saved_vacrel->blkno;
3965 vacrel->offnum = saved_vacrel->offnum;
3966 vacrel->phase = saved_vacrel->phase;
3967}

References fb().

Referenced by lazy_cleanup_one_index(), lazy_vacuum_heap_page(), lazy_vacuum_heap_rel(), and lazy_vacuum_one_index().

◆ should_attempt_truncation()

static bool should_attempt_truncation ( LVRelState vacrel)
static

Definition at line 3230 of file vacuumlazy.c.

3231{
3233
3234 if (!vacrel->do_rel_truncate || VacuumFailsafeActive)
3235 return false;
3236
3237 possibly_freeable = vacrel->rel_pages - vacrel->nonempty_pages;
3238 if (possibly_freeable > 0 &&
3241 return true;
3242
3243 return false;
3244}

References fb(), REL_TRUNCATE_FRACTION, REL_TRUNCATE_MINIMUM, and VacuumFailsafeActive.

Referenced by heap_vacuum_rel().

◆ update_relstats_all_indexes()

static void update_relstats_all_indexes ( LVRelState vacrel)
static

Definition at line 3843 of file vacuumlazy.c.

3844{
3845 Relation *indrels = vacrel->indrels;
3846 int nindexes = vacrel->nindexes;
3847 IndexBulkDeleteResult **indstats = vacrel->indstats;
3848
3849 Assert(vacrel->do_index_cleanup);
3850
3851 for (int idx = 0; idx < nindexes; idx++)
3852 {
3853 Relation indrel = indrels[idx];
3854 IndexBulkDeleteResult *istat = indstats[idx];
3855
3856 if (istat == NULL || istat->estimated_count)
3857 continue;
3858
3859 /* Update index statistics */
3861 istat->num_pages,
3862 istat->num_index_tuples,
3863 0, 0,
3864 false,
3867 NULL, NULL, false);
3868 }
3869}

References Assert, IndexBulkDeleteResult::estimated_count, fb(), idx(), InvalidMultiXactId, InvalidTransactionId, IndexBulkDeleteResult::num_index_tuples, IndexBulkDeleteResult::num_pages, and vac_update_relstats().

Referenced by heap_vacuum_rel().

◆ update_vacuum_error_info()

static void update_vacuum_error_info ( LVRelState vacrel,
LVSavedErrInfo saved_vacrel,
int  phase,
BlockNumber  blkno,
OffsetNumber  offnum 
)
static

Definition at line 3942 of file vacuumlazy.c.

3944{
3945 if (saved_vacrel)
3946 {
3947 saved_vacrel->offnum = vacrel->offnum;
3948 saved_vacrel->blkno = vacrel->blkno;
3949 saved_vacrel->phase = vacrel->phase;
3950 }
3951
3952 vacrel->blkno = blkno;
3953 vacrel->offnum = offnum;
3954 vacrel->phase = phase;
3955}

References fb().

Referenced by lazy_cleanup_one_index(), lazy_scan_heap(), lazy_truncate_heap(), lazy_vacuum_heap_page(), lazy_vacuum_heap_rel(), and lazy_vacuum_one_index().

◆ vacuum_error_callback()

static void vacuum_error_callback ( void arg)
static

Definition at line 3878 of file vacuumlazy.c.

3879{
3881
3882 switch (errinfo->phase)
3883 {
3885 if (BlockNumberIsValid(errinfo->blkno))
3886 {
3887 if (OffsetNumberIsValid(errinfo->offnum))
3888 errcontext("while scanning block %u offset %u of relation \"%s.%s\"",
3889 errinfo->blkno, errinfo->offnum, errinfo->relnamespace, errinfo->relname);
3890 else
3891 errcontext("while scanning block %u of relation \"%s.%s\"",
3892 errinfo->blkno, errinfo->relnamespace, errinfo->relname);
3893 }
3894 else
3895 errcontext("while scanning relation \"%s.%s\"",
3896 errinfo->relnamespace, errinfo->relname);
3897 break;
3898
3900 if (BlockNumberIsValid(errinfo->blkno))
3901 {
3902 if (OffsetNumberIsValid(errinfo->offnum))
3903 errcontext("while vacuuming block %u offset %u of relation \"%s.%s\"",
3904 errinfo->blkno, errinfo->offnum, errinfo->relnamespace, errinfo->relname);
3905 else
3906 errcontext("while vacuuming block %u of relation \"%s.%s\"",
3907 errinfo->blkno, errinfo->relnamespace, errinfo->relname);
3908 }
3909 else
3910 errcontext("while vacuuming relation \"%s.%s\"",
3911 errinfo->relnamespace, errinfo->relname);
3912 break;
3913
3915 errcontext("while vacuuming index \"%s\" of relation \"%s.%s\"",
3916 errinfo->indname, errinfo->relnamespace, errinfo->relname);
3917 break;
3918
3920 errcontext("while cleaning up index \"%s\" of relation \"%s.%s\"",
3921 errinfo->indname, errinfo->relnamespace, errinfo->relname);
3922 break;
3923
3925 if (BlockNumberIsValid(errinfo->blkno))
3926 errcontext("while truncating relation \"%s.%s\" to %u blocks",
3927 errinfo->relnamespace, errinfo->relname, errinfo->blkno);
3928 break;
3929
3931 default:
3932 return; /* do nothing; the errinfo may not be
3933 * initialized */
3934 }
3935}

References arg, BlockNumberIsValid(), errcontext, fb(), OffsetNumberIsValid, VACUUM_ERRCB_PHASE_INDEX_CLEANUP, VACUUM_ERRCB_PHASE_SCAN_HEAP, VACUUM_ERRCB_PHASE_TRUNCATE, VACUUM_ERRCB_PHASE_UNKNOWN, VACUUM_ERRCB_PHASE_VACUUM_HEAP, and VACUUM_ERRCB_PHASE_VACUUM_INDEX.

Referenced by heap_vacuum_rel().

◆ vacuum_reap_lp_read_stream_next()

static BlockNumber vacuum_reap_lp_read_stream_next ( ReadStream stream,
void callback_private_data,
void per_buffer_data 
)
static

Definition at line 2712 of file vacuumlazy.c.

2715{
2716 TidStoreIter *iter = callback_private_data;
2718
2720 if (iter_result == NULL)
2721 return InvalidBlockNumber;
2722
2723 /*
2724 * Save the TidStoreIterResult for later, so we can extract the offsets.
2725 * It is safe to copy the result, according to TidStoreIterateNext().
2726 */
2727 memcpy(per_buffer_data, iter_result, sizeof(*iter_result));
2728
2729 return iter_result->blkno;
2730}

References fb(), InvalidBlockNumber, and TidStoreIterateNext().

Referenced by lazy_vacuum_heap_rel().