PostgreSQL Source Code git master
Loading...
Searching...
No Matches
vacuumlazy.c File Reference
#include "postgres.h"
#include "access/genam.h"
#include "access/heapam.h"
#include "access/htup_details.h"
#include "access/multixact.h"
#include "access/tidstore.h"
#include "access/transam.h"
#include "access/visibilitymap.h"
#include "access/xloginsert.h"
#include "catalog/storage.h"
#include "commands/progress.h"
#include "commands/vacuum.h"
#include "common/int.h"
#include "common/pg_prng.h"
#include "executor/instrument.h"
#include "miscadmin.h"
#include "pgstat.h"
#include "portability/instr_time.h"
#include "postmaster/autovacuum.h"
#include "storage/bufmgr.h"
#include "storage/freespace.h"
#include "storage/latch.h"
#include "storage/lmgr.h"
#include "storage/read_stream.h"
#include "utils/injection_point.h"
#include "utils/lsyscache.h"
#include "utils/pg_rusage.h"
#include "utils/timestamp.h"
#include "utils/wait_event.h"
Include dependency graph for vacuumlazy.c:

Go to the source code of this file.

Data Structures

struct  LVRelState
 
struct  LVSavedErrInfo
 

Macros

#define REL_TRUNCATE_MINIMUM   1000
 
#define REL_TRUNCATE_FRACTION   16
 
#define VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL   20 /* ms */
 
#define VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL   50 /* ms */
 
#define VACUUM_TRUNCATE_LOCK_TIMEOUT   5000 /* ms */
 
#define BYPASS_THRESHOLD_PAGES   0.02 /* i.e. 2% of rel_pages */
 
#define FAILSAFE_EVERY_PAGES    ((BlockNumber) (((uint64) 4 * 1024 * 1024 * 1024) / BLCKSZ))
 
#define VACUUM_FSM_EVERY_PAGES    ((BlockNumber) (((uint64) 8 * 1024 * 1024 * 1024) / BLCKSZ))
 
#define SKIP_PAGES_THRESHOLD   ((BlockNumber) 32)
 
#define PREFETCH_SIZE   ((BlockNumber) 32)
 
#define ParallelVacuumIsActive(vacrel)   ((vacrel)->pvs != NULL)
 
#define MAX_EAGER_FREEZE_SUCCESS_RATE   0.2
 
#define EAGER_SCAN_REGION_SIZE   4096
 

Typedefs

typedef struct LVRelState LVRelState
 
typedef struct LVSavedErrInfo LVSavedErrInfo
 

Enumerations

enum  VacErrPhase {
  VACUUM_ERRCB_PHASE_UNKNOWN , VACUUM_ERRCB_PHASE_SCAN_HEAP , VACUUM_ERRCB_PHASE_VACUUM_INDEX , VACUUM_ERRCB_PHASE_VACUUM_HEAP ,
  VACUUM_ERRCB_PHASE_INDEX_CLEANUP , VACUUM_ERRCB_PHASE_TRUNCATE
}
 

Functions

static void lazy_scan_heap (LVRelState *vacrel)
 
static void heap_vacuum_eager_scan_setup (LVRelState *vacrel, const VacuumParams *params)
 
static BlockNumber heap_vac_scan_next_block (ReadStream *stream, void *callback_private_data, void *per_buffer_data)
 
static void find_next_unskippable_block (LVRelState *vacrel, bool *skipsallvis)
 
static bool lazy_scan_new_or_empty (LVRelState *vacrel, Buffer buf, BlockNumber blkno, Page page, bool sharelock, Buffer vmbuffer)
 
static int lazy_scan_prune (LVRelState *vacrel, Buffer buf, BlockNumber blkno, Page page, Buffer vmbuffer, bool *has_lpdead_items, bool *vm_page_frozen)
 
static bool lazy_scan_noprune (LVRelState *vacrel, Buffer buf, BlockNumber blkno, Page page, bool *has_lpdead_items)
 
static void lazy_vacuum (LVRelState *vacrel)
 
static bool lazy_vacuum_all_indexes (LVRelState *vacrel)
 
static void lazy_vacuum_heap_rel (LVRelState *vacrel)
 
static void lazy_vacuum_heap_page (LVRelState *vacrel, BlockNumber blkno, Buffer buffer, OffsetNumber *deadoffsets, int num_offsets, Buffer vmbuffer)
 
static bool lazy_check_wraparound_failsafe (LVRelState *vacrel)
 
static void lazy_cleanup_all_indexes (LVRelState *vacrel)
 
static IndexBulkDeleteResultlazy_vacuum_one_index (Relation indrel, IndexBulkDeleteResult *istat, double reltuples, LVRelState *vacrel)
 
static IndexBulkDeleteResultlazy_cleanup_one_index (Relation indrel, IndexBulkDeleteResult *istat, double reltuples, bool estimated_count, LVRelState *vacrel)
 
static bool should_attempt_truncation (LVRelState *vacrel)
 
static void lazy_truncate_heap (LVRelState *vacrel)
 
static BlockNumber count_nondeletable_pages (LVRelState *vacrel, bool *lock_waiter_detected)
 
static void dead_items_alloc (LVRelState *vacrel, int nworkers)
 
static void dead_items_add (LVRelState *vacrel, BlockNumber blkno, OffsetNumber *offsets, int num_offsets)
 
static void dead_items_reset (LVRelState *vacrel)
 
static void dead_items_cleanup (LVRelState *vacrel)
 
static bool heap_page_would_be_all_visible (Relation rel, Buffer buf, GlobalVisState *vistest, bool allow_update_vistest, OffsetNumber *deadoffsets, int ndeadoffsets, bool *all_frozen, TransactionId *newest_live_xid, OffsetNumber *logging_offnum)
 
static void update_relstats_all_indexes (LVRelState *vacrel)
 
static void vacuum_error_callback (void *arg)
 
static void update_vacuum_error_info (LVRelState *vacrel, LVSavedErrInfo *saved_vacrel, int phase, BlockNumber blkno, OffsetNumber offnum)
 
static void restore_vacuum_error_info (LVRelState *vacrel, const LVSavedErrInfo *saved_vacrel)
 
void heap_vacuum_rel (Relation rel, const VacuumParams *params, BufferAccessStrategy bstrategy)
 
static int cmpOffsetNumbers (const void *a, const void *b)
 
static BlockNumber vacuum_reap_lp_read_stream_next (ReadStream *stream, void *callback_private_data, void *per_buffer_data)
 

Macro Definition Documentation

◆ BYPASS_THRESHOLD_PAGES

#define BYPASS_THRESHOLD_PAGES   0.02 /* i.e. 2% of rel_pages */

Definition at line 187 of file vacuumlazy.c.

◆ EAGER_SCAN_REGION_SIZE

#define EAGER_SCAN_REGION_SIZE   4096

Definition at line 250 of file vacuumlazy.c.

◆ FAILSAFE_EVERY_PAGES

#define FAILSAFE_EVERY_PAGES    ((BlockNumber) (((uint64) 4 * 1024 * 1024 * 1024) / BLCKSZ))

Definition at line 193 of file vacuumlazy.c.

224{
232
233/*
234 * An eager scan of a page that is set all-frozen in the VM is considered
235 * "successful". To spread out freezing overhead across multiple normal
236 * vacuums, we limit the number of successful eager page freezes. The maximum
237 * number of eager page freezes is calculated as a ratio of the all-visible
238 * but not all-frozen pages at the beginning of the vacuum.
239 */
240#define MAX_EAGER_FREEZE_SUCCESS_RATE 0.2
241
242/*
243 * On the assumption that different regions of the table tend to have
244 * similarly aged data, once vacuum fails to freeze
245 * vacuum_max_eager_freeze_failure_rate of the blocks in a region of size
246 * EAGER_SCAN_REGION_SIZE, it suspends eager scanning until it has progressed
247 * to another region of the table with potentially older data.
248 */
249#define EAGER_SCAN_REGION_SIZE 4096
250
251typedef struct LVRelState
252{
253 /* Target heap relation and its indexes */
256 int nindexes;
257
258 /* Buffer access strategy and parallel vacuum state */
261
262 /* Aggressive VACUUM? (must set relfrozenxid >= FreezeLimit) */
263 bool aggressive;
264 /* Use visibility map to skip? (disabled by DISABLE_PAGE_SKIPPING) */
265 bool skipwithvm;
266 /* Consider index vacuuming bypass optimization? */
268
269 /* Doing index vacuuming, index cleanup, rel truncation? */
271 bool do_index_cleanup;
272 bool do_rel_truncate;
273
274 /* VACUUM operation's cutoffs for freezing and pruning */
275 struct VacuumCutoffs cutoffs;
277 /* Tracks oldest extant XID/MXID for setting relfrozenxid/relminmxid */
280 bool skippedallvis;
281
282 /* Error reporting state */
283 char *dbname;
284 char *relnamespace;
285 char *relname;
286 char *indname; /* Current index name */
287 BlockNumber blkno; /* used only for heap operations */
288 OffsetNumber offnum; /* used only for heap operations */
290 bool verbose; /* VACUUM VERBOSE? */
291
292 /*
293 * dead_items stores TIDs whose index tuples are deleted by index
294 * vacuuming. Each TID points to an LP_DEAD line pointer from a heap page
295 * that has been processed by lazy_scan_prune. Also needed by
296 * lazy_vacuum_heap_rel, which marks the same LP_DEAD line pointers as
297 * LP_UNUSED during second heap pass.
298 *
299 * Both dead_items and dead_items_info are allocated in shared memory in
300 * parallel vacuum cases.
301 */
302 TidStore *dead_items; /* TIDs whose index tuples we'll delete */
304
305 BlockNumber rel_pages; /* total number of pages */
306 BlockNumber scanned_pages; /* # pages examined (not skipped via VM) */
307
308 /*
309 * Count of all-visible blocks eagerly scanned (for logging only). This
310 * does not include skippable blocks scanned due to SKIP_PAGES_THRESHOLD.
311 */
313
314 BlockNumber removed_pages; /* # pages removed by relation truncation */
315 BlockNumber new_frozen_tuple_pages; /* # pages with newly frozen tuples */
316
317 /* # pages newly set all-visible in the VM */
319
320 /*
321 * # pages newly set all-visible and all-frozen in the VM. This is a
322 * subset of new_all_visible_pages. That is, new_all_visible_pages
323 * includes all pages set all-visible, but
324 * new_all_visible_all_frozen_pages includes only those which were also
325 * set all-frozen.
326 */
328
329 /* # all-visible pages newly set all-frozen in the VM */
331
332 BlockNumber lpdead_item_pages; /* # pages with LP_DEAD items */
333 BlockNumber missed_dead_pages; /* # pages with missed dead tuples */
334 BlockNumber nonempty_pages; /* actually, last nonempty page + 1 */
335
336 /* Statistics output by us, for table */
337 double new_rel_tuples; /* new estimated total # of tuples */
338 double new_live_tuples; /* new estimated total # of live tuples */
339 /* Statistics output by index AMs */
341
342 /* Instrumentation counters */
343 int num_index_scans;
346
347 /*
348 * Total number of planned and actually launched parallel workers for
349 * index vacuuming and index cleanup.
350 */
352
353 /* Counters that follow are only for scanned_pages */
354 int64 tuples_deleted; /* # deleted from table */
355 int64 tuples_frozen; /* # newly frozen */
356 int64 lpdead_items; /* # deleted from indexes */
357 int64 live_tuples; /* # live tuples remaining */
358 int64 recently_dead_tuples; /* # dead, but not yet removable */
359 int64 missed_dead_tuples; /* # removable, but not removed */
360
361 /* State maintained by heap_vac_scan_next_block() */
362 BlockNumber current_block; /* last block returned */
363 BlockNumber next_unskippable_block; /* next unskippable block */
364 bool next_unskippable_eager_scanned; /* if it was eagerly scanned */
365 Buffer next_unskippable_vmbuffer; /* buffer containing its VM bit */
366
367 /* State related to managing eager scanning of all-visible pages */
368
369 /*
370 * A normal vacuum that has failed to freeze too many eagerly scanned
371 * blocks in a region suspends eager scanning.
372 * next_eager_scan_region_start is the block number of the first block
373 * eligible for resumed eager scanning.
374 *
375 * When eager scanning is permanently disabled, either initially
376 * (including for aggressive vacuum) or due to hitting the success cap,
377 * this is set to InvalidBlockNumber.
378 */
380
381 /*
382 * The remaining number of blocks a normal vacuum will consider eager
383 * scanning when it is successful. When eager scanning is enabled, this is
384 * initialized to MAX_EAGER_FREEZE_SUCCESS_RATE of the total number of
385 * all-visible but not all-frozen pages. For each eager freeze success,
386 * this is decremented. Once it hits 0, eager scanning is permanently
387 * disabled. It is initialized to 0 if eager scanning starts out disabled
388 * (including for aggressive vacuum).
389 */
391
392 /*
393 * The maximum number of blocks which may be eagerly scanned and not
394 * frozen before eager scanning is temporarily suspended. This is
395 * configurable both globally, via the
396 * vacuum_max_eager_freeze_failure_rate GUC, and per table, with a table
397 * storage parameter of the same name. It is calculated as
398 * vacuum_max_eager_freeze_failure_rate of EAGER_SCAN_REGION_SIZE blocks.
399 * It is 0 when eager scanning is disabled.
400 */
402
403 /*
404 * The number of eagerly scanned blocks vacuum failed to freeze (due to
405 * age) in the current eager scan region. Vacuum resets it to
406 * eager_scan_max_fails_per_region each time it enters a new region of the
407 * relation. If eager_scan_remaining_fails hits 0, eager scanning is
408 * suspended until the next region. It is also 0 if eager scanning has
409 * been permanently disabled.
410 */
412} LVRelState;
413
414
415/* Struct for saving and restoring vacuum error information. */
416typedef struct LVSavedErrInfo
417{
422
423
424/* non-export function prototypes */
425static void lazy_scan_heap(LVRelState *vacrel);
427 const VacuumParams *params);
429 void *callback_private_data,
430 void *per_buffer_data);
433 BlockNumber blkno, Page page,
434 bool sharelock, Buffer vmbuffer);
436 BlockNumber blkno, Page page,
437 Buffer vmbuffer,
438 bool *has_lpdead_items, bool *vm_page_frozen);
440 BlockNumber blkno, Page page,
441 bool *has_lpdead_items);
442static void lazy_vacuum(LVRelState *vacrel);
446 Buffer buffer, OffsetNumber *deadoffsets,
447 int num_offsets, Buffer vmbuffer);
452 double reltuples,
456 double reltuples,
457 bool estimated_count,
463static void dead_items_alloc(LVRelState *vacrel, int nworkers);
464static void dead_items_add(LVRelState *vacrel, BlockNumber blkno, OffsetNumber *offsets,
465 int num_offsets);
468
470 GlobalVisState *vistest,
472 OffsetNumber *deadoffsets,
473 int ndeadoffsets,
474 bool *all_frozen,
475 TransactionId *newest_live_xid,
478static void vacuum_error_callback(void *arg);
481 int phase, BlockNumber blkno,
482 OffsetNumber offnum);
485
486
487
488/*
489 * Helper to set up the eager scanning state for vacuuming a single relation.
490 * Initializes the eager scan management related members of the LVRelState.
491 *
492 * Caller provides whether or not an aggressive vacuum is required due to
493 * vacuum options or for relfrozenxid/relminmxid advancement.
494 */
495static void
497{
501 float first_region_ratio;
503
504 /*
505 * Initialize eager scan management fields to their disabled values.
506 * Aggressive vacuums, normal vacuums of small tables, and normal vacuums
507 * of tables without sufficiently old tuples disable eager scanning.
508 */
509 vacrel->next_eager_scan_region_start = InvalidBlockNumber;
510 vacrel->eager_scan_max_fails_per_region = 0;
511 vacrel->eager_scan_remaining_fails = 0;
512 vacrel->eager_scan_remaining_successes = 0;
513
514 /* If eager scanning is explicitly disabled, just return. */
515 if (params->max_eager_freeze_failure_rate == 0)
516 return;
517
518 /*
519 * The caller will have determined whether or not an aggressive vacuum is
520 * required by either the vacuum parameters or the relative age of the
521 * oldest unfrozen transaction IDs. An aggressive vacuum must scan every
522 * all-visible page to safely advance the relfrozenxid and/or relminmxid,
523 * so scans of all-visible pages are not considered eager.
524 */
525 if (vacrel->aggressive)
526 return;
527
528 /*
529 * Aggressively vacuuming a small relation shouldn't take long, so it
530 * isn't worth amortizing. We use two times the region size as the size
531 * cutoff because the eager scan start block is a random spot somewhere in
532 * the first region, making the second region the first to be eager
533 * scanned normally.
534 */
535 if (vacrel->rel_pages < 2 * EAGER_SCAN_REGION_SIZE)
536 return;
537
538 /*
539 * We only want to enable eager scanning if we are likely to be able to
540 * freeze some of the pages in the relation.
541 *
542 * Tuples with XIDs older than OldestXmin or MXIDs older than OldestMxact
543 * are technically freezable, but we won't freeze them unless the criteria
544 * for opportunistic freezing is met. Only tuples with XIDs/MXIDs older
545 * than the FreezeLimit/MultiXactCutoff are frozen in the common case.
546 *
547 * So, as a heuristic, we wait until the FreezeLimit has advanced past the
548 * relfrozenxid or the MultiXactCutoff has advanced past the relminmxid to
549 * enable eager scanning.
550 */
551 if (TransactionIdIsNormal(vacrel->cutoffs.relfrozenxid) &&
552 TransactionIdPrecedes(vacrel->cutoffs.relfrozenxid,
553 vacrel->cutoffs.FreezeLimit))
555
557 MultiXactIdIsValid(vacrel->cutoffs.relminmxid) &&
558 MultiXactIdPrecedes(vacrel->cutoffs.relminmxid,
559 vacrel->cutoffs.MultiXactCutoff))
561
563 return;
564
565 /* We have met the criteria to eagerly scan some pages. */
566
567 /*
568 * Our success cap is MAX_EAGER_FREEZE_SUCCESS_RATE of the number of
569 * all-visible but not all-frozen blocks in the relation.
570 */
572
573 vacrel->eager_scan_remaining_successes =
576
577 /* If every all-visible page is frozen, eager scanning is disabled. */
578 if (vacrel->eager_scan_remaining_successes == 0)
579 return;
580
581 /*
582 * Now calculate the bounds of the first eager scan region. Its end block
583 * will be a random spot somewhere in the first EAGER_SCAN_REGION_SIZE
584 * blocks. This affects the bounds of all subsequent regions and avoids
585 * eager scanning and failing to freeze the same blocks each vacuum of the
586 * relation.
587 */
589
590 vacrel->next_eager_scan_region_start = randseed % EAGER_SCAN_REGION_SIZE;
591
593 params->max_eager_freeze_failure_rate <= 1);
594
595 vacrel->eager_scan_max_fails_per_region =
598
599 /*
600 * The first region will be smaller than subsequent regions. As such,
601 * adjust the eager freeze failures tolerated for this region.
602 */
603 first_region_ratio = 1 - (float) vacrel->next_eager_scan_region_start /
605
606 vacrel->eager_scan_remaining_fails =
607 vacrel->eager_scan_max_fails_per_region *
609}
610
611/*
612 * heap_vacuum_rel() -- perform VACUUM for one heap relation
613 *
614 * This routine sets things up for and then calls lazy_scan_heap, where
615 * almost all work actually takes place. Finalizes everything after call
616 * returns by managing relation truncation and updating rel's pg_class
617 * entry. (Also updates pg_class entries for any indexes that need it.)
618 *
619 * At entry, we have already established a transaction and opened
620 * and locked the relation.
621 */
622void
623heap_vacuum_rel(Relation rel, const VacuumParams *params,
624 BufferAccessStrategy bstrategy)
625{
627 bool verbose,
628 instrument,
629 skipwithvm,
637 TimestampTz starttime = 0;
639 startwritetime = 0;
642 ErrorContextCallback errcallback;
643 char **indnames = NULL;
645
646 verbose = (params->options & VACOPT_VERBOSE) != 0;
647 instrument = (verbose || (AmAutoVacuumWorkerProcess() &&
648 params->log_vacuum_min_duration >= 0));
649 if (instrument)
650 {
652 if (track_io_timing)
653 {
656 }
657 }
658
659 /* Used for instrumentation and stats report */
660 starttime = GetCurrentTimestamp();
661
663 RelationGetRelid(rel));
666 params->is_wraparound
669 else
672
673 /*
674 * Setup error traceback support for ereport() first. The idea is to set
675 * up an error context callback to display additional information on any
676 * error during a vacuum. During different phases of vacuum, we update
677 * the state so that the error context callback always display current
678 * information.
679 *
680 * Copy the names of heap rel into local memory for error reporting
681 * purposes, too. It isn't always safe to assume that we can get the name
682 * of each rel. It's convenient for code in lazy_scan_heap to always use
683 * these temp copies.
684 */
687 vacrel->relnamespace = get_namespace_name(RelationGetNamespace(rel));
688 vacrel->relname = pstrdup(RelationGetRelationName(rel));
689 vacrel->indname = NULL;
691 vacrel->verbose = verbose;
692 errcallback.callback = vacuum_error_callback;
693 errcallback.arg = vacrel;
694 errcallback.previous = error_context_stack;
695 error_context_stack = &errcallback;
696
697 /* Set up high level stuff about rel and its indexes */
698 vacrel->rel = rel;
700 &vacrel->indrels);
701 vacrel->bstrategy = bstrategy;
702 if (instrument && vacrel->nindexes > 0)
703 {
704 /* Copy index names used by instrumentation (not error reporting) */
705 indnames = palloc_array(char *, vacrel->nindexes);
706 for (int i = 0; i < vacrel->nindexes; i++)
708 }
709
710 /*
711 * The index_cleanup param either disables index vacuuming and cleanup or
712 * forces it to go ahead when we would otherwise apply the index bypass
713 * optimization. The default is 'auto', which leaves the final decision
714 * up to lazy_vacuum().
715 *
716 * The truncate param allows user to avoid attempting relation truncation,
717 * though it can't force truncation to happen.
718 */
721 params->truncate != VACOPTVALUE_AUTO);
722
723 /*
724 * While VacuumFailSafeActive is reset to false before calling this, we
725 * still need to reset it here due to recursive calls.
726 */
727 VacuumFailsafeActive = false;
728 vacrel->consider_bypass_optimization = true;
729 vacrel->do_index_vacuuming = true;
730 vacrel->do_index_cleanup = true;
731 vacrel->do_rel_truncate = (params->truncate != VACOPTVALUE_DISABLED);
732 if (params->index_cleanup == VACOPTVALUE_DISABLED)
733 {
734 /* Force disable index vacuuming up-front */
735 vacrel->do_index_vacuuming = false;
736 vacrel->do_index_cleanup = false;
737 }
738 else if (params->index_cleanup == VACOPTVALUE_ENABLED)
739 {
740 /* Force index vacuuming. Note that failsafe can still bypass. */
741 vacrel->consider_bypass_optimization = false;
742 }
743 else
744 {
745 /* Default/auto, make all decisions dynamically */
747 }
748
749 /* Initialize page counters explicitly (be tidy) */
750 vacrel->scanned_pages = 0;
751 vacrel->eager_scanned_pages = 0;
752 vacrel->removed_pages = 0;
753 vacrel->new_frozen_tuple_pages = 0;
754 vacrel->lpdead_item_pages = 0;
755 vacrel->missed_dead_pages = 0;
756 vacrel->nonempty_pages = 0;
757 /* dead_items_alloc allocates vacrel->dead_items later on */
758
759 /* Allocate/initialize output statistics state */
760 vacrel->new_rel_tuples = 0;
761 vacrel->new_live_tuples = 0;
762 vacrel->indstats = (IndexBulkDeleteResult **)
763 palloc0(vacrel->nindexes * sizeof(IndexBulkDeleteResult *));
764
765 /* Initialize remaining counters (be tidy) */
766 vacrel->num_index_scans = 0;
767 vacrel->num_dead_items_resets = 0;
768 vacrel->total_dead_items_bytes = 0;
769 vacrel->tuples_deleted = 0;
770 vacrel->tuples_frozen = 0;
771 vacrel->lpdead_items = 0;
772 vacrel->live_tuples = 0;
773 vacrel->recently_dead_tuples = 0;
774 vacrel->missed_dead_tuples = 0;
775
776 vacrel->new_all_visible_pages = 0;
777 vacrel->new_all_visible_all_frozen_pages = 0;
778 vacrel->new_all_frozen_pages = 0;
779
780 vacrel->worker_usage.vacuum.nlaunched = 0;
781 vacrel->worker_usage.vacuum.nplanned = 0;
782 vacrel->worker_usage.cleanup.nlaunched = 0;
783 vacrel->worker_usage.cleanup.nplanned = 0;
784
785 /*
786 * Get cutoffs that determine which deleted tuples are considered DEAD,
787 * not just RECENTLY_DEAD, and which XIDs/MXIDs to freeze. Then determine
788 * the extent of the blocks that we'll scan in lazy_scan_heap. It has to
789 * happen in this order to ensure that the OldestXmin cutoff field works
790 * as an upper bound on the XIDs stored in the pages we'll actually scan
791 * (NewRelfrozenXid tracking must never be allowed to miss unfrozen XIDs).
792 *
793 * Next acquire vistest, a related cutoff that's used in pruning. We use
794 * vistest in combination with OldestXmin to ensure that
795 * heap_page_prune_and_freeze() always removes any deleted tuple whose
796 * xmax is < OldestXmin. lazy_scan_prune must never become confused about
797 * whether a tuple should be frozen or removed. (In the future we might
798 * want to teach lazy_scan_prune to recompute vistest from time to time,
799 * to increase the number of dead tuples it can prune away.)
800 */
801 vacrel->aggressive = vacuum_get_cutoffs(rel, params, &vacrel->cutoffs);
803 vacrel->vistest = GlobalVisTestFor(rel);
804
805 /* Initialize state used to track oldest extant XID/MXID */
806 vacrel->NewRelfrozenXid = vacrel->cutoffs.OldestXmin;
807 vacrel->NewRelminMxid = vacrel->cutoffs.OldestMxact;
808
809 /*
810 * Initialize state related to tracking all-visible page skipping. This is
811 * very important to determine whether or not it is safe to advance the
812 * relfrozenxid/relminmxid.
813 */
814 vacrel->skippedallvis = false;
815 skipwithvm = true;
817 {
818 /*
819 * Force aggressive mode, and disable skipping blocks using the
820 * visibility map (even those set all-frozen)
821 */
822 vacrel->aggressive = true;
823 skipwithvm = false;
824 }
825
826 vacrel->skipwithvm = skipwithvm;
827
828 /*
829 * Set up eager scan tracking state. This must happen after determining
830 * whether or not the vacuum must be aggressive, because only normal
831 * vacuums use the eager scan algorithm.
832 */
834
835 /* Report the vacuum mode: 'normal' or 'aggressive' */
837 vacrel->aggressive
840
841 if (verbose)
842 {
843 if (vacrel->aggressive)
845 (errmsg("aggressively vacuuming \"%s.%s.%s\"",
846 vacrel->dbname, vacrel->relnamespace,
847 vacrel->relname)));
848 else
850 (errmsg("vacuuming \"%s.%s.%s\"",
851 vacrel->dbname, vacrel->relnamespace,
852 vacrel->relname)));
853 }
854
855 /*
856 * Allocate dead_items memory using dead_items_alloc. This handles
857 * parallel VACUUM initialization as part of allocating shared memory
858 * space used for dead_items. (But do a failsafe precheck first, to
859 * ensure that parallel VACUUM won't be attempted at all when relfrozenxid
860 * is already dangerously old.)
861 */
864
865#ifdef USE_INJECTION_POINTS
866
867 /*
868 * Used by tests to pause before parallel vacuum is launched, allowing
869 * test code to modify configuration that the leader then propagates to
870 * workers.
871 */
873 INJECTION_POINT("autovacuum-start-parallel-vacuum", NULL);
874#endif
875
876 /*
877 * Call lazy_scan_heap to perform all required heap pruning, index
878 * vacuuming, and heap vacuuming (plus related processing)
879 */
881
882 /*
883 * Save dead items max_bytes and update the memory usage statistics before
884 * cleanup, they are freed in parallel vacuum cases during
885 * dead_items_cleanup().
886 */
887 dead_items_max_bytes = vacrel->dead_items_info->max_bytes;
888 vacrel->total_dead_items_bytes += TidStoreMemoryUsage(vacrel->dead_items);
889
890 /*
891 * Free resources managed by dead_items_alloc. This ends parallel mode in
892 * passing when necessary.
893 */
896
897 /*
898 * Update pg_class entries for each of rel's indexes where appropriate.
899 *
900 * Unlike the later update to rel's pg_class entry, this is not critical.
901 * Maintains relpages/reltuples statistics used by the planner only.
902 */
903 if (vacrel->do_index_cleanup)
905
906 /* Done with rel's indexes */
907 vac_close_indexes(vacrel->nindexes, vacrel->indrels, NoLock);
908
909 /* Optionally truncate rel */
912
913 /* Pop the error context stack */
914 error_context_stack = errcallback.previous;
915
916 /* Report that we are now doing final cleanup */
919
920 /*
921 * Prepare to update rel's pg_class entry.
922 *
923 * Aggressive VACUUMs must always be able to advance relfrozenxid to a
924 * value >= FreezeLimit, and relminmxid to a value >= MultiXactCutoff.
925 * Non-aggressive VACUUMs may advance them by any amount, or not at all.
926 */
927 Assert(vacrel->NewRelfrozenXid == vacrel->cutoffs.OldestXmin ||
928 TransactionIdPrecedesOrEquals(vacrel->aggressive ? vacrel->cutoffs.FreezeLimit :
929 vacrel->cutoffs.relfrozenxid,
930 vacrel->NewRelfrozenXid));
931 Assert(vacrel->NewRelminMxid == vacrel->cutoffs.OldestMxact ||
932 MultiXactIdPrecedesOrEquals(vacrel->aggressive ? vacrel->cutoffs.MultiXactCutoff :
933 vacrel->cutoffs.relminmxid,
934 vacrel->NewRelminMxid));
935 if (vacrel->skippedallvis)
936 {
937 /*
938 * Must keep original relfrozenxid in a non-aggressive VACUUM that
939 * chose to skip an all-visible page range. The state that tracks new
940 * values will have missed unfrozen XIDs from the pages we skipped.
941 */
942 Assert(!vacrel->aggressive);
943 vacrel->NewRelfrozenXid = InvalidTransactionId;
944 vacrel->NewRelminMxid = InvalidMultiXactId;
945 }
946
947 /*
948 * For safety, clamp relallvisible to be not more than what we're setting
949 * pg_class.relpages to
950 */
951 new_rel_pages = vacrel->rel_pages; /* After possible rel truncation */
955
956 /*
957 * An all-frozen block _must_ be all-visible. As such, clamp the count of
958 * all-frozen blocks to the count of all-visible blocks. This matches the
959 * clamping of relallvisible above.
960 */
963
964 /*
965 * Now actually update rel's pg_class entry.
966 *
967 * In principle new_live_tuples could be -1 indicating that we (still)
968 * don't know the tuple count. In practice that can't happen, since we
969 * scan every page that isn't skipped using the visibility map.
970 */
971 vac_update_relstats(rel, new_rel_pages, vacrel->new_live_tuples,
973 vacrel->nindexes > 0,
974 vacrel->NewRelfrozenXid, vacrel->NewRelminMxid,
976
977 /*
978 * Report results to the cumulative stats system, too.
979 *
980 * Deliberately avoid telling the stats system about LP_DEAD items that
981 * remain in the table due to VACUUM bypassing index and heap vacuuming.
982 * ANALYZE will consider the remaining LP_DEAD items to be dead "tuples".
983 * It seems like a good idea to err on the side of not vacuuming again too
984 * soon in cases where the failsafe prevented significant amounts of heap
985 * vacuuming.
986 */
988 Max(vacrel->new_live_tuples, 0),
989 vacrel->recently_dead_tuples +
990 vacrel->missed_dead_tuples,
991 starttime);
993
994 if (instrument)
995 {
997
998 if (verbose || params->log_vacuum_min_duration == 0 ||
1000 params->log_vacuum_min_duration))
1001 {
1002 long secs_dur;
1003 int usecs_dur;
1004 WalUsage walusage;
1005 BufferUsage bufferusage;
1007 char *msgfmt;
1008 int32 diff;
1009 double read_rate = 0,
1010 write_rate = 0;
1014
1016 memset(&walusage, 0, sizeof(WalUsage));
1018 memset(&bufferusage, 0, sizeof(BufferUsage));
1020
1021 total_blks_hit = bufferusage.shared_blks_hit +
1022 bufferusage.local_blks_hit;
1023 total_blks_read = bufferusage.shared_blks_read +
1024 bufferusage.local_blks_read;
1026 bufferusage.local_blks_dirtied;
1027
1029 if (verbose)
1030 {
1031 /*
1032 * Aggressiveness already reported earlier, in dedicated
1033 * VACUUM VERBOSE ereport
1034 */
1035 Assert(!params->is_wraparound);
1036 msgfmt = _("finished vacuuming \"%s.%s.%s\": index scans: %d\n");
1037 }
1038 else if (params->is_wraparound)
1039 {
1040 /*
1041 * While it's possible for a VACUUM to be both is_wraparound
1042 * and !aggressive, that's just a corner-case -- is_wraparound
1043 * implies aggressive. Produce distinct output for the corner
1044 * case all the same, just in case.
1045 */
1046 if (vacrel->aggressive)
1047 msgfmt = _("automatic aggressive vacuum to prevent wraparound of table \"%s.%s.%s\": index scans: %d\n");
1048 else
1049 msgfmt = _("automatic vacuum to prevent wraparound of table \"%s.%s.%s\": index scans: %d\n");
1050 }
1051 else
1052 {
1053 if (vacrel->aggressive)
1054 msgfmt = _("automatic aggressive vacuum of table \"%s.%s.%s\": index scans: %d\n");
1055 else
1056 msgfmt = _("automatic vacuum of table \"%s.%s.%s\": index scans: %d\n");
1057 }
1059 vacrel->dbname,
1060 vacrel->relnamespace,
1061 vacrel->relname,
1062 vacrel->num_index_scans);
1063 appendStringInfo(&buf, _("pages: %u removed, %u remain, %u scanned (%.2f%% of total), %u eagerly scanned\n"),
1064 vacrel->removed_pages,
1066 vacrel->scanned_pages,
1067 orig_rel_pages == 0 ? 100.0 :
1068 100.0 * vacrel->scanned_pages /
1070 vacrel->eager_scanned_pages);
1072 _("tuples: %" PRId64 " removed, %" PRId64 " remain, %" PRId64 " are dead but not yet removable\n"),
1073 vacrel->tuples_deleted,
1074 (int64) vacrel->new_rel_tuples,
1075 vacrel->recently_dead_tuples);
1076 if (vacrel->missed_dead_tuples > 0)
1078 _("tuples missed: %" PRId64 " dead from %u pages not removed due to cleanup lock contention\n"),
1079 vacrel->missed_dead_tuples,
1080 vacrel->missed_dead_pages);
1082 vacrel->cutoffs.OldestXmin);
1084 _("removable cutoff: %u, which was %d XIDs old when operation ended\n"),
1085 vacrel->cutoffs.OldestXmin, diff);
1087 {
1088 diff = (int32) (vacrel->NewRelfrozenXid -
1089 vacrel->cutoffs.relfrozenxid);
1091 _("new relfrozenxid: %u, which is %d XIDs ahead of previous value\n"),
1092 vacrel->NewRelfrozenXid, diff);
1093 }
1094 if (minmulti_updated)
1095 {
1096 diff = (int32) (vacrel->NewRelminMxid -
1097 vacrel->cutoffs.relminmxid);
1099 _("new relminmxid: %u, which is %d MXIDs ahead of previous value\n"),
1100 vacrel->NewRelminMxid, diff);
1101 }
1102 appendStringInfo(&buf, _("frozen: %u pages from table (%.2f%% of total) had %" PRId64 " tuples frozen\n"),
1103 vacrel->new_frozen_tuple_pages,
1104 orig_rel_pages == 0 ? 100.0 :
1105 100.0 * vacrel->new_frozen_tuple_pages /
1107 vacrel->tuples_frozen);
1108
1110 _("visibility map: %u pages set all-visible, %u pages set all-frozen (%u were all-visible)\n"),
1111 vacrel->new_all_visible_pages,
1112 vacrel->new_all_visible_all_frozen_pages +
1113 vacrel->new_all_frozen_pages,
1114 vacrel->new_all_frozen_pages);
1115 if (vacrel->do_index_vacuuming)
1116 {
1117 if (vacrel->nindexes == 0 || vacrel->num_index_scans == 0)
1118 appendStringInfoString(&buf, _("index scan not needed: "));
1119 else
1120 appendStringInfoString(&buf, _("index scan needed: "));
1121
1122 msgfmt = _("%u pages from table (%.2f%% of total) had %" PRId64 " dead item identifiers removed\n");
1123 }
1124 else
1125 {
1127 appendStringInfoString(&buf, _("index scan bypassed: "));
1128 else
1129 appendStringInfoString(&buf, _("index scan bypassed by failsafe: "));
1130
1131 msgfmt = _("%u pages from table (%.2f%% of total) have %" PRId64 " dead item identifiers\n");
1132 }
1134 vacrel->lpdead_item_pages,
1135 orig_rel_pages == 0 ? 100.0 :
1136 100.0 * vacrel->lpdead_item_pages / orig_rel_pages,
1137 vacrel->lpdead_items);
1138
1139 if (vacrel->worker_usage.vacuum.nplanned > 0)
1141 _("parallel workers: index vacuum: %d planned, %d launched in total\n"),
1142 vacrel->worker_usage.vacuum.nplanned,
1143 vacrel->worker_usage.vacuum.nlaunched);
1144
1145 if (vacrel->worker_usage.cleanup.nplanned > 0)
1147 _("parallel workers: index cleanup: %d planned, %d launched\n"),
1148 vacrel->worker_usage.cleanup.nplanned,
1149 vacrel->worker_usage.cleanup.nlaunched);
1150
1151 for (int i = 0; i < vacrel->nindexes; i++)
1152 {
1153 IndexBulkDeleteResult *istat = vacrel->indstats[i];
1154
1155 if (!istat)
1156 continue;
1157
1159 _("index \"%s\": pages: %u in total, %u newly deleted, %u currently deleted, %u reusable\n"),
1160 indnames[i],
1161 istat->num_pages,
1162 istat->pages_newly_deleted,
1163 istat->pages_deleted,
1164 istat->pages_free);
1165 }
1167 {
1168 /*
1169 * We bypass the changecount mechanism because this value is
1170 * only updated by the calling process. We also rely on the
1171 * above call to pgstat_progress_end_command() to not clear
1172 * the st_progress_param array.
1173 */
1174 appendStringInfo(&buf, _("delay time: %.3f ms\n"),
1176 }
1177 if (track_io_timing)
1178 {
1179 double read_ms = (double) (pgStatBlockReadTime - startreadtime) / 1000;
1180 double write_ms = (double) (pgStatBlockWriteTime - startwritetime) / 1000;
1181
1182 appendStringInfo(&buf, _("I/O timings: read: %.3f ms, write: %.3f ms\n"),
1183 read_ms, write_ms);
1184 }
1185 if (secs_dur > 0 || usecs_dur > 0)
1186 {
1188 (1024 * 1024) / (secs_dur + usecs_dur / 1000000.0);
1190 (1024 * 1024) / (secs_dur + usecs_dur / 1000000.0);
1191 }
1192 appendStringInfo(&buf, _("avg read rate: %.3f MB/s, avg write rate: %.3f MB/s\n"),
1195 _("buffer usage: %" PRId64 " hits, %" PRId64 " reads, %" PRId64 " dirtied\n"),
1200 _("WAL usage: %" PRId64 " records, %" PRId64 " full page images, %" PRIu64 " bytes, %" PRIu64 " full page image bytes, %" PRId64 " buffers full\n"),
1201 walusage.wal_records,
1202 walusage.wal_fpi,
1203 walusage.wal_bytes,
1204 walusage.wal_fpi_bytes,
1205 walusage.wal_buffers_full);
1206
1207 /*
1208 * Report the dead items memory usage.
1209 *
1210 * The num_dead_items_resets counter increases when we reset the
1211 * collected dead items, so the counter is non-zero if at least
1212 * one dead items are collected, even if index vacuuming is
1213 * disabled.
1214 */
1216 ngettext("memory usage: dead item storage %.2f MB accumulated across %d reset (limit %.2f MB each)\n",
1217 "memory usage: dead item storage %.2f MB accumulated across %d resets (limit %.2f MB each)\n",
1218 vacrel->num_dead_items_resets),
1219 (double) vacrel->total_dead_items_bytes / (1024 * 1024),
1220 vacrel->num_dead_items_resets,
1221 (double) dead_items_max_bytes / (1024 * 1024));
1222 appendStringInfo(&buf, _("system usage: %s"), pg_rusage_show(&ru0));
1223
1224 ereport(verbose ? INFO : LOG,
1225 (errmsg_internal("%s", buf.data)));
1226 pfree(buf.data);
1227 }
1228 }
1229
1230 /* Cleanup index statistics and index names */
1231 for (int i = 0; i < vacrel->nindexes; i++)
1232 {
1233 if (vacrel->indstats[i])
1234 pfree(vacrel->indstats[i]);
1235
1236 if (instrument)
1237 pfree(indnames[i]);
1238 }
1239}
1240
1241/*
1242 * lazy_scan_heap() -- workhorse function for VACUUM
1243 *
1244 * This routine prunes each page in the heap, and considers the need to
1245 * freeze remaining tuples with storage (not including pages that can be
1246 * skipped using the visibility map). Also performs related maintenance
1247 * of the FSM and visibility map. These steps all take place during an
1248 * initial pass over the target heap relation.
1249 *
1250 * Also invokes lazy_vacuum_all_indexes to vacuum indexes, which largely
1251 * consists of deleting index tuples that point to LP_DEAD items left in
1252 * heap pages following pruning. Earlier initial pass over the heap will
1253 * have collected the TIDs whose index tuples need to be removed.
1254 *
1255 * Finally, invokes lazy_vacuum_heap_rel to vacuum heap pages, which
1256 * largely consists of marking LP_DEAD items (from vacrel->dead_items)
1257 * as LP_UNUSED. This has to happen in a second, final pass over the
1258 * heap, to preserve a basic invariant that all index AMs rely on: no
1259 * extant index tuple can ever be allowed to contain a TID that points to
1260 * an LP_UNUSED line pointer in the heap. We must disallow premature
1261 * recycling of line pointers to avoid index scans that get confused
1262 * about which TID points to which tuple immediately after recycling.
1263 * (Actually, this isn't a concern when target heap relation happens to
1264 * have no indexes, which allows us to safely apply the one-pass strategy
1265 * as an optimization).
1266 *
1267 * In practice we often have enough space to fit all TIDs, and so won't
1268 * need to call lazy_vacuum more than once, after our initial pass over
1269 * the heap has totally finished. Otherwise things are slightly more
1270 * complicated: our "initial pass" over the heap applies only to those
1271 * pages that were pruned before we needed to call lazy_vacuum, and our
1272 * "final pass" over the heap only vacuums these same heap pages.
1273 * However, we process indexes in full every time lazy_vacuum is called,
1274 * which makes index processing very inefficient when memory is in short
1275 * supply.
1276 */
1277static void
1279{
1280 ReadStream *stream;
1281 BlockNumber rel_pages = vacrel->rel_pages,
1282 blkno = 0,
1285 vacrel->eager_scan_remaining_successes; /* for logging */
1286 Buffer vmbuffer = InvalidBuffer;
1287 const int initprog_index[] = {
1291 };
1293
1294 /* Report that we're scanning the heap, advertising total # of blocks */
1296 initprog_val[1] = rel_pages;
1297 initprog_val[2] = vacrel->dead_items_info->max_bytes;
1299
1300 /* Initialize for the first heap_vac_scan_next_block() call */
1301 vacrel->current_block = InvalidBlockNumber;
1302 vacrel->next_unskippable_block = InvalidBlockNumber;
1303 vacrel->next_unskippable_eager_scanned = false;
1304 vacrel->next_unskippable_vmbuffer = InvalidBuffer;
1305
1306 /*
1307 * Set up the read stream for vacuum's first pass through the heap.
1308 *
1309 * This could be made safe for READ_STREAM_USE_BATCHING, but only with
1310 * explicit work in heap_vac_scan_next_block.
1311 */
1313 vacrel->bstrategy,
1314 vacrel->rel,
1317 vacrel,
1318 sizeof(bool));
1319
1320 while (true)
1321 {
1322 Buffer buf;
1323 Page page;
1324 bool was_eager_scanned = false;
1325 int ndeleted = 0;
1326 bool has_lpdead_items;
1327 void *per_buffer_data = NULL;
1328 bool vm_page_frozen = false;
1329 bool got_cleanup_lock = false;
1330
1331 vacuum_delay_point(false);
1332
1333 /*
1334 * Regularly check if wraparound failsafe should trigger.
1335 *
1336 * There is a similar check inside lazy_vacuum_all_indexes(), but
1337 * relfrozenxid might start to look dangerously old before we reach
1338 * that point. This check also provides failsafe coverage for the
1339 * one-pass strategy, and the two-pass strategy with the index_cleanup
1340 * param set to 'off'.
1341 */
1342 if (vacrel->scanned_pages > 0 &&
1343 vacrel->scanned_pages % FAILSAFE_EVERY_PAGES == 0)
1345
1346 /*
1347 * Consider if we definitely have enough space to process TIDs on page
1348 * already. If we are close to overrunning the available space for
1349 * dead_items TIDs, pause and do a cycle of vacuuming before we tackle
1350 * this page. However, let's force at least one page-worth of tuples
1351 * to be stored as to ensure we do at least some work when the memory
1352 * configured is so low that we run out before storing anything.
1353 */
1354 if (vacrel->dead_items_info->num_items > 0 &&
1355 TidStoreMemoryUsage(vacrel->dead_items) > vacrel->dead_items_info->max_bytes)
1356 {
1357 /*
1358 * Before beginning index vacuuming, we release any pin we may
1359 * hold on the visibility map page. This isn't necessary for
1360 * correctness, but we do it anyway to avoid holding the pin
1361 * across a lengthy, unrelated operation.
1362 */
1363 if (BufferIsValid(vmbuffer))
1364 {
1365 ReleaseBuffer(vmbuffer);
1366 vmbuffer = InvalidBuffer;
1367 }
1368
1369 /* Perform a round of index and heap vacuuming */
1370 vacrel->consider_bypass_optimization = false;
1372
1373 /*
1374 * Vacuum the Free Space Map to make newly-freed space visible on
1375 * upper-level FSM pages. Note that blkno is the previously
1376 * processed block.
1377 */
1379 blkno + 1);
1381
1382 /* Report that we are once again scanning the heap */
1385 }
1386
1387 buf = read_stream_next_buffer(stream, &per_buffer_data);
1388
1389 /* The relation is exhausted. */
1390 if (!BufferIsValid(buf))
1391 break;
1392
1393 was_eager_scanned = *((bool *) per_buffer_data);
1395 page = BufferGetPage(buf);
1396 blkno = BufferGetBlockNumber(buf);
1397
1398 vacrel->scanned_pages++;
1400 vacrel->eager_scanned_pages++;
1401
1402 /* Report as block scanned, update error traceback information */
1405 blkno, InvalidOffsetNumber);
1406
1407 /*
1408 * Pin the visibility map page in case we need to mark the page
1409 * all-visible. In most cases this will be very cheap, because we'll
1410 * already have the correct page pinned anyway.
1411 */
1412 visibilitymap_pin(vacrel->rel, blkno, &vmbuffer);
1413
1414 /*
1415 * We need a buffer cleanup lock to prune HOT chains and defragment
1416 * the page in lazy_scan_prune. But when it's not possible to acquire
1417 * a cleanup lock right away, we may be able to settle for reduced
1418 * processing using lazy_scan_noprune.
1419 */
1421
1422 if (!got_cleanup_lock)
1424
1425 /* Check for new or empty pages before lazy_scan_[no]prune call */
1427 vmbuffer))
1428 {
1429 /* Processed as new/empty page (lock and pin released) */
1430 continue;
1431 }
1432
1433 /*
1434 * If we didn't get the cleanup lock, we can still collect LP_DEAD
1435 * items in the dead_items area for later vacuuming, count live and
1436 * recently dead tuples for vacuum logging, and determine if this
1437 * block could later be truncated. If we encounter any xid/mxids that
1438 * require advancing the relfrozenxid/relminxid, we'll have to wait
1439 * for a cleanup lock and call lazy_scan_prune().
1440 */
1441 if (!got_cleanup_lock &&
1442 !lazy_scan_noprune(vacrel, buf, blkno, page, &has_lpdead_items))
1443 {
1444 /*
1445 * lazy_scan_noprune could not do all required processing. Wait
1446 * for a cleanup lock, and call lazy_scan_prune in the usual way.
1447 */
1448 Assert(vacrel->aggressive);
1451 got_cleanup_lock = true;
1452 }
1453
1454 /*
1455 * If we have a cleanup lock, we must now prune, freeze, and count
1456 * tuples. We may have acquired the cleanup lock originally, or we may
1457 * have gone back and acquired it after lazy_scan_noprune() returned
1458 * false. Either way, the page hasn't been processed yet.
1459 *
1460 * Like lazy_scan_noprune(), lazy_scan_prune() will count
1461 * recently_dead_tuples and live tuples for vacuum logging, determine
1462 * if the block can later be truncated, and accumulate the details of
1463 * remaining LP_DEAD line pointers on the page into dead_items. These
1464 * dead items include those pruned by lazy_scan_prune() as well as
1465 * line pointers previously marked LP_DEAD.
1466 */
1467 if (got_cleanup_lock)
1468 ndeleted = lazy_scan_prune(vacrel, buf, blkno, page,
1469 vmbuffer,
1471
1472 /*
1473 * Count an eagerly scanned page as a failure or a success.
1474 *
1475 * Only lazy_scan_prune() freezes pages, so if we didn't get the
1476 * cleanup lock, we won't have frozen the page. However, we only count
1477 * pages that were too new to require freezing as eager freeze
1478 * failures.
1479 *
1480 * We could gather more information from lazy_scan_noprune() about
1481 * whether or not there were tuples with XIDs or MXIDs older than the
1482 * FreezeLimit or MultiXactCutoff. However, for simplicity, we simply
1483 * exclude pages skipped due to cleanup lock contention from eager
1484 * freeze algorithm caps.
1485 */
1487 {
1488 /* Aggressive vacuums do not eager scan. */
1489 Assert(!vacrel->aggressive);
1490
1491 if (vm_page_frozen)
1492 {
1493 if (vacrel->eager_scan_remaining_successes > 0)
1494 vacrel->eager_scan_remaining_successes--;
1495
1496 if (vacrel->eager_scan_remaining_successes == 0)
1497 {
1498 /*
1499 * Report only once that we disabled eager scanning. We
1500 * may eagerly read ahead blocks in excess of the success
1501 * or failure caps before attempting to freeze them, so we
1502 * could reach here even after disabling additional eager
1503 * scanning.
1504 */
1505 if (vacrel->eager_scan_max_fails_per_region > 0)
1506 ereport(vacrel->verbose ? INFO : DEBUG2,
1507 (errmsg("disabling eager scanning after freezing %u eagerly scanned blocks of relation \"%s.%s.%s\"",
1509 vacrel->dbname, vacrel->relnamespace,
1510 vacrel->relname)));
1511
1512 /*
1513 * If we hit our success cap, permanently disable eager
1514 * scanning by setting the other eager scan management
1515 * fields to their disabled values.
1516 */
1517 vacrel->eager_scan_remaining_fails = 0;
1518 vacrel->next_eager_scan_region_start = InvalidBlockNumber;
1519 vacrel->eager_scan_max_fails_per_region = 0;
1520 }
1521 }
1522 else if (vacrel->eager_scan_remaining_fails > 0)
1523 vacrel->eager_scan_remaining_fails--;
1524 }
1525
1526 /*
1527 * Now drop the buffer lock and, potentially, update the FSM.
1528 *
1529 * Our goal is to update the freespace map the last time we touch the
1530 * page. If we'll process a block in the second pass, we may free up
1531 * additional space on the page, so it is better to update the FSM
1532 * after the second pass. If the relation has no indexes, or if index
1533 * vacuuming is disabled, there will be no second heap pass; if this
1534 * particular page has no dead items, the second heap pass will not
1535 * touch this page. So, in those cases, update the FSM now.
1536 *
1537 * Note: In corner cases, it's possible to miss updating the FSM
1538 * entirely. If index vacuuming is currently enabled, we'll skip the
1539 * FSM update now. But if failsafe mode is later activated, or there
1540 * are so few dead tuples that index vacuuming is bypassed, there will
1541 * also be no opportunity to update the FSM later, because we'll never
1542 * revisit this page. Since updating the FSM is desirable but not
1543 * absolutely required, that's OK.
1544 */
1545 if (vacrel->nindexes == 0
1546 || !vacrel->do_index_vacuuming
1547 || !has_lpdead_items)
1548 {
1549 Size freespace = PageGetHeapFreeSpace(page);
1550
1552 RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
1553
1554 /*
1555 * Periodically perform FSM vacuuming to make newly-freed space
1556 * visible on upper FSM pages. This is done after vacuuming if the
1557 * table has indexes. There will only be newly-freed space if we
1558 * held the cleanup lock and lazy_scan_prune() was called.
1559 */
1560 if (got_cleanup_lock && vacrel->nindexes == 0 && ndeleted > 0 &&
1562 {
1564 blkno);
1566 }
1567 }
1568 else
1570 }
1571
1572 vacrel->blkno = InvalidBlockNumber;
1573 if (BufferIsValid(vmbuffer))
1574 ReleaseBuffer(vmbuffer);
1575
1576 /*
1577 * Report that everything is now scanned. We never skip scanning the last
1578 * block in the relation, so we can pass rel_pages here.
1579 */
1581 rel_pages);
1582
1583 /* now we can compute the new value for pg_class.reltuples */
1584 vacrel->new_live_tuples = vac_estimate_reltuples(vacrel->rel, rel_pages,
1585 vacrel->scanned_pages,
1586 vacrel->live_tuples);
1587
1588 /*
1589 * Also compute the total number of surviving heap entries. In the
1590 * (unlikely) scenario that new_live_tuples is -1, take it as zero.
1591 */
1592 vacrel->new_rel_tuples =
1593 Max(vacrel->new_live_tuples, 0) + vacrel->recently_dead_tuples +
1594 vacrel->missed_dead_tuples;
1595
1596 read_stream_end(stream);
1597
1598 /*
1599 * Do index vacuuming (call each index's ambulkdelete routine), then do
1600 * related heap vacuuming
1601 */
1602 if (vacrel->dead_items_info->num_items > 0)
1604
1605 /*
1606 * Vacuum the remainder of the Free Space Map. We must do this whether or
1607 * not there were indexes, and whether or not we bypassed index vacuuming.
1608 * We can pass rel_pages here because we never skip scanning the last
1609 * block of the relation.
1610 */
1611 if (rel_pages > next_fsm_block_to_vacuum)
1613
1614 /* report all blocks vacuumed */
1616
1617 /* Do final index cleanup (call each index's amvacuumcleanup routine) */
1618 if (vacrel->nindexes > 0 && vacrel->do_index_cleanup)
1620}
1621
1622/*
1623 * heap_vac_scan_next_block() -- read stream callback to get the next block
1624 * for vacuum to process
1625 *
1626 * Every time lazy_scan_heap() needs a new block to process during its first
1627 * phase, it invokes read_stream_next_buffer() with a stream set up to call
1628 * heap_vac_scan_next_block() to get the next block.
1629 *
1630 * heap_vac_scan_next_block() uses the visibility map, vacuum options, and
1631 * various thresholds to skip blocks which do not need to be processed and
1632 * returns the next block to process or InvalidBlockNumber if there are no
1633 * remaining blocks.
1634 *
1635 * The visibility status of the next block to process and whether or not it
1636 * was eager scanned is set in the per_buffer_data.
1637 *
1638 * callback_private_data contains a reference to the LVRelState, passed to the
1639 * read stream API during stream setup. The LVRelState is an in/out parameter
1640 * here (locally named `vacrel`). Vacuum options and information about the
1641 * relation are read from it. vacrel->skippedallvis is set if we skip a block
1642 * that's all-visible but not all-frozen (to ensure that we don't update
1643 * relfrozenxid in that case). vacrel also holds information about the next
1644 * unskippable block -- as bookkeeping for this function.
1645 */
1646static BlockNumber
1648 void *callback_private_data,
1649 void *per_buffer_data)
1650{
1652 LVRelState *vacrel = callback_private_data;
1653
1654 /* relies on InvalidBlockNumber + 1 overflowing to 0 on first call */
1656
1657 /* Have we reached the end of the relation? */
1658 if (next_block >= vacrel->rel_pages)
1659 {
1660 if (BufferIsValid(vacrel->next_unskippable_vmbuffer))
1661 {
1662 ReleaseBuffer(vacrel->next_unskippable_vmbuffer);
1663 vacrel->next_unskippable_vmbuffer = InvalidBuffer;
1664 }
1665 return InvalidBlockNumber;
1666 }
1667
1668 /*
1669 * We must be in one of the three following states:
1670 */
1671 if (next_block > vacrel->next_unskippable_block ||
1672 vacrel->next_unskippable_block == InvalidBlockNumber)
1673 {
1674 /*
1675 * 1. We have just processed an unskippable block (or we're at the
1676 * beginning of the scan). Find the next unskippable block using the
1677 * visibility map.
1678 */
1679 bool skipsallvis;
1680
1682
1683 /*
1684 * We now know the next block that we must process. It can be the
1685 * next block after the one we just processed, or something further
1686 * ahead. If it's further ahead, we can jump to it, but we choose to
1687 * do so only if we can skip at least SKIP_PAGES_THRESHOLD consecutive
1688 * pages. Since we're reading sequentially, the OS should be doing
1689 * readahead for us, so there's no gain in skipping a page now and
1690 * then. Skipping such a range might even discourage sequential
1691 * detection.
1692 *
1693 * This test also enables more frequent relfrozenxid advancement
1694 * during non-aggressive VACUUMs. If the range has any all-visible
1695 * pages then skipping makes updating relfrozenxid unsafe, which is a
1696 * real downside.
1697 */
1698 if (vacrel->next_unskippable_block - next_block >= SKIP_PAGES_THRESHOLD)
1699 {
1700 next_block = vacrel->next_unskippable_block;
1701 if (skipsallvis)
1702 vacrel->skippedallvis = true;
1703 }
1704 }
1705
1706 /* Now we must be in one of the two remaining states: */
1707 if (next_block < vacrel->next_unskippable_block)
1708 {
1709 /*
1710 * 2. We are processing a range of blocks that we could have skipped
1711 * but chose not to. We know that they are all-visible in the VM,
1712 * otherwise they would've been unskippable.
1713 */
1714 vacrel->current_block = next_block;
1715 /* Block was not eager scanned */
1716 *((bool *) per_buffer_data) = false;
1717 return vacrel->current_block;
1718 }
1719 else
1720 {
1721 /*
1722 * 3. We reached the next unskippable block. Process it. On next
1723 * iteration, we will be back in state 1.
1724 */
1725 Assert(next_block == vacrel->next_unskippable_block);
1726
1727 vacrel->current_block = next_block;
1728 *((bool *) per_buffer_data) = vacrel->next_unskippable_eager_scanned;
1729 return vacrel->current_block;
1730 }
1731}
1732
1733/*
1734 * Find the next unskippable block in a vacuum scan using the visibility map.
1735 * The next unskippable block and its visibility information is updated in
1736 * vacrel.
1737 *
1738 * Note: our opinion of which blocks can be skipped can go stale immediately.
1739 * It's okay if caller "misses" a page whose all-visible or all-frozen marking
1740 * was concurrently cleared, though. All that matters is that caller scan all
1741 * pages whose tuples might contain XIDs < OldestXmin, or MXIDs < OldestMxact.
1742 * (Actually, non-aggressive VACUUMs can choose to skip all-visible pages with
1743 * older XIDs/MXIDs. The *skippedallvis flag will be set here when the choice
1744 * to skip such a range is actually made, making everything safe.)
1745 */
1746static void
1748{
1749 BlockNumber rel_pages = vacrel->rel_pages;
1750 BlockNumber next_unskippable_block = vacrel->next_unskippable_block + 1;
1751 Buffer next_unskippable_vmbuffer = vacrel->next_unskippable_vmbuffer;
1752 bool next_unskippable_eager_scanned = false;
1753
1754 *skipsallvis = false;
1755
1756 for (;; next_unskippable_block++)
1757 {
1759 next_unskippable_block,
1760 &next_unskippable_vmbuffer);
1761
1762
1763 /*
1764 * At the start of each eager scan region, normal vacuums with eager
1765 * scanning enabled reset the failure counter, allowing vacuum to
1766 * resume eager scanning if it had been suspended in the previous
1767 * region.
1768 */
1769 if (next_unskippable_block >= vacrel->next_eager_scan_region_start)
1770 {
1771 vacrel->eager_scan_remaining_fails =
1772 vacrel->eager_scan_max_fails_per_region;
1773 vacrel->next_eager_scan_region_start += EAGER_SCAN_REGION_SIZE;
1774 }
1775
1776 /*
1777 * A block is unskippable if it is not all visible according to the
1778 * visibility map.
1779 */
1781 {
1783 break;
1784 }
1785
1786 /*
1787 * Caller must scan the last page to determine whether it has tuples
1788 * (caller must have the opportunity to set vacrel->nonempty_pages).
1789 * This rule avoids having lazy_truncate_heap() take access-exclusive
1790 * lock on rel to attempt a truncation that fails anyway, just because
1791 * there are tuples on the last page (it is likely that there will be
1792 * tuples on other nearby pages as well, but those can be skipped).
1793 *
1794 * Implement this by always treating the last block as unsafe to skip.
1795 */
1796 if (next_unskippable_block == rel_pages - 1)
1797 break;
1798
1799 /* DISABLE_PAGE_SKIPPING makes all skipping unsafe */
1800 if (!vacrel->skipwithvm)
1801 break;
1802
1803 /*
1804 * All-frozen pages cannot contain XIDs < OldestXmin (XIDs that aren't
1805 * already frozen by now), so this page can be skipped.
1806 */
1807 if ((mapbits & VISIBILITYMAP_ALL_FROZEN) != 0)
1808 continue;
1809
1810 /*
1811 * Aggressive vacuums cannot skip any all-visible pages that are not
1812 * also all-frozen.
1813 */
1814 if (vacrel->aggressive)
1815 break;
1816
1817 /*
1818 * Normal vacuums with eager scanning enabled only skip all-visible
1819 * but not all-frozen pages if they have hit the failure limit for the
1820 * current eager scan region.
1821 */
1822 if (vacrel->eager_scan_remaining_fails > 0)
1823 {
1824 next_unskippable_eager_scanned = true;
1825 break;
1826 }
1827
1828 /*
1829 * All-visible blocks are safe to skip in a normal vacuum. But
1830 * remember that the final range contains such a block for later.
1831 */
1832 *skipsallvis = true;
1833 }
1834
1835 /* write the local variables back to vacrel */
1836 vacrel->next_unskippable_block = next_unskippable_block;
1837 vacrel->next_unskippable_eager_scanned = next_unskippable_eager_scanned;
1838 vacrel->next_unskippable_vmbuffer = next_unskippable_vmbuffer;
1839}
1840
1841/*
1842 * lazy_scan_new_or_empty() -- lazy_scan_heap() new/empty page handling.
1843 *
1844 * Must call here to handle both new and empty pages before calling
1845 * lazy_scan_prune or lazy_scan_noprune, since they're not prepared to deal
1846 * with new or empty pages.
1847 *
1848 * It's necessary to consider new pages as a special case, since the rules for
1849 * maintaining the visibility map and FSM with empty pages are a little
1850 * different (though new pages can be truncated away during rel truncation).
1851 *
1852 * Empty pages are not really a special case -- they're just heap pages that
1853 * have no allocated tuples (including even LP_UNUSED items). You might
1854 * wonder why we need to handle them here all the same. It's only necessary
1855 * because of a corner-case involving a hard crash during heap relation
1856 * extension. If we ever make relation-extension crash safe, then it should
1857 * no longer be necessary to deal with empty pages here (or new pages, for
1858 * that matter).
1859 *
1860 * Caller must hold at least a shared lock. We might need to escalate the
1861 * lock in that case, so the type of lock caller holds needs to be specified
1862 * using 'sharelock' argument.
1863 *
1864 * Returns false in common case where caller should go on to call
1865 * lazy_scan_prune (or lazy_scan_noprune). Otherwise returns true, indicating
1866 * that lazy_scan_heap is done processing the page, releasing lock on caller's
1867 * behalf.
1868 *
1869 * No vm_page_frozen output parameter (like that passed to lazy_scan_prune())
1870 * is passed here because neither empty nor new pages can be eagerly frozen.
1871 * New pages are never frozen. Empty pages are always set frozen in the VM at
1872 * the same time that they are set all-visible, and we don't eagerly scan
1873 * frozen pages.
1874 */
1875static bool
1877 Page page, bool sharelock, Buffer vmbuffer)
1878{
1879 Size freespace;
1880
1881 if (PageIsNew(page))
1882 {
1883 /*
1884 * All-zeroes pages can be left over if either a backend extends the
1885 * relation by a single page, but crashes before the newly initialized
1886 * page has been written out, or when bulk-extending the relation
1887 * (which creates a number of empty pages at the tail end of the
1888 * relation), and then enters them into the FSM.
1889 *
1890 * Note we do not enter the page into the visibilitymap. That has the
1891 * downside that we repeatedly visit this page in subsequent vacuums,
1892 * but otherwise we'll never discover the space on a promoted standby.
1893 * The harm of repeated checking ought to normally not be too bad. The
1894 * space usually should be used at some point, otherwise there
1895 * wouldn't be any regular vacuums.
1896 *
1897 * Make sure these pages are in the FSM, to ensure they can be reused.
1898 * Do that by testing if there's any space recorded for the page. If
1899 * not, enter it. We do so after releasing the lock on the heap page,
1900 * the FSM is approximate, after all.
1901 */
1903
1904 if (GetRecordedFreeSpace(vacrel->rel, blkno) == 0)
1905 {
1906 freespace = BLCKSZ - SizeOfPageHeaderData;
1907
1908 RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
1909 }
1910
1911 return true;
1912 }
1913
1914 if (PageIsEmpty(page))
1915 {
1916 /*
1917 * It seems likely that caller will always be able to get a cleanup
1918 * lock on an empty page. But don't take any chances -- escalate to
1919 * an exclusive lock (still don't need a cleanup lock, though).
1920 */
1921 if (sharelock)
1922 {
1925
1926 if (!PageIsEmpty(page))
1927 {
1928 /* page isn't new or empty -- keep lock and pin for now */
1929 return false;
1930 }
1931 }
1932 else
1933 {
1934 /* Already have a full cleanup lock (which is more than enough) */
1935 }
1936
1937 /*
1938 * Unlike new pages, empty pages are always set all-visible and
1939 * all-frozen.
1940 */
1941 if (!PageIsAllVisible(page))
1942 {
1943 /* Lock vmbuffer before entering critical section */
1945
1947
1948 /* mark buffer dirty before writing a WAL record */
1950
1951 PageSetAllVisible(page);
1952 PageClearPrunable(page);
1953 visibilitymap_set(blkno,
1954 vmbuffer,
1957 vacrel->rel->rd_locator);
1958
1959 /*
1960 * Emit WAL for setting PD_ALL_VISIBLE on the heap page and
1961 * setting the VM.
1962 */
1963 if (RelationNeedsWAL(vacrel->rel))
1965 vmbuffer,
1968 InvalidTransactionId, /* conflict xid */
1969 false, /* cleanup lock */
1970 PRUNE_VACUUM_SCAN, /* reason */
1971 NULL, 0,
1972 NULL, 0,
1973 NULL, 0,
1974 NULL, 0);
1975
1977
1978 LockBuffer(vmbuffer, BUFFER_LOCK_UNLOCK);
1979
1980 /* Count the newly all-frozen pages for logging */
1981 vacrel->new_all_visible_pages++;
1982 vacrel->new_all_visible_all_frozen_pages++;
1983 }
1984
1985 freespace = PageGetHeapFreeSpace(page);
1987 RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
1988 return true;
1989 }
1990
1991 /* page isn't new or empty -- keep lock and pin */
1992 return false;
1993}
1994
1995/* qsort comparator for sorting OffsetNumbers */
1996static int
1997cmpOffsetNumbers(const void *a, const void *b)
1998{
1999 return pg_cmp_u16(*(const OffsetNumber *) a, *(const OffsetNumber *) b);
2000}
2001
2002/*
2003 * lazy_scan_prune() -- lazy_scan_heap() pruning and freezing.
2004 *
2005 * Caller must hold pin and buffer cleanup lock on the buffer.
2006 *
2007 * vmbuffer is the buffer containing the VM block with visibility information
2008 * for the heap block, blkno.
2009 *
2010 * *has_lpdead_items is set to true or false depending on whether, upon return
2011 * from this function, any LP_DEAD items are still present on the page.
2012 *
2013 * *vm_page_frozen is set to true if the page is newly set all-frozen in the
2014 * VM. The caller currently only uses this for determining whether an eagerly
2015 * scanned page was successfully set all-frozen.
2016 *
2017 * Returns the number of tuples deleted from the page during HOT pruning.
2018 */
2019static int
2021 Buffer buf,
2022 BlockNumber blkno,
2023 Page page,
2024 Buffer vmbuffer,
2025 bool *has_lpdead_items,
2026 bool *vm_page_frozen)
2027{
2028 Relation rel = vacrel->rel;
2030 PruneFreezeParams params = {
2031 .relation = rel,
2032 .buffer = buf,
2033 .vmbuffer = vmbuffer,
2034 .reason = PRUNE_VACUUM_SCAN,
2036 .vistest = vacrel->vistest,
2037 .cutoffs = &vacrel->cutoffs,
2038 };
2039
2040 Assert(BufferGetBlockNumber(buf) == blkno);
2041
2042 /*
2043 * Prune all HOT-update chains and potentially freeze tuples on this page.
2044 *
2045 * If the relation has no indexes, we can immediately mark would-be dead
2046 * items LP_UNUSED.
2047 *
2048 * The number of tuples removed from the page is returned in
2049 * presult.ndeleted. It should not be confused with presult.lpdead_items;
2050 * presult.lpdead_items's final value can be thought of as the number of
2051 * tuples that were deleted from indexes.
2052 *
2053 * We will update the VM after collecting LP_DEAD items and freezing
2054 * tuples. Pruning will have determined whether or not the page is
2055 * all-visible.
2056 */
2057 if (vacrel->nindexes == 0)
2059
2060 /*
2061 * Allow skipping full inspection of pages that the VM indicates are
2062 * already all-frozen (which may be scanned due to SKIP_PAGES_THRESHOLD).
2063 * However, if DISABLE_PAGE_SKIPPING was specified, we can't trust the VM,
2064 * so we must examine the page to make sure it is truly all-frozen and fix
2065 * it otherwise.
2066 */
2067 if (vacrel->skipwithvm)
2069
2071 &presult,
2072 &vacrel->offnum,
2073 &vacrel->NewRelfrozenXid, &vacrel->NewRelminMxid);
2074
2075 Assert(MultiXactIdIsValid(vacrel->NewRelminMxid));
2076 Assert(TransactionIdIsValid(vacrel->NewRelfrozenXid));
2077
2078 if (presult.nfrozen > 0)
2079 {
2080 /*
2081 * We don't increment the new_frozen_tuple_pages instrumentation
2082 * counter when nfrozen == 0, since it only counts pages with newly
2083 * frozen tuples (don't confuse that with pages newly set all-frozen
2084 * in VM).
2085 */
2086 vacrel->new_frozen_tuple_pages++;
2087 }
2088
2089 /*
2090 * Now save details of the LP_DEAD items from the page in vacrel
2091 */
2092 if (presult.lpdead_items > 0)
2093 {
2094 vacrel->lpdead_item_pages++;
2095
2096 /*
2097 * deadoffsets are collected incrementally in
2098 * heap_page_prune_and_freeze() as each dead line pointer is recorded,
2099 * with an indeterminate order, but dead_items_add requires them to be
2100 * sorted.
2101 */
2102 qsort(presult.deadoffsets, presult.lpdead_items, sizeof(OffsetNumber),
2104
2105 dead_items_add(vacrel, blkno, presult.deadoffsets, presult.lpdead_items);
2106 }
2107
2108 /* Finally, add page-local counts to whole-VACUUM counts */
2109 if (presult.newly_all_visible)
2110 vacrel->new_all_visible_pages++;
2111 if (presult.newly_all_visible_frozen)
2112 vacrel->new_all_visible_all_frozen_pages++;
2113 if (presult.newly_all_frozen)
2114 vacrel->new_all_frozen_pages++;
2115
2116 /* Capture if the page was newly set frozen */
2117 *vm_page_frozen = presult.newly_all_visible_frozen ||
2118 presult.newly_all_frozen;
2119
2120 vacrel->tuples_deleted += presult.ndeleted;
2121 vacrel->tuples_frozen += presult.nfrozen;
2122 vacrel->lpdead_items += presult.lpdead_items;
2123 vacrel->live_tuples += presult.live_tuples;
2124 vacrel->recently_dead_tuples += presult.recently_dead_tuples;
2125
2126 /* Can't truncate this page */
2127 if (presult.hastup)
2128 vacrel->nonempty_pages = blkno + 1;
2129
2130 /* Did we find LP_DEAD items? */
2131 *has_lpdead_items = (presult.lpdead_items > 0);
2132
2133 return presult.ndeleted;
2134}
2135
2136/*
2137 * lazy_scan_noprune() -- lazy_scan_prune() without pruning or freezing
2138 *
2139 * Caller need only hold a pin and share lock on the buffer, unlike
2140 * lazy_scan_prune, which requires a full cleanup lock. While pruning isn't
2141 * performed here, it's quite possible that an earlier opportunistic pruning
2142 * operation left LP_DEAD items behind. We'll at least collect any such items
2143 * in dead_items for removal from indexes.
2144 *
2145 * For aggressive VACUUM callers, we may return false to indicate that a full
2146 * cleanup lock is required for processing by lazy_scan_prune. This is only
2147 * necessary when the aggressive VACUUM needs to freeze some tuple XIDs from
2148 * one or more tuples on the page. We always return true for non-aggressive
2149 * callers.
2150 *
2151 * If this function returns true, *has_lpdead_items gets set to true or false
2152 * depending on whether, upon return from this function, any LP_DEAD items are
2153 * present on the page. If this function returns false, *has_lpdead_items
2154 * is not updated.
2155 */
2156static bool
2158 Buffer buf,
2159 BlockNumber blkno,
2160 Page page,
2161 bool *has_lpdead_items)
2162{
2163 OffsetNumber offnum,
2164 maxoff;
2165 int lpdead_items,
2166 live_tuples,
2167 recently_dead_tuples,
2168 missed_dead_tuples;
2169 bool hastup;
2171 TransactionId NoFreezePageRelfrozenXid = vacrel->NewRelfrozenXid;
2172 MultiXactId NoFreezePageRelminMxid = vacrel->NewRelminMxid;
2174
2175 Assert(BufferGetBlockNumber(buf) == blkno);
2176
2177 hastup = false; /* for now */
2178
2179 lpdead_items = 0;
2180 live_tuples = 0;
2181 recently_dead_tuples = 0;
2182 missed_dead_tuples = 0;
2183
2184 maxoff = PageGetMaxOffsetNumber(page);
2185 for (offnum = FirstOffsetNumber;
2186 offnum <= maxoff;
2187 offnum = OffsetNumberNext(offnum))
2188 {
2189 ItemId itemid;
2190 HeapTupleData tuple;
2191
2192 vacrel->offnum = offnum;
2193 itemid = PageGetItemId(page, offnum);
2194
2195 if (!ItemIdIsUsed(itemid))
2196 continue;
2197
2198 if (ItemIdIsRedirected(itemid))
2199 {
2200 hastup = true;
2201 continue;
2202 }
2203
2204 if (ItemIdIsDead(itemid))
2205 {
2206 /*
2207 * Deliberately don't set hastup=true here. See same point in
2208 * lazy_scan_prune for an explanation.
2209 */
2210 deadoffsets[lpdead_items++] = offnum;
2211 continue;
2212 }
2213
2214 hastup = true; /* page prevents rel truncation */
2215 tupleheader = (HeapTupleHeader) PageGetItem(page, itemid);
2217 &NoFreezePageRelfrozenXid,
2218 &NoFreezePageRelminMxid))
2219 {
2220 /* Tuple with XID < FreezeLimit (or MXID < MultiXactCutoff) */
2221 if (vacrel->aggressive)
2222 {
2223 /*
2224 * Aggressive VACUUMs must always be able to advance rel's
2225 * relfrozenxid to a value >= FreezeLimit (and be able to
2226 * advance rel's relminmxid to a value >= MultiXactCutoff).
2227 * The ongoing aggressive VACUUM won't be able to do that
2228 * unless it can freeze an XID (or MXID) from this tuple now.
2229 *
2230 * The only safe option is to have caller perform processing
2231 * of this page using lazy_scan_prune. Caller might have to
2232 * wait a while for a cleanup lock, but it can't be helped.
2233 */
2234 vacrel->offnum = InvalidOffsetNumber;
2235 return false;
2236 }
2237
2238 /*
2239 * Non-aggressive VACUUMs are under no obligation to advance
2240 * relfrozenxid (even by one XID). We can be much laxer here.
2241 *
2242 * Currently we always just accept an older final relfrozenxid
2243 * and/or relminmxid value. We never make caller wait or work a
2244 * little harder, even when it likely makes sense to do so.
2245 */
2246 }
2247
2248 ItemPointerSet(&(tuple.t_self), blkno, offnum);
2249 tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
2250 tuple.t_len = ItemIdGetLength(itemid);
2251 tuple.t_tableOid = RelationGetRelid(vacrel->rel);
2252
2253 switch (HeapTupleSatisfiesVacuum(&tuple, vacrel->cutoffs.OldestXmin,
2254 buf))
2255 {
2257 case HEAPTUPLE_LIVE:
2258
2259 /*
2260 * Count both cases as live, just like lazy_scan_prune
2261 */
2262 live_tuples++;
2263
2264 break;
2265 case HEAPTUPLE_DEAD:
2266
2267 /*
2268 * There is some useful work for pruning to do, that won't be
2269 * done due to failure to get a cleanup lock.
2270 */
2271 missed_dead_tuples++;
2272 break;
2274
2275 /*
2276 * Count in recently_dead_tuples, just like lazy_scan_prune
2277 */
2278 recently_dead_tuples++;
2279 break;
2281
2282 /*
2283 * Do not count these rows as live, just like lazy_scan_prune
2284 */
2285 break;
2286 default:
2287 elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
2288 break;
2289 }
2290 }
2291
2292 vacrel->offnum = InvalidOffsetNumber;
2293
2294 /*
2295 * By here we know for sure that caller can put off freezing and pruning
2296 * this particular page until the next VACUUM. Remember its details now.
2297 * (lazy_scan_prune expects a clean slate, so we have to do this last.)
2298 */
2299 vacrel->NewRelfrozenXid = NoFreezePageRelfrozenXid;
2300 vacrel->NewRelminMxid = NoFreezePageRelminMxid;
2301
2302 /* Save any LP_DEAD items found on the page in dead_items */
2303 if (vacrel->nindexes == 0)
2304 {
2305 /* Using one-pass strategy (since table has no indexes) */
2306 if (lpdead_items > 0)
2307 {
2308 /*
2309 * Perfunctory handling for the corner case where a single pass
2310 * strategy VACUUM cannot get a cleanup lock, and it turns out
2311 * that there is one or more LP_DEAD items: just count the LP_DEAD
2312 * items as missed_dead_tuples instead. (This is a bit dishonest,
2313 * but it beats having to maintain specialized heap vacuuming code
2314 * forever, for vanishingly little benefit.)
2315 */
2316 hastup = true;
2317 missed_dead_tuples += lpdead_items;
2318 }
2319 }
2320 else if (lpdead_items > 0)
2321 {
2322 /*
2323 * Page has LP_DEAD items, and so any references/TIDs that remain in
2324 * indexes will be deleted during index vacuuming (and then marked
2325 * LP_UNUSED in the heap)
2326 */
2327 vacrel->lpdead_item_pages++;
2328
2329 dead_items_add(vacrel, blkno, deadoffsets, lpdead_items);
2330
2331 vacrel->lpdead_items += lpdead_items;
2332 }
2333
2334 /*
2335 * Finally, add relevant page-local counts to whole-VACUUM counts
2336 */
2337 vacrel->live_tuples += live_tuples;
2338 vacrel->recently_dead_tuples += recently_dead_tuples;
2339 vacrel->missed_dead_tuples += missed_dead_tuples;
2340 if (missed_dead_tuples > 0)
2341 vacrel->missed_dead_pages++;
2342
2343 /* Can't truncate this page */
2344 if (hastup)
2345 vacrel->nonempty_pages = blkno + 1;
2346
2347 /* Did we find LP_DEAD items? */
2348 *has_lpdead_items = (lpdead_items > 0);
2349
2350 /* Caller won't need to call lazy_scan_prune with same page */
2351 return true;
2352}
2353
2354/*
2355 * Main entry point for index vacuuming and heap vacuuming.
2356 *
2357 * Removes items collected in dead_items from table's indexes, then marks the
2358 * same items LP_UNUSED in the heap. See the comments above lazy_scan_heap
2359 * for full details.
2360 *
2361 * Also empties dead_items, freeing up space for later TIDs.
2362 *
2363 * We may choose to bypass index vacuuming at this point, though only when the
2364 * ongoing VACUUM operation will definitely only have one index scan/round of
2365 * index vacuuming.
2366 */
2367static void
2369{
2370 bool bypass;
2371
2372 /* Should not end up here with no indexes */
2373 Assert(vacrel->nindexes > 0);
2374 Assert(vacrel->lpdead_item_pages > 0);
2375
2376 if (!vacrel->do_index_vacuuming)
2377 {
2378 Assert(!vacrel->do_index_cleanup);
2380 return;
2381 }
2382
2383 /*
2384 * Consider bypassing index vacuuming (and heap vacuuming) entirely.
2385 *
2386 * We currently only do this in cases where the number of LP_DEAD items
2387 * for the entire VACUUM operation is close to zero. This avoids sharp
2388 * discontinuities in the duration and overhead of successive VACUUM
2389 * operations that run against the same table with a fixed workload.
2390 * Ideally, successive VACUUM operations will behave as if there are
2391 * exactly zero LP_DEAD items in cases where there are close to zero.
2392 *
2393 * This is likely to be helpful with a table that is continually affected
2394 * by UPDATEs that can mostly apply the HOT optimization, but occasionally
2395 * have small aberrations that lead to just a few heap pages retaining
2396 * only one or two LP_DEAD items. This is pretty common; even when the
2397 * DBA goes out of their way to make UPDATEs use HOT, it is practically
2398 * impossible to predict whether HOT will be applied in 100% of cases.
2399 * It's far easier to ensure that 99%+ of all UPDATEs against a table use
2400 * HOT through careful tuning.
2401 */
2402 bypass = false;
2403 if (vacrel->consider_bypass_optimization && vacrel->rel_pages > 0)
2404 {
2406
2407 Assert(vacrel->num_index_scans == 0);
2408 Assert(vacrel->lpdead_items == vacrel->dead_items_info->num_items);
2409 Assert(vacrel->do_index_vacuuming);
2410 Assert(vacrel->do_index_cleanup);
2411
2412 /*
2413 * This crossover point at which we'll start to do index vacuuming is
2414 * expressed as a percentage of the total number of heap pages in the
2415 * table that are known to have at least one LP_DEAD item. This is
2416 * much more important than the total number of LP_DEAD items, since
2417 * it's a proxy for the number of heap pages whose visibility map bits
2418 * cannot be set on account of bypassing index and heap vacuuming.
2419 *
2420 * We apply one further precautionary test: the space currently used
2421 * to store the TIDs (TIDs that now all point to LP_DEAD items) must
2422 * not exceed 32MB. This limits the risk that we will bypass index
2423 * vacuuming again and again until eventually there is a VACUUM whose
2424 * dead_items space is not CPU cache resident.
2425 *
2426 * We don't take any special steps to remember the LP_DEAD items (such
2427 * as counting them in our final update to the stats system) when the
2428 * optimization is applied. Though the accounting used in analyze.c's
2429 * acquire_sample_rows() will recognize the same LP_DEAD items as dead
2430 * rows in its own stats report, that's okay. The discrepancy should
2431 * be negligible. If this optimization is ever expanded to cover more
2432 * cases then this may need to be reconsidered.
2433 */
2435 bypass = (vacrel->lpdead_item_pages < threshold &&
2436 TidStoreMemoryUsage(vacrel->dead_items) < 32 * 1024 * 1024);
2437 }
2438
2439 if (bypass)
2440 {
2441 /*
2442 * There are almost zero TIDs. Behave as if there were precisely
2443 * zero: bypass index vacuuming, but do index cleanup.
2444 *
2445 * We expect that the ongoing VACUUM operation will finish very
2446 * quickly, so there is no point in considering speeding up as a
2447 * failsafe against wraparound failure. (Index cleanup is expected to
2448 * finish very quickly in cases where there were no ambulkdelete()
2449 * calls.)
2450 */
2451 vacrel->do_index_vacuuming = false;
2452 }
2454 {
2455 /*
2456 * We successfully completed a round of index vacuuming. Do related
2457 * heap vacuuming now.
2458 */
2460 }
2461 else
2462 {
2463 /*
2464 * Failsafe case.
2465 *
2466 * We attempted index vacuuming, but didn't finish a full round/full
2467 * index scan. This happens when relfrozenxid or relminmxid is too
2468 * far in the past.
2469 *
2470 * From this point on the VACUUM operation will do no further index
2471 * vacuuming or heap vacuuming. This VACUUM operation won't end up
2472 * back here again.
2473 */
2475 }
2476
2477 /*
2478 * Forget the LP_DEAD items that we just vacuumed (or just decided to not
2479 * vacuum)
2480 */
2482}
2483
2484/*
2485 * lazy_vacuum_all_indexes() -- Main entry for index vacuuming
2486 *
2487 * Returns true in the common case when all indexes were successfully
2488 * vacuumed. Returns false in rare cases where we determined that the ongoing
2489 * VACUUM operation is at risk of taking too long to finish, leading to
2490 * wraparound failure.
2491 */
2492static bool
2494{
2495 bool allindexes = true;
2496 double old_live_tuples = vacrel->rel->rd_rel->reltuples;
2497 const int progress_start_index[] = {
2500 };
2501 const int progress_end_index[] = {
2505 };
2508
2509 Assert(vacrel->nindexes > 0);
2510 Assert(vacrel->do_index_vacuuming);
2511 Assert(vacrel->do_index_cleanup);
2512
2513 /* Precheck for XID wraparound emergencies */
2515 {
2516 /* Wraparound emergency -- don't even start an index scan */
2517 return false;
2518 }
2519
2520 /*
2521 * Report that we are now vacuuming indexes and the number of indexes to
2522 * vacuum.
2523 */
2525 progress_start_val[1] = vacrel->nindexes;
2527
2529 {
2530 for (int idx = 0; idx < vacrel->nindexes; idx++)
2531 {
2532 Relation indrel = vacrel->indrels[idx];
2533 IndexBulkDeleteResult *istat = vacrel->indstats[idx];
2534
2535 vacrel->indstats[idx] = lazy_vacuum_one_index(indrel, istat,
2537 vacrel);
2538
2539 /* Report the number of indexes vacuumed */
2541 idx + 1);
2542
2544 {
2545 /* Wraparound emergency -- end current index scan */
2546 allindexes = false;
2547 break;
2548 }
2549 }
2550 }
2551 else
2552 {
2553 /* Outsource everything to parallel variant */
2555 vacrel->num_index_scans,
2556 &(vacrel->worker_usage.vacuum));
2557
2558 /*
2559 * Do a postcheck to consider applying wraparound failsafe now. Note
2560 * that parallel VACUUM only gets the precheck and this postcheck.
2561 */
2563 allindexes = false;
2564 }
2565
2566 /*
2567 * We delete all LP_DEAD items from the first heap pass in all indexes on
2568 * each call here (except calls where we choose to do the failsafe). This
2569 * makes the next call to lazy_vacuum_heap_rel() safe (except in the event
2570 * of the failsafe triggering, which prevents the next call from taking
2571 * place).
2572 */
2573 Assert(vacrel->num_index_scans > 0 ||
2574 vacrel->dead_items_info->num_items == vacrel->lpdead_items);
2576
2577 /*
2578 * Increase and report the number of index scans. Also, we reset
2579 * PROGRESS_VACUUM_INDEXES_TOTAL and PROGRESS_VACUUM_INDEXES_PROCESSED.
2580 *
2581 * We deliberately include the case where we started a round of bulk
2582 * deletes that we weren't able to finish due to the failsafe triggering.
2583 */
2584 vacrel->num_index_scans++;
2585 progress_end_val[0] = 0;
2586 progress_end_val[1] = 0;
2587 progress_end_val[2] = vacrel->num_index_scans;
2589
2590 return allindexes;
2591}
2592
2593/*
2594 * Read stream callback for vacuum's third phase (second pass over the heap).
2595 * Gets the next block from the TID store and returns it or InvalidBlockNumber
2596 * if there are no further blocks to vacuum.
2597 *
2598 * NB: Assumed to be safe to use with READ_STREAM_USE_BATCHING.
2599 */
2600static BlockNumber
2602 void *callback_private_data,
2603 void *per_buffer_data)
2604{
2605 TidStoreIter *iter = callback_private_data;
2607
2609 if (iter_result == NULL)
2610 return InvalidBlockNumber;
2611
2612 /*
2613 * Save the TidStoreIterResult for later, so we can extract the offsets.
2614 * It is safe to copy the result, according to TidStoreIterateNext().
2615 */
2616 memcpy(per_buffer_data, iter_result, sizeof(*iter_result));
2617
2618 return iter_result->blkno;
2619}
2620
2621/*
2622 * lazy_vacuum_heap_rel() -- second pass over the heap for two pass strategy
2623 *
2624 * This routine marks LP_DEAD items in vacrel->dead_items as LP_UNUSED. Pages
2625 * that never had lazy_scan_prune record LP_DEAD items are not visited at all.
2626 *
2627 * We may also be able to truncate the line pointer array of the heap pages we
2628 * visit. If there is a contiguous group of LP_UNUSED items at the end of the
2629 * array, it can be reclaimed as free space. These LP_UNUSED items usually
2630 * start out as LP_DEAD items recorded by lazy_scan_prune (we set items from
2631 * each page to LP_UNUSED, and then consider if it's possible to truncate the
2632 * page's line pointer array).
2633 *
2634 * Note: the reason for doing this as a second pass is we cannot remove the
2635 * tuples until we've removed their index entries, and we want to process
2636 * index entry removal in batches as large as possible.
2637 */
2638static void
2640{
2641 ReadStream *stream;
2643 Buffer vmbuffer = InvalidBuffer;
2645 TidStoreIter *iter;
2646
2647 Assert(vacrel->do_index_vacuuming);
2648 Assert(vacrel->do_index_cleanup);
2649 Assert(vacrel->num_index_scans > 0);
2650
2651 /* Report that we are now vacuuming the heap */
2654
2655 /* Update error traceback information */
2659
2660 iter = TidStoreBeginIterate(vacrel->dead_items);
2661
2662 /*
2663 * Set up the read stream for vacuum's second pass through the heap.
2664 *
2665 * It is safe to use batchmode, as vacuum_reap_lp_read_stream_next() does
2666 * not need to wait for IO and does not perform locking. Once we support
2667 * parallelism it should still be fine, as presumably the holder of locks
2668 * would never be blocked by IO while holding the lock.
2669 */
2672 vacrel->bstrategy,
2673 vacrel->rel,
2676 iter,
2677 sizeof(TidStoreIterResult));
2678
2679 while (true)
2680 {
2681 BlockNumber blkno;
2682 Buffer buf;
2683 Page page;
2685 Size freespace;
2687 int num_offsets;
2688
2689 vacuum_delay_point(false);
2690
2691 buf = read_stream_next_buffer(stream, (void **) &iter_result);
2692
2693 /* The relation is exhausted */
2694 if (!BufferIsValid(buf))
2695 break;
2696
2697 vacrel->blkno = blkno = BufferGetBlockNumber(buf);
2698
2701 Assert(num_offsets <= lengthof(offsets));
2702
2703 /*
2704 * Pin the visibility map page in case we need to mark the page
2705 * all-visible. In most cases this will be very cheap, because we'll
2706 * already have the correct page pinned anyway.
2707 */
2708 visibilitymap_pin(vacrel->rel, blkno, &vmbuffer);
2709
2710 /* We need a non-cleanup exclusive lock to mark dead_items unused */
2712 lazy_vacuum_heap_page(vacrel, blkno, buf, offsets,
2713 num_offsets, vmbuffer);
2714
2715 /* Now that we've vacuumed the page, record its available space */
2716 page = BufferGetPage(buf);
2717 freespace = PageGetHeapFreeSpace(page);
2718
2720 RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
2722 }
2723
2724 read_stream_end(stream);
2725 TidStoreEndIterate(iter);
2726
2727 vacrel->blkno = InvalidBlockNumber;
2728 if (BufferIsValid(vmbuffer))
2729 ReleaseBuffer(vmbuffer);
2730
2731 /*
2732 * We set all LP_DEAD items from the first heap pass to LP_UNUSED during
2733 * the second heap pass. No more, no less.
2734 */
2735 Assert(vacrel->num_index_scans > 1 ||
2736 (vacrel->dead_items_info->num_items == vacrel->lpdead_items &&
2737 vacuumed_pages == vacrel->lpdead_item_pages));
2738
2740 (errmsg("table \"%s\": removed %" PRId64 " dead item identifiers in %u pages",
2741 vacrel->relname, vacrel->dead_items_info->num_items,
2742 vacuumed_pages)));
2743
2744 /* Revert to the previous phase information for error traceback */
2746}
2747
2748/*
2749 * lazy_vacuum_heap_page() -- free page's LP_DEAD items listed in the
2750 * vacrel->dead_items store.
2751 *
2752 * Caller must have an exclusive buffer lock on the buffer (though a full
2753 * cleanup lock is also acceptable). vmbuffer must be valid and already have
2754 * a pin on blkno's visibility map page.
2755 */
2756static void
2758 OffsetNumber *deadoffsets, int num_offsets,
2759 Buffer vmbuffer)
2760{
2761 Page page = BufferGetPage(buffer);
2763 int nunused = 0;
2764 TransactionId newest_live_xid;
2766 bool all_frozen;
2768 uint8 vmflags = 0;
2769
2770 Assert(vacrel->do_index_vacuuming);
2771
2773
2774 /* Update error traceback information */
2778
2779 /*
2780 * Before marking dead items unused, check whether the page will become
2781 * all-visible once that change is applied. This lets us reap the tuples
2782 * and mark the page all-visible within the same critical section,
2783 * enabling both changes to be emitted in a single WAL record. Since the
2784 * visibility checks may perform I/O and allocate memory, they must be
2785 * done outside the critical section.
2786 */
2787 if (heap_page_would_be_all_visible(vacrel->rel, buffer,
2788 vacrel->vistest, true,
2789 deadoffsets, num_offsets,
2790 &all_frozen, &newest_live_xid,
2791 &vacrel->offnum))
2792 {
2794 if (all_frozen)
2795 {
2797 Assert(!TransactionIdIsValid(newest_live_xid));
2798 }
2799
2800 /*
2801 * Take the lock on the vmbuffer before entering a critical section.
2802 * The heap page lock must also be held while updating the VM to
2803 * ensure consistency.
2804 */
2806 }
2807
2809
2810 for (int i = 0; i < num_offsets; i++)
2811 {
2812 ItemId itemid;
2813 OffsetNumber toff = deadoffsets[i];
2814
2815 itemid = PageGetItemId(page, toff);
2816
2817 Assert(ItemIdIsDead(itemid) && !ItemIdHasStorage(itemid));
2818 ItemIdSetUnused(itemid);
2819 unused[nunused++] = toff;
2820 }
2821
2822 Assert(nunused > 0);
2823
2824 /* Attempt to truncate line pointer array now */
2826
2827 if ((vmflags & VISIBILITYMAP_VALID_BITS) != 0)
2828 {
2829 /*
2830 * The page is guaranteed to have had dead line pointers, so we always
2831 * set PD_ALL_VISIBLE.
2832 */
2833 PageSetAllVisible(page);
2834 PageClearPrunable(page);
2835 visibilitymap_set(blkno,
2836 vmbuffer, vmflags,
2837 vacrel->rel->rd_locator);
2838 conflict_xid = newest_live_xid;
2839 }
2840
2841 /*
2842 * Mark buffer dirty before we write WAL.
2843 */
2844 MarkBufferDirty(buffer);
2845
2846 /* XLOG stuff */
2847 if (RelationNeedsWAL(vacrel->rel))
2848 {
2849 log_heap_prune_and_freeze(vacrel->rel, buffer,
2850 vmflags != 0 ? vmbuffer : InvalidBuffer,
2851 vmflags,
2853 false, /* no cleanup lock required */
2855 NULL, 0, /* frozen */
2856 NULL, 0, /* redirected */
2857 NULL, 0, /* dead */
2858 unused, nunused);
2859 }
2860
2862
2864 {
2865 /* Count the newly set VM page for logging */
2866 LockBuffer(vmbuffer, BUFFER_LOCK_UNLOCK);
2867 vacrel->new_all_visible_pages++;
2868 if (all_frozen)
2869 vacrel->new_all_visible_all_frozen_pages++;
2870 }
2871
2872 /* Revert to the previous phase information for error traceback */
2874}
2875
2876/*
2877 * Trigger the failsafe to avoid wraparound failure when vacrel table has a
2878 * relfrozenxid and/or relminmxid that is dangerously far in the past.
2879 * Triggering the failsafe makes the ongoing VACUUM bypass any further index
2880 * vacuuming and heap vacuuming. Truncating the heap is also bypassed.
2881 *
2882 * Any remaining work (work that VACUUM cannot just bypass) is typically sped
2883 * up when the failsafe triggers. VACUUM stops applying any cost-based delay
2884 * that it started out with.
2885 *
2886 * Returns true when failsafe has been triggered.
2887 */
2888static bool
2890{
2891 /* Don't warn more than once per VACUUM */
2893 return true;
2894
2896 {
2897 const int progress_index[] = {
2901 };
2903
2904 VacuumFailsafeActive = true;
2905
2906 /*
2907 * Abandon use of a buffer access strategy to allow use of all of
2908 * shared buffers. We assume the caller who allocated the memory for
2909 * the BufferAccessStrategy will free it.
2910 */
2911 vacrel->bstrategy = NULL;
2912
2913 /* Disable index vacuuming, index cleanup, and heap rel truncation */
2914 vacrel->do_index_vacuuming = false;
2915 vacrel->do_index_cleanup = false;
2916 vacrel->do_rel_truncate = false;
2917
2918 /* Reset the progress counters and set the failsafe mode */
2920
2922 (errmsg("bypassing nonessential maintenance of table \"%s.%s.%s\" as a failsafe after %d index scans",
2923 vacrel->dbname, vacrel->relnamespace, vacrel->relname,
2924 vacrel->num_index_scans),
2925 errdetail("The table's relfrozenxid or relminmxid is too far in the past."),
2926 errhint("Consider increasing configuration parameter \"maintenance_work_mem\" or \"autovacuum_work_mem\".\n"
2927 "You might also need to consider other ways for VACUUM to keep up with the allocation of transaction IDs.")));
2928
2929 /* Stop applying cost limits from this point on */
2930 VacuumCostActive = false;
2932
2933 return true;
2934 }
2935
2936 return false;
2937}
2938
2939/*
2940 * lazy_cleanup_all_indexes() -- cleanup all indexes of relation.
2941 */
2942static void
2944{
2945 double reltuples = vacrel->new_rel_tuples;
2946 bool estimated_count = vacrel->scanned_pages < vacrel->rel_pages;
2947 const int progress_start_index[] = {
2950 };
2951 const int progress_end_index[] = {
2954 };
2956 int64 progress_end_val[2] = {0, 0};
2957
2958 Assert(vacrel->do_index_cleanup);
2959 Assert(vacrel->nindexes > 0);
2960
2961 /*
2962 * Report that we are now cleaning up indexes and the number of indexes to
2963 * cleanup.
2964 */
2966 progress_start_val[1] = vacrel->nindexes;
2968
2970 {
2971 for (int idx = 0; idx < vacrel->nindexes; idx++)
2972 {
2973 Relation indrel = vacrel->indrels[idx];
2974 IndexBulkDeleteResult *istat = vacrel->indstats[idx];
2975
2976 vacrel->indstats[idx] =
2977 lazy_cleanup_one_index(indrel, istat, reltuples,
2978 estimated_count, vacrel);
2979
2980 /* Report the number of indexes cleaned up */
2982 idx + 1);
2983 }
2984 }
2985 else
2986 {
2987 /* Outsource everything to parallel variant */
2989 vacrel->num_index_scans,
2990 estimated_count,
2991 &(vacrel->worker_usage.cleanup));
2992 }
2993
2994 /* Reset the progress counters */
2996}
2997
2998/*
2999 * lazy_vacuum_one_index() -- vacuum index relation.
3000 *
3001 * Delete all the index tuples containing a TID collected in
3002 * vacrel->dead_items. Also update running statistics. Exact
3003 * details depend on index AM's ambulkdelete routine.
3004 *
3005 * reltuples is the number of heap tuples to be passed to the
3006 * bulkdelete callback. It's always assumed to be estimated.
3007 * See indexam.sgml for more info.
3008 *
3009 * Returns bulk delete stats derived from input stats
3010 */
3011static IndexBulkDeleteResult *
3013 double reltuples, LVRelState *vacrel)
3014{
3017
3018 ivinfo.index = indrel;
3019 ivinfo.heaprel = vacrel->rel;
3020 ivinfo.analyze_only = false;
3021 ivinfo.report_progress = false;
3022 ivinfo.estimated_count = true;
3023 ivinfo.message_level = DEBUG2;
3024 ivinfo.num_heap_tuples = reltuples;
3025 ivinfo.strategy = vacrel->bstrategy;
3026
3027 /*
3028 * Update error traceback information.
3029 *
3030 * The index name is saved during this phase and restored immediately
3031 * after this phase. See vacuum_error_callback.
3032 */
3033 Assert(vacrel->indname == NULL);
3038
3039 /* Do bulk deletion */
3040 istat = vac_bulkdel_one_index(&ivinfo, istat, vacrel->dead_items,
3041 vacrel->dead_items_info);
3042
3043 /* Revert to the previous phase information for error traceback */
3045 pfree(vacrel->indname);
3046 vacrel->indname = NULL;
3047
3048 return istat;
3049}
3050
3051/*
3052 * lazy_cleanup_one_index() -- do post-vacuum cleanup for index relation.
3053 *
3054 * Calls index AM's amvacuumcleanup routine. reltuples is the number
3055 * of heap tuples and estimated_count is true if reltuples is an
3056 * estimated value. See indexam.sgml for more info.
3057 *
3058 * Returns bulk delete stats derived from input stats
3059 */
3060static IndexBulkDeleteResult *
3062 double reltuples, bool estimated_count,
3064{
3067
3068 ivinfo.index = indrel;
3069 ivinfo.heaprel = vacrel->rel;
3070 ivinfo.analyze_only = false;
3071 ivinfo.report_progress = false;
3072 ivinfo.estimated_count = estimated_count;
3073 ivinfo.message_level = DEBUG2;
3074
3075 ivinfo.num_heap_tuples = reltuples;
3076 ivinfo.strategy = vacrel->bstrategy;
3077
3078 /*
3079 * Update error traceback information.
3080 *
3081 * The index name is saved during this phase and restored immediately
3082 * after this phase. See vacuum_error_callback.
3083 */
3084 Assert(vacrel->indname == NULL);
3089
3090 istat = vac_cleanup_one_index(&ivinfo, istat);
3091
3092 /* Revert to the previous phase information for error traceback */
3094 pfree(vacrel->indname);
3095 vacrel->indname = NULL;
3096
3097 return istat;
3098}
3099
3100/*
3101 * should_attempt_truncation - should we attempt to truncate the heap?
3102 *
3103 * Don't even think about it unless we have a shot at releasing a goodly
3104 * number of pages. Otherwise, the time taken isn't worth it, mainly because
3105 * an AccessExclusive lock must be replayed on any hot standby, where it can
3106 * be particularly disruptive.
3107 *
3108 * Also don't attempt it if wraparound failsafe is in effect. The entire
3109 * system might be refusing to allocate new XIDs at this point. The system
3110 * definitely won't return to normal unless and until VACUUM actually advances
3111 * the oldest relfrozenxid -- which hasn't happened for target rel just yet.
3112 * If lazy_truncate_heap attempted to acquire an AccessExclusiveLock to
3113 * truncate the table under these circumstances, an XID exhaustion error might
3114 * make it impossible for VACUUM to fix the underlying XID exhaustion problem.
3115 * There is very little chance of truncation working out when the failsafe is
3116 * in effect in any case. lazy_scan_prune makes the optimistic assumption
3117 * that any LP_DEAD items it encounters will always be LP_UNUSED by the time
3118 * we're called.
3119 */
3120static bool
3122{
3124
3125 if (!vacrel->do_rel_truncate || VacuumFailsafeActive)
3126 return false;
3127
3128 possibly_freeable = vacrel->rel_pages - vacrel->nonempty_pages;
3129 if (possibly_freeable > 0 &&
3132 return true;
3133
3134 return false;
3135}
3136
3137/*
3138 * lazy_truncate_heap - try to truncate off any empty pages at the end
3139 */
3140static void
3142{
3143 BlockNumber orig_rel_pages = vacrel->rel_pages;
3146 int lock_retry;
3147
3148 /* Report that we are now truncating */
3151
3152 /* Update error traceback information one last time */
3154 vacrel->nonempty_pages, InvalidOffsetNumber);
3155
3156 /*
3157 * Loop until no more truncating can be done.
3158 */
3159 do
3160 {
3161 /*
3162 * We need full exclusive lock on the relation in order to do
3163 * truncation. If we can't get it, give up rather than waiting --- we
3164 * don't want to block other backends, and we don't want to deadlock
3165 * (which is quite possible considering we already hold a lower-grade
3166 * lock).
3167 */
3168 lock_waiter_detected = false;
3169 lock_retry = 0;
3170 while (true)
3171 {
3173 break;
3174
3175 /*
3176 * Check for interrupts while trying to (re-)acquire the exclusive
3177 * lock.
3178 */
3180
3183 {
3184 /*
3185 * We failed to establish the lock in the specified number of
3186 * retries. This means we give up truncating.
3187 */
3188 ereport(vacrel->verbose ? INFO : DEBUG2,
3189 (errmsg("\"%s\": stopping truncate due to conflicting lock request",
3190 vacrel->relname)));
3191 return;
3192 }
3193
3199 }
3200
3201 /*
3202 * Now that we have exclusive lock, look to see if the rel has grown
3203 * whilst we were vacuuming with non-exclusive lock. If so, give up;
3204 * the newly added pages presumably contain non-deletable tuples.
3205 */
3208 {
3209 /*
3210 * Note: we intentionally don't update vacrel->rel_pages with the
3211 * new rel size here. If we did, it would amount to assuming that
3212 * the new pages are empty, which is unlikely. Leaving the numbers
3213 * alone amounts to assuming that the new pages have the same
3214 * tuple density as existing ones, which is less unlikely.
3215 */
3217 return;
3218 }
3219
3220 /*
3221 * Scan backwards from the end to verify that the end pages actually
3222 * contain no tuples. This is *necessary*, not optional, because
3223 * other backends could have added tuples to these pages whilst we
3224 * were vacuuming.
3225 */
3227 vacrel->blkno = new_rel_pages;
3228
3230 {
3231 /* can't do anything after all */
3233 return;
3234 }
3235
3236 /*
3237 * Okay to truncate.
3238 */
3240
3241 /*
3242 * We can release the exclusive lock as soon as we have truncated.
3243 * Other backends can't safely access the relation until they have
3244 * processed the smgr invalidation that smgrtruncate sent out ... but
3245 * that should happen as part of standard invalidation processing once
3246 * they acquire lock on the relation.
3247 */
3249
3250 /*
3251 * Update statistics. Here, it *is* correct to adjust rel_pages
3252 * without also touching reltuples, since the tuple count wasn't
3253 * changed by the truncation.
3254 */
3255 vacrel->removed_pages += orig_rel_pages - new_rel_pages;
3256 vacrel->rel_pages = new_rel_pages;
3257
3258 ereport(vacrel->verbose ? INFO : DEBUG2,
3259 (errmsg("table \"%s\": truncated %u to %u pages",
3260 vacrel->relname,
3263 } while (new_rel_pages > vacrel->nonempty_pages && lock_waiter_detected);
3264}
3265
3266/*
3267 * Rescan end pages to verify that they are (still) empty of tuples.
3268 *
3269 * Returns number of nondeletable pages (last nonempty page + 1).
3270 */
3271static BlockNumber
3273{
3275 "prefetch size must be power of 2");
3276
3277 BlockNumber blkno;
3279 instr_time starttime;
3280
3281 /* Initialize the starttime if we check for conflicting lock requests */
3282 INSTR_TIME_SET_CURRENT(starttime);
3283
3284 /*
3285 * Start checking blocks at what we believe relation end to be and move
3286 * backwards. (Strange coding of loop control is needed because blkno is
3287 * unsigned.) To make the scan faster, we prefetch a few blocks at a time
3288 * in forward direction, so that OS-level readahead can kick in.
3289 */
3290 blkno = vacrel->rel_pages;
3292 while (blkno > vacrel->nonempty_pages)
3293 {
3294 Buffer buf;
3295 Page page;
3296 OffsetNumber offnum,
3297 maxoff;
3298 bool hastup;
3299
3300 /*
3301 * Check if another process requests a lock on our relation. We are
3302 * holding an AccessExclusiveLock here, so they will be waiting. We
3303 * only do this once per VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL, and we
3304 * only check if that interval has elapsed once every 32 blocks to
3305 * keep the number of system calls and actual shared lock table
3306 * lookups to a minimum.
3307 */
3308 if ((blkno % 32) == 0)
3309 {
3312
3315 INSTR_TIME_SUBTRACT(elapsed, starttime);
3316 if ((INSTR_TIME_GET_MICROSEC(elapsed) / 1000)
3318 {
3320 {
3321 ereport(vacrel->verbose ? INFO : DEBUG2,
3322 (errmsg("table \"%s\": suspending truncate due to conflicting lock request",
3323 vacrel->relname)));
3324
3325 *lock_waiter_detected = true;
3326 return blkno;
3327 }
3328 starttime = currenttime;
3329 }
3330 }
3331
3332 /*
3333 * We don't insert a vacuum delay point here, because we have an
3334 * exclusive lock on the table which we want to hold for as short a
3335 * time as possible. We still need to check for interrupts however.
3336 */
3338
3339 blkno--;
3340
3341 /* If we haven't prefetched this lot yet, do so now. */
3342 if (prefetchedUntil > blkno)
3343 {
3346
3347 prefetchStart = blkno & ~(PREFETCH_SIZE - 1);
3348 for (pblkno = prefetchStart; pblkno <= blkno; pblkno++)
3349 {
3352 }
3354 }
3355
3357 vacrel->bstrategy);
3358
3359 /* In this phase we only need shared access to the buffer */
3361
3362 page = BufferGetPage(buf);
3363
3364 if (PageIsNew(page) || PageIsEmpty(page))
3365 {
3367 continue;
3368 }
3369
3370 hastup = false;
3371 maxoff = PageGetMaxOffsetNumber(page);
3372 for (offnum = FirstOffsetNumber;
3373 offnum <= maxoff;
3374 offnum = OffsetNumberNext(offnum))
3375 {
3376 ItemId itemid;
3377
3378 itemid = PageGetItemId(page, offnum);
3379
3380 /*
3381 * Note: any non-unused item should be taken as a reason to keep
3382 * this page. Even an LP_DEAD item makes truncation unsafe, since
3383 * we must not have cleaned out its index entries.
3384 */
3385 if (ItemIdIsUsed(itemid))
3386 {
3387 hastup = true;
3388 break; /* can stop scanning */
3389 }
3390 } /* scan along page */
3391
3393
3394 /* Done scanning if we found a tuple here */
3395 if (hastup)
3396 return blkno + 1;
3397 }
3398
3399 /*
3400 * If we fall out of the loop, all the previously-thought-to-be-empty
3401 * pages still are; we need not bother to look at the last known-nonempty
3402 * page.
3403 */
3404 return vacrel->nonempty_pages;
3405}
3406
3407/*
3408 * Allocate dead_items and dead_items_info (either using palloc, or in dynamic
3409 * shared memory). Sets both in vacrel for caller.
3410 *
3411 * Also handles parallel initialization as part of allocating dead_items in
3412 * DSM when required.
3413 */
3414static void
3415dead_items_alloc(LVRelState *vacrel, int nworkers)
3416{
3417 VacDeadItemsInfo *dead_items_info;
3419 autovacuum_work_mem != -1 ?
3421
3422 /*
3423 * Initialize state for a parallel vacuum. As of now, only one worker can
3424 * be used for an index, so we invoke parallelism only if there are at
3425 * least two indexes on a table.
3426 */
3427 if (nworkers >= 0 && vacrel->nindexes > 1 && vacrel->do_index_vacuuming)
3428 {
3429 /*
3430 * Since parallel workers cannot access data in temporary tables, we
3431 * can't perform parallel vacuum on them.
3432 */
3434 {
3435 /*
3436 * Give warning only if the user explicitly tries to perform a
3437 * parallel vacuum on the temporary table.
3438 */
3439 if (nworkers > 0)
3441 (errmsg("disabling parallel option of vacuum on \"%s\" --- cannot vacuum temporary tables in parallel",
3442 vacrel->relname)));
3443 }
3444 else
3445 vacrel->pvs = parallel_vacuum_init(vacrel->rel, vacrel->indrels,
3446 vacrel->nindexes, nworkers,
3448 vacrel->verbose ? INFO : DEBUG2,
3449 vacrel->bstrategy);
3450
3451 /*
3452 * If parallel mode started, dead_items and dead_items_info spaces are
3453 * allocated in DSM.
3454 */
3456 {
3458 &vacrel->dead_items_info);
3459 return;
3460 }
3461 }
3462
3463 /*
3464 * Serial VACUUM case. Allocate both dead_items and dead_items_info
3465 * locally.
3466 */
3467
3468 dead_items_info = palloc_object(VacDeadItemsInfo);
3469 dead_items_info->max_bytes = vac_work_mem * (Size) 1024;
3470 dead_items_info->num_items = 0;
3471 vacrel->dead_items_info = dead_items_info;
3472
3473 vacrel->dead_items = TidStoreCreateLocal(dead_items_info->max_bytes, true);
3474}
3475
3476/*
3477 * Add the given block number and offset numbers to dead_items.
3478 */
3479static void
3481 int num_offsets)
3482{
3483 const int prog_index[2] = {
3486 };
3487 int64 prog_val[2];
3488
3489 TidStoreSetBlockOffsets(vacrel->dead_items, blkno, offsets, num_offsets);
3490 vacrel->dead_items_info->num_items += num_offsets;
3491
3492 /* update the progress information */
3493 prog_val[0] = vacrel->dead_items_info->num_items;
3494 prog_val[1] = TidStoreMemoryUsage(vacrel->dead_items);
3496}
3497
3498/*
3499 * Forget all collected dead items.
3500 */
3501static void
3503{
3504 /* Update statistics for dead items */
3505 vacrel->num_dead_items_resets++;
3506 vacrel->total_dead_items_bytes += TidStoreMemoryUsage(vacrel->dead_items);
3507
3509 {
3511 vacrel->dead_items = parallel_vacuum_get_dead_items(vacrel->pvs,
3512 &vacrel->dead_items_info);
3513 return;
3514 }
3515
3516 /* Recreate the tidstore with the same max_bytes limitation */
3517 TidStoreDestroy(vacrel->dead_items);
3518 vacrel->dead_items = TidStoreCreateLocal(vacrel->dead_items_info->max_bytes, true);
3519
3520 /* Reset the counter */
3521 vacrel->dead_items_info->num_items = 0;
3522}
3523
3524/*
3525 * Perform cleanup for resources allocated in dead_items_alloc
3526 */
3527static void
3529{
3531 {
3532 /* Don't bother with pfree here */
3533 return;
3534 }
3535
3536 /* End parallel mode */
3537 parallel_vacuum_end(vacrel->pvs, vacrel->indstats);
3538 vacrel->pvs = NULL;
3539}
3540
3541#ifdef USE_ASSERT_CHECKING
3542
3543/*
3544 * Wrapper for heap_page_would_be_all_visible() which can be used for callers
3545 * that expect no LP_DEAD on the page. Currently assert-only, but there is no
3546 * reason not to use it outside of asserts.
3547 */
3548bool
3550 GlobalVisState *vistest,
3551 bool *all_frozen,
3552 TransactionId *newest_live_xid,
3554{
3555 /*
3556 * Pass allow_update_vistest as false so that the GlobalVisState
3557 * boundaries used here match those used by the pruning code we are
3558 * cross-checking. Allowing an update could move the boundaries between
3559 * the two calls, causing a spurious assertion failure.
3560 */
3562 vistest, false,
3563 NULL, 0,
3564 all_frozen,
3565 newest_live_xid,
3567}
3568#endif
3569
3570/*
3571 * Check whether the heap page in buf is all-visible except for the dead
3572 * tuples referenced in the deadoffsets array.
3573 *
3574 * Vacuum uses this to check if a page would become all-visible after reaping
3575 * known dead tuples. This function does not remove the dead items.
3576 *
3577 * This cannot be called in a critical section, as the visibility checks may
3578 * perform IO and allocate memory.
3579 *
3580 * Returns true if the page is all-visible other than the provided
3581 * deadoffsets and false otherwise.
3582 *
3583 * vistest is used to determine visibility. If allow_update_vistest is true,
3584 * the boundaries of the GlobalVisState may be updated when checking the
3585 * visibility of the newest live XID on the page.
3586 *
3587 * Output parameters:
3588 *
3589 * - *all_frozen: true if every tuple on the page is frozen
3590 * - *newest_live_xid: newest xmin of live tuples on the page
3591 * - *logging_offnum: OffsetNumber of current tuple being processed;
3592 * used by vacuum's error callback system.
3593 *
3594 * Callers looking to verify that the page is already all-visible can call
3595 * heap_page_is_all_visible().
3596 *
3597 * This logic is closely related to heap_prune_record_unchanged_lp_normal().
3598 * If you modify this function, ensure consistency with that code. An
3599 * assertion cross-checks that both remain in agreement. Do not introduce new
3600 * side-effects.
3601 */
3602static bool
3604 GlobalVisState *vistest,
3606 OffsetNumber *deadoffsets,
3607 int ndeadoffsets,
3608 bool *all_frozen,
3609 TransactionId *newest_live_xid,
3611{
3612 Page page = BufferGetPage(buf);
3614 OffsetNumber offnum,
3615 maxoff;
3616 bool all_visible = true;
3617 int matched_dead_count = 0;
3618
3619 *newest_live_xid = InvalidTransactionId;
3620 *all_frozen = true;
3621
3622 Assert(ndeadoffsets == 0 || deadoffsets);
3623
3624#ifdef USE_ASSERT_CHECKING
3625 /* Confirm input deadoffsets[] is strictly sorted */
3626 if (ndeadoffsets > 1)
3627 {
3628 for (int i = 1; i < ndeadoffsets; i++)
3629 Assert(deadoffsets[i - 1] < deadoffsets[i]);
3630 }
3631#endif
3632
3633 maxoff = PageGetMaxOffsetNumber(page);
3634 for (offnum = FirstOffsetNumber;
3635 offnum <= maxoff && all_visible;
3636 offnum = OffsetNumberNext(offnum))
3637 {
3638 ItemId itemid;
3639 HeapTupleData tuple;
3641
3642 /*
3643 * Set the offset number so that we can display it along with any
3644 * error that occurred while processing this tuple.
3645 */
3646 *logging_offnum = offnum;
3647 itemid = PageGetItemId(page, offnum);
3648
3649 /* Unused or redirect line pointers are of no interest */
3650 if (!ItemIdIsUsed(itemid) || ItemIdIsRedirected(itemid))
3651 continue;
3652
3653 ItemPointerSet(&(tuple.t_self), blockno, offnum);
3654
3655 /*
3656 * Dead line pointers can have index pointers pointing to them. So
3657 * they can't be treated as visible
3658 */
3659 if (ItemIdIsDead(itemid))
3660 {
3661 if (!deadoffsets ||
3663 deadoffsets[matched_dead_count] != offnum)
3664 {
3665 *all_frozen = all_visible = false;
3666 break;
3667 }
3669 continue;
3670 }
3671
3672 Assert(ItemIdIsNormal(itemid));
3673
3674 tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
3675 tuple.t_len = ItemIdGetLength(itemid);
3676 tuple.t_tableOid = RelationGetRelid(rel);
3677
3678 /* Visibility checks may do IO or allocate memory */
3681 {
3682 case HEAPTUPLE_LIVE:
3683 {
3684 TransactionId xmin;
3685
3686 /* Check heap_prune_record_unchanged_lp_normal comments */
3688 {
3689 all_visible = false;
3690 *all_frozen = false;
3691 break;
3692 }
3693
3694 /*
3695 * The inserter definitely committed. But we don't know if
3696 * it is old enough that everyone sees it as committed.
3697 * Don't check that now.
3698 *
3699 * If we scan all tuples without finding one that prevents
3700 * the page from being all-visible, we then check whether
3701 * any snapshot still considers the newest XID on the page
3702 * to be running. In that case, the page is not considered
3703 * all-visible.
3704 */
3705 xmin = HeapTupleHeaderGetXmin(tuple.t_data);
3706
3707 /* Track newest xmin on page. */
3708 if (TransactionIdFollows(xmin, *newest_live_xid) &&
3710 *newest_live_xid = xmin;
3711
3712 /* Check whether this tuple is already frozen or not */
3713 if (all_visible && *all_frozen &&
3715 *all_frozen = false;
3716 }
3717 break;
3718
3719 case HEAPTUPLE_DEAD:
3723 {
3724 all_visible = false;
3725 *all_frozen = false;
3726 break;
3727 }
3728 default:
3729 elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
3730 break;
3731 }
3732 } /* scan along page */
3733
3734 /*
3735 * After processing all the live tuples on the page, if the newest xmin
3736 * among them may still be considered running by any snapshot, the page
3737 * cannot be all-visible.
3738 */
3739 if (all_visible &&
3740 TransactionIdIsNormal(*newest_live_xid) &&
3741 GlobalVisTestXidConsideredRunning(vistest, *newest_live_xid,
3743 {
3744 all_visible = false;
3745 *all_frozen = false;
3746 }
3747
3748 /* Clear the offset information once we have processed the given page. */
3750
3751 return all_visible;
3752}
3753
3754/*
3755 * Update index statistics in pg_class if the statistics are accurate.
3756 */
3757static void
3759{
3760 Relation *indrels = vacrel->indrels;
3761 int nindexes = vacrel->nindexes;
3762 IndexBulkDeleteResult **indstats = vacrel->indstats;
3763
3764 Assert(vacrel->do_index_cleanup);
3765
3766 for (int idx = 0; idx < nindexes; idx++)
3767 {
3768 Relation indrel = indrels[idx];
3769 IndexBulkDeleteResult *istat = indstats[idx];
3770
3771 if (istat == NULL || istat->estimated_count)
3772 continue;
3773
3774 /* Update index statistics */
3776 istat->num_pages,
3777 istat->num_index_tuples,
3778 0, 0,
3779 false,
3782 NULL, NULL, false);
3783 }
3784}
3785
3786/*
3787 * Error context callback for errors occurring during vacuum. The error
3788 * context messages for index phases should match the messages set in parallel
3789 * vacuum. If you change this function for those phases, change
3790 * parallel_vacuum_error_callback() as well.
3791 */
3792static void
3794{
3796
3797 switch (errinfo->phase)
3798 {
3800 if (BlockNumberIsValid(errinfo->blkno))
3801 {
3802 if (OffsetNumberIsValid(errinfo->offnum))
3803 errcontext("while scanning block %u offset %u of relation \"%s.%s\"",
3804 errinfo->blkno, errinfo->offnum, errinfo->relnamespace, errinfo->relname);
3805 else
3806 errcontext("while scanning block %u of relation \"%s.%s\"",
3807 errinfo->blkno, errinfo->relnamespace, errinfo->relname);
3808 }
3809 else
3810 errcontext("while scanning relation \"%s.%s\"",
3811 errinfo->relnamespace, errinfo->relname);
3812 break;
3813
3815 if (BlockNumberIsValid(errinfo->blkno))
3816 {
3817 if (OffsetNumberIsValid(errinfo->offnum))
3818 errcontext("while vacuuming block %u offset %u of relation \"%s.%s\"",
3819 errinfo->blkno, errinfo->offnum, errinfo->relnamespace, errinfo->relname);
3820 else
3821 errcontext("while vacuuming block %u of relation \"%s.%s\"",
3822 errinfo->blkno, errinfo->relnamespace, errinfo->relname);
3823 }
3824 else
3825 errcontext("while vacuuming relation \"%s.%s\"",
3826 errinfo->relnamespace, errinfo->relname);
3827 break;
3828
3830 errcontext("while vacuuming index \"%s\" of relation \"%s.%s\"",
3831 errinfo->indname, errinfo->relnamespace, errinfo->relname);
3832 break;
3833
3835 errcontext("while cleaning up index \"%s\" of relation \"%s.%s\"",
3836 errinfo->indname, errinfo->relnamespace, errinfo->relname);
3837 break;
3838
3840 if (BlockNumberIsValid(errinfo->blkno))
3841 errcontext("while truncating relation \"%s.%s\" to %u blocks",
3842 errinfo->relnamespace, errinfo->relname, errinfo->blkno);
3843 break;
3844
3846 default:
3847 return; /* do nothing; the errinfo may not be
3848 * initialized */
3849 }
3850}
3851
3852/*
3853 * Updates the information required for vacuum error callback. This also saves
3854 * the current information which can be later restored via restore_vacuum_error_info.
3855 */
3856static void
3858 int phase, BlockNumber blkno, OffsetNumber offnum)
3859{
3860 if (saved_vacrel)
3861 {
3862 saved_vacrel->offnum = vacrel->offnum;
3863 saved_vacrel->blkno = vacrel->blkno;
3864 saved_vacrel->phase = vacrel->phase;
3865 }
3866
3867 vacrel->blkno = blkno;
3868 vacrel->offnum = offnum;
3869 vacrel->phase = phase;
3870}
3871
3872/*
3873 * Restores the vacuum information saved via a prior call to update_vacuum_error_info.
3874 */
3875static void
3878{
3879 vacrel->blkno = saved_vacrel->blkno;
3880 vacrel->offnum = saved_vacrel->offnum;
3881 vacrel->phase = saved_vacrel->phase;
3882}
Datum idx(PG_FUNCTION_ARGS)
Definition _int_op.c:262
int autovacuum_work_mem
Definition autovacuum.c:126
void TimestampDifference(TimestampTz start_time, TimestampTz stop_time, long *secs, int *microsecs)
Definition timestamp.c:1715
bool TimestampDifferenceExceeds(TimestampTz start_time, TimestampTz stop_time, int msec)
Definition timestamp.c:1775
TimestampTz GetCurrentTimestamp(void)
Definition timestamp.c:1639
void pgstat_progress_start_command(ProgressCommandType cmdtype, Oid relid)
void pgstat_progress_update_param(int index, int64 val)
void pgstat_progress_update_multi_param(int nparam, const int *index, const int64 *val)
void pgstat_progress_end_command(void)
@ PROGRESS_COMMAND_VACUUM
PgBackendStatus * MyBEEntry
uint32 BlockNumber
Definition block.h:31
#define InvalidBlockNumber
Definition block.h:33
static bool BlockNumberIsValid(BlockNumber blockNumber)
Definition block.h:71
int Buffer
Definition buf.h:23
#define InvalidBuffer
Definition buf.h:25
bool track_io_timing
Definition bufmgr.c:192
void CheckBufferIsPinnedOnce(Buffer buffer)
Definition bufmgr.c:6637
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition bufmgr.c:4446
PrefetchBufferResult PrefetchBuffer(Relation reln, ForkNumber forkNum, BlockNumber blockNum)
Definition bufmgr.c:787
void ReleaseBuffer(Buffer buffer)
Definition bufmgr.c:5586
void UnlockReleaseBuffer(Buffer buffer)
Definition bufmgr.c:5603
void MarkBufferDirty(Buffer buffer)
Definition bufmgr.c:3147
void LockBufferForCleanup(Buffer buffer)
Definition bufmgr.c:6670
Buffer ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
Definition bufmgr.c:926
bool ConditionalLockBufferForCleanup(Buffer buffer)
Definition bufmgr.c:6843
#define RelationGetNumberOfBlocks(reln)
Definition bufmgr.h:309
static Page BufferGetPage(Buffer buffer)
Definition bufmgr.h:468
@ BUFFER_LOCK_SHARE
Definition bufmgr.h:212
@ BUFFER_LOCK_EXCLUSIVE
Definition bufmgr.h:222
@ BUFFER_LOCK_UNLOCK
Definition bufmgr.h:207
static void LockBuffer(Buffer buffer, BufferLockMode mode)
Definition bufmgr.h:334
@ RBM_NORMAL
Definition bufmgr.h:46
static bool BufferIsValid(Buffer bufnum)
Definition bufmgr.h:419
Size PageGetHeapFreeSpace(const PageData *page)
Definition bufpage.c:1000
void PageTruncateLinePointerArray(Page page)
Definition bufpage.c:844
static bool PageIsEmpty(const PageData *page)
Definition bufpage.h:248
static bool PageIsAllVisible(const PageData *page)
Definition bufpage.h:454
static bool PageIsNew(const PageData *page)
Definition bufpage.h:258
#define SizeOfPageHeaderData
Definition bufpage.h:241
static void PageSetAllVisible(Page page)
Definition bufpage.h:459
static ItemId PageGetItemId(Page page, OffsetNumber offsetNumber)
Definition bufpage.h:268
static void * PageGetItem(PageData *page, const ItemIdData *itemId)
Definition bufpage.h:378
PageData * Page
Definition bufpage.h:81
#define PageClearPrunable(page)
Definition bufpage.h:485
static OffsetNumber PageGetMaxOffsetNumber(const PageData *page)
Definition bufpage.h:396
uint8_t uint8
Definition c.h:622
#define ngettext(s, p, n)
Definition c.h:1270
#define Max(x, y)
Definition c.h:1085
#define Assert(condition)
Definition c.h:943
int64_t int64
Definition c.h:621
TransactionId MultiXactId
Definition c.h:746
int32_t int32
Definition c.h:620
#define unlikely(x)
Definition c.h:438
uint32_t uint32
Definition c.h:624
#define lengthof(array)
Definition c.h:873
#define StaticAssertDecl(condition, errmessage)
Definition c.h:1008
uint32 TransactionId
Definition c.h:736
size_t Size
Definition c.h:689
memcpy(sums, checksumBaseOffsets, sizeof(checksumBaseOffsets))
int64 TimestampTz
Definition timestamp.h:39
Datum arg
Definition elog.c:1323
ErrorContextCallback * error_context_stack
Definition elog.c:100
#define _(x)
Definition elog.c:96
#define LOG
Definition elog.h:32
#define errcontext
Definition elog.h:200
int errhint(const char *fmt,...) pg_attribute_printf(1
int errdetail(const char *fmt,...) pg_attribute_printf(1
int int errmsg_internal(const char *fmt,...) pg_attribute_printf(1
#define WARNING
Definition elog.h:37
#define DEBUG2
Definition elog.h:30
#define ERROR
Definition elog.h:40
#define elog(elevel,...)
Definition elog.h:228
#define INFO
Definition elog.h:35
#define ereport(elevel,...)
Definition elog.h:152
#define palloc_object(type)
Definition fe_memutils.h:74
#define palloc_array(type, count)
Definition fe_memutils.h:76
#define palloc0_object(type)
Definition fe_memutils.h:75
void FreeSpaceMapVacuumRange(Relation rel, BlockNumber start, BlockNumber end)
Definition freespace.c:377
Size GetRecordedFreeSpace(Relation rel, BlockNumber heapBlk)
Definition freespace.c:244
void RecordPageWithFreeSpace(Relation rel, BlockNumber heapBlk, Size spaceAvail)
Definition freespace.c:194
bool VacuumCostActive
Definition globals.c:161
int VacuumCostBalance
Definition globals.c:160
int maintenance_work_mem
Definition globals.c:135
volatile uint32 CritSectionCount
Definition globals.c:45
struct Latch * MyLatch
Definition globals.c:65
Oid MyDatabaseId
Definition globals.c:96
bool heap_tuple_needs_eventual_freeze(HeapTupleHeader tuple)
Definition heapam.c:7791
bool heap_tuple_should_freeze(HeapTupleHeader tuple, const struct VacuumCutoffs *cutoffs, TransactionId *NoFreezePageRelfrozenXid, MultiXactId *NoFreezePageRelminMxid)
Definition heapam.c:7846
#define HEAP_PAGE_PRUNE_FREEZE
Definition heapam.h:43
#define HEAP_PAGE_PRUNE_ALLOW_FAST_PATH
Definition heapam.h:44
@ HEAPTUPLE_RECENTLY_DEAD
Definition heapam.h:140
@ HEAPTUPLE_INSERT_IN_PROGRESS
Definition heapam.h:141
@ HEAPTUPLE_LIVE
Definition heapam.h:139
@ HEAPTUPLE_DELETE_IN_PROGRESS
Definition heapam.h:142
@ HEAPTUPLE_DEAD
Definition heapam.h:138
#define HEAP_PAGE_PRUNE_SET_VM
Definition heapam.h:45
@ PRUNE_VACUUM_CLEANUP
Definition heapam.h:254
@ PRUNE_VACUUM_SCAN
Definition heapam.h:253
#define HEAP_PAGE_PRUNE_MARK_UNUSED_NOW
Definition heapam.h:42
HTSV_Result HeapTupleSatisfiesVacuumHorizon(HeapTuple htup, Buffer buffer, TransactionId *dead_after)
HTSV_Result HeapTupleSatisfiesVacuum(HeapTuple htup, TransactionId OldestXmin, Buffer buffer)
HeapTupleHeaderData * HeapTupleHeader
Definition htup.h:23
static TransactionId HeapTupleHeaderGetXmin(const HeapTupleHeaderData *tup)
#define MaxHeapTuplesPerPage
static bool HeapTupleHeaderXminCommitted(const HeapTupleHeaderData *tup)
#define false
#define INJECTION_POINT(name, arg)
#define INSTR_TIME_SET_CURRENT(t)
Definition instr_time.h:426
#define INSTR_TIME_SUBTRACT(x, y)
Definition instr_time.h:436
#define INSTR_TIME_GET_MICROSEC(t)
Definition instr_time.h:454
WalUsage pgWalUsage
Definition instrument.c:27
void WalUsageAccumDiff(WalUsage *dst, const WalUsage *add, const WalUsage *sub)
Definition instrument.c:367
BufferUsage pgBufferUsage
Definition instrument.c:25
void BufferUsageAccumDiff(BufferUsage *dst, const BufferUsage *add, const BufferUsage *sub)
Definition instrument.c:327
static int pg_cmp_u16(uint16 a, uint16 b)
Definition int.h:707
int b
Definition isn.c:74
int a
Definition isn.c:73
int i
Definition isn.c:77
#define ItemIdGetLength(itemId)
Definition itemid.h:59
#define ItemIdIsNormal(itemId)
Definition itemid.h:99
#define ItemIdIsDead(itemId)
Definition itemid.h:113
#define ItemIdIsUsed(itemId)
Definition itemid.h:92
#define ItemIdSetUnused(itemId)
Definition itemid.h:128
#define ItemIdIsRedirected(itemId)
Definition itemid.h:106
#define ItemIdHasStorage(itemId)
Definition itemid.h:120
static void ItemPointerSet(ItemPointerData *pointer, BlockNumber blockNumber, OffsetNumber offNum)
Definition itemptr.h:135
void ResetLatch(Latch *latch)
Definition latch.c:374
int WaitLatch(Latch *latch, int wakeEvents, long timeout, uint32 wait_event_info)
Definition latch.c:172
void UnlockRelation(Relation relation, LOCKMODE lockmode)
Definition lmgr.c:314
bool ConditionalLockRelation(Relation relation, LOCKMODE lockmode)
Definition lmgr.c:278
bool LockHasWaitersRelation(Relation relation, LOCKMODE lockmode)
Definition lmgr.c:367
#define NoLock
Definition lockdefs.h:34
#define AccessExclusiveLock
Definition lockdefs.h:43
#define RowExclusiveLock
Definition lockdefs.h:38
char * get_database_name(Oid dbid)
Definition lsyscache.c:1285
char * get_namespace_name(Oid nspid)
Definition lsyscache.c:3561
char * pstrdup(const char *in)
Definition mcxt.c:1781
void pfree(void *pointer)
Definition mcxt.c:1616
void * palloc0(Size size)
Definition mcxt.c:1417
#define AmAutoVacuumWorkerProcess()
Definition miscadmin.h:398
#define START_CRIT_SECTION()
Definition miscadmin.h:152
#define CHECK_FOR_INTERRUPTS()
Definition miscadmin.h:125
#define END_CRIT_SECTION()
Definition miscadmin.h:154
bool MultiXactIdPrecedes(MultiXactId multi1, MultiXactId multi2)
Definition multixact.c:2865
bool MultiXactIdPrecedesOrEquals(MultiXactId multi1, MultiXactId multi2)
Definition multixact.c:2879
#define MultiXactIdIsValid(multi)
Definition multixact.h:29
#define InvalidMultiXactId
Definition multixact.h:25
static char * errmsg
#define InvalidOffsetNumber
Definition off.h:26
#define OffsetNumberIsValid(offsetNumber)
Definition off.h:39
#define OffsetNumberNext(offsetNumber)
Definition off.h:52
uint16 OffsetNumber
Definition off.h:24
#define FirstOffsetNumber
Definition off.h:27
#define MaxOffsetNumber
Definition off.h:28
static int verbose
NameData relname
Definition pg_class.h:40
const void * data
uint32 pg_prng_uint32(pg_prng_state *state)
Definition pg_prng.c:227
pg_prng_state pg_global_prng_state
Definition pg_prng.c:34
const char * pg_rusage_show(const PGRUsage *ru0)
Definition pg_rusage.c:40
void pg_rusage_init(PGRUsage *ru0)
Definition pg_rusage.c:27
static char buf[DEFAULT_XLOG_SEG_SIZE]
int64 PgStat_Counter
Definition pgstat.h:71
PgStat_Counter pgStatBlockReadTime
PgStat_Counter pgStatBlockWriteTime
void pgstat_report_vacuum(Relation rel, PgStat_Counter livetuples, PgStat_Counter deadtuples, TimestampTz starttime)
#define qsort(a, b, c, d)
Definition port.h:495
static int fb(int x)
GlobalVisState * GlobalVisTestFor(Relation rel)
Definition procarray.c:4127
bool GlobalVisTestXidConsideredRunning(GlobalVisState *state, TransactionId xid, bool allow_update)
Definition procarray.c:4328
#define PROGRESS_VACUUM_PHASE_FINAL_CLEANUP
Definition progress.h:41
#define PROGRESS_VACUUM_MODE
Definition progress.h:32
#define PROGRESS_VACUUM_MODE_NORMAL
Definition progress.h:44
#define PROGRESS_VACUUM_STARTED_BY_AUTOVACUUM
Definition progress.h:50
#define PROGRESS_VACUUM_DEAD_TUPLE_BYTES
Definition progress.h:27
#define PROGRESS_VACUUM_PHASE_SCAN_HEAP
Definition progress.h:36
#define PROGRESS_VACUUM_TOTAL_HEAP_BLKS
Definition progress.h:22
#define PROGRESS_VACUUM_PHASE
Definition progress.h:21
#define PROGRESS_VACUUM_DELAY_TIME
Definition progress.h:31
#define PROGRESS_VACUUM_STARTED_BY_AUTOVACUUM_WRAPAROUND
Definition progress.h:51
#define PROGRESS_VACUUM_NUM_INDEX_VACUUMS
Definition progress.h:25
#define PROGRESS_VACUUM_PHASE_VACUUM_HEAP
Definition progress.h:38
#define PROGRESS_VACUUM_NUM_DEAD_ITEM_IDS
Definition progress.h:28
#define PROGRESS_VACUUM_MAX_DEAD_TUPLE_BYTES
Definition progress.h:26
#define PROGRESS_VACUUM_STARTED_BY_MANUAL
Definition progress.h:49
#define PROGRESS_VACUUM_HEAP_BLKS_SCANNED
Definition progress.h:23
#define PROGRESS_VACUUM_STARTED_BY
Definition progress.h:33
#define PROGRESS_VACUUM_PHASE_INDEX_CLEANUP
Definition progress.h:39
#define PROGRESS_VACUUM_PHASE_VACUUM_INDEX
Definition progress.h:37
#define PROGRESS_VACUUM_MODE_FAILSAFE
Definition progress.h:46
#define PROGRESS_VACUUM_INDEXES_PROCESSED
Definition progress.h:30
#define PROGRESS_VACUUM_INDEXES_TOTAL
Definition progress.h:29
#define PROGRESS_VACUUM_MODE_AGGRESSIVE
Definition progress.h:45
#define PROGRESS_VACUUM_HEAP_BLKS_VACUUMED
Definition progress.h:24
#define PROGRESS_VACUUM_PHASE_TRUNCATE
Definition progress.h:40
void heap_page_prune_and_freeze(PruneFreezeParams *params, PruneFreezeResult *presult, OffsetNumber *off_loc, TransactionId *new_relfrozen_xid, MultiXactId *new_relmin_mxid)
Definition pruneheap.c:1090
void log_heap_prune_and_freeze(Relation relation, Buffer buffer, Buffer vmbuffer, uint8 vmflags, TransactionId conflict_xid, bool cleanup_lock, PruneReason reason, HeapTupleFreeze *frozen, int nfrozen, OffsetNumber *redirected, int nredirected, OffsetNumber *dead, int ndead, OffsetNumber *unused, int nunused)
Definition pruneheap.c:2561
Buffer read_stream_next_buffer(ReadStream *stream, void **per_buffer_data)
ReadStream * read_stream_begin_relation(int flags, BufferAccessStrategy strategy, Relation rel, ForkNumber forknum, ReadStreamBlockNumberCB callback, void *callback_private_data, size_t per_buffer_data_size)
void read_stream_end(ReadStream *stream)
#define READ_STREAM_MAINTENANCE
Definition read_stream.h:28
#define READ_STREAM_USE_BATCHING
Definition read_stream.h:64
#define RelationGetRelid(relation)
Definition rel.h:516
#define RelationGetRelationName(relation)
Definition rel.h:550
#define RelationNeedsWAL(relation)
Definition rel.h:639
#define RelationUsesLocalBuffers(relation)
Definition rel.h:648
#define RelationGetNamespace(relation)
Definition rel.h:557
@ MAIN_FORKNUM
Definition relpath.h:58
void RelationTruncate(Relation rel, BlockNumber nblocks)
Definition storage.c:289
char * dbname
Definition streamutil.c:49
void appendStringInfo(StringInfo str, const char *fmt,...)
Definition stringinfo.c:145
void appendStringInfoString(StringInfo str, const char *s)
Definition stringinfo.c:230
void initStringInfo(StringInfo str)
Definition stringinfo.c:97
int64 shared_blks_dirtied
Definition instrument.h:28
int64 local_blks_hit
Definition instrument.h:30
int64 shared_blks_read
Definition instrument.h:27
int64 local_blks_read
Definition instrument.h:31
int64 local_blks_dirtied
Definition instrument.h:32
int64 shared_blks_hit
Definition instrument.h:26
struct ErrorContextCallback * previous
Definition elog.h:299
void(* callback)(void *arg)
Definition elog.h:300
ItemPointerData t_self
Definition htup.h:65
uint32 t_len
Definition htup.h:64
HeapTupleHeader t_data
Definition htup.h:68
Oid t_tableOid
Definition htup.h:66
BlockNumber pages_deleted
Definition genam.h:90
BlockNumber pages_newly_deleted
Definition genam.h:89
BlockNumber pages_free
Definition genam.h:91
BlockNumber num_pages
Definition genam.h:85
double num_index_tuples
Definition genam.h:87
BlockNumber next_eager_scan_region_start
Definition vacuumlazy.c:380
ParallelVacuumState * pvs
Definition vacuumlazy.c:261
bool next_unskippable_eager_scanned
Definition vacuumlazy.c:365
VacDeadItemsInfo * dead_items_info
Definition vacuumlazy.c:304
PVWorkerUsage worker_usage
Definition vacuumlazy.c:352
Buffer next_unskippable_vmbuffer
Definition vacuumlazy.c:366
OffsetNumber offnum
Definition vacuumlazy.c:289
TidStore * dead_items
Definition vacuumlazy.c:303
int64 tuples_deleted
Definition vacuumlazy.c:355
BlockNumber nonempty_pages
Definition vacuumlazy.c:335
BlockNumber eager_scan_remaining_fails
Definition vacuumlazy.c:412
bool do_rel_truncate
Definition vacuumlazy.c:273
BlockNumber scanned_pages
Definition vacuumlazy.c:307
int num_dead_items_resets
Definition vacuumlazy.c:345
bool aggressive
Definition vacuumlazy.c:264
BlockNumber new_frozen_tuple_pages
Definition vacuumlazy.c:316
GlobalVisState * vistest
Definition vacuumlazy.c:277
BlockNumber removed_pages
Definition vacuumlazy.c:315
int num_index_scans
Definition vacuumlazy.c:344
IndexBulkDeleteResult ** indstats
Definition vacuumlazy.c:341
BlockNumber new_all_frozen_pages
Definition vacuumlazy.c:331
double new_live_tuples
Definition vacuumlazy.c:339
double new_rel_tuples
Definition vacuumlazy.c:338
BlockNumber new_all_visible_all_frozen_pages
Definition vacuumlazy.c:328
BlockNumber new_all_visible_pages
Definition vacuumlazy.c:319
TransactionId NewRelfrozenXid
Definition vacuumlazy.c:279
Relation rel
Definition vacuumlazy.c:255
bool consider_bypass_optimization
Definition vacuumlazy.c:268
BlockNumber rel_pages
Definition vacuumlazy.c:306
Size total_dead_items_bytes
Definition vacuumlazy.c:346
BlockNumber next_unskippable_block
Definition vacuumlazy.c:364
int64 recently_dead_tuples
Definition vacuumlazy.c:359
int64 tuples_frozen
Definition vacuumlazy.c:356
char * dbname
Definition vacuumlazy.c:284
BlockNumber missed_dead_pages
Definition vacuumlazy.c:334
BlockNumber current_block
Definition vacuumlazy.c:363
char * relnamespace
Definition vacuumlazy.c:285
int64 live_tuples
Definition vacuumlazy.c:358
int64 lpdead_items
Definition vacuumlazy.c:357
BufferAccessStrategy bstrategy
Definition vacuumlazy.c:260
BlockNumber eager_scan_remaining_successes
Definition vacuumlazy.c:391
bool skippedallvis
Definition vacuumlazy.c:281
BlockNumber lpdead_item_pages
Definition vacuumlazy.c:333
BlockNumber eager_scanned_pages
Definition vacuumlazy.c:313
Relation * indrels
Definition vacuumlazy.c:256
bool skipwithvm
Definition vacuumlazy.c:266
bool do_index_cleanup
Definition vacuumlazy.c:272
MultiXactId NewRelminMxid
Definition vacuumlazy.c:280
int64 missed_dead_tuples
Definition vacuumlazy.c:360
BlockNumber blkno
Definition vacuumlazy.c:288
struct VacuumCutoffs cutoffs
Definition vacuumlazy.c:276
char * relname
Definition vacuumlazy.c:286
BlockNumber eager_scan_max_fails_per_region
Definition vacuumlazy.c:402
VacErrPhase phase
Definition vacuumlazy.c:290
char * indname
Definition vacuumlazy.c:287
bool do_index_vacuuming
Definition vacuumlazy.c:271
BlockNumber blkno
Definition vacuumlazy.c:419
VacErrPhase phase
Definition vacuumlazy.c:421
OffsetNumber offnum
Definition vacuumlazy.c:420
int64 st_progress_param[PGSTAT_NUM_PROGRESS_PARAM]
size_t max_bytes
Definition vacuum.h:298
int64 num_items
Definition vacuum.h:299
int nworkers
Definition vacuum.h:250
VacOptValue truncate
Definition vacuum.h:235
int log_vacuum_min_duration
Definition vacuum.h:226
uint32 options
Definition vacuum.h:218
bool is_wraparound
Definition vacuum.h:225
VacOptValue index_cleanup
Definition vacuum.h:234
double max_eager_freeze_failure_rate
Definition vacuum.h:243
int64 wal_buffers_full
Definition instrument.h:57
uint64 wal_bytes
Definition instrument.h:55
int64 wal_fpi
Definition instrument.h:54
uint64 wal_fpi_bytes
Definition instrument.h:56
int64 wal_records
Definition instrument.h:53
TidStoreIter * TidStoreBeginIterate(TidStore *ts)
Definition tidstore.c:471
void TidStoreEndIterate(TidStoreIter *iter)
Definition tidstore.c:518
TidStoreIterResult * TidStoreIterateNext(TidStoreIter *iter)
Definition tidstore.c:493
TidStore * TidStoreCreateLocal(size_t max_bytes, bool insert_only)
Definition tidstore.c:162
void TidStoreDestroy(TidStore *ts)
Definition tidstore.c:317
int TidStoreGetBlockOffsets(TidStoreIterResult *result, OffsetNumber *offsets, int max_offsets)
Definition tidstore.c:566
void TidStoreSetBlockOffsets(TidStore *ts, BlockNumber blkno, OffsetNumber *offsets, int num_offsets)
Definition tidstore.c:345
size_t TidStoreMemoryUsage(TidStore *ts)
Definition tidstore.c:532
static bool TransactionIdFollows(TransactionId id1, TransactionId id2)
Definition transam.h:297
static TransactionId ReadNextTransactionId(void)
Definition transam.h:375
#define InvalidTransactionId
Definition transam.h:31
static bool TransactionIdPrecedesOrEquals(TransactionId id1, TransactionId id2)
Definition transam.h:282
#define TransactionIdIsValid(xid)
Definition transam.h:41
#define TransactionIdIsNormal(xid)
Definition transam.h:42
static bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition transam.h:263
bool track_cost_delay_timing
Definition vacuum.c:83
void vac_open_indexes(Relation relation, LOCKMODE lockmode, int *nindexes, Relation **Irel)
Definition vacuum.c:2369
IndexBulkDeleteResult * vac_cleanup_one_index(IndexVacuumInfo *ivinfo, IndexBulkDeleteResult *istat)
Definition vacuum.c:2679
void vac_close_indexes(int nindexes, Relation *Irel, LOCKMODE lockmode)
Definition vacuum.c:2412
void vacuum_delay_point(bool is_analyze)
Definition vacuum.c:2433
bool vacuum_get_cutoffs(Relation rel, const VacuumParams *params, struct VacuumCutoffs *cutoffs)
Definition vacuum.c:1101
bool vacuum_xid_failsafe_check(const struct VacuumCutoffs *cutoffs)
Definition vacuum.c:1269
bool VacuumFailsafeActive
Definition vacuum.c:111
double vac_estimate_reltuples(Relation relation, BlockNumber total_pages, BlockNumber scanned_pages, double scanned_tuples)
Definition vacuum.c:1331
void vac_update_relstats(Relation relation, BlockNumber num_pages, double num_tuples, BlockNumber num_all_visible_pages, BlockNumber num_all_frozen_pages, bool hasindex, TransactionId frozenxid, MultiXactId minmulti, bool *frozenxid_updated, bool *minmulti_updated, bool in_outer_xact)
Definition vacuum.c:1427
IndexBulkDeleteResult * vac_bulkdel_one_index(IndexVacuumInfo *ivinfo, IndexBulkDeleteResult *istat, TidStore *dead_items, VacDeadItemsInfo *dead_items_info)
Definition vacuum.c:2658
#define VACOPT_VERBOSE
Definition vacuum.h:181
@ VACOPTVALUE_AUTO
Definition vacuum.h:202
@ VACOPTVALUE_ENABLED
Definition vacuum.h:204
@ VACOPTVALUE_UNSPECIFIED
Definition vacuum.h:201
@ VACOPTVALUE_DISABLED
Definition vacuum.h:203
#define VACOPT_DISABLE_PAGE_SKIPPING
Definition vacuum.h:187
static int lazy_scan_prune(LVRelState *vacrel, Buffer buf, BlockNumber blkno, Page page, Buffer vmbuffer, bool *has_lpdead_items, bool *vm_page_frozen)
static void dead_items_cleanup(LVRelState *vacrel)
static void update_relstats_all_indexes(LVRelState *vacrel)
static void dead_items_add(LVRelState *vacrel, BlockNumber blkno, OffsetNumber *offsets, int num_offsets)
static BlockNumber heap_vac_scan_next_block(ReadStream *stream, void *callback_private_data, void *per_buffer_data)
#define VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL
Definition vacuumlazy.c:180
static void vacuum_error_callback(void *arg)
#define EAGER_SCAN_REGION_SIZE
Definition vacuumlazy.c:250
static void lazy_truncate_heap(LVRelState *vacrel)
static void lazy_vacuum(LVRelState *vacrel)
static void lazy_cleanup_all_indexes(LVRelState *vacrel)
#define MAX_EAGER_FREEZE_SUCCESS_RATE
Definition vacuumlazy.c:241
static bool lazy_scan_noprune(LVRelState *vacrel, Buffer buf, BlockNumber blkno, Page page, bool *has_lpdead_items)
static BlockNumber vacuum_reap_lp_read_stream_next(ReadStream *stream, void *callback_private_data, void *per_buffer_data)
#define REL_TRUNCATE_MINIMUM
Definition vacuumlazy.c:169
static bool should_attempt_truncation(LVRelState *vacrel)
static bool lazy_scan_new_or_empty(LVRelState *vacrel, Buffer buf, BlockNumber blkno, Page page, bool sharelock, Buffer vmbuffer)
VacErrPhase
Definition vacuumlazy.c:225
@ VACUUM_ERRCB_PHASE_SCAN_HEAP
Definition vacuumlazy.c:227
@ VACUUM_ERRCB_PHASE_VACUUM_INDEX
Definition vacuumlazy.c:228
@ VACUUM_ERRCB_PHASE_TRUNCATE
Definition vacuumlazy.c:231
@ VACUUM_ERRCB_PHASE_INDEX_CLEANUP
Definition vacuumlazy.c:230
@ VACUUM_ERRCB_PHASE_VACUUM_HEAP
Definition vacuumlazy.c:229
@ VACUUM_ERRCB_PHASE_UNKNOWN
Definition vacuumlazy.c:226
static void lazy_scan_heap(LVRelState *vacrel)
#define ParallelVacuumIsActive(vacrel)
Definition vacuumlazy.c:221
static void restore_vacuum_error_info(LVRelState *vacrel, const LVSavedErrInfo *saved_vacrel)
static IndexBulkDeleteResult * lazy_vacuum_one_index(Relation indrel, IndexBulkDeleteResult *istat, double reltuples, LVRelState *vacrel)
static void find_next_unskippable_block(LVRelState *vacrel, bool *skipsallvis)
static void dead_items_reset(LVRelState *vacrel)
#define REL_TRUNCATE_FRACTION
Definition vacuumlazy.c:170
static bool lazy_check_wraparound_failsafe(LVRelState *vacrel)
static IndexBulkDeleteResult * lazy_cleanup_one_index(Relation indrel, IndexBulkDeleteResult *istat, double reltuples, bool estimated_count, LVRelState *vacrel)
#define PREFETCH_SIZE
Definition vacuumlazy.c:215
static void lazy_vacuum_heap_page(LVRelState *vacrel, BlockNumber blkno, Buffer buffer, OffsetNumber *deadoffsets, int num_offsets, Buffer vmbuffer)
static void heap_vacuum_eager_scan_setup(LVRelState *vacrel, const VacuumParams *params)
Definition vacuumlazy.c:497
static bool heap_page_would_be_all_visible(Relation rel, Buffer buf, GlobalVisState *vistest, bool allow_update_vistest, OffsetNumber *deadoffsets, int ndeadoffsets, bool *all_frozen, TransactionId *newest_live_xid, OffsetNumber *logging_offnum)
void heap_vacuum_rel(Relation rel, const VacuumParams *params, BufferAccessStrategy bstrategy)
Definition vacuumlazy.c:624
#define BYPASS_THRESHOLD_PAGES
Definition vacuumlazy.c:187
static void dead_items_alloc(LVRelState *vacrel, int nworkers)
#define VACUUM_TRUNCATE_LOCK_TIMEOUT
Definition vacuumlazy.c:181
static bool lazy_vacuum_all_indexes(LVRelState *vacrel)
static void update_vacuum_error_info(LVRelState *vacrel, LVSavedErrInfo *saved_vacrel, int phase, BlockNumber blkno, OffsetNumber offnum)
static BlockNumber count_nondeletable_pages(LVRelState *vacrel, bool *lock_waiter_detected)
#define SKIP_PAGES_THRESHOLD
Definition vacuumlazy.c:209
#define FAILSAFE_EVERY_PAGES
Definition vacuumlazy.c:193
#define VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL
Definition vacuumlazy.c:179
static int cmpOffsetNumbers(const void *a, const void *b)
static void lazy_vacuum_heap_rel(LVRelState *vacrel)
#define VACUUM_FSM_EVERY_PAGES
Definition vacuumlazy.c:202
void parallel_vacuum_cleanup_all_indexes(ParallelVacuumState *pvs, long num_table_tuples, int num_index_scans, bool estimated_count, PVWorkerStats *wstats)
TidStore * parallel_vacuum_get_dead_items(ParallelVacuumState *pvs, VacDeadItemsInfo **dead_items_info_p)
void parallel_vacuum_bulkdel_all_indexes(ParallelVacuumState *pvs, long num_table_tuples, int num_index_scans, PVWorkerStats *wstats)
ParallelVacuumState * parallel_vacuum_init(Relation rel, Relation *indrels, int nindexes, int nrequested_workers, int vac_work_mem, int elevel, BufferAccessStrategy bstrategy)
void parallel_vacuum_reset_dead_items(ParallelVacuumState *pvs)
void parallel_vacuum_end(ParallelVacuumState *pvs, IndexBulkDeleteResult **istats)
void visibilitymap_set(BlockNumber heapBlk, Buffer vmBuf, uint8 flags, const RelFileLocator rlocator)
void visibilitymap_pin(Relation rel, BlockNumber heapBlk, Buffer *vmbuf)
uint8 visibilitymap_get_status(Relation rel, BlockNumber heapBlk, Buffer *vmbuf)
void visibilitymap_count(Relation rel, BlockNumber *all_visible, BlockNumber *all_frozen)
#define VISIBILITYMAP_VALID_BITS
#define VISIBILITYMAP_ALL_FROZEN
#define VISIBILITYMAP_ALL_VISIBLE
#define WL_TIMEOUT
#define WL_EXIT_ON_PM_DEATH
#define WL_LATCH_SET
bool IsInParallelMode(void)
Definition xact.c:1119

◆ MAX_EAGER_FREEZE_SUCCESS_RATE

#define MAX_EAGER_FREEZE_SUCCESS_RATE   0.2

Definition at line 241 of file vacuumlazy.c.

◆ ParallelVacuumIsActive

#define ParallelVacuumIsActive (   vacrel)    ((vacrel)->pvs != NULL)

Definition at line 221 of file vacuumlazy.c.

◆ PREFETCH_SIZE

#define PREFETCH_SIZE   ((BlockNumber) 32)

Definition at line 215 of file vacuumlazy.c.

◆ REL_TRUNCATE_FRACTION

#define REL_TRUNCATE_FRACTION   16

Definition at line 170 of file vacuumlazy.c.

◆ REL_TRUNCATE_MINIMUM

#define REL_TRUNCATE_MINIMUM   1000

Definition at line 169 of file vacuumlazy.c.

◆ SKIP_PAGES_THRESHOLD

#define SKIP_PAGES_THRESHOLD   ((BlockNumber) 32)

Definition at line 209 of file vacuumlazy.c.

◆ VACUUM_FSM_EVERY_PAGES

#define VACUUM_FSM_EVERY_PAGES    ((BlockNumber) (((uint64) 8 * 1024 * 1024 * 1024) / BLCKSZ))

Definition at line 202 of file vacuumlazy.c.

◆ VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL

#define VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL   20 /* ms */

Definition at line 179 of file vacuumlazy.c.

◆ VACUUM_TRUNCATE_LOCK_TIMEOUT

#define VACUUM_TRUNCATE_LOCK_TIMEOUT   5000 /* ms */

Definition at line 181 of file vacuumlazy.c.

◆ VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL

#define VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL   50 /* ms */

Definition at line 180 of file vacuumlazy.c.

Typedef Documentation

◆ LVRelState

◆ LVSavedErrInfo

Enumeration Type Documentation

◆ VacErrPhase

Enumerator
VACUUM_ERRCB_PHASE_UNKNOWN 
VACUUM_ERRCB_PHASE_SCAN_HEAP 
VACUUM_ERRCB_PHASE_VACUUM_INDEX 
VACUUM_ERRCB_PHASE_VACUUM_HEAP 
VACUUM_ERRCB_PHASE_INDEX_CLEANUP 
VACUUM_ERRCB_PHASE_TRUNCATE 

Definition at line 224 of file vacuumlazy.c.

Function Documentation

◆ cmpOffsetNumbers()

static int cmpOffsetNumbers ( const void a,
const void b 
)
static

Definition at line 1998 of file vacuumlazy.c.

1999{
2000 return pg_cmp_u16(*(const OffsetNumber *) a, *(const OffsetNumber *) b);
2001}

References a, b, and pg_cmp_u16().

Referenced by lazy_scan_prune().

◆ count_nondeletable_pages()

static BlockNumber count_nondeletable_pages ( LVRelState vacrel,
bool lock_waiter_detected 
)
static

Definition at line 3273 of file vacuumlazy.c.

3274{
3276 "prefetch size must be power of 2");
3277
3278 BlockNumber blkno;
3280 instr_time starttime;
3281
3282 /* Initialize the starttime if we check for conflicting lock requests */
3283 INSTR_TIME_SET_CURRENT(starttime);
3284
3285 /*
3286 * Start checking blocks at what we believe relation end to be and move
3287 * backwards. (Strange coding of loop control is needed because blkno is
3288 * unsigned.) To make the scan faster, we prefetch a few blocks at a time
3289 * in forward direction, so that OS-level readahead can kick in.
3290 */
3291 blkno = vacrel->rel_pages;
3293 while (blkno > vacrel->nonempty_pages)
3294 {
3295 Buffer buf;
3296 Page page;
3297 OffsetNumber offnum,
3298 maxoff;
3299 bool hastup;
3300
3301 /*
3302 * Check if another process requests a lock on our relation. We are
3303 * holding an AccessExclusiveLock here, so they will be waiting. We
3304 * only do this once per VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL, and we
3305 * only check if that interval has elapsed once every 32 blocks to
3306 * keep the number of system calls and actual shared lock table
3307 * lookups to a minimum.
3308 */
3309 if ((blkno % 32) == 0)
3310 {
3313
3316 INSTR_TIME_SUBTRACT(elapsed, starttime);
3317 if ((INSTR_TIME_GET_MICROSEC(elapsed) / 1000)
3319 {
3321 {
3322 ereport(vacrel->verbose ? INFO : DEBUG2,
3323 (errmsg("table \"%s\": suspending truncate due to conflicting lock request",
3324 vacrel->relname)));
3325
3326 *lock_waiter_detected = true;
3327 return blkno;
3328 }
3329 starttime = currenttime;
3330 }
3331 }
3332
3333 /*
3334 * We don't insert a vacuum delay point here, because we have an
3335 * exclusive lock on the table which we want to hold for as short a
3336 * time as possible. We still need to check for interrupts however.
3337 */
3339
3340 blkno--;
3341
3342 /* If we haven't prefetched this lot yet, do so now. */
3343 if (prefetchedUntil > blkno)
3344 {
3347
3348 prefetchStart = blkno & ~(PREFETCH_SIZE - 1);
3349 for (pblkno = prefetchStart; pblkno <= blkno; pblkno++)
3350 {
3353 }
3355 }
3356
3358 vacrel->bstrategy);
3359
3360 /* In this phase we only need shared access to the buffer */
3362
3363 page = BufferGetPage(buf);
3364
3365 if (PageIsNew(page) || PageIsEmpty(page))
3366 {
3368 continue;
3369 }
3370
3371 hastup = false;
3372 maxoff = PageGetMaxOffsetNumber(page);
3373 for (offnum = FirstOffsetNumber;
3374 offnum <= maxoff;
3375 offnum = OffsetNumberNext(offnum))
3376 {
3377 ItemId itemid;
3378
3379 itemid = PageGetItemId(page, offnum);
3380
3381 /*
3382 * Note: any non-unused item should be taken as a reason to keep
3383 * this page. Even an LP_DEAD item makes truncation unsafe, since
3384 * we must not have cleaned out its index entries.
3385 */
3386 if (ItemIdIsUsed(itemid))
3387 {
3388 hastup = true;
3389 break; /* can stop scanning */
3390 }
3391 } /* scan along page */
3392
3394
3395 /* Done scanning if we found a tuple here */
3396 if (hastup)
3397 return blkno + 1;
3398 }
3399
3400 /*
3401 * If we fall out of the loop, all the previously-thought-to-be-empty
3402 * pages still are; we need not bother to look at the last known-nonempty
3403 * page.
3404 */
3405 return vacrel->nonempty_pages;
3406}

References AccessExclusiveLock, buf, BUFFER_LOCK_SHARE, BufferGetPage(), CHECK_FOR_INTERRUPTS, DEBUG2, ereport, errmsg, fb(), FirstOffsetNumber, INFO, INSTR_TIME_GET_MICROSEC, INSTR_TIME_SET_CURRENT, INSTR_TIME_SUBTRACT, InvalidBlockNumber, ItemIdIsUsed, LockBuffer(), LockHasWaitersRelation(), MAIN_FORKNUM, OffsetNumberNext, PageGetItemId(), PageGetMaxOffsetNumber(), PageIsEmpty(), PageIsNew(), PREFETCH_SIZE, PrefetchBuffer(), RBM_NORMAL, ReadBufferExtended(), StaticAssertDecl, UnlockReleaseBuffer(), and VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL.

Referenced by lazy_truncate_heap().

◆ dead_items_add()

static void dead_items_add ( LVRelState vacrel,
BlockNumber  blkno,
OffsetNumber offsets,
int  num_offsets 
)
static

Definition at line 3481 of file vacuumlazy.c.

3483{
3484 const int prog_index[2] = {
3487 };
3488 int64 prog_val[2];
3489
3490 TidStoreSetBlockOffsets(vacrel->dead_items, blkno, offsets, num_offsets);
3491 vacrel->dead_items_info->num_items += num_offsets;
3492
3493 /* update the progress information */
3494 prog_val[0] = vacrel->dead_items_info->num_items;
3495 prog_val[1] = TidStoreMemoryUsage(vacrel->dead_items);
3497}

References fb(), pgstat_progress_update_multi_param(), PROGRESS_VACUUM_DEAD_TUPLE_BYTES, PROGRESS_VACUUM_NUM_DEAD_ITEM_IDS, TidStoreMemoryUsage(), and TidStoreSetBlockOffsets().

Referenced by lazy_scan_noprune(), and lazy_scan_prune().

◆ dead_items_alloc()

static void dead_items_alloc ( LVRelState vacrel,
int  nworkers 
)
static

Definition at line 3416 of file vacuumlazy.c.

3417{
3418 VacDeadItemsInfo *dead_items_info;
3420 autovacuum_work_mem != -1 ?
3422
3423 /*
3424 * Initialize state for a parallel vacuum. As of now, only one worker can
3425 * be used for an index, so we invoke parallelism only if there are at
3426 * least two indexes on a table.
3427 */
3428 if (nworkers >= 0 && vacrel->nindexes > 1 && vacrel->do_index_vacuuming)
3429 {
3430 /*
3431 * Since parallel workers cannot access data in temporary tables, we
3432 * can't perform parallel vacuum on them.
3433 */
3435 {
3436 /*
3437 * Give warning only if the user explicitly tries to perform a
3438 * parallel vacuum on the temporary table.
3439 */
3440 if (nworkers > 0)
3442 (errmsg("disabling parallel option of vacuum on \"%s\" --- cannot vacuum temporary tables in parallel",
3443 vacrel->relname)));
3444 }
3445 else
3446 vacrel->pvs = parallel_vacuum_init(vacrel->rel, vacrel->indrels,
3447 vacrel->nindexes, nworkers,
3449 vacrel->verbose ? INFO : DEBUG2,
3450 vacrel->bstrategy);
3451
3452 /*
3453 * If parallel mode started, dead_items and dead_items_info spaces are
3454 * allocated in DSM.
3455 */
3457 {
3459 &vacrel->dead_items_info);
3460 return;
3461 }
3462 }
3463
3464 /*
3465 * Serial VACUUM case. Allocate both dead_items and dead_items_info
3466 * locally.
3467 */
3468
3469 dead_items_info = palloc_object(VacDeadItemsInfo);
3470 dead_items_info->max_bytes = vac_work_mem * (Size) 1024;
3471 dead_items_info->num_items = 0;
3472 vacrel->dead_items_info = dead_items_info;
3473
3474 vacrel->dead_items = TidStoreCreateLocal(dead_items_info->max_bytes, true);
3475}

References AmAutoVacuumWorkerProcess, autovacuum_work_mem, ParallelVacuumState::dead_items, DEBUG2, ereport, errmsg, fb(), INFO, maintenance_work_mem, VacDeadItemsInfo::max_bytes, VacDeadItemsInfo::num_items, palloc_object, parallel_vacuum_get_dead_items(), parallel_vacuum_init(), ParallelVacuumIsActive, RelationUsesLocalBuffers, TidStoreCreateLocal(), and WARNING.

Referenced by heap_vacuum_rel().

◆ dead_items_cleanup()

static void dead_items_cleanup ( LVRelState vacrel)
static

Definition at line 3529 of file vacuumlazy.c.

3530{
3532 {
3533 /* Don't bother with pfree here */
3534 return;
3535 }
3536
3537 /* End parallel mode */
3538 parallel_vacuum_end(vacrel->pvs, vacrel->indstats);
3539 vacrel->pvs = NULL;
3540}

References fb(), parallel_vacuum_end(), and ParallelVacuumIsActive.

Referenced by heap_vacuum_rel().

◆ dead_items_reset()

static void dead_items_reset ( LVRelState vacrel)
static

Definition at line 3503 of file vacuumlazy.c.

3504{
3505 /* Update statistics for dead items */
3506 vacrel->num_dead_items_resets++;
3507 vacrel->total_dead_items_bytes += TidStoreMemoryUsage(vacrel->dead_items);
3508
3510 {
3512 vacrel->dead_items = parallel_vacuum_get_dead_items(vacrel->pvs,
3513 &vacrel->dead_items_info);
3514 return;
3515 }
3516
3517 /* Recreate the tidstore with the same max_bytes limitation */
3518 TidStoreDestroy(vacrel->dead_items);
3519 vacrel->dead_items = TidStoreCreateLocal(vacrel->dead_items_info->max_bytes, true);
3520
3521 /* Reset the counter */
3522 vacrel->dead_items_info->num_items = 0;
3523}

References fb(), parallel_vacuum_get_dead_items(), parallel_vacuum_reset_dead_items(), ParallelVacuumIsActive, TidStoreCreateLocal(), TidStoreDestroy(), and TidStoreMemoryUsage().

Referenced by lazy_vacuum().

◆ find_next_unskippable_block()

static void find_next_unskippable_block ( LVRelState vacrel,
bool skipsallvis 
)
static

Definition at line 1748 of file vacuumlazy.c.

1749{
1750 BlockNumber rel_pages = vacrel->rel_pages;
1751 BlockNumber next_unskippable_block = vacrel->next_unskippable_block + 1;
1752 Buffer next_unskippable_vmbuffer = vacrel->next_unskippable_vmbuffer;
1753 bool next_unskippable_eager_scanned = false;
1754
1755 *skipsallvis = false;
1756
1757 for (;; next_unskippable_block++)
1758 {
1760 next_unskippable_block,
1761 &next_unskippable_vmbuffer);
1762
1763
1764 /*
1765 * At the start of each eager scan region, normal vacuums with eager
1766 * scanning enabled reset the failure counter, allowing vacuum to
1767 * resume eager scanning if it had been suspended in the previous
1768 * region.
1769 */
1770 if (next_unskippable_block >= vacrel->next_eager_scan_region_start)
1771 {
1772 vacrel->eager_scan_remaining_fails =
1773 vacrel->eager_scan_max_fails_per_region;
1774 vacrel->next_eager_scan_region_start += EAGER_SCAN_REGION_SIZE;
1775 }
1776
1777 /*
1778 * A block is unskippable if it is not all visible according to the
1779 * visibility map.
1780 */
1782 {
1784 break;
1785 }
1786
1787 /*
1788 * Caller must scan the last page to determine whether it has tuples
1789 * (caller must have the opportunity to set vacrel->nonempty_pages).
1790 * This rule avoids having lazy_truncate_heap() take access-exclusive
1791 * lock on rel to attempt a truncation that fails anyway, just because
1792 * there are tuples on the last page (it is likely that there will be
1793 * tuples on other nearby pages as well, but those can be skipped).
1794 *
1795 * Implement this by always treating the last block as unsafe to skip.
1796 */
1797 if (next_unskippable_block == rel_pages - 1)
1798 break;
1799
1800 /* DISABLE_PAGE_SKIPPING makes all skipping unsafe */
1801 if (!vacrel->skipwithvm)
1802 break;
1803
1804 /*
1805 * All-frozen pages cannot contain XIDs < OldestXmin (XIDs that aren't
1806 * already frozen by now), so this page can be skipped.
1807 */
1808 if ((mapbits & VISIBILITYMAP_ALL_FROZEN) != 0)
1809 continue;
1810
1811 /*
1812 * Aggressive vacuums cannot skip any all-visible pages that are not
1813 * also all-frozen.
1814 */
1815 if (vacrel->aggressive)
1816 break;
1817
1818 /*
1819 * Normal vacuums with eager scanning enabled only skip all-visible
1820 * but not all-frozen pages if they have hit the failure limit for the
1821 * current eager scan region.
1822 */
1823 if (vacrel->eager_scan_remaining_fails > 0)
1824 {
1825 next_unskippable_eager_scanned = true;
1826 break;
1827 }
1828
1829 /*
1830 * All-visible blocks are safe to skip in a normal vacuum. But
1831 * remember that the final range contains such a block for later.
1832 */
1833 *skipsallvis = true;
1834 }
1835
1836 /* write the local variables back to vacrel */
1837 vacrel->next_unskippable_block = next_unskippable_block;
1838 vacrel->next_unskippable_eager_scanned = next_unskippable_eager_scanned;
1839 vacrel->next_unskippable_vmbuffer = next_unskippable_vmbuffer;
1840}

References Assert, EAGER_SCAN_REGION_SIZE, fb(), VISIBILITYMAP_ALL_FROZEN, VISIBILITYMAP_ALL_VISIBLE, and visibilitymap_get_status().

Referenced by heap_vac_scan_next_block().

◆ heap_page_would_be_all_visible()

static bool heap_page_would_be_all_visible ( Relation  rel,
Buffer  buf,
GlobalVisState vistest,
bool  allow_update_vistest,
OffsetNumber deadoffsets,
int  ndeadoffsets,
bool all_frozen,
TransactionId newest_live_xid,
OffsetNumber logging_offnum 
)
static

Definition at line 3604 of file vacuumlazy.c.

3612{
3613 Page page = BufferGetPage(buf);
3615 OffsetNumber offnum,
3616 maxoff;
3617 bool all_visible = true;
3618 int matched_dead_count = 0;
3619
3620 *newest_live_xid = InvalidTransactionId;
3621 *all_frozen = true;
3622
3623 Assert(ndeadoffsets == 0 || deadoffsets);
3624
3625#ifdef USE_ASSERT_CHECKING
3626 /* Confirm input deadoffsets[] is strictly sorted */
3627 if (ndeadoffsets > 1)
3628 {
3629 for (int i = 1; i < ndeadoffsets; i++)
3630 Assert(deadoffsets[i - 1] < deadoffsets[i]);
3631 }
3632#endif
3633
3634 maxoff = PageGetMaxOffsetNumber(page);
3635 for (offnum = FirstOffsetNumber;
3636 offnum <= maxoff && all_visible;
3637 offnum = OffsetNumberNext(offnum))
3638 {
3639 ItemId itemid;
3640 HeapTupleData tuple;
3642
3643 /*
3644 * Set the offset number so that we can display it along with any
3645 * error that occurred while processing this tuple.
3646 */
3647 *logging_offnum = offnum;
3648 itemid = PageGetItemId(page, offnum);
3649
3650 /* Unused or redirect line pointers are of no interest */
3651 if (!ItemIdIsUsed(itemid) || ItemIdIsRedirected(itemid))
3652 continue;
3653
3654 ItemPointerSet(&(tuple.t_self), blockno, offnum);
3655
3656 /*
3657 * Dead line pointers can have index pointers pointing to them. So
3658 * they can't be treated as visible
3659 */
3660 if (ItemIdIsDead(itemid))
3661 {
3662 if (!deadoffsets ||
3664 deadoffsets[matched_dead_count] != offnum)
3665 {
3666 *all_frozen = all_visible = false;
3667 break;
3668 }
3670 continue;
3671 }
3672
3673 Assert(ItemIdIsNormal(itemid));
3674
3675 tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
3676 tuple.t_len = ItemIdGetLength(itemid);
3677 tuple.t_tableOid = RelationGetRelid(rel);
3678
3679 /* Visibility checks may do IO or allocate memory */
3682 {
3683 case HEAPTUPLE_LIVE:
3684 {
3685 TransactionId xmin;
3686
3687 /* Check heap_prune_record_unchanged_lp_normal comments */
3689 {
3690 all_visible = false;
3691 *all_frozen = false;
3692 break;
3693 }
3694
3695 /*
3696 * The inserter definitely committed. But we don't know if
3697 * it is old enough that everyone sees it as committed.
3698 * Don't check that now.
3699 *
3700 * If we scan all tuples without finding one that prevents
3701 * the page from being all-visible, we then check whether
3702 * any snapshot still considers the newest XID on the page
3703 * to be running. In that case, the page is not considered
3704 * all-visible.
3705 */
3706 xmin = HeapTupleHeaderGetXmin(tuple.t_data);
3707
3708 /* Track newest xmin on page. */
3709 if (TransactionIdFollows(xmin, *newest_live_xid) &&
3711 *newest_live_xid = xmin;
3712
3713 /* Check whether this tuple is already frozen or not */
3714 if (all_visible && *all_frozen &&
3716 *all_frozen = false;
3717 }
3718 break;
3719
3720 case HEAPTUPLE_DEAD:
3724 {
3725 all_visible = false;
3726 *all_frozen = false;
3727 break;
3728 }
3729 default:
3730 elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
3731 break;
3732 }
3733 } /* scan along page */
3734
3735 /*
3736 * After processing all the live tuples on the page, if the newest xmin
3737 * among them may still be considered running by any snapshot, the page
3738 * cannot be all-visible.
3739 */
3740 if (all_visible &&
3741 TransactionIdIsNormal(*newest_live_xid) &&
3742 GlobalVisTestXidConsideredRunning(vistest, *newest_live_xid,
3744 {
3745 all_visible = false;
3746 *all_frozen = false;
3747 }
3748
3749 /* Clear the offset information once we have processed the given page. */
3751
3752 return all_visible;
3753}

References Assert, buf, BufferGetBlockNumber(), BufferGetPage(), CritSectionCount, elog, ERROR, fb(), FirstOffsetNumber, GlobalVisTestXidConsideredRunning(), heap_tuple_needs_eventual_freeze(), HEAPTUPLE_DEAD, HEAPTUPLE_DELETE_IN_PROGRESS, HEAPTUPLE_INSERT_IN_PROGRESS, HEAPTUPLE_LIVE, HEAPTUPLE_RECENTLY_DEAD, HeapTupleHeaderGetXmin(), HeapTupleHeaderXminCommitted(), HeapTupleSatisfiesVacuumHorizon(), i, InvalidOffsetNumber, InvalidTransactionId, ItemIdGetLength, ItemIdIsDead, ItemIdIsNormal, ItemIdIsRedirected, ItemIdIsUsed, ItemPointerSet(), OffsetNumberNext, PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), RelationGetRelid, HeapTupleData::t_data, HeapTupleData::t_len, HeapTupleData::t_self, HeapTupleData::t_tableOid, TransactionIdFollows(), and TransactionIdIsNormal.

Referenced by lazy_vacuum_heap_page().

◆ heap_vac_scan_next_block()

static BlockNumber heap_vac_scan_next_block ( ReadStream stream,
void callback_private_data,
void per_buffer_data 
)
static

Definition at line 1648 of file vacuumlazy.c.

1651{
1653 LVRelState *vacrel = callback_private_data;
1654
1655 /* relies on InvalidBlockNumber + 1 overflowing to 0 on first call */
1657
1658 /* Have we reached the end of the relation? */
1659 if (next_block >= vacrel->rel_pages)
1660 {
1661 if (BufferIsValid(vacrel->next_unskippable_vmbuffer))
1662 {
1663 ReleaseBuffer(vacrel->next_unskippable_vmbuffer);
1664 vacrel->next_unskippable_vmbuffer = InvalidBuffer;
1665 }
1666 return InvalidBlockNumber;
1667 }
1668
1669 /*
1670 * We must be in one of the three following states:
1671 */
1672 if (next_block > vacrel->next_unskippable_block ||
1673 vacrel->next_unskippable_block == InvalidBlockNumber)
1674 {
1675 /*
1676 * 1. We have just processed an unskippable block (or we're at the
1677 * beginning of the scan). Find the next unskippable block using the
1678 * visibility map.
1679 */
1680 bool skipsallvis;
1681
1683
1684 /*
1685 * We now know the next block that we must process. It can be the
1686 * next block after the one we just processed, or something further
1687 * ahead. If it's further ahead, we can jump to it, but we choose to
1688 * do so only if we can skip at least SKIP_PAGES_THRESHOLD consecutive
1689 * pages. Since we're reading sequentially, the OS should be doing
1690 * readahead for us, so there's no gain in skipping a page now and
1691 * then. Skipping such a range might even discourage sequential
1692 * detection.
1693 *
1694 * This test also enables more frequent relfrozenxid advancement
1695 * during non-aggressive VACUUMs. If the range has any all-visible
1696 * pages then skipping makes updating relfrozenxid unsafe, which is a
1697 * real downside.
1698 */
1699 if (vacrel->next_unskippable_block - next_block >= SKIP_PAGES_THRESHOLD)
1700 {
1701 next_block = vacrel->next_unskippable_block;
1702 if (skipsallvis)
1703 vacrel->skippedallvis = true;
1704 }
1705 }
1706
1707 /* Now we must be in one of the two remaining states: */
1708 if (next_block < vacrel->next_unskippable_block)
1709 {
1710 /*
1711 * 2. We are processing a range of blocks that we could have skipped
1712 * but chose not to. We know that they are all-visible in the VM,
1713 * otherwise they would've been unskippable.
1714 */
1715 vacrel->current_block = next_block;
1716 /* Block was not eager scanned */
1717 *((bool *) per_buffer_data) = false;
1718 return vacrel->current_block;
1719 }
1720 else
1721 {
1722 /*
1723 * 3. We reached the next unskippable block. Process it. On next
1724 * iteration, we will be back in state 1.
1725 */
1726 Assert(next_block == vacrel->next_unskippable_block);
1727
1728 vacrel->current_block = next_block;
1729 *((bool *) per_buffer_data) = vacrel->next_unskippable_eager_scanned;
1730 return vacrel->current_block;
1731 }
1732}

References Assert, BufferIsValid(), LVRelState::current_block, fb(), find_next_unskippable_block(), InvalidBlockNumber, InvalidBuffer, ReleaseBuffer(), and SKIP_PAGES_THRESHOLD.

Referenced by lazy_scan_heap().

◆ heap_vacuum_eager_scan_setup()

static void heap_vacuum_eager_scan_setup ( LVRelState vacrel,
const VacuumParams params 
)
static

Definition at line 497 of file vacuumlazy.c.

498{
502 float first_region_ratio;
504
505 /*
506 * Initialize eager scan management fields to their disabled values.
507 * Aggressive vacuums, normal vacuums of small tables, and normal vacuums
508 * of tables without sufficiently old tuples disable eager scanning.
509 */
510 vacrel->next_eager_scan_region_start = InvalidBlockNumber;
511 vacrel->eager_scan_max_fails_per_region = 0;
512 vacrel->eager_scan_remaining_fails = 0;
513 vacrel->eager_scan_remaining_successes = 0;
514
515 /* If eager scanning is explicitly disabled, just return. */
516 if (params->max_eager_freeze_failure_rate == 0)
517 return;
518
519 /*
520 * The caller will have determined whether or not an aggressive vacuum is
521 * required by either the vacuum parameters or the relative age of the
522 * oldest unfrozen transaction IDs. An aggressive vacuum must scan every
523 * all-visible page to safely advance the relfrozenxid and/or relminmxid,
524 * so scans of all-visible pages are not considered eager.
525 */
526 if (vacrel->aggressive)
527 return;
528
529 /*
530 * Aggressively vacuuming a small relation shouldn't take long, so it
531 * isn't worth amortizing. We use two times the region size as the size
532 * cutoff because the eager scan start block is a random spot somewhere in
533 * the first region, making the second region the first to be eager
534 * scanned normally.
535 */
536 if (vacrel->rel_pages < 2 * EAGER_SCAN_REGION_SIZE)
537 return;
538
539 /*
540 * We only want to enable eager scanning if we are likely to be able to
541 * freeze some of the pages in the relation.
542 *
543 * Tuples with XIDs older than OldestXmin or MXIDs older than OldestMxact
544 * are technically freezable, but we won't freeze them unless the criteria
545 * for opportunistic freezing is met. Only tuples with XIDs/MXIDs older
546 * than the FreezeLimit/MultiXactCutoff are frozen in the common case.
547 *
548 * So, as a heuristic, we wait until the FreezeLimit has advanced past the
549 * relfrozenxid or the MultiXactCutoff has advanced past the relminmxid to
550 * enable eager scanning.
551 */
552 if (TransactionIdIsNormal(vacrel->cutoffs.relfrozenxid) &&
553 TransactionIdPrecedes(vacrel->cutoffs.relfrozenxid,
554 vacrel->cutoffs.FreezeLimit))
556
558 MultiXactIdIsValid(vacrel->cutoffs.relminmxid) &&
559 MultiXactIdPrecedes(vacrel->cutoffs.relminmxid,
560 vacrel->cutoffs.MultiXactCutoff))
562
564 return;
565
566 /* We have met the criteria to eagerly scan some pages. */
567
568 /*
569 * Our success cap is MAX_EAGER_FREEZE_SUCCESS_RATE of the number of
570 * all-visible but not all-frozen blocks in the relation.
571 */
573
574 vacrel->eager_scan_remaining_successes =
577
578 /* If every all-visible page is frozen, eager scanning is disabled. */
579 if (vacrel->eager_scan_remaining_successes == 0)
580 return;
581
582 /*
583 * Now calculate the bounds of the first eager scan region. Its end block
584 * will be a random spot somewhere in the first EAGER_SCAN_REGION_SIZE
585 * blocks. This affects the bounds of all subsequent regions and avoids
586 * eager scanning and failing to freeze the same blocks each vacuum of the
587 * relation.
588 */
590
591 vacrel->next_eager_scan_region_start = randseed % EAGER_SCAN_REGION_SIZE;
592
594 params->max_eager_freeze_failure_rate <= 1);
595
596 vacrel->eager_scan_max_fails_per_region =
599
600 /*
601 * The first region will be smaller than subsequent regions. As such,
602 * adjust the eager freeze failures tolerated for this region.
603 */
604 first_region_ratio = 1 - (float) vacrel->next_eager_scan_region_start /
606
607 vacrel->eager_scan_remaining_fails =
608 vacrel->eager_scan_max_fails_per_region *
610}

References Assert, EAGER_SCAN_REGION_SIZE, fb(), InvalidBlockNumber, VacuumParams::max_eager_freeze_failure_rate, MAX_EAGER_FREEZE_SUCCESS_RATE, MultiXactIdIsValid, MultiXactIdPrecedes(), pg_global_prng_state, pg_prng_uint32(), TransactionIdIsNormal, TransactionIdPrecedes(), and visibilitymap_count().

Referenced by heap_vacuum_rel().

◆ heap_vacuum_rel()

void heap_vacuum_rel ( Relation  rel,
const VacuumParams params,
BufferAccessStrategy  bstrategy 
)

Definition at line 624 of file vacuumlazy.c.

626{
628 bool verbose,
629 instrument,
630 skipwithvm,
638 TimestampTz starttime = 0;
640 startwritetime = 0;
643 ErrorContextCallback errcallback;
644 char **indnames = NULL;
646
647 verbose = (params->options & VACOPT_VERBOSE) != 0;
648 instrument = (verbose || (AmAutoVacuumWorkerProcess() &&
649 params->log_vacuum_min_duration >= 0));
650 if (instrument)
651 {
653 if (track_io_timing)
654 {
657 }
658 }
659
660 /* Used for instrumentation and stats report */
661 starttime = GetCurrentTimestamp();
662
664 RelationGetRelid(rel));
667 params->is_wraparound
670 else
673
674 /*
675 * Setup error traceback support for ereport() first. The idea is to set
676 * up an error context callback to display additional information on any
677 * error during a vacuum. During different phases of vacuum, we update
678 * the state so that the error context callback always display current
679 * information.
680 *
681 * Copy the names of heap rel into local memory for error reporting
682 * purposes, too. It isn't always safe to assume that we can get the name
683 * of each rel. It's convenient for code in lazy_scan_heap to always use
684 * these temp copies.
685 */
688 vacrel->relnamespace = get_namespace_name(RelationGetNamespace(rel));
689 vacrel->relname = pstrdup(RelationGetRelationName(rel));
690 vacrel->indname = NULL;
692 vacrel->verbose = verbose;
693 errcallback.callback = vacuum_error_callback;
694 errcallback.arg = vacrel;
695 errcallback.previous = error_context_stack;
696 error_context_stack = &errcallback;
697
698 /* Set up high level stuff about rel and its indexes */
699 vacrel->rel = rel;
701 &vacrel->indrels);
702 vacrel->bstrategy = bstrategy;
703 if (instrument && vacrel->nindexes > 0)
704 {
705 /* Copy index names used by instrumentation (not error reporting) */
706 indnames = palloc_array(char *, vacrel->nindexes);
707 for (int i = 0; i < vacrel->nindexes; i++)
709 }
710
711 /*
712 * The index_cleanup param either disables index vacuuming and cleanup or
713 * forces it to go ahead when we would otherwise apply the index bypass
714 * optimization. The default is 'auto', which leaves the final decision
715 * up to lazy_vacuum().
716 *
717 * The truncate param allows user to avoid attempting relation truncation,
718 * though it can't force truncation to happen.
719 */
722 params->truncate != VACOPTVALUE_AUTO);
723
724 /*
725 * While VacuumFailSafeActive is reset to false before calling this, we
726 * still need to reset it here due to recursive calls.
727 */
728 VacuumFailsafeActive = false;
729 vacrel->consider_bypass_optimization = true;
730 vacrel->do_index_vacuuming = true;
731 vacrel->do_index_cleanup = true;
732 vacrel->do_rel_truncate = (params->truncate != VACOPTVALUE_DISABLED);
733 if (params->index_cleanup == VACOPTVALUE_DISABLED)
734 {
735 /* Force disable index vacuuming up-front */
736 vacrel->do_index_vacuuming = false;
737 vacrel->do_index_cleanup = false;
738 }
739 else if (params->index_cleanup == VACOPTVALUE_ENABLED)
740 {
741 /* Force index vacuuming. Note that failsafe can still bypass. */
742 vacrel->consider_bypass_optimization = false;
743 }
744 else
745 {
746 /* Default/auto, make all decisions dynamically */
748 }
749
750 /* Initialize page counters explicitly (be tidy) */
751 vacrel->scanned_pages = 0;
752 vacrel->eager_scanned_pages = 0;
753 vacrel->removed_pages = 0;
754 vacrel->new_frozen_tuple_pages = 0;
755 vacrel->lpdead_item_pages = 0;
756 vacrel->missed_dead_pages = 0;
757 vacrel->nonempty_pages = 0;
758 /* dead_items_alloc allocates vacrel->dead_items later on */
759
760 /* Allocate/initialize output statistics state */
761 vacrel->new_rel_tuples = 0;
762 vacrel->new_live_tuples = 0;
763 vacrel->indstats = (IndexBulkDeleteResult **)
764 palloc0(vacrel->nindexes * sizeof(IndexBulkDeleteResult *));
765
766 /* Initialize remaining counters (be tidy) */
767 vacrel->num_index_scans = 0;
768 vacrel->num_dead_items_resets = 0;
769 vacrel->total_dead_items_bytes = 0;
770 vacrel->tuples_deleted = 0;
771 vacrel->tuples_frozen = 0;
772 vacrel->lpdead_items = 0;
773 vacrel->live_tuples = 0;
774 vacrel->recently_dead_tuples = 0;
775 vacrel->missed_dead_tuples = 0;
776
777 vacrel->new_all_visible_pages = 0;
778 vacrel->new_all_visible_all_frozen_pages = 0;
779 vacrel->new_all_frozen_pages = 0;
780
781 vacrel->worker_usage.vacuum.nlaunched = 0;
782 vacrel->worker_usage.vacuum.nplanned = 0;
783 vacrel->worker_usage.cleanup.nlaunched = 0;
784 vacrel->worker_usage.cleanup.nplanned = 0;
785
786 /*
787 * Get cutoffs that determine which deleted tuples are considered DEAD,
788 * not just RECENTLY_DEAD, and which XIDs/MXIDs to freeze. Then determine
789 * the extent of the blocks that we'll scan in lazy_scan_heap. It has to
790 * happen in this order to ensure that the OldestXmin cutoff field works
791 * as an upper bound on the XIDs stored in the pages we'll actually scan
792 * (NewRelfrozenXid tracking must never be allowed to miss unfrozen XIDs).
793 *
794 * Next acquire vistest, a related cutoff that's used in pruning. We use
795 * vistest in combination with OldestXmin to ensure that
796 * heap_page_prune_and_freeze() always removes any deleted tuple whose
797 * xmax is < OldestXmin. lazy_scan_prune must never become confused about
798 * whether a tuple should be frozen or removed. (In the future we might
799 * want to teach lazy_scan_prune to recompute vistest from time to time,
800 * to increase the number of dead tuples it can prune away.)
801 */
802 vacrel->aggressive = vacuum_get_cutoffs(rel, params, &vacrel->cutoffs);
804 vacrel->vistest = GlobalVisTestFor(rel);
805
806 /* Initialize state used to track oldest extant XID/MXID */
807 vacrel->NewRelfrozenXid = vacrel->cutoffs.OldestXmin;
808 vacrel->NewRelminMxid = vacrel->cutoffs.OldestMxact;
809
810 /*
811 * Initialize state related to tracking all-visible page skipping. This is
812 * very important to determine whether or not it is safe to advance the
813 * relfrozenxid/relminmxid.
814 */
815 vacrel->skippedallvis = false;
816 skipwithvm = true;
818 {
819 /*
820 * Force aggressive mode, and disable skipping blocks using the
821 * visibility map (even those set all-frozen)
822 */
823 vacrel->aggressive = true;
824 skipwithvm = false;
825 }
826
827 vacrel->skipwithvm = skipwithvm;
828
829 /*
830 * Set up eager scan tracking state. This must happen after determining
831 * whether or not the vacuum must be aggressive, because only normal
832 * vacuums use the eager scan algorithm.
833 */
835
836 /* Report the vacuum mode: 'normal' or 'aggressive' */
838 vacrel->aggressive
841
842 if (verbose)
843 {
844 if (vacrel->aggressive)
846 (errmsg("aggressively vacuuming \"%s.%s.%s\"",
847 vacrel->dbname, vacrel->relnamespace,
848 vacrel->relname)));
849 else
851 (errmsg("vacuuming \"%s.%s.%s\"",
852 vacrel->dbname, vacrel->relnamespace,
853 vacrel->relname)));
854 }
855
856 /*
857 * Allocate dead_items memory using dead_items_alloc. This handles
858 * parallel VACUUM initialization as part of allocating shared memory
859 * space used for dead_items. (But do a failsafe precheck first, to
860 * ensure that parallel VACUUM won't be attempted at all when relfrozenxid
861 * is already dangerously old.)
862 */
865
866#ifdef USE_INJECTION_POINTS
867
868 /*
869 * Used by tests to pause before parallel vacuum is launched, allowing
870 * test code to modify configuration that the leader then propagates to
871 * workers.
872 */
874 INJECTION_POINT("autovacuum-start-parallel-vacuum", NULL);
875#endif
876
877 /*
878 * Call lazy_scan_heap to perform all required heap pruning, index
879 * vacuuming, and heap vacuuming (plus related processing)
880 */
882
883 /*
884 * Save dead items max_bytes and update the memory usage statistics before
885 * cleanup, they are freed in parallel vacuum cases during
886 * dead_items_cleanup().
887 */
888 dead_items_max_bytes = vacrel->dead_items_info->max_bytes;
889 vacrel->total_dead_items_bytes += TidStoreMemoryUsage(vacrel->dead_items);
890
891 /*
892 * Free resources managed by dead_items_alloc. This ends parallel mode in
893 * passing when necessary.
894 */
897
898 /*
899 * Update pg_class entries for each of rel's indexes where appropriate.
900 *
901 * Unlike the later update to rel's pg_class entry, this is not critical.
902 * Maintains relpages/reltuples statistics used by the planner only.
903 */
904 if (vacrel->do_index_cleanup)
906
907 /* Done with rel's indexes */
908 vac_close_indexes(vacrel->nindexes, vacrel->indrels, NoLock);
909
910 /* Optionally truncate rel */
913
914 /* Pop the error context stack */
915 error_context_stack = errcallback.previous;
916
917 /* Report that we are now doing final cleanup */
920
921 /*
922 * Prepare to update rel's pg_class entry.
923 *
924 * Aggressive VACUUMs must always be able to advance relfrozenxid to a
925 * value >= FreezeLimit, and relminmxid to a value >= MultiXactCutoff.
926 * Non-aggressive VACUUMs may advance them by any amount, or not at all.
927 */
928 Assert(vacrel->NewRelfrozenXid == vacrel->cutoffs.OldestXmin ||
929 TransactionIdPrecedesOrEquals(vacrel->aggressive ? vacrel->cutoffs.FreezeLimit :
930 vacrel->cutoffs.relfrozenxid,
931 vacrel->NewRelfrozenXid));
932 Assert(vacrel->NewRelminMxid == vacrel->cutoffs.OldestMxact ||
933 MultiXactIdPrecedesOrEquals(vacrel->aggressive ? vacrel->cutoffs.MultiXactCutoff :
934 vacrel->cutoffs.relminmxid,
935 vacrel->NewRelminMxid));
936 if (vacrel->skippedallvis)
937 {
938 /*
939 * Must keep original relfrozenxid in a non-aggressive VACUUM that
940 * chose to skip an all-visible page range. The state that tracks new
941 * values will have missed unfrozen XIDs from the pages we skipped.
942 */
943 Assert(!vacrel->aggressive);
944 vacrel->NewRelfrozenXid = InvalidTransactionId;
945 vacrel->NewRelminMxid = InvalidMultiXactId;
946 }
947
948 /*
949 * For safety, clamp relallvisible to be not more than what we're setting
950 * pg_class.relpages to
951 */
952 new_rel_pages = vacrel->rel_pages; /* After possible rel truncation */
956
957 /*
958 * An all-frozen block _must_ be all-visible. As such, clamp the count of
959 * all-frozen blocks to the count of all-visible blocks. This matches the
960 * clamping of relallvisible above.
961 */
964
965 /*
966 * Now actually update rel's pg_class entry.
967 *
968 * In principle new_live_tuples could be -1 indicating that we (still)
969 * don't know the tuple count. In practice that can't happen, since we
970 * scan every page that isn't skipped using the visibility map.
971 */
972 vac_update_relstats(rel, new_rel_pages, vacrel->new_live_tuples,
974 vacrel->nindexes > 0,
975 vacrel->NewRelfrozenXid, vacrel->NewRelminMxid,
977
978 /*
979 * Report results to the cumulative stats system, too.
980 *
981 * Deliberately avoid telling the stats system about LP_DEAD items that
982 * remain in the table due to VACUUM bypassing index and heap vacuuming.
983 * ANALYZE will consider the remaining LP_DEAD items to be dead "tuples".
984 * It seems like a good idea to err on the side of not vacuuming again too
985 * soon in cases where the failsafe prevented significant amounts of heap
986 * vacuuming.
987 */
989 Max(vacrel->new_live_tuples, 0),
990 vacrel->recently_dead_tuples +
991 vacrel->missed_dead_tuples,
992 starttime);
994
995 if (instrument)
996 {
998
999 if (verbose || params->log_vacuum_min_duration == 0 ||
1001 params->log_vacuum_min_duration))
1002 {
1003 long secs_dur;
1004 int usecs_dur;
1005 WalUsage walusage;
1006 BufferUsage bufferusage;
1008 char *msgfmt;
1009 int32 diff;
1010 double read_rate = 0,
1011 write_rate = 0;
1015
1017 memset(&walusage, 0, sizeof(WalUsage));
1019 memset(&bufferusage, 0, sizeof(BufferUsage));
1021
1022 total_blks_hit = bufferusage.shared_blks_hit +
1023 bufferusage.local_blks_hit;
1024 total_blks_read = bufferusage.shared_blks_read +
1025 bufferusage.local_blks_read;
1027 bufferusage.local_blks_dirtied;
1028
1030 if (verbose)
1031 {
1032 /*
1033 * Aggressiveness already reported earlier, in dedicated
1034 * VACUUM VERBOSE ereport
1035 */
1036 Assert(!params->is_wraparound);
1037 msgfmt = _("finished vacuuming \"%s.%s.%s\": index scans: %d\n");
1038 }
1039 else if (params->is_wraparound)
1040 {
1041 /*
1042 * While it's possible for a VACUUM to be both is_wraparound
1043 * and !aggressive, that's just a corner-case -- is_wraparound
1044 * implies aggressive. Produce distinct output for the corner
1045 * case all the same, just in case.
1046 */
1047 if (vacrel->aggressive)
1048 msgfmt = _("automatic aggressive vacuum to prevent wraparound of table \"%s.%s.%s\": index scans: %d\n");
1049 else
1050 msgfmt = _("automatic vacuum to prevent wraparound of table \"%s.%s.%s\": index scans: %d\n");
1051 }
1052 else
1053 {
1054 if (vacrel->aggressive)
1055 msgfmt = _("automatic aggressive vacuum of table \"%s.%s.%s\": index scans: %d\n");
1056 else
1057 msgfmt = _("automatic vacuum of table \"%s.%s.%s\": index scans: %d\n");
1058 }
1060 vacrel->dbname,
1061 vacrel->relnamespace,
1062 vacrel->relname,
1063 vacrel->num_index_scans);
1064 appendStringInfo(&buf, _("pages: %u removed, %u remain, %u scanned (%.2f%% of total), %u eagerly scanned\n"),
1065 vacrel->removed_pages,
1067 vacrel->scanned_pages,
1068 orig_rel_pages == 0 ? 100.0 :
1069 100.0 * vacrel->scanned_pages /
1071 vacrel->eager_scanned_pages);
1073 _("tuples: %" PRId64 " removed, %" PRId64 " remain, %" PRId64 " are dead but not yet removable\n"),
1074 vacrel->tuples_deleted,
1075 (int64) vacrel->new_rel_tuples,
1076 vacrel->recently_dead_tuples);
1077 if (vacrel->missed_dead_tuples > 0)
1079 _("tuples missed: %" PRId64 " dead from %u pages not removed due to cleanup lock contention\n"),
1080 vacrel->missed_dead_tuples,
1081 vacrel->missed_dead_pages);
1083 vacrel->cutoffs.OldestXmin);
1085 _("removable cutoff: %u, which was %d XIDs old when operation ended\n"),
1086 vacrel->cutoffs.OldestXmin, diff);
1088 {
1089 diff = (int32) (vacrel->NewRelfrozenXid -
1090 vacrel->cutoffs.relfrozenxid);
1092 _("new relfrozenxid: %u, which is %d XIDs ahead of previous value\n"),
1093 vacrel->NewRelfrozenXid, diff);
1094 }
1095 if (minmulti_updated)
1096 {
1097 diff = (int32) (vacrel->NewRelminMxid -
1098 vacrel->cutoffs.relminmxid);
1100 _("new relminmxid: %u, which is %d MXIDs ahead of previous value\n"),
1101 vacrel->NewRelminMxid, diff);
1102 }
1103 appendStringInfo(&buf, _("frozen: %u pages from table (%.2f%% of total) had %" PRId64 " tuples frozen\n"),
1104 vacrel->new_frozen_tuple_pages,
1105 orig_rel_pages == 0 ? 100.0 :
1106 100.0 * vacrel->new_frozen_tuple_pages /
1108 vacrel->tuples_frozen);
1109
1111 _("visibility map: %u pages set all-visible, %u pages set all-frozen (%u were all-visible)\n"),
1112 vacrel->new_all_visible_pages,
1113 vacrel->new_all_visible_all_frozen_pages +
1114 vacrel->new_all_frozen_pages,
1115 vacrel->new_all_frozen_pages);
1116 if (vacrel->do_index_vacuuming)
1117 {
1118 if (vacrel->nindexes == 0 || vacrel->num_index_scans == 0)
1119 appendStringInfoString(&buf, _("index scan not needed: "));
1120 else
1121 appendStringInfoString(&buf, _("index scan needed: "));
1122
1123 msgfmt = _("%u pages from table (%.2f%% of total) had %" PRId64 " dead item identifiers removed\n");
1124 }
1125 else
1126 {
1128 appendStringInfoString(&buf, _("index scan bypassed: "));
1129 else
1130 appendStringInfoString(&buf, _("index scan bypassed by failsafe: "));
1131
1132 msgfmt = _("%u pages from table (%.2f%% of total) have %" PRId64 " dead item identifiers\n");
1133 }
1135 vacrel->lpdead_item_pages,
1136 orig_rel_pages == 0 ? 100.0 :
1137 100.0 * vacrel->lpdead_item_pages / orig_rel_pages,
1138 vacrel->lpdead_items);
1139
1140 if (vacrel->worker_usage.vacuum.nplanned > 0)
1142 _("parallel workers: index vacuum: %d planned, %d launched in total\n"),
1143 vacrel->worker_usage.vacuum.nplanned,
1144 vacrel->worker_usage.vacuum.nlaunched);
1145
1146 if (vacrel->worker_usage.cleanup.nplanned > 0)
1148 _("parallel workers: index cleanup: %d planned, %d launched\n"),
1149 vacrel->worker_usage.cleanup.nplanned,
1150 vacrel->worker_usage.cleanup.nlaunched);
1151
1152 for (int i = 0; i < vacrel->nindexes; i++)
1153 {
1154 IndexBulkDeleteResult *istat = vacrel->indstats[i];
1155
1156 if (!istat)
1157 continue;
1158
1160 _("index \"%s\": pages: %u in total, %u newly deleted, %u currently deleted, %u reusable\n"),
1161 indnames[i],
1162 istat->num_pages,
1163 istat->pages_newly_deleted,
1164 istat->pages_deleted,
1165 istat->pages_free);
1166 }
1168 {
1169 /*
1170 * We bypass the changecount mechanism because this value is
1171 * only updated by the calling process. We also rely on the
1172 * above call to pgstat_progress_end_command() to not clear
1173 * the st_progress_param array.
1174 */
1175 appendStringInfo(&buf, _("delay time: %.3f ms\n"),
1177 }
1178 if (track_io_timing)
1179 {
1180 double read_ms = (double) (pgStatBlockReadTime - startreadtime) / 1000;
1181 double write_ms = (double) (pgStatBlockWriteTime - startwritetime) / 1000;
1182
1183 appendStringInfo(&buf, _("I/O timings: read: %.3f ms, write: %.3f ms\n"),
1184 read_ms, write_ms);
1185 }
1186 if (secs_dur > 0 || usecs_dur > 0)
1187 {
1189 (1024 * 1024) / (secs_dur + usecs_dur / 1000000.0);
1191 (1024 * 1024) / (secs_dur + usecs_dur / 1000000.0);
1192 }
1193 appendStringInfo(&buf, _("avg read rate: %.3f MB/s, avg write rate: %.3f MB/s\n"),
1196 _("buffer usage: %" PRId64 " hits, %" PRId64 " reads, %" PRId64 " dirtied\n"),
1201 _("WAL usage: %" PRId64 " records, %" PRId64 " full page images, %" PRIu64 " bytes, %" PRIu64 " full page image bytes, %" PRId64 " buffers full\n"),
1202 walusage.wal_records,
1203 walusage.wal_fpi,
1204 walusage.wal_bytes,
1205 walusage.wal_fpi_bytes,
1206 walusage.wal_buffers_full);
1207
1208 /*
1209 * Report the dead items memory usage.
1210 *
1211 * The num_dead_items_resets counter increases when we reset the
1212 * collected dead items, so the counter is non-zero if at least
1213 * one dead items are collected, even if index vacuuming is
1214 * disabled.
1215 */
1217 ngettext("memory usage: dead item storage %.2f MB accumulated across %d reset (limit %.2f MB each)\n",
1218 "memory usage: dead item storage %.2f MB accumulated across %d resets (limit %.2f MB each)\n",
1219 vacrel->num_dead_items_resets),
1220 (double) vacrel->total_dead_items_bytes / (1024 * 1024),
1221 vacrel->num_dead_items_resets,
1222 (double) dead_items_max_bytes / (1024 * 1024));
1223 appendStringInfo(&buf, _("system usage: %s"), pg_rusage_show(&ru0));
1224
1225 ereport(verbose ? INFO : LOG,
1226 (errmsg_internal("%s", buf.data)));
1227 pfree(buf.data);
1228 }
1229 }
1230
1231 /* Cleanup index statistics and index names */
1232 for (int i = 0; i < vacrel->nindexes; i++)
1233 {
1234 if (vacrel->indstats[i])
1235 pfree(vacrel->indstats[i]);
1236
1237 if (instrument)
1238 pfree(indnames[i]);
1239 }
1240}

References _, AmAutoVacuumWorkerProcess, appendStringInfo(), appendStringInfoString(), ErrorContextCallback::arg, Assert, buf, BufferUsageAccumDiff(), ErrorContextCallback::callback, dead_items_alloc(), dead_items_cleanup(), ereport, errmsg, errmsg_internal(), error_context_stack, fb(), get_database_name(), get_namespace_name(), GetCurrentTimestamp(), GlobalVisTestFor(), heap_vacuum_eager_scan_setup(), i, VacuumParams::index_cleanup, INFO, initStringInfo(), INJECTION_POINT, InvalidMultiXactId, InvalidTransactionId, VacuumParams::is_wraparound, IsInParallelMode(), lazy_check_wraparound_failsafe(), lazy_scan_heap(), lazy_truncate_heap(), BufferUsage::local_blks_dirtied, BufferUsage::local_blks_hit, BufferUsage::local_blks_read, LOG, VacuumParams::log_vacuum_min_duration, Max, MultiXactIdPrecedesOrEquals(), MyBEEntry, MyDatabaseId, ngettext, NoLock, IndexBulkDeleteResult::num_pages, VacuumParams::nworkers, VacuumParams::options, IndexBulkDeleteResult::pages_deleted, IndexBulkDeleteResult::pages_free, IndexBulkDeleteResult::pages_newly_deleted, palloc0(), palloc0_object, palloc_array, ParallelVacuumIsActive, pfree(), pg_rusage_init(), pg_rusage_show(), pgBufferUsage, pgstat_progress_end_command(), pgstat_progress_start_command(), pgstat_progress_update_param(), pgstat_report_vacuum(), pgStatBlockReadTime, pgStatBlockWriteTime, pgWalUsage, ErrorContextCallback::previous, PROGRESS_COMMAND_VACUUM, PROGRESS_VACUUM_DELAY_TIME, PROGRESS_VACUUM_MODE, PROGRESS_VACUUM_MODE_AGGRESSIVE, PROGRESS_VACUUM_MODE_NORMAL, PROGRESS_VACUUM_PHASE, PROGRESS_VACUUM_PHASE_FINAL_CLEANUP, PROGRESS_VACUUM_STARTED_BY, PROGRESS_VACUUM_STARTED_BY_AUTOVACUUM, PROGRESS_VACUUM_STARTED_BY_AUTOVACUUM_WRAPAROUND, PROGRESS_VACUUM_STARTED_BY_MANUAL, pstrdup(), ReadNextTransactionId(), RelationGetNamespace, RelationGetNumberOfBlocks, RelationGetRelationName, RelationGetRelid, RowExclusiveLock, BufferUsage::shared_blks_dirtied, BufferUsage::shared_blks_hit, BufferUsage::shared_blks_read, should_attempt_truncation(), PgBackendStatus::st_progress_param, TidStoreMemoryUsage(), TimestampDifference(), TimestampDifferenceExceeds(), track_cost_delay_timing, track_io_timing, TransactionIdPrecedesOrEquals(), VacuumParams::truncate, update_relstats_all_indexes(), vac_close_indexes(), vac_open_indexes(), vac_update_relstats(), VACOPT_DISABLE_PAGE_SKIPPING, VACOPT_VERBOSE, VACOPTVALUE_AUTO, VACOPTVALUE_DISABLED, VACOPTVALUE_ENABLED, VACOPTVALUE_UNSPECIFIED, VACUUM_ERRCB_PHASE_UNKNOWN, vacuum_error_callback(), vacuum_get_cutoffs(), VacuumFailsafeActive, verbose, visibilitymap_count(), WalUsage::wal_buffers_full, WalUsage::wal_bytes, WalUsage::wal_fpi, WalUsage::wal_fpi_bytes, WalUsage::wal_records, and WalUsageAccumDiff().

◆ lazy_check_wraparound_failsafe()

static bool lazy_check_wraparound_failsafe ( LVRelState vacrel)
static

Definition at line 2890 of file vacuumlazy.c.

2891{
2892 /* Don't warn more than once per VACUUM */
2894 return true;
2895
2897 {
2898 const int progress_index[] = {
2902 };
2904
2905 VacuumFailsafeActive = true;
2906
2907 /*
2908 * Abandon use of a buffer access strategy to allow use of all of
2909 * shared buffers. We assume the caller who allocated the memory for
2910 * the BufferAccessStrategy will free it.
2911 */
2912 vacrel->bstrategy = NULL;
2913
2914 /* Disable index vacuuming, index cleanup, and heap rel truncation */
2915 vacrel->do_index_vacuuming = false;
2916 vacrel->do_index_cleanup = false;
2917 vacrel->do_rel_truncate = false;
2918
2919 /* Reset the progress counters and set the failsafe mode */
2921
2923 (errmsg("bypassing nonessential maintenance of table \"%s.%s.%s\" as a failsafe after %d index scans",
2924 vacrel->dbname, vacrel->relnamespace, vacrel->relname,
2925 vacrel->num_index_scans),
2926 errdetail("The table's relfrozenxid or relminmxid is too far in the past."),
2927 errhint("Consider increasing configuration parameter \"maintenance_work_mem\" or \"autovacuum_work_mem\".\n"
2928 "You might also need to consider other ways for VACUUM to keep up with the allocation of transaction IDs.")));
2929
2930 /* Stop applying cost limits from this point on */
2931 VacuumCostActive = false;
2933
2934 return true;
2935 }
2936
2937 return false;
2938}

References ereport, errdetail(), errhint(), errmsg, fb(), pgstat_progress_update_multi_param(), PROGRESS_VACUUM_INDEXES_PROCESSED, PROGRESS_VACUUM_INDEXES_TOTAL, PROGRESS_VACUUM_MODE, PROGRESS_VACUUM_MODE_FAILSAFE, unlikely, vacuum_xid_failsafe_check(), VacuumCostActive, VacuumCostBalance, VacuumFailsafeActive, and WARNING.

Referenced by heap_vacuum_rel(), lazy_scan_heap(), and lazy_vacuum_all_indexes().

◆ lazy_cleanup_all_indexes()

static void lazy_cleanup_all_indexes ( LVRelState vacrel)
static

Definition at line 2944 of file vacuumlazy.c.

2945{
2946 double reltuples = vacrel->new_rel_tuples;
2947 bool estimated_count = vacrel->scanned_pages < vacrel->rel_pages;
2948 const int progress_start_index[] = {
2951 };
2952 const int progress_end_index[] = {
2955 };
2957 int64 progress_end_val[2] = {0, 0};
2958
2959 Assert(vacrel->do_index_cleanup);
2960 Assert(vacrel->nindexes > 0);
2961
2962 /*
2963 * Report that we are now cleaning up indexes and the number of indexes to
2964 * cleanup.
2965 */
2967 progress_start_val[1] = vacrel->nindexes;
2969
2971 {
2972 for (int idx = 0; idx < vacrel->nindexes; idx++)
2973 {
2974 Relation indrel = vacrel->indrels[idx];
2975 IndexBulkDeleteResult *istat = vacrel->indstats[idx];
2976
2977 vacrel->indstats[idx] =
2978 lazy_cleanup_one_index(indrel, istat, reltuples,
2979 estimated_count, vacrel);
2980
2981 /* Report the number of indexes cleaned up */
2983 idx + 1);
2984 }
2985 }
2986 else
2987 {
2988 /* Outsource everything to parallel variant */
2990 vacrel->num_index_scans,
2991 estimated_count,
2992 &(vacrel->worker_usage.cleanup));
2993 }
2994
2995 /* Reset the progress counters */
2997}

References Assert, fb(), idx(), lazy_cleanup_one_index(), parallel_vacuum_cleanup_all_indexes(), ParallelVacuumIsActive, pgstat_progress_update_multi_param(), pgstat_progress_update_param(), PROGRESS_VACUUM_INDEXES_PROCESSED, PROGRESS_VACUUM_INDEXES_TOTAL, PROGRESS_VACUUM_PHASE, and PROGRESS_VACUUM_PHASE_INDEX_CLEANUP.

Referenced by lazy_scan_heap().

◆ lazy_cleanup_one_index()

static IndexBulkDeleteResult * lazy_cleanup_one_index ( Relation  indrel,
IndexBulkDeleteResult istat,
double  reltuples,
bool  estimated_count,
LVRelState vacrel 
)
static

Definition at line 3062 of file vacuumlazy.c.

3065{
3068
3069 ivinfo.index = indrel;
3070 ivinfo.heaprel = vacrel->rel;
3071 ivinfo.analyze_only = false;
3072 ivinfo.report_progress = false;
3073 ivinfo.estimated_count = estimated_count;
3074 ivinfo.message_level = DEBUG2;
3075
3076 ivinfo.num_heap_tuples = reltuples;
3077 ivinfo.strategy = vacrel->bstrategy;
3078
3079 /*
3080 * Update error traceback information.
3081 *
3082 * The index name is saved during this phase and restored immediately
3083 * after this phase. See vacuum_error_callback.
3084 */
3085 Assert(vacrel->indname == NULL);
3090
3091 istat = vac_cleanup_one_index(&ivinfo, istat);
3092
3093 /* Revert to the previous phase information for error traceback */
3095 pfree(vacrel->indname);
3096 vacrel->indname = NULL;
3097
3098 return istat;
3099}

References Assert, DEBUG2, fb(), InvalidBlockNumber, InvalidOffsetNumber, pfree(), pstrdup(), RelationGetRelationName, restore_vacuum_error_info(), update_vacuum_error_info(), vac_cleanup_one_index(), and VACUUM_ERRCB_PHASE_INDEX_CLEANUP.

Referenced by lazy_cleanup_all_indexes().

◆ lazy_scan_heap()

static void lazy_scan_heap ( LVRelState vacrel)
static

Definition at line 1279 of file vacuumlazy.c.

1280{
1281 ReadStream *stream;
1282 BlockNumber rel_pages = vacrel->rel_pages,
1283 blkno = 0,
1286 vacrel->eager_scan_remaining_successes; /* for logging */
1287 Buffer vmbuffer = InvalidBuffer;
1288 const int initprog_index[] = {
1292 };
1294
1295 /* Report that we're scanning the heap, advertising total # of blocks */
1297 initprog_val[1] = rel_pages;
1298 initprog_val[2] = vacrel->dead_items_info->max_bytes;
1300
1301 /* Initialize for the first heap_vac_scan_next_block() call */
1302 vacrel->current_block = InvalidBlockNumber;
1303 vacrel->next_unskippable_block = InvalidBlockNumber;
1304 vacrel->next_unskippable_eager_scanned = false;
1305 vacrel->next_unskippable_vmbuffer = InvalidBuffer;
1306
1307 /*
1308 * Set up the read stream for vacuum's first pass through the heap.
1309 *
1310 * This could be made safe for READ_STREAM_USE_BATCHING, but only with
1311 * explicit work in heap_vac_scan_next_block.
1312 */
1314 vacrel->bstrategy,
1315 vacrel->rel,
1318 vacrel,
1319 sizeof(bool));
1320
1321 while (true)
1322 {
1323 Buffer buf;
1324 Page page;
1325 bool was_eager_scanned = false;
1326 int ndeleted = 0;
1327 bool has_lpdead_items;
1328 void *per_buffer_data = NULL;
1329 bool vm_page_frozen = false;
1330 bool got_cleanup_lock = false;
1331
1332 vacuum_delay_point(false);
1333
1334 /*
1335 * Regularly check if wraparound failsafe should trigger.
1336 *
1337 * There is a similar check inside lazy_vacuum_all_indexes(), but
1338 * relfrozenxid might start to look dangerously old before we reach
1339 * that point. This check also provides failsafe coverage for the
1340 * one-pass strategy, and the two-pass strategy with the index_cleanup
1341 * param set to 'off'.
1342 */
1343 if (vacrel->scanned_pages > 0 &&
1344 vacrel->scanned_pages % FAILSAFE_EVERY_PAGES == 0)
1346
1347 /*
1348 * Consider if we definitely have enough space to process TIDs on page
1349 * already. If we are close to overrunning the available space for
1350 * dead_items TIDs, pause and do a cycle of vacuuming before we tackle
1351 * this page. However, let's force at least one page-worth of tuples
1352 * to be stored as to ensure we do at least some work when the memory
1353 * configured is so low that we run out before storing anything.
1354 */
1355 if (vacrel->dead_items_info->num_items > 0 &&
1356 TidStoreMemoryUsage(vacrel->dead_items) > vacrel->dead_items_info->max_bytes)
1357 {
1358 /*
1359 * Before beginning index vacuuming, we release any pin we may
1360 * hold on the visibility map page. This isn't necessary for
1361 * correctness, but we do it anyway to avoid holding the pin
1362 * across a lengthy, unrelated operation.
1363 */
1364 if (BufferIsValid(vmbuffer))
1365 {
1366 ReleaseBuffer(vmbuffer);
1367 vmbuffer = InvalidBuffer;
1368 }
1369
1370 /* Perform a round of index and heap vacuuming */
1371 vacrel->consider_bypass_optimization = false;
1373
1374 /*
1375 * Vacuum the Free Space Map to make newly-freed space visible on
1376 * upper-level FSM pages. Note that blkno is the previously
1377 * processed block.
1378 */
1380 blkno + 1);
1382
1383 /* Report that we are once again scanning the heap */
1386 }
1387
1388 buf = read_stream_next_buffer(stream, &per_buffer_data);
1389
1390 /* The relation is exhausted. */
1391 if (!BufferIsValid(buf))
1392 break;
1393
1394 was_eager_scanned = *((bool *) per_buffer_data);
1396 page = BufferGetPage(buf);
1397 blkno = BufferGetBlockNumber(buf);
1398
1399 vacrel->scanned_pages++;
1401 vacrel->eager_scanned_pages++;
1402
1403 /* Report as block scanned, update error traceback information */
1406 blkno, InvalidOffsetNumber);
1407
1408 /*
1409 * Pin the visibility map page in case we need to mark the page
1410 * all-visible. In most cases this will be very cheap, because we'll
1411 * already have the correct page pinned anyway.
1412 */
1413 visibilitymap_pin(vacrel->rel, blkno, &vmbuffer);
1414
1415 /*
1416 * We need a buffer cleanup lock to prune HOT chains and defragment
1417 * the page in lazy_scan_prune. But when it's not possible to acquire
1418 * a cleanup lock right away, we may be able to settle for reduced
1419 * processing using lazy_scan_noprune.
1420 */
1422
1423 if (!got_cleanup_lock)
1425
1426 /* Check for new or empty pages before lazy_scan_[no]prune call */
1428 vmbuffer))
1429 {
1430 /* Processed as new/empty page (lock and pin released) */
1431 continue;
1432 }
1433
1434 /*
1435 * If we didn't get the cleanup lock, we can still collect LP_DEAD
1436 * items in the dead_items area for later vacuuming, count live and
1437 * recently dead tuples for vacuum logging, and determine if this
1438 * block could later be truncated. If we encounter any xid/mxids that
1439 * require advancing the relfrozenxid/relminxid, we'll have to wait
1440 * for a cleanup lock and call lazy_scan_prune().
1441 */
1442 if (!got_cleanup_lock &&
1443 !lazy_scan_noprune(vacrel, buf, blkno, page, &has_lpdead_items))
1444 {
1445 /*
1446 * lazy_scan_noprune could not do all required processing. Wait
1447 * for a cleanup lock, and call lazy_scan_prune in the usual way.
1448 */
1449 Assert(vacrel->aggressive);
1452 got_cleanup_lock = true;
1453 }
1454
1455 /*
1456 * If we have a cleanup lock, we must now prune, freeze, and count
1457 * tuples. We may have acquired the cleanup lock originally, or we may
1458 * have gone back and acquired it after lazy_scan_noprune() returned
1459 * false. Either way, the page hasn't been processed yet.
1460 *
1461 * Like lazy_scan_noprune(), lazy_scan_prune() will count
1462 * recently_dead_tuples and live tuples for vacuum logging, determine
1463 * if the block can later be truncated, and accumulate the details of
1464 * remaining LP_DEAD line pointers on the page into dead_items. These
1465 * dead items include those pruned by lazy_scan_prune() as well as
1466 * line pointers previously marked LP_DEAD.
1467 */
1468 if (got_cleanup_lock)
1469 ndeleted = lazy_scan_prune(vacrel, buf, blkno, page,
1470 vmbuffer,
1472
1473 /*
1474 * Count an eagerly scanned page as a failure or a success.
1475 *
1476 * Only lazy_scan_prune() freezes pages, so if we didn't get the
1477 * cleanup lock, we won't have frozen the page. However, we only count
1478 * pages that were too new to require freezing as eager freeze
1479 * failures.
1480 *
1481 * We could gather more information from lazy_scan_noprune() about
1482 * whether or not there were tuples with XIDs or MXIDs older than the
1483 * FreezeLimit or MultiXactCutoff. However, for simplicity, we simply
1484 * exclude pages skipped due to cleanup lock contention from eager
1485 * freeze algorithm caps.
1486 */
1488 {
1489 /* Aggressive vacuums do not eager scan. */
1490 Assert(!vacrel->aggressive);
1491
1492 if (vm_page_frozen)
1493 {
1494 if (vacrel->eager_scan_remaining_successes > 0)
1495 vacrel->eager_scan_remaining_successes--;
1496
1497 if (vacrel->eager_scan_remaining_successes == 0)
1498 {
1499 /*
1500 * Report only once that we disabled eager scanning. We
1501 * may eagerly read ahead blocks in excess of the success
1502 * or failure caps before attempting to freeze them, so we
1503 * could reach here even after disabling additional eager
1504 * scanning.
1505 */
1506 if (vacrel->eager_scan_max_fails_per_region > 0)
1507 ereport(vacrel->verbose ? INFO : DEBUG2,
1508 (errmsg("disabling eager scanning after freezing %u eagerly scanned blocks of relation \"%s.%s.%s\"",
1510 vacrel->dbname, vacrel->relnamespace,
1511 vacrel->relname)));
1512
1513 /*
1514 * If we hit our success cap, permanently disable eager
1515 * scanning by setting the other eager scan management
1516 * fields to their disabled values.
1517 */
1518 vacrel->eager_scan_remaining_fails = 0;
1519 vacrel->next_eager_scan_region_start = InvalidBlockNumber;
1520 vacrel->eager_scan_max_fails_per_region = 0;
1521 }
1522 }
1523 else if (vacrel->eager_scan_remaining_fails > 0)
1524 vacrel->eager_scan_remaining_fails--;
1525 }
1526
1527 /*
1528 * Now drop the buffer lock and, potentially, update the FSM.
1529 *
1530 * Our goal is to update the freespace map the last time we touch the
1531 * page. If we'll process a block in the second pass, we may free up
1532 * additional space on the page, so it is better to update the FSM
1533 * after the second pass. If the relation has no indexes, or if index
1534 * vacuuming is disabled, there will be no second heap pass; if this
1535 * particular page has no dead items, the second heap pass will not
1536 * touch this page. So, in those cases, update the FSM now.
1537 *
1538 * Note: In corner cases, it's possible to miss updating the FSM
1539 * entirely. If index vacuuming is currently enabled, we'll skip the
1540 * FSM update now. But if failsafe mode is later activated, or there
1541 * are so few dead tuples that index vacuuming is bypassed, there will
1542 * also be no opportunity to update the FSM later, because we'll never
1543 * revisit this page. Since updating the FSM is desirable but not
1544 * absolutely required, that's OK.
1545 */
1546 if (vacrel->nindexes == 0
1547 || !vacrel->do_index_vacuuming
1548 || !has_lpdead_items)
1549 {
1550 Size freespace = PageGetHeapFreeSpace(page);
1551
1553 RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
1554
1555 /*
1556 * Periodically perform FSM vacuuming to make newly-freed space
1557 * visible on upper FSM pages. This is done after vacuuming if the
1558 * table has indexes. There will only be newly-freed space if we
1559 * held the cleanup lock and lazy_scan_prune() was called.
1560 */
1561 if (got_cleanup_lock && vacrel->nindexes == 0 && ndeleted > 0 &&
1563 {
1565 blkno);
1567 }
1568 }
1569 else
1571 }
1572
1573 vacrel->blkno = InvalidBlockNumber;
1574 if (BufferIsValid(vmbuffer))
1575 ReleaseBuffer(vmbuffer);
1576
1577 /*
1578 * Report that everything is now scanned. We never skip scanning the last
1579 * block in the relation, so we can pass rel_pages here.
1580 */
1582 rel_pages);
1583
1584 /* now we can compute the new value for pg_class.reltuples */
1585 vacrel->new_live_tuples = vac_estimate_reltuples(vacrel->rel, rel_pages,
1586 vacrel->scanned_pages,
1587 vacrel->live_tuples);
1588
1589 /*
1590 * Also compute the total number of surviving heap entries. In the
1591 * (unlikely) scenario that new_live_tuples is -1, take it as zero.
1592 */
1593 vacrel->new_rel_tuples =
1594 Max(vacrel->new_live_tuples, 0) + vacrel->recently_dead_tuples +
1595 vacrel->missed_dead_tuples;
1596
1597 read_stream_end(stream);
1598
1599 /*
1600 * Do index vacuuming (call each index's ambulkdelete routine), then do
1601 * related heap vacuuming
1602 */
1603 if (vacrel->dead_items_info->num_items > 0)
1605
1606 /*
1607 * Vacuum the remainder of the Free Space Map. We must do this whether or
1608 * not there were indexes, and whether or not we bypassed index vacuuming.
1609 * We can pass rel_pages here because we never skip scanning the last
1610 * block of the relation.
1611 */
1612 if (rel_pages > next_fsm_block_to_vacuum)
1614
1615 /* report all blocks vacuumed */
1617
1618 /* Do final index cleanup (call each index's amvacuumcleanup routine) */
1619 if (vacrel->nindexes > 0 && vacrel->do_index_cleanup)
1621}

References Assert, buf, BUFFER_LOCK_SHARE, BUFFER_LOCK_UNLOCK, BufferGetBlockNumber(), BufferGetPage(), BufferIsValid(), CheckBufferIsPinnedOnce(), ConditionalLockBufferForCleanup(), DEBUG2, ereport, errmsg, FAILSAFE_EVERY_PAGES, fb(), FreeSpaceMapVacuumRange(), heap_vac_scan_next_block(), INFO, InvalidBlockNumber, InvalidBuffer, InvalidOffsetNumber, lazy_check_wraparound_failsafe(), lazy_cleanup_all_indexes(), lazy_scan_new_or_empty(), lazy_scan_noprune(), lazy_scan_prune(), lazy_vacuum(), LockBuffer(), LockBufferForCleanup(), MAIN_FORKNUM, Max, PageGetHeapFreeSpace(), pgstat_progress_update_multi_param(), pgstat_progress_update_param(), PROGRESS_VACUUM_HEAP_BLKS_SCANNED, PROGRESS_VACUUM_HEAP_BLKS_VACUUMED, PROGRESS_VACUUM_MAX_DEAD_TUPLE_BYTES, PROGRESS_VACUUM_PHASE, PROGRESS_VACUUM_PHASE_SCAN_HEAP, PROGRESS_VACUUM_TOTAL_HEAP_BLKS, read_stream_begin_relation(), read_stream_end(), READ_STREAM_MAINTENANCE, read_stream_next_buffer(), RecordPageWithFreeSpace(), ReleaseBuffer(), TidStoreMemoryUsage(), UnlockReleaseBuffer(), update_vacuum_error_info(), vac_estimate_reltuples(), vacuum_delay_point(), VACUUM_ERRCB_PHASE_SCAN_HEAP, VACUUM_FSM_EVERY_PAGES, and visibilitymap_pin().

Referenced by heap_vacuum_rel().

◆ lazy_scan_new_or_empty()

static bool lazy_scan_new_or_empty ( LVRelState vacrel,
Buffer  buf,
BlockNumber  blkno,
Page  page,
bool  sharelock,
Buffer  vmbuffer 
)
static

Definition at line 1877 of file vacuumlazy.c.

1879{
1880 Size freespace;
1881
1882 if (PageIsNew(page))
1883 {
1884 /*
1885 * All-zeroes pages can be left over if either a backend extends the
1886 * relation by a single page, but crashes before the newly initialized
1887 * page has been written out, or when bulk-extending the relation
1888 * (which creates a number of empty pages at the tail end of the
1889 * relation), and then enters them into the FSM.
1890 *
1891 * Note we do not enter the page into the visibilitymap. That has the
1892 * downside that we repeatedly visit this page in subsequent vacuums,
1893 * but otherwise we'll never discover the space on a promoted standby.
1894 * The harm of repeated checking ought to normally not be too bad. The
1895 * space usually should be used at some point, otherwise there
1896 * wouldn't be any regular vacuums.
1897 *
1898 * Make sure these pages are in the FSM, to ensure they can be reused.
1899 * Do that by testing if there's any space recorded for the page. If
1900 * not, enter it. We do so after releasing the lock on the heap page,
1901 * the FSM is approximate, after all.
1902 */
1904
1905 if (GetRecordedFreeSpace(vacrel->rel, blkno) == 0)
1906 {
1907 freespace = BLCKSZ - SizeOfPageHeaderData;
1908
1909 RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
1910 }
1911
1912 return true;
1913 }
1914
1915 if (PageIsEmpty(page))
1916 {
1917 /*
1918 * It seems likely that caller will always be able to get a cleanup
1919 * lock on an empty page. But don't take any chances -- escalate to
1920 * an exclusive lock (still don't need a cleanup lock, though).
1921 */
1922 if (sharelock)
1923 {
1926
1927 if (!PageIsEmpty(page))
1928 {
1929 /* page isn't new or empty -- keep lock and pin for now */
1930 return false;
1931 }
1932 }
1933 else
1934 {
1935 /* Already have a full cleanup lock (which is more than enough) */
1936 }
1937
1938 /*
1939 * Unlike new pages, empty pages are always set all-visible and
1940 * all-frozen.
1941 */
1942 if (!PageIsAllVisible(page))
1943 {
1944 /* Lock vmbuffer before entering critical section */
1946
1948
1949 /* mark buffer dirty before writing a WAL record */
1951
1952 PageSetAllVisible(page);
1953 PageClearPrunable(page);
1954 visibilitymap_set(blkno,
1955 vmbuffer,
1958 vacrel->rel->rd_locator);
1959
1960 /*
1961 * Emit WAL for setting PD_ALL_VISIBLE on the heap page and
1962 * setting the VM.
1963 */
1964 if (RelationNeedsWAL(vacrel->rel))
1966 vmbuffer,
1969 InvalidTransactionId, /* conflict xid */
1970 false, /* cleanup lock */
1971 PRUNE_VACUUM_SCAN, /* reason */
1972 NULL, 0,
1973 NULL, 0,
1974 NULL, 0,
1975 NULL, 0);
1976
1978
1979 LockBuffer(vmbuffer, BUFFER_LOCK_UNLOCK);
1980
1981 /* Count the newly all-frozen pages for logging */
1982 vacrel->new_all_visible_pages++;
1983 vacrel->new_all_visible_all_frozen_pages++;
1984 }
1985
1986 freespace = PageGetHeapFreeSpace(page);
1988 RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
1989 return true;
1990 }
1991
1992 /* page isn't new or empty -- keep lock and pin */
1993 return false;
1994}

References buf, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, END_CRIT_SECTION, fb(), GetRecordedFreeSpace(), InvalidTransactionId, LockBuffer(), log_heap_prune_and_freeze(), MarkBufferDirty(), PageClearPrunable, PageGetHeapFreeSpace(), PageIsAllVisible(), PageIsEmpty(), PageIsNew(), PageSetAllVisible(), PRUNE_VACUUM_SCAN, RecordPageWithFreeSpace(), RelationNeedsWAL, SizeOfPageHeaderData, START_CRIT_SECTION, UnlockReleaseBuffer(), VISIBILITYMAP_ALL_FROZEN, VISIBILITYMAP_ALL_VISIBLE, and visibilitymap_set().

Referenced by lazy_scan_heap().

◆ lazy_scan_noprune()

static bool lazy_scan_noprune ( LVRelState vacrel,
Buffer  buf,
BlockNumber  blkno,
Page  page,
bool has_lpdead_items 
)
static

Definition at line 2158 of file vacuumlazy.c.

2163{
2164 OffsetNumber offnum,
2165 maxoff;
2166 int lpdead_items,
2167 live_tuples,
2168 recently_dead_tuples,
2169 missed_dead_tuples;
2170 bool hastup;
2172 TransactionId NoFreezePageRelfrozenXid = vacrel->NewRelfrozenXid;
2173 MultiXactId NoFreezePageRelminMxid = vacrel->NewRelminMxid;
2175
2176 Assert(BufferGetBlockNumber(buf) == blkno);
2177
2178 hastup = false; /* for now */
2179
2180 lpdead_items = 0;
2181 live_tuples = 0;
2182 recently_dead_tuples = 0;
2183 missed_dead_tuples = 0;
2184
2185 maxoff = PageGetMaxOffsetNumber(page);
2186 for (offnum = FirstOffsetNumber;
2187 offnum <= maxoff;
2188 offnum = OffsetNumberNext(offnum))
2189 {
2190 ItemId itemid;
2191 HeapTupleData tuple;
2192
2193 vacrel->offnum = offnum;
2194 itemid = PageGetItemId(page, offnum);
2195
2196 if (!ItemIdIsUsed(itemid))
2197 continue;
2198
2199 if (ItemIdIsRedirected(itemid))
2200 {
2201 hastup = true;
2202 continue;
2203 }
2204
2205 if (ItemIdIsDead(itemid))
2206 {
2207 /*
2208 * Deliberately don't set hastup=true here. See same point in
2209 * lazy_scan_prune for an explanation.
2210 */
2211 deadoffsets[lpdead_items++] = offnum;
2212 continue;
2213 }
2214
2215 hastup = true; /* page prevents rel truncation */
2216 tupleheader = (HeapTupleHeader) PageGetItem(page, itemid);
2218 &NoFreezePageRelfrozenXid,
2219 &NoFreezePageRelminMxid))
2220 {
2221 /* Tuple with XID < FreezeLimit (or MXID < MultiXactCutoff) */
2222 if (vacrel->aggressive)
2223 {
2224 /*
2225 * Aggressive VACUUMs must always be able to advance rel's
2226 * relfrozenxid to a value >= FreezeLimit (and be able to
2227 * advance rel's relminmxid to a value >= MultiXactCutoff).
2228 * The ongoing aggressive VACUUM won't be able to do that
2229 * unless it can freeze an XID (or MXID) from this tuple now.
2230 *
2231 * The only safe option is to have caller perform processing
2232 * of this page using lazy_scan_prune. Caller might have to
2233 * wait a while for a cleanup lock, but it can't be helped.
2234 */
2235 vacrel->offnum = InvalidOffsetNumber;
2236 return false;
2237 }
2238
2239 /*
2240 * Non-aggressive VACUUMs are under no obligation to advance
2241 * relfrozenxid (even by one XID). We can be much laxer here.
2242 *
2243 * Currently we always just accept an older final relfrozenxid
2244 * and/or relminmxid value. We never make caller wait or work a
2245 * little harder, even when it likely makes sense to do so.
2246 */
2247 }
2248
2249 ItemPointerSet(&(tuple.t_self), blkno, offnum);
2250 tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
2251 tuple.t_len = ItemIdGetLength(itemid);
2252 tuple.t_tableOid = RelationGetRelid(vacrel->rel);
2253
2254 switch (HeapTupleSatisfiesVacuum(&tuple, vacrel->cutoffs.OldestXmin,
2255 buf))
2256 {
2258 case HEAPTUPLE_LIVE:
2259
2260 /*
2261 * Count both cases as live, just like lazy_scan_prune
2262 */
2263 live_tuples++;
2264
2265 break;
2266 case HEAPTUPLE_DEAD:
2267
2268 /*
2269 * There is some useful work for pruning to do, that won't be
2270 * done due to failure to get a cleanup lock.
2271 */
2272 missed_dead_tuples++;
2273 break;
2275
2276 /*
2277 * Count in recently_dead_tuples, just like lazy_scan_prune
2278 */
2279 recently_dead_tuples++;
2280 break;
2282
2283 /*
2284 * Do not count these rows as live, just like lazy_scan_prune
2285 */
2286 break;
2287 default:
2288 elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
2289 break;
2290 }
2291 }
2292
2293 vacrel->offnum = InvalidOffsetNumber;
2294
2295 /*
2296 * By here we know for sure that caller can put off freezing and pruning
2297 * this particular page until the next VACUUM. Remember its details now.
2298 * (lazy_scan_prune expects a clean slate, so we have to do this last.)
2299 */
2300 vacrel->NewRelfrozenXid = NoFreezePageRelfrozenXid;
2301 vacrel->NewRelminMxid = NoFreezePageRelminMxid;
2302
2303 /* Save any LP_DEAD items found on the page in dead_items */
2304 if (vacrel->nindexes == 0)
2305 {
2306 /* Using one-pass strategy (since table has no indexes) */
2307 if (lpdead_items > 0)
2308 {
2309 /*
2310 * Perfunctory handling for the corner case where a single pass
2311 * strategy VACUUM cannot get a cleanup lock, and it turns out
2312 * that there is one or more LP_DEAD items: just count the LP_DEAD
2313 * items as missed_dead_tuples instead. (This is a bit dishonest,
2314 * but it beats having to maintain specialized heap vacuuming code
2315 * forever, for vanishingly little benefit.)
2316 */
2317 hastup = true;
2318 missed_dead_tuples += lpdead_items;
2319 }
2320 }
2321 else if (lpdead_items > 0)
2322 {
2323 /*
2324 * Page has LP_DEAD items, and so any references/TIDs that remain in
2325 * indexes will be deleted during index vacuuming (and then marked
2326 * LP_UNUSED in the heap)
2327 */
2328 vacrel->lpdead_item_pages++;
2329
2330 dead_items_add(vacrel, blkno, deadoffsets, lpdead_items);
2331
2332 vacrel->lpdead_items += lpdead_items;
2333 }
2334
2335 /*
2336 * Finally, add relevant page-local counts to whole-VACUUM counts
2337 */
2338 vacrel->live_tuples += live_tuples;
2339 vacrel->recently_dead_tuples += recently_dead_tuples;
2340 vacrel->missed_dead_tuples += missed_dead_tuples;
2341 if (missed_dead_tuples > 0)
2342 vacrel->missed_dead_pages++;
2343
2344 /* Can't truncate this page */
2345 if (hastup)
2346 vacrel->nonempty_pages = blkno + 1;
2347
2348 /* Did we find LP_DEAD items? */
2349 *has_lpdead_items = (lpdead_items > 0);
2350
2351 /* Caller won't need to call lazy_scan_prune with same page */
2352 return true;
2353}

References Assert, buf, BufferGetBlockNumber(), dead_items_add(), elog, ERROR, fb(), FirstOffsetNumber, heap_tuple_should_freeze(), HEAPTUPLE_DEAD, HEAPTUPLE_DELETE_IN_PROGRESS, HEAPTUPLE_INSERT_IN_PROGRESS, HEAPTUPLE_LIVE, HEAPTUPLE_RECENTLY_DEAD, HeapTupleSatisfiesVacuum(), InvalidOffsetNumber, ItemIdGetLength, ItemIdIsDead, ItemIdIsRedirected, ItemIdIsUsed, ItemPointerSet(), MaxHeapTuplesPerPage, OffsetNumberNext, PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), RelationGetRelid, HeapTupleData::t_data, HeapTupleData::t_len, HeapTupleData::t_self, and HeapTupleData::t_tableOid.

Referenced by lazy_scan_heap().

◆ lazy_scan_prune()

static int lazy_scan_prune ( LVRelState vacrel,
Buffer  buf,
BlockNumber  blkno,
Page  page,
Buffer  vmbuffer,
bool has_lpdead_items,
bool vm_page_frozen 
)
static

Definition at line 2021 of file vacuumlazy.c.

2028{
2029 Relation rel = vacrel->rel;
2031 PruneFreezeParams params = {
2032 .relation = rel,
2033 .buffer = buf,
2034 .vmbuffer = vmbuffer,
2035 .reason = PRUNE_VACUUM_SCAN,
2037 .vistest = vacrel->vistest,
2038 .cutoffs = &vacrel->cutoffs,
2039 };
2040
2041 Assert(BufferGetBlockNumber(buf) == blkno);
2042
2043 /*
2044 * Prune all HOT-update chains and potentially freeze tuples on this page.
2045 *
2046 * If the relation has no indexes, we can immediately mark would-be dead
2047 * items LP_UNUSED.
2048 *
2049 * The number of tuples removed from the page is returned in
2050 * presult.ndeleted. It should not be confused with presult.lpdead_items;
2051 * presult.lpdead_items's final value can be thought of as the number of
2052 * tuples that were deleted from indexes.
2053 *
2054 * We will update the VM after collecting LP_DEAD items and freezing
2055 * tuples. Pruning will have determined whether or not the page is
2056 * all-visible.
2057 */
2058 if (vacrel->nindexes == 0)
2060
2061 /*
2062 * Allow skipping full inspection of pages that the VM indicates are
2063 * already all-frozen (which may be scanned due to SKIP_PAGES_THRESHOLD).
2064 * However, if DISABLE_PAGE_SKIPPING was specified, we can't trust the VM,
2065 * so we must examine the page to make sure it is truly all-frozen and fix
2066 * it otherwise.
2067 */
2068 if (vacrel->skipwithvm)
2070
2072 &presult,
2073 &vacrel->offnum,
2074 &vacrel->NewRelfrozenXid, &vacrel->NewRelminMxid);
2075
2076 Assert(MultiXactIdIsValid(vacrel->NewRelminMxid));
2077 Assert(TransactionIdIsValid(vacrel->NewRelfrozenXid));
2078
2079 if (presult.nfrozen > 0)
2080 {
2081 /*
2082 * We don't increment the new_frozen_tuple_pages instrumentation
2083 * counter when nfrozen == 0, since it only counts pages with newly
2084 * frozen tuples (don't confuse that with pages newly set all-frozen
2085 * in VM).
2086 */
2087 vacrel->new_frozen_tuple_pages++;
2088 }
2089
2090 /*
2091 * Now save details of the LP_DEAD items from the page in vacrel
2092 */
2093 if (presult.lpdead_items > 0)
2094 {
2095 vacrel->lpdead_item_pages++;
2096
2097 /*
2098 * deadoffsets are collected incrementally in
2099 * heap_page_prune_and_freeze() as each dead line pointer is recorded,
2100 * with an indeterminate order, but dead_items_add requires them to be
2101 * sorted.
2102 */
2103 qsort(presult.deadoffsets, presult.lpdead_items, sizeof(OffsetNumber),
2105
2106 dead_items_add(vacrel, blkno, presult.deadoffsets, presult.lpdead_items);
2107 }
2108
2109 /* Finally, add page-local counts to whole-VACUUM counts */
2110 if (presult.newly_all_visible)
2111 vacrel->new_all_visible_pages++;
2112 if (presult.newly_all_visible_frozen)
2113 vacrel->new_all_visible_all_frozen_pages++;
2114 if (presult.newly_all_frozen)
2115 vacrel->new_all_frozen_pages++;
2116
2117 /* Capture if the page was newly set frozen */
2118 *vm_page_frozen = presult.newly_all_visible_frozen ||
2119 presult.newly_all_frozen;
2120
2121 vacrel->tuples_deleted += presult.ndeleted;
2122 vacrel->tuples_frozen += presult.nfrozen;
2123 vacrel->lpdead_items += presult.lpdead_items;
2124 vacrel->live_tuples += presult.live_tuples;
2125 vacrel->recently_dead_tuples += presult.recently_dead_tuples;
2126
2127 /* Can't truncate this page */
2128 if (presult.hastup)
2129 vacrel->nonempty_pages = blkno + 1;
2130
2131 /* Did we find LP_DEAD items? */
2132 *has_lpdead_items = (presult.lpdead_items > 0);
2133
2134 return presult.ndeleted;
2135}
Relation relation
Definition heapam.h:262

References Assert, buf, BufferGetBlockNumber(), cmpOffsetNumbers(), dead_items_add(), fb(), HEAP_PAGE_PRUNE_ALLOW_FAST_PATH, heap_page_prune_and_freeze(), HEAP_PAGE_PRUNE_FREEZE, HEAP_PAGE_PRUNE_MARK_UNUSED_NOW, HEAP_PAGE_PRUNE_SET_VM, MultiXactIdIsValid, PruneFreezeParams::options, PRUNE_VACUUM_SCAN, qsort, PruneFreezeParams::relation, and TransactionIdIsValid.

Referenced by lazy_scan_heap().

◆ lazy_truncate_heap()

static void lazy_truncate_heap ( LVRelState vacrel)
static

Definition at line 3142 of file vacuumlazy.c.

3143{
3144 BlockNumber orig_rel_pages = vacrel->rel_pages;
3147 int lock_retry;
3148
3149 /* Report that we are now truncating */
3152
3153 /* Update error traceback information one last time */
3155 vacrel->nonempty_pages, InvalidOffsetNumber);
3156
3157 /*
3158 * Loop until no more truncating can be done.
3159 */
3160 do
3161 {
3162 /*
3163 * We need full exclusive lock on the relation in order to do
3164 * truncation. If we can't get it, give up rather than waiting --- we
3165 * don't want to block other backends, and we don't want to deadlock
3166 * (which is quite possible considering we already hold a lower-grade
3167 * lock).
3168 */
3169 lock_waiter_detected = false;
3170 lock_retry = 0;
3171 while (true)
3172 {
3174 break;
3175
3176 /*
3177 * Check for interrupts while trying to (re-)acquire the exclusive
3178 * lock.
3179 */
3181
3184 {
3185 /*
3186 * We failed to establish the lock in the specified number of
3187 * retries. This means we give up truncating.
3188 */
3189 ereport(vacrel->verbose ? INFO : DEBUG2,
3190 (errmsg("\"%s\": stopping truncate due to conflicting lock request",
3191 vacrel->relname)));
3192 return;
3193 }
3194
3200 }
3201
3202 /*
3203 * Now that we have exclusive lock, look to see if the rel has grown
3204 * whilst we were vacuuming with non-exclusive lock. If so, give up;
3205 * the newly added pages presumably contain non-deletable tuples.
3206 */
3209 {
3210 /*
3211 * Note: we intentionally don't update vacrel->rel_pages with the
3212 * new rel size here. If we did, it would amount to assuming that
3213 * the new pages are empty, which is unlikely. Leaving the numbers
3214 * alone amounts to assuming that the new pages have the same
3215 * tuple density as existing ones, which is less unlikely.
3216 */
3218 return;
3219 }
3220
3221 /*
3222 * Scan backwards from the end to verify that the end pages actually
3223 * contain no tuples. This is *necessary*, not optional, because
3224 * other backends could have added tuples to these pages whilst we
3225 * were vacuuming.
3226 */
3228 vacrel->blkno = new_rel_pages;
3229
3231 {
3232 /* can't do anything after all */
3234 return;
3235 }
3236
3237 /*
3238 * Okay to truncate.
3239 */
3241
3242 /*
3243 * We can release the exclusive lock as soon as we have truncated.
3244 * Other backends can't safely access the relation until they have
3245 * processed the smgr invalidation that smgrtruncate sent out ... but
3246 * that should happen as part of standard invalidation processing once
3247 * they acquire lock on the relation.
3248 */
3250
3251 /*
3252 * Update statistics. Here, it *is* correct to adjust rel_pages
3253 * without also touching reltuples, since the tuple count wasn't
3254 * changed by the truncation.
3255 */
3256 vacrel->removed_pages += orig_rel_pages - new_rel_pages;
3257 vacrel->rel_pages = new_rel_pages;
3258
3259 ereport(vacrel->verbose ? INFO : DEBUG2,
3260 (errmsg("table \"%s\": truncated %u to %u pages",
3261 vacrel->relname,
3264 } while (new_rel_pages > vacrel->nonempty_pages && lock_waiter_detected);
3265}

References AccessExclusiveLock, CHECK_FOR_INTERRUPTS, ConditionalLockRelation(), count_nondeletable_pages(), DEBUG2, ereport, errmsg, fb(), INFO, InvalidOffsetNumber, MyLatch, pgstat_progress_update_param(), PROGRESS_VACUUM_PHASE, PROGRESS_VACUUM_PHASE_TRUNCATE, RelationGetNumberOfBlocks, RelationTruncate(), ResetLatch(), UnlockRelation(), update_vacuum_error_info(), VACUUM_ERRCB_PHASE_TRUNCATE, VACUUM_TRUNCATE_LOCK_TIMEOUT, VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL, WaitLatch(), WL_EXIT_ON_PM_DEATH, WL_LATCH_SET, and WL_TIMEOUT.

Referenced by heap_vacuum_rel().

◆ lazy_vacuum()

static void lazy_vacuum ( LVRelState vacrel)
static

Definition at line 2369 of file vacuumlazy.c.

2370{
2371 bool bypass;
2372
2373 /* Should not end up here with no indexes */
2374 Assert(vacrel->nindexes > 0);
2375 Assert(vacrel->lpdead_item_pages > 0);
2376
2377 if (!vacrel->do_index_vacuuming)
2378 {
2379 Assert(!vacrel->do_index_cleanup);
2381 return;
2382 }
2383
2384 /*
2385 * Consider bypassing index vacuuming (and heap vacuuming) entirely.
2386 *
2387 * We currently only do this in cases where the number of LP_DEAD items
2388 * for the entire VACUUM operation is close to zero. This avoids sharp
2389 * discontinuities in the duration and overhead of successive VACUUM
2390 * operations that run against the same table with a fixed workload.
2391 * Ideally, successive VACUUM operations will behave as if there are
2392 * exactly zero LP_DEAD items in cases where there are close to zero.
2393 *
2394 * This is likely to be helpful with a table that is continually affected
2395 * by UPDATEs that can mostly apply the HOT optimization, but occasionally
2396 * have small aberrations that lead to just a few heap pages retaining
2397 * only one or two LP_DEAD items. This is pretty common; even when the
2398 * DBA goes out of their way to make UPDATEs use HOT, it is practically
2399 * impossible to predict whether HOT will be applied in 100% of cases.
2400 * It's far easier to ensure that 99%+ of all UPDATEs against a table use
2401 * HOT through careful tuning.
2402 */
2403 bypass = false;
2404 if (vacrel->consider_bypass_optimization && vacrel->rel_pages > 0)
2405 {
2407
2408 Assert(vacrel->num_index_scans == 0);
2409 Assert(vacrel->lpdead_items == vacrel->dead_items_info->num_items);
2410 Assert(vacrel->do_index_vacuuming);
2411 Assert(vacrel->do_index_cleanup);
2412
2413 /*
2414 * This crossover point at which we'll start to do index vacuuming is
2415 * expressed as a percentage of the total number of heap pages in the
2416 * table that are known to have at least one LP_DEAD item. This is
2417 * much more important than the total number of LP_DEAD items, since
2418 * it's a proxy for the number of heap pages whose visibility map bits
2419 * cannot be set on account of bypassing index and heap vacuuming.
2420 *
2421 * We apply one further precautionary test: the space currently used
2422 * to store the TIDs (TIDs that now all point to LP_DEAD items) must
2423 * not exceed 32MB. This limits the risk that we will bypass index
2424 * vacuuming again and again until eventually there is a VACUUM whose
2425 * dead_items space is not CPU cache resident.
2426 *
2427 * We don't take any special steps to remember the LP_DEAD items (such
2428 * as counting them in our final update to the stats system) when the
2429 * optimization is applied. Though the accounting used in analyze.c's
2430 * acquire_sample_rows() will recognize the same LP_DEAD items as dead
2431 * rows in its own stats report, that's okay. The discrepancy should
2432 * be negligible. If this optimization is ever expanded to cover more
2433 * cases then this may need to be reconsidered.
2434 */
2436 bypass = (vacrel->lpdead_item_pages < threshold &&
2437 TidStoreMemoryUsage(vacrel->dead_items) < 32 * 1024 * 1024);
2438 }
2439
2440 if (bypass)
2441 {
2442 /*
2443 * There are almost zero TIDs. Behave as if there were precisely
2444 * zero: bypass index vacuuming, but do index cleanup.
2445 *
2446 * We expect that the ongoing VACUUM operation will finish very
2447 * quickly, so there is no point in considering speeding up as a
2448 * failsafe against wraparound failure. (Index cleanup is expected to
2449 * finish very quickly in cases where there were no ambulkdelete()
2450 * calls.)
2451 */
2452 vacrel->do_index_vacuuming = false;
2453 }
2455 {
2456 /*
2457 * We successfully completed a round of index vacuuming. Do related
2458 * heap vacuuming now.
2459 */
2461 }
2462 else
2463 {
2464 /*
2465 * Failsafe case.
2466 *
2467 * We attempted index vacuuming, but didn't finish a full round/full
2468 * index scan. This happens when relfrozenxid or relminmxid is too
2469 * far in the past.
2470 *
2471 * From this point on the VACUUM operation will do no further index
2472 * vacuuming or heap vacuuming. This VACUUM operation won't end up
2473 * back here again.
2474 */
2476 }
2477
2478 /*
2479 * Forget the LP_DEAD items that we just vacuumed (or just decided to not
2480 * vacuum)
2481 */
2483}

References Assert, BYPASS_THRESHOLD_PAGES, dead_items_reset(), fb(), lazy_vacuum_all_indexes(), lazy_vacuum_heap_rel(), TidStoreMemoryUsage(), and VacuumFailsafeActive.

Referenced by lazy_scan_heap().

◆ lazy_vacuum_all_indexes()

static bool lazy_vacuum_all_indexes ( LVRelState vacrel)
static

Definition at line 2494 of file vacuumlazy.c.

2495{
2496 bool allindexes = true;
2497 double old_live_tuples = vacrel->rel->rd_rel->reltuples;
2498 const int progress_start_index[] = {
2501 };
2502 const int progress_end_index[] = {
2506 };
2509
2510 Assert(vacrel->nindexes > 0);
2511 Assert(vacrel->do_index_vacuuming);
2512 Assert(vacrel->do_index_cleanup);
2513
2514 /* Precheck for XID wraparound emergencies */
2516 {
2517 /* Wraparound emergency -- don't even start an index scan */
2518 return false;
2519 }
2520
2521 /*
2522 * Report that we are now vacuuming indexes and the number of indexes to
2523 * vacuum.
2524 */
2526 progress_start_val[1] = vacrel->nindexes;
2528
2530 {
2531 for (int idx = 0; idx < vacrel->nindexes; idx++)
2532 {
2533 Relation indrel = vacrel->indrels[idx];
2534 IndexBulkDeleteResult *istat = vacrel->indstats[idx];
2535
2536 vacrel->indstats[idx] = lazy_vacuum_one_index(indrel, istat,
2538 vacrel);
2539
2540 /* Report the number of indexes vacuumed */
2542 idx + 1);
2543
2545 {
2546 /* Wraparound emergency -- end current index scan */
2547 allindexes = false;
2548 break;
2549 }
2550 }
2551 }
2552 else
2553 {
2554 /* Outsource everything to parallel variant */
2556 vacrel->num_index_scans,
2557 &(vacrel->worker_usage.vacuum));
2558
2559 /*
2560 * Do a postcheck to consider applying wraparound failsafe now. Note
2561 * that parallel VACUUM only gets the precheck and this postcheck.
2562 */
2564 allindexes = false;
2565 }
2566
2567 /*
2568 * We delete all LP_DEAD items from the first heap pass in all indexes on
2569 * each call here (except calls where we choose to do the failsafe). This
2570 * makes the next call to lazy_vacuum_heap_rel() safe (except in the event
2571 * of the failsafe triggering, which prevents the next call from taking
2572 * place).
2573 */
2574 Assert(vacrel->num_index_scans > 0 ||
2575 vacrel->dead_items_info->num_items == vacrel->lpdead_items);
2577
2578 /*
2579 * Increase and report the number of index scans. Also, we reset
2580 * PROGRESS_VACUUM_INDEXES_TOTAL and PROGRESS_VACUUM_INDEXES_PROCESSED.
2581 *
2582 * We deliberately include the case where we started a round of bulk
2583 * deletes that we weren't able to finish due to the failsafe triggering.
2584 */
2585 vacrel->num_index_scans++;
2586 progress_end_val[0] = 0;
2587 progress_end_val[1] = 0;
2588 progress_end_val[2] = vacrel->num_index_scans;
2590
2591 return allindexes;
2592}

References Assert, fb(), idx(), lazy_check_wraparound_failsafe(), lazy_vacuum_one_index(), parallel_vacuum_bulkdel_all_indexes(), ParallelVacuumIsActive, pgstat_progress_update_multi_param(), pgstat_progress_update_param(), PROGRESS_VACUUM_INDEXES_PROCESSED, PROGRESS_VACUUM_INDEXES_TOTAL, PROGRESS_VACUUM_NUM_INDEX_VACUUMS, PROGRESS_VACUUM_PHASE, PROGRESS_VACUUM_PHASE_VACUUM_INDEX, and VacuumFailsafeActive.

Referenced by lazy_vacuum().

◆ lazy_vacuum_heap_page()

static void lazy_vacuum_heap_page ( LVRelState vacrel,
BlockNumber  blkno,
Buffer  buffer,
OffsetNumber deadoffsets,
int  num_offsets,
Buffer  vmbuffer 
)
static

Definition at line 2758 of file vacuumlazy.c.

2761{
2762 Page page = BufferGetPage(buffer);
2764 int nunused = 0;
2765 TransactionId newest_live_xid;
2767 bool all_frozen;
2769 uint8 vmflags = 0;
2770
2771 Assert(vacrel->do_index_vacuuming);
2772
2774
2775 /* Update error traceback information */
2779
2780 /*
2781 * Before marking dead items unused, check whether the page will become
2782 * all-visible once that change is applied. This lets us reap the tuples
2783 * and mark the page all-visible within the same critical section,
2784 * enabling both changes to be emitted in a single WAL record. Since the
2785 * visibility checks may perform I/O and allocate memory, they must be
2786 * done outside the critical section.
2787 */
2788 if (heap_page_would_be_all_visible(vacrel->rel, buffer,
2789 vacrel->vistest, true,
2790 deadoffsets, num_offsets,
2791 &all_frozen, &newest_live_xid,
2792 &vacrel->offnum))
2793 {
2795 if (all_frozen)
2796 {
2798 Assert(!TransactionIdIsValid(newest_live_xid));
2799 }
2800
2801 /*
2802 * Take the lock on the vmbuffer before entering a critical section.
2803 * The heap page lock must also be held while updating the VM to
2804 * ensure consistency.
2805 */
2807 }
2808
2810
2811 for (int i = 0; i < num_offsets; i++)
2812 {
2813 ItemId itemid;
2814 OffsetNumber toff = deadoffsets[i];
2815
2816 itemid = PageGetItemId(page, toff);
2817
2818 Assert(ItemIdIsDead(itemid) && !ItemIdHasStorage(itemid));
2819 ItemIdSetUnused(itemid);
2820 unused[nunused++] = toff;
2821 }
2822
2823 Assert(nunused > 0);
2824
2825 /* Attempt to truncate line pointer array now */
2827
2828 if ((vmflags & VISIBILITYMAP_VALID_BITS) != 0)
2829 {
2830 /*
2831 * The page is guaranteed to have had dead line pointers, so we always
2832 * set PD_ALL_VISIBLE.
2833 */
2834 PageSetAllVisible(page);
2835 PageClearPrunable(page);
2836 visibilitymap_set(blkno,
2837 vmbuffer, vmflags,
2838 vacrel->rel->rd_locator);
2839 conflict_xid = newest_live_xid;
2840 }
2841
2842 /*
2843 * Mark buffer dirty before we write WAL.
2844 */
2845 MarkBufferDirty(buffer);
2846
2847 /* XLOG stuff */
2848 if (RelationNeedsWAL(vacrel->rel))
2849 {
2850 log_heap_prune_and_freeze(vacrel->rel, buffer,
2851 vmflags != 0 ? vmbuffer : InvalidBuffer,
2852 vmflags,
2854 false, /* no cleanup lock required */
2856 NULL, 0, /* frozen */
2857 NULL, 0, /* redirected */
2858 NULL, 0, /* dead */
2859 unused, nunused);
2860 }
2861
2863
2865 {
2866 /* Count the newly set VM page for logging */
2867 LockBuffer(vmbuffer, BUFFER_LOCK_UNLOCK);
2868 vacrel->new_all_visible_pages++;
2869 if (all_frozen)
2870 vacrel->new_all_visible_all_frozen_pages++;
2871 }
2872
2873 /* Revert to the previous phase information for error traceback */
2875}

References Assert, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferGetPage(), END_CRIT_SECTION, fb(), heap_page_would_be_all_visible(), i, InvalidBuffer, InvalidOffsetNumber, InvalidTransactionId, ItemIdHasStorage, ItemIdIsDead, ItemIdSetUnused, LockBuffer(), log_heap_prune_and_freeze(), MarkBufferDirty(), MaxHeapTuplesPerPage, PageClearPrunable, PageGetItemId(), PageSetAllVisible(), PageTruncateLinePointerArray(), pgstat_progress_update_param(), PROGRESS_VACUUM_HEAP_BLKS_VACUUMED, PRUNE_VACUUM_CLEANUP, RelationNeedsWAL, restore_vacuum_error_info(), START_CRIT_SECTION, TransactionIdIsValid, update_vacuum_error_info(), VACUUM_ERRCB_PHASE_VACUUM_HEAP, VISIBILITYMAP_ALL_FROZEN, VISIBILITYMAP_ALL_VISIBLE, visibilitymap_set(), and VISIBILITYMAP_VALID_BITS.

Referenced by lazy_vacuum_heap_rel().

◆ lazy_vacuum_heap_rel()

static void lazy_vacuum_heap_rel ( LVRelState vacrel)
static

Definition at line 2640 of file vacuumlazy.c.

2641{
2642 ReadStream *stream;
2644 Buffer vmbuffer = InvalidBuffer;
2646 TidStoreIter *iter;
2647
2648 Assert(vacrel->do_index_vacuuming);
2649 Assert(vacrel->do_index_cleanup);
2650 Assert(vacrel->num_index_scans > 0);
2651
2652 /* Report that we are now vacuuming the heap */
2655
2656 /* Update error traceback information */
2660
2661 iter = TidStoreBeginIterate(vacrel->dead_items);
2662
2663 /*
2664 * Set up the read stream for vacuum's second pass through the heap.
2665 *
2666 * It is safe to use batchmode, as vacuum_reap_lp_read_stream_next() does
2667 * not need to wait for IO and does not perform locking. Once we support
2668 * parallelism it should still be fine, as presumably the holder of locks
2669 * would never be blocked by IO while holding the lock.
2670 */
2673 vacrel->bstrategy,
2674 vacrel->rel,
2677 iter,
2678 sizeof(TidStoreIterResult));
2679
2680 while (true)
2681 {
2682 BlockNumber blkno;
2683 Buffer buf;
2684 Page page;
2686 Size freespace;
2688 int num_offsets;
2689
2690 vacuum_delay_point(false);
2691
2692 buf = read_stream_next_buffer(stream, (void **) &iter_result);
2693
2694 /* The relation is exhausted */
2695 if (!BufferIsValid(buf))
2696 break;
2697
2698 vacrel->blkno = blkno = BufferGetBlockNumber(buf);
2699
2702 Assert(num_offsets <= lengthof(offsets));
2703
2704 /*
2705 * Pin the visibility map page in case we need to mark the page
2706 * all-visible. In most cases this will be very cheap, because we'll
2707 * already have the correct page pinned anyway.
2708 */
2709 visibilitymap_pin(vacrel->rel, blkno, &vmbuffer);
2710
2711 /* We need a non-cleanup exclusive lock to mark dead_items unused */
2713 lazy_vacuum_heap_page(vacrel, blkno, buf, offsets,
2714 num_offsets, vmbuffer);
2715
2716 /* Now that we've vacuumed the page, record its available space */
2717 page = BufferGetPage(buf);
2718 freespace = PageGetHeapFreeSpace(page);
2719
2721 RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
2723 }
2724
2725 read_stream_end(stream);
2726 TidStoreEndIterate(iter);
2727
2728 vacrel->blkno = InvalidBlockNumber;
2729 if (BufferIsValid(vmbuffer))
2730 ReleaseBuffer(vmbuffer);
2731
2732 /*
2733 * We set all LP_DEAD items from the first heap pass to LP_UNUSED during
2734 * the second heap pass. No more, no less.
2735 */
2736 Assert(vacrel->num_index_scans > 1 ||
2737 (vacrel->dead_items_info->num_items == vacrel->lpdead_items &&
2738 vacuumed_pages == vacrel->lpdead_item_pages));
2739
2741 (errmsg("table \"%s\": removed %" PRId64 " dead item identifiers in %u pages",
2742 vacrel->relname, vacrel->dead_items_info->num_items,
2743 vacuumed_pages)));
2744
2745 /* Revert to the previous phase information for error traceback */
2747}

References Assert, buf, BUFFER_LOCK_EXCLUSIVE, BufferGetBlockNumber(), BufferGetPage(), BufferIsValid(), DEBUG2, ereport, errmsg, fb(), InvalidBlockNumber, InvalidBuffer, InvalidOffsetNumber, lazy_vacuum_heap_page(), lengthof, LockBuffer(), MAIN_FORKNUM, MaxOffsetNumber, PageGetHeapFreeSpace(), pgstat_progress_update_param(), PROGRESS_VACUUM_PHASE, PROGRESS_VACUUM_PHASE_VACUUM_HEAP, read_stream_begin_relation(), read_stream_end(), READ_STREAM_MAINTENANCE, read_stream_next_buffer(), READ_STREAM_USE_BATCHING, RecordPageWithFreeSpace(), ReleaseBuffer(), restore_vacuum_error_info(), TidStoreBeginIterate(), TidStoreEndIterate(), TidStoreGetBlockOffsets(), UnlockReleaseBuffer(), update_vacuum_error_info(), vacuum_delay_point(), VACUUM_ERRCB_PHASE_VACUUM_HEAP, vacuum_reap_lp_read_stream_next(), and visibilitymap_pin().

Referenced by lazy_vacuum().

◆ lazy_vacuum_one_index()

static IndexBulkDeleteResult * lazy_vacuum_one_index ( Relation  indrel,
IndexBulkDeleteResult istat,
double  reltuples,
LVRelState vacrel 
)
static

Definition at line 3013 of file vacuumlazy.c.

3015{
3018
3019 ivinfo.index = indrel;
3020 ivinfo.heaprel = vacrel->rel;
3021 ivinfo.analyze_only = false;
3022 ivinfo.report_progress = false;
3023 ivinfo.estimated_count = true;
3024 ivinfo.message_level = DEBUG2;
3025 ivinfo.num_heap_tuples = reltuples;
3026 ivinfo.strategy = vacrel->bstrategy;
3027
3028 /*
3029 * Update error traceback information.
3030 *
3031 * The index name is saved during this phase and restored immediately
3032 * after this phase. See vacuum_error_callback.
3033 */
3034 Assert(vacrel->indname == NULL);
3039
3040 /* Do bulk deletion */
3041 istat = vac_bulkdel_one_index(&ivinfo, istat, vacrel->dead_items,
3042 vacrel->dead_items_info);
3043
3044 /* Revert to the previous phase information for error traceback */
3046 pfree(vacrel->indname);
3047 vacrel->indname = NULL;
3048
3049 return istat;
3050}

References Assert, DEBUG2, fb(), InvalidBlockNumber, InvalidOffsetNumber, pfree(), pstrdup(), RelationGetRelationName, restore_vacuum_error_info(), update_vacuum_error_info(), vac_bulkdel_one_index(), and VACUUM_ERRCB_PHASE_VACUUM_INDEX.

Referenced by lazy_vacuum_all_indexes().

◆ restore_vacuum_error_info()

static void restore_vacuum_error_info ( LVRelState vacrel,
const LVSavedErrInfo saved_vacrel 
)
static

Definition at line 3877 of file vacuumlazy.c.

3879{
3880 vacrel->blkno = saved_vacrel->blkno;
3881 vacrel->offnum = saved_vacrel->offnum;
3882 vacrel->phase = saved_vacrel->phase;
3883}

References fb().

Referenced by lazy_cleanup_one_index(), lazy_vacuum_heap_page(), lazy_vacuum_heap_rel(), and lazy_vacuum_one_index().

◆ should_attempt_truncation()

static bool should_attempt_truncation ( LVRelState vacrel)
static

Definition at line 3122 of file vacuumlazy.c.

3123{
3125
3126 if (!vacrel->do_rel_truncate || VacuumFailsafeActive)
3127 return false;
3128
3129 possibly_freeable = vacrel->rel_pages - vacrel->nonempty_pages;
3130 if (possibly_freeable > 0 &&
3133 return true;
3134
3135 return false;
3136}

References fb(), REL_TRUNCATE_FRACTION, REL_TRUNCATE_MINIMUM, and VacuumFailsafeActive.

Referenced by heap_vacuum_rel().

◆ update_relstats_all_indexes()

static void update_relstats_all_indexes ( LVRelState vacrel)
static

Definition at line 3759 of file vacuumlazy.c.

3760{
3761 Relation *indrels = vacrel->indrels;
3762 int nindexes = vacrel->nindexes;
3763 IndexBulkDeleteResult **indstats = vacrel->indstats;
3764
3765 Assert(vacrel->do_index_cleanup);
3766
3767 for (int idx = 0; idx < nindexes; idx++)
3768 {
3769 Relation indrel = indrels[idx];
3770 IndexBulkDeleteResult *istat = indstats[idx];
3771
3772 if (istat == NULL || istat->estimated_count)
3773 continue;
3774
3775 /* Update index statistics */
3777 istat->num_pages,
3778 istat->num_index_tuples,
3779 0, 0,
3780 false,
3783 NULL, NULL, false);
3784 }
3785}

References Assert, IndexBulkDeleteResult::estimated_count, fb(), idx(), InvalidMultiXactId, InvalidTransactionId, IndexBulkDeleteResult::num_index_tuples, IndexBulkDeleteResult::num_pages, and vac_update_relstats().

Referenced by heap_vacuum_rel().

◆ update_vacuum_error_info()

static void update_vacuum_error_info ( LVRelState vacrel,
LVSavedErrInfo saved_vacrel,
int  phase,
BlockNumber  blkno,
OffsetNumber  offnum 
)
static

Definition at line 3858 of file vacuumlazy.c.

3860{
3861 if (saved_vacrel)
3862 {
3863 saved_vacrel->offnum = vacrel->offnum;
3864 saved_vacrel->blkno = vacrel->blkno;
3865 saved_vacrel->phase = vacrel->phase;
3866 }
3867
3868 vacrel->blkno = blkno;
3869 vacrel->offnum = offnum;
3870 vacrel->phase = phase;
3871}

References fb().

Referenced by lazy_cleanup_one_index(), lazy_scan_heap(), lazy_truncate_heap(), lazy_vacuum_heap_page(), lazy_vacuum_heap_rel(), and lazy_vacuum_one_index().

◆ vacuum_error_callback()

static void vacuum_error_callback ( void arg)
static

Definition at line 3794 of file vacuumlazy.c.

3795{
3797
3798 switch (errinfo->phase)
3799 {
3801 if (BlockNumberIsValid(errinfo->blkno))
3802 {
3803 if (OffsetNumberIsValid(errinfo->offnum))
3804 errcontext("while scanning block %u offset %u of relation \"%s.%s\"",
3805 errinfo->blkno, errinfo->offnum, errinfo->relnamespace, errinfo->relname);
3806 else
3807 errcontext("while scanning block %u of relation \"%s.%s\"",
3808 errinfo->blkno, errinfo->relnamespace, errinfo->relname);
3809 }
3810 else
3811 errcontext("while scanning relation \"%s.%s\"",
3812 errinfo->relnamespace, errinfo->relname);
3813 break;
3814
3816 if (BlockNumberIsValid(errinfo->blkno))
3817 {
3818 if (OffsetNumberIsValid(errinfo->offnum))
3819 errcontext("while vacuuming block %u offset %u of relation \"%s.%s\"",
3820 errinfo->blkno, errinfo->offnum, errinfo->relnamespace, errinfo->relname);
3821 else
3822 errcontext("while vacuuming block %u of relation \"%s.%s\"",
3823 errinfo->blkno, errinfo->relnamespace, errinfo->relname);
3824 }
3825 else
3826 errcontext("while vacuuming relation \"%s.%s\"",
3827 errinfo->relnamespace, errinfo->relname);
3828 break;
3829
3831 errcontext("while vacuuming index \"%s\" of relation \"%s.%s\"",
3832 errinfo->indname, errinfo->relnamespace, errinfo->relname);
3833 break;
3834
3836 errcontext("while cleaning up index \"%s\" of relation \"%s.%s\"",
3837 errinfo->indname, errinfo->relnamespace, errinfo->relname);
3838 break;
3839
3841 if (BlockNumberIsValid(errinfo->blkno))
3842 errcontext("while truncating relation \"%s.%s\" to %u blocks",
3843 errinfo->relnamespace, errinfo->relname, errinfo->blkno);
3844 break;
3845
3847 default:
3848 return; /* do nothing; the errinfo may not be
3849 * initialized */
3850 }
3851}

References arg, BlockNumberIsValid(), errcontext, fb(), OffsetNumberIsValid, VACUUM_ERRCB_PHASE_INDEX_CLEANUP, VACUUM_ERRCB_PHASE_SCAN_HEAP, VACUUM_ERRCB_PHASE_TRUNCATE, VACUUM_ERRCB_PHASE_UNKNOWN, VACUUM_ERRCB_PHASE_VACUUM_HEAP, and VACUUM_ERRCB_PHASE_VACUUM_INDEX.

Referenced by heap_vacuum_rel().

◆ vacuum_reap_lp_read_stream_next()

static BlockNumber vacuum_reap_lp_read_stream_next ( ReadStream stream,
void callback_private_data,
void per_buffer_data 
)
static

Definition at line 2602 of file vacuumlazy.c.

2605{
2606 TidStoreIter *iter = callback_private_data;
2608
2610 if (iter_result == NULL)
2611 return InvalidBlockNumber;
2612
2613 /*
2614 * Save the TidStoreIterResult for later, so we can extract the offsets.
2615 * It is safe to copy the result, according to TidStoreIterateNext().
2616 */
2617 memcpy(per_buffer_data, iter_result, sizeof(*iter_result));
2618
2619 return iter_result->blkno;
2620}

References fb(), InvalidBlockNumber, memcpy(), and TidStoreIterateNext().

Referenced by lazy_vacuum_heap_rel().