PostgreSQL Source Code git master
Loading...
Searching...
No Matches
pruneheap.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * pruneheap.c
4 * heap page pruning and HOT-chain management code
5 *
6 * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
8 *
9 *
10 * IDENTIFICATION
11 * src/backend/access/heap/pruneheap.c
12 *
13 *-------------------------------------------------------------------------
14 */
15#include "postgres.h"
16
17#include "access/heapam.h"
18#include "access/heapam_xlog.h"
19#include "access/htup_details.h"
20#include "access/multixact.h"
21#include "access/transam.h"
23#include "access/xlog.h"
24#include "access/xloginsert.h"
25#include "commands/vacuum.h"
26#include "executor/instrument.h"
27#include "miscadmin.h"
28#include "pgstat.h"
29#include "storage/bufmgr.h"
30#include "utils/rel.h"
31#include "utils/snapmgr.h"
32
33/* Working data for heap_page_prune_and_freeze() and subroutines */
34typedef struct
35{
36 /*-------------------------------------------------------
37 * Arguments passed to heap_page_prune_and_freeze()
38 *-------------------------------------------------------
39 */
40
41 /* tuple visibility test, initialized for the relation */
43 /* whether or not dead items can be set LP_UNUSED during pruning */
45 /* whether to attempt freezing tuples */
49
50 /*
51 * Keep the buffer, block, and page handy so that helpers needing to
52 * access them don't need to make repeated calls to BufferGetBlockNumber()
53 * and BufferGetPage().
54 */
58
59 /*-------------------------------------------------------
60 * Fields describing what to do to the page
61 *-------------------------------------------------------
62 */
63 TransactionId new_prune_xid; /* new prune hint value */
65 int nredirected; /* numbers of entries in arrays below */
66 int ndead;
69 /* arrays that accumulate indexes of items to be changed */
74
75 /*
76 * set_all_visible and set_all_frozen indicate if the all-visible and
77 * all-frozen bits in the visibility map can be set for this page after
78 * pruning.
79 *
80 * NOTE: set_all_visible and set_all_frozen initially don't include
81 * LP_DEAD items. That's convenient for heap_page_prune_and_freeze() to
82 * use them to decide whether to opportunistically freeze the page or not.
83 * The set_all_visible and set_all_frozen values ultimately used to set
84 * the VM are adjusted to include LP_DEAD items after we determine whether
85 * or not to opportunistically freeze.
86 */
89
90 /*-------------------------------------------------------
91 * Working state for HOT chain processing
92 *-------------------------------------------------------
93 */
94
95 /*
96 * 'root_items' contains offsets of all LP_REDIRECT line pointers and
97 * normal non-HOT tuples. They can be stand-alone items or the first item
98 * in a HOT chain. 'heaponly_items' contains heap-only tuples which can
99 * only be removed as part of a HOT chain.
100 */
105
106 /*
107 * processed[offnum] is true if item at offnum has been processed.
108 *
109 * This needs to be MaxHeapTuplesPerPage + 1 long as FirstOffsetNumber is
110 * 1. Otherwise every access would need to subtract 1.
111 */
112 bool processed[MaxHeapTuplesPerPage + 1];
113
114 /*
115 * Tuple visibility is only computed once for each tuple, for correctness
116 * and efficiency reasons; see comment in heap_page_prune_and_freeze() for
117 * details. This is of type int8[], instead of HTSV_Result[], so we can
118 * use -1 to indicate no visibility has been computed, e.g. for LP_DEAD
119 * items.
120 *
121 * This needs to be MaxHeapTuplesPerPage + 1 long as FirstOffsetNumber is
122 * 1. Otherwise every access would need to subtract 1.
123 */
125
126 /*-------------------------------------------------------
127 * Working state for freezing
128 *-------------------------------------------------------
129 */
131
132 /*-------------------------------------------------------
133 * Working state for visibility map processing
134 *-------------------------------------------------------
135 */
136
137 /*
138 * Caller must provide a pinned vmbuffer corresponding to the heap block
139 * passed to heap_page_prune_and_freeze(). We will fix any corruption
140 * found in the VM and set the VM if the page is all-visible/all-frozen.
141 */
143
144 /*
145 * The state of the VM bits at the beginning of pruning and the state they
146 * will be in at the end.
147 */
150
151 /* The newest xmin of live tuples on the page */
153
154 /*-------------------------------------------------------
155 * Information about what was done
156 *
157 * These fields are not used by pruning itself for the most part, but are
158 * used to collect information about what was pruned and what state the
159 * page is in after pruning, for the benefit of the caller. They are
160 * copied to the caller's PruneFreezeResult at the end.
161 * -------------------------------------------------------
162 */
163
164 int ndeleted; /* Number of tuples deleted from the page */
165
166 /* Number of live and recently dead tuples, after pruning */
169
170 /* Whether or not the page makes rel truncation unsafe */
171 bool hastup;
172
173 /*
174 * LP_DEAD items on the page after pruning. Includes existing LP_DEAD
175 * items
176 */
177 int lpdead_items; /* number of items in the array */
178 OffsetNumber *deadoffsets; /* points directly to presult->deadoffsets */
179} PruneState;
180
181/*
182 * Type of visibility map corruption detected on a heap page and its
183 * associated VM page. Passed to heap_page_fix_vm_corruption() so the caller
184 * can specify what it found rather than having the function rederive the
185 * corruption from page state.
186 */
188{
189 /* VM bits are set but the heap page-level PD_ALL_VISIBLE flag is not */
191 /* LP_DEAD line pointers found on a page marked all-visible */
193 /* Tuple not visible to all transactions on a page marked all-visible */
196
197/* Local functions */
198static void prune_freeze_setup(PruneFreezeParams *params,
204 OffsetNumber offnum,
205 VMCorruptionType ctype);
211 HeapTuple tup);
212static inline HTSV_Result htsv_get_valid_status(int status);
213static void heap_prune_chain(OffsetNumber maxoff,
216 OffsetNumber offnum);
219 bool was_normal);
221 bool was_normal);
223 bool was_normal);
225
230
231static void page_verify_redirects(Page page);
232
236
237
238/*
239 * Optionally prune and repair fragmentation in the specified page.
240 *
241 * This is an opportunistic function. It will perform housekeeping
242 * only if the page heuristically looks like a candidate for pruning and we
243 * can acquire buffer cleanup lock without blocking.
244 *
245 * Note: this is called quite often. It's important that it fall out quickly
246 * if there's not any use in pruning.
247 *
248 * Caller must have pin on the buffer, and must *not* have a lock on it.
249 *
250 * This function may pin *vmbuffer. It's passed by reference so the caller can
251 * reuse the pin across calls, avoiding repeated pin/unpin cycles. If we find
252 * VM corruption during pruning, we will fix it. Caller is responsible for
253 * unpinning *vmbuffer.
254 */
255void
256heap_page_prune_opt(Relation relation, Buffer buffer, Buffer *vmbuffer)
257{
258 Page page = BufferGetPage(buffer);
260 GlobalVisState *vistest;
262
263 /*
264 * We can't write WAL in recovery mode, so there's no point trying to
265 * clean the page. The primary will likely issue a cleaning WAL record
266 * soon anyway, so this is no particular loss.
267 */
268 if (RecoveryInProgress())
269 return;
270
271 /*
272 * First check whether there's any chance there's something to prune,
273 * determining the appropriate horizon is a waste if there's no prune_xid
274 * (i.e. no updates/deletes left potentially dead tuples around).
275 */
278 return;
279
280 /*
281 * Check whether prune_xid indicates that there may be dead rows that can
282 * be cleaned up.
283 */
284 vistest = GlobalVisTestFor(relation);
285
286 if (!GlobalVisTestIsRemovableXid(vistest, prune_xid, true))
287 return;
288
289 /*
290 * We prune when a previous UPDATE failed to find enough space on the page
291 * for a new tuple version, or when free space falls below the relation's
292 * fill-factor target (but not less than 10%).
293 *
294 * Checking free space here is questionable since we aren't holding any
295 * lock on the buffer; in the worst case we could get a bogus answer. It's
296 * unlikely to be *seriously* wrong, though, since reading either pd_lower
297 * or pd_upper is probably atomic. Avoiding taking a lock seems more
298 * important than sometimes getting a wrong answer in what is after all
299 * just a heuristic estimate.
300 */
303 minfree = Max(minfree, BLCKSZ / 10);
304
305 if (PageIsFull(page) || PageGetHeapFreeSpace(page) < minfree)
306 {
307 /* OK, try to get exclusive buffer lock */
309 return;
310
311 /*
312 * Now that we have buffer lock, get accurate information about the
313 * page's free space, and recheck the heuristic about whether to
314 * prune.
315 */
316 if (PageIsFull(page) || PageGetHeapFreeSpace(page) < minfree)
317 {
320 PruneFreezeParams params;
321
322 visibilitymap_pin(relation, BufferGetBlockNumber(buffer),
323 vmbuffer);
324
325 params.relation = relation;
326 params.buffer = buffer;
327 params.vmbuffer = *vmbuffer;
328 params.reason = PRUNE_ON_ACCESS;
329 params.vistest = vistest;
330 params.cutoffs = NULL;
331
332 /*
333 * We don't pass the HEAP_PAGE_PRUNE_MARK_UNUSED_NOW option
334 * regardless of whether or not the relation has indexes, since we
335 * cannot safely determine that during on-access pruning with the
336 * current implementation.
337 */
339
341 NULL, NULL);
342
343 /*
344 * Report the number of tuples reclaimed to pgstats. This is
345 * presult.ndeleted minus the number of newly-LP_DEAD-set items.
346 *
347 * We derive the number of dead tuples like this to avoid totally
348 * forgetting about items that were set to LP_DEAD, since they
349 * still need to be cleaned up by VACUUM. We only want to count
350 * heap-only tuples that just became LP_UNUSED in our report,
351 * which don't.
352 *
353 * VACUUM doesn't have to compensate in the same way when it
354 * tracks ndeleted, since it will set the same LP_DEAD items to
355 * LP_UNUSED separately.
356 */
357 if (presult.ndeleted > presult.nnewlpdead)
359 presult.ndeleted - presult.nnewlpdead);
360 }
361
362 /* And release buffer lock */
364
365 /*
366 * We avoid reuse of any free space created on the page by unrelated
367 * UPDATEs/INSERTs by opting to not update the FSM at this point. The
368 * free space should be reused by UPDATEs to *this* page.
369 */
370 }
371}
372
373/*
374 * Helper for heap_page_prune_and_freeze() to initialize the PruneState using
375 * the provided parameters.
376 *
377 * params, new_relfrozen_xid, new_relmin_mxid, and presult are input
378 * parameters and are not modified by this function. Only prstate is modified.
379 */
380static void
386{
387 /* Copy parameters to prstate */
388 prstate->vistest = params->vistest;
389 prstate->mark_unused_now =
391
392 /* cutoffs must be provided if we will attempt freezing */
393 Assert(!(params->options & HEAP_PAGE_PRUNE_FREEZE) || params->cutoffs);
394 prstate->attempt_freeze = (params->options & HEAP_PAGE_PRUNE_FREEZE) != 0;
395 prstate->cutoffs = params->cutoffs;
396 prstate->relation = params->relation;
397 prstate->block = BufferGetBlockNumber(params->buffer);
398 prstate->buffer = params->buffer;
399 prstate->page = BufferGetPage(params->buffer);
400
401 Assert(BufferIsValid(params->vmbuffer));
402 prstate->vmbuffer = params->vmbuffer;
403 prstate->new_vmbits = 0;
404 prstate->old_vmbits = visibilitymap_get_status(prstate->relation,
405 prstate->block,
406 &prstate->vmbuffer);
407
408 /*
409 * Our strategy is to scan the page and make lists of items to change,
410 * then apply the changes within a critical section. This keeps as much
411 * logic as possible out of the critical section, and also ensures that
412 * WAL replay will work the same as the normal case.
413 *
414 * First, initialize the new pd_prune_xid value to zero (indicating no
415 * prunable tuples). If we find any tuples which may soon become
416 * prunable, we will save the lowest relevant XID in new_prune_xid. Also
417 * initialize the rest of our working state.
418 */
419 prstate->new_prune_xid = InvalidTransactionId;
420 prstate->latest_xid_removed = InvalidTransactionId;
421 prstate->nredirected = prstate->ndead = prstate->nunused = 0;
422 prstate->nfrozen = 0;
423 prstate->nroot_items = 0;
424 prstate->nheaponly_items = 0;
425
426 /* initialize page freezing working state */
427 prstate->pagefrz.freeze_required = false;
428 prstate->pagefrz.FreezePageConflictXid = InvalidTransactionId;
429 if (prstate->attempt_freeze)
430 {
432 prstate->pagefrz.FreezePageRelfrozenXid = *new_relfrozen_xid;
433 prstate->pagefrz.NoFreezePageRelfrozenXid = *new_relfrozen_xid;
434 prstate->pagefrz.FreezePageRelminMxid = *new_relmin_mxid;
435 prstate->pagefrz.NoFreezePageRelminMxid = *new_relmin_mxid;
436 }
437 else
438 {
440 prstate->pagefrz.FreezePageRelminMxid = InvalidMultiXactId;
441 prstate->pagefrz.NoFreezePageRelminMxid = InvalidMultiXactId;
442 prstate->pagefrz.FreezePageRelfrozenXid = InvalidTransactionId;
443 prstate->pagefrz.NoFreezePageRelfrozenXid = InvalidTransactionId;
444 }
445
446 prstate->ndeleted = 0;
447 prstate->live_tuples = 0;
448 prstate->recently_dead_tuples = 0;
449 prstate->hastup = false;
450 prstate->lpdead_items = 0;
451
452 /*
453 * deadoffsets are filled in during pruning but are only used to populate
454 * PruneFreezeResult->deadoffsets. To avoid needing two copies of the
455 * array, just save a pointer to the result offsets array in the
456 * PruneState.
457 */
458 prstate->deadoffsets = presult->deadoffsets;
459
460 /*
461 * We track whether the page will be all-visible/all-frozen at the end of
462 * pruning and freezing. While examining tuple visibility, we'll set
463 * set_all_visible to false if there are tuples on the page not visible to
464 * all running and future transactions. set_all_visible is always
465 * maintained but only VACUUM will set the VM if the page ends up being
466 * all-visible.
467 *
468 * We also keep track of the newest live XID, which is used to calculate
469 * the snapshot conflict horizon for a WAL record setting the VM.
470 */
471 prstate->set_all_visible = true;
472 prstate->newest_live_xid = InvalidTransactionId;
473
474 /*
475 * Currently, only VACUUM performs freezing, but other callers may in the
476 * future. We must initialize set_all_frozen based on whether or not the
477 * caller passed HEAP_PAGE_PRUNE_FREEZE, because if they did not, we won't
478 * call heap_prepare_freeze_tuple() for each tuple, and set_all_frozen
479 * will never be cleared for tuples that need freezing. This would lead to
480 * incorrectly setting the visibility map all-frozen for this page.
481 *
482 * When freezing is not required (no XIDs/MXIDs older than the freeze
483 * cutoff), we may still choose to "opportunistically" freeze if doing so
484 * would make the page all-frozen.
485 *
486 * We will not be able to freeze the whole page at the end of vacuum if
487 * there are tuples present that are not visible to everyone or if there
488 * are dead tuples which will not be removable. However, dead tuples that
489 * will be removed by the end of vacuum should not prevent this
490 * opportunistic freezing.
491 *
492 * Therefore, we do not clear set_all_visible and set_all_frozen when we
493 * encounter LP_DEAD items. Instead, we correct them after deciding
494 * whether to freeze, but before updating the VM, to avoid setting the VM
495 * bits incorrectly.
496 */
497 prstate->set_all_frozen = prstate->attempt_freeze;
498}
499
500/*
501 * Helper for heap_page_prune_and_freeze(). Iterates over every tuple on the
502 * page, examines its visibility information, and determines the appropriate
503 * action for each tuple. All tuples are processed and classified during this
504 * phase, but no modifications are made to the page until the later execution
505 * stage.
506 *
507 * *off_loc is used for error callback and cleared before returning.
508 */
509static void
511{
512 Page page = prstate->page;
513 BlockNumber blockno = prstate->block;
515 OffsetNumber offnum;
517
519
520 /*
521 * Determine HTSV for all tuples, and queue them up for processing as HOT
522 * chain roots or as heap-only items.
523 *
524 * Determining HTSV only once for each tuple is required for correctness,
525 * to deal with cases where running HTSV twice could result in different
526 * results. For example, RECENTLY_DEAD can turn to DEAD if another
527 * checked item causes GlobalVisTestIsRemovableFullXid() to update the
528 * horizon, or INSERT_IN_PROGRESS can change to DEAD if the inserting
529 * transaction aborts.
530 *
531 * It's also good for performance. Most commonly tuples within a page are
532 * stored at decreasing offsets (while the items are stored at increasing
533 * offsets). When processing all tuples on a page this leads to reading
534 * memory at decreasing offsets within a page, with a variable stride.
535 * That's hard for CPU prefetchers to deal with. Processing the items in
536 * reverse order (and thus the tuples in increasing order) increases
537 * prefetching efficiency significantly / decreases the number of cache
538 * misses.
539 */
540 for (offnum = maxoff;
541 offnum >= FirstOffsetNumber;
542 offnum = OffsetNumberPrev(offnum))
543 {
544 ItemId itemid = PageGetItemId(page, offnum);
545 HeapTupleHeader htup;
546
547 /*
548 * Set the offset number so that we can display it along with any
549 * error that occurred while processing this tuple.
550 */
551 *off_loc = offnum;
552
553 prstate->processed[offnum] = false;
554 prstate->htsv[offnum] = -1;
555
556 /* Nothing to do if slot doesn't contain a tuple */
557 if (!ItemIdIsUsed(itemid))
558 {
560 continue;
561 }
562
563 if (ItemIdIsDead(itemid))
564 {
565 /*
566 * If the caller set mark_unused_now true, we can set dead line
567 * pointers LP_UNUSED now.
568 */
569 if (unlikely(prstate->mark_unused_now))
570 heap_prune_record_unused(prstate, offnum, false);
571 else
573 continue;
574 }
575
576 if (ItemIdIsRedirected(itemid))
577 {
578 /* This is the start of a HOT chain */
579 prstate->root_items[prstate->nroot_items++] = offnum;
580 continue;
581 }
582
583 Assert(ItemIdIsNormal(itemid));
584
585 /*
586 * Get the tuple's visibility status and queue it up for processing.
587 */
588 htup = (HeapTupleHeader) PageGetItem(page, itemid);
589 tup.t_data = htup;
590 tup.t_len = ItemIdGetLength(itemid);
591 ItemPointerSet(&tup.t_self, blockno, offnum);
592
594
595 if (!HeapTupleHeaderIsHeapOnly(htup))
596 prstate->root_items[prstate->nroot_items++] = offnum;
597 else
598 prstate->heaponly_items[prstate->nheaponly_items++] = offnum;
599 }
600
601 /*
602 * Process HOT chains.
603 *
604 * We added the items to the array starting from 'maxoff', so by
605 * processing the array in reverse order, we process the items in
606 * ascending offset number order. The order doesn't matter for
607 * correctness, but some quick micro-benchmarking suggests that this is
608 * faster. (Earlier PostgreSQL versions, which scanned all the items on
609 * the page instead of using the root_items array, also did it in
610 * ascending offset number order.)
611 */
612 for (int i = prstate->nroot_items - 1; i >= 0; i--)
613 {
614 offnum = prstate->root_items[i];
615
616 /* Ignore items already processed as part of an earlier chain */
617 if (prstate->processed[offnum])
618 continue;
619
620 /* see preceding loop */
621 *off_loc = offnum;
622
623 /* Process this item or chain of items */
624 heap_prune_chain(maxoff, offnum, prstate);
625 }
626
627 /*
628 * Process any heap-only tuples that were not already processed as part of
629 * a HOT chain.
630 */
631 for (int i = prstate->nheaponly_items - 1; i >= 0; i--)
632 {
633 offnum = prstate->heaponly_items[i];
634
635 if (prstate->processed[offnum])
636 continue;
637
638 /* see preceding loop */
639 *off_loc = offnum;
640
641 /*
642 * If the tuple is DEAD and doesn't chain to anything else, mark it
643 * unused. (If it does chain, we can only remove it as part of
644 * pruning its chain.)
645 *
646 * We need this primarily to handle aborted HOT updates, that is,
647 * XMIN_INVALID heap-only tuples. Those might not be linked to by any
648 * chain, since the parent tuple might be re-updated before any
649 * pruning occurs. So we have to be able to reap them separately from
650 * chain-pruning. (Note that HeapTupleHeaderIsHotUpdated will never
651 * return true for an XMIN_INVALID tuple, so this code will work even
652 * when there were sequential updates within the aborted transaction.)
653 */
654 if (prstate->htsv[offnum] == HEAPTUPLE_DEAD)
655 {
656 ItemId itemid = PageGetItemId(page, offnum);
657 HeapTupleHeader htup = (HeapTupleHeader) PageGetItem(page, itemid);
658
660 {
662 &prstate->latest_xid_removed);
663 heap_prune_record_unused(prstate, offnum, true);
664 }
665 else
666 {
667 /*
668 * This tuple should've been processed and removed as part of
669 * a HOT chain, so something's wrong. To preserve evidence,
670 * we don't dare to remove it. We cannot leave behind a DEAD
671 * tuple either, because that will cause VACUUM to error out.
672 * Throwing an error with a distinct error message seems like
673 * the least bad option.
674 */
675 elog(ERROR, "dead heap-only tuple (%u, %d) is not linked to from any HOT chain",
676 blockno, offnum);
677 }
678 }
679 else
681 }
682
683 /* We should now have processed every tuple exactly once */
684#ifdef USE_ASSERT_CHECKING
685 for (offnum = FirstOffsetNumber;
686 offnum <= maxoff;
687 offnum = OffsetNumberNext(offnum))
688 {
689 *off_loc = offnum;
690
691 Assert(prstate->processed[offnum]);
692 }
693#endif
694
695 /* Clear the offset information once we have processed the given page. */
697}
698
699/*
700 * Decide whether to proceed with freezing according to the freeze plans
701 * prepared for the current heap buffer. If freezing is chosen, this function
702 * performs several pre-freeze checks.
703 *
704 * The values of do_prune, do_hint_prune, and did_tuple_hint_fpi must be
705 * determined before calling this function.
706 *
707 * prstate is both an input and output parameter.
708 *
709 * Returns true if we should apply the freeze plans and freeze tuples on the
710 * page, and false otherwise.
711 */
712static bool
714 bool do_prune,
715 bool do_hint_prune,
717{
718 bool do_freeze = false;
719
720 /*
721 * If the caller specified we should not attempt to freeze any tuples,
722 * validate that everything is in the right state and return.
723 */
724 if (!prstate->attempt_freeze)
725 {
726 Assert(!prstate->set_all_frozen && prstate->nfrozen == 0);
727 return false;
728 }
729
730 if (prstate->pagefrz.freeze_required)
731 {
732 /*
733 * heap_prepare_freeze_tuple indicated that at least one XID/MXID from
734 * before FreezeLimit/MultiXactCutoff is present. Must freeze to
735 * advance relfrozenxid/relminmxid.
736 */
737 do_freeze = true;
738 }
739 else
740 {
741 /*
742 * Opportunistically freeze the page if we are generating an FPI
743 * anyway and if doing so means that we can set the page all-frozen
744 * afterwards (might not happen until VACUUM's final heap pass).
745 *
746 * XXX: Previously, we knew if pruning emitted an FPI by checking
747 * pgWalUsage.wal_fpi before and after pruning. Once the freeze and
748 * prune records were combined, this heuristic couldn't be used
749 * anymore. The opportunistic freeze heuristic must be improved;
750 * however, for now, try to approximate the old logic.
751 */
752 if (prstate->set_all_frozen && prstate->nfrozen > 0)
753 {
754 Assert(prstate->set_all_visible);
755
756 /*
757 * Freezing would make the page all-frozen. Have already emitted
758 * an FPI or will do so anyway?
759 */
760 if (RelationNeedsWAL(prstate->relation))
761 {
763 do_freeze = true;
764 else if (do_prune)
765 {
767 do_freeze = true;
768 }
769 else if (do_hint_prune)
770 {
771 if (XLogHintBitIsNeeded() &&
773 do_freeze = true;
774 }
775 }
776 }
777 }
778
779 if (do_freeze)
780 {
781 /*
782 * Validate the tuples we will be freezing before entering the
783 * critical section.
784 */
785 heap_pre_freeze_checks(prstate->buffer, prstate->frozen, prstate->nfrozen);
786 Assert(TransactionIdPrecedes(prstate->pagefrz.FreezePageConflictXid,
787 prstate->cutoffs->OldestXmin));
788 }
789 else if (prstate->nfrozen > 0)
790 {
791 /*
792 * The page contained some tuples that were not already frozen, and we
793 * chose not to freeze them now. The page won't be all-frozen then.
794 */
795 Assert(!prstate->pagefrz.freeze_required);
796
797 prstate->set_all_frozen = false;
798 prstate->nfrozen = 0; /* avoid miscounts in instrumentation */
799 }
800 else
801 {
802 /*
803 * We have no freeze plans to execute. The page might already be
804 * all-frozen (perhaps only following pruning), though. Such pages
805 * can be marked all-frozen in the VM by our caller, even though none
806 * of its tuples were newly frozen here.
807 */
808 }
809
810 return do_freeze;
811}
812
813/*
814 * Emit a warning about and fix visibility map corruption on the given page.
815 *
816 * The caller specifies the type of corruption it has already detected via
817 * corruption_type, so that we can emit the appropriate warning. All cases
818 * result in the VM bits being cleared; corruption types where PD_ALL_VISIBLE
819 * is incorrectly set also clear PD_ALL_VISIBLE.
820 *
821 * Must be called while holding an exclusive lock on the heap buffer. Dead
822 * items and not all-visible tuples must have been discovered under that same
823 * lock. Although we do not hold a lock on the VM buffer, it is pinned, and
824 * the heap buffer is exclusively locked, ensuring that no other backend can
825 * update the VM bits corresponding to this heap page.
826 *
827 * This function makes changes to the VM and, potentially, the heap page, but
828 * it does not need to be done in a critical section.
829 */
830static void
833{
834 const char *relname = RelationGetRelationName(prstate->relation);
835 bool do_clear_vm = false;
836 bool do_clear_heap = false;
837
839
840 switch (corruption_type)
841 {
845 errmsg("dead line pointer found on page marked all-visible"),
846 errcontext("relation \"%s\", page %u, tuple %u",
847 relname, prstate->block, offnum)));
848 do_clear_vm = true;
849 do_clear_heap = true;
850 break;
851
853
854 /*
855 * A HEAPTUPLE_LIVE tuple on an all-visible page can appear to not
856 * be visible to everyone when
857 * GetOldestNonRemovableTransactionId() returns a conservative
858 * value that's older than the real safe xmin. That is not
859 * corruption -- the PD_ALL_VISIBLE flag is still correct.
860 *
861 * However, dead tuple versions, in-progress inserts, and
862 * in-progress deletes should never appear on a page marked
863 * all-visible. That indicates real corruption. PD_ALL_VISIBLE
864 * should have been cleared by the DML operation that deleted or
865 * inserted the tuple.
866 */
869 errmsg("tuple not visible to all transactions found on page marked all-visible"),
870 errcontext("relation \"%s\", page %u, tuple %u",
871 relname, prstate->block, offnum)));
872 do_clear_vm = true;
873 do_clear_heap = true;
874 break;
875
877
878 /*
879 * As of PostgreSQL 9.2, the visibility map bit should never be
880 * set if the page-level bit is clear. However, for vacuum, it's
881 * possible that the bit got cleared after
882 * heap_vac_scan_next_block() was called, so we must recheck now
883 * that we have the buffer lock before concluding that the VM is
884 * corrupt.
885 */
890 errmsg("page is not marked all-visible but visibility map bit is set"),
891 errcontext("relation \"%s\", page %u",
892 relname, prstate->block)));
893 do_clear_vm = true;
894 break;
895 }
896
898
899 /* Avoid marking the buffer dirty if PD_ALL_VISIBLE is already clear */
900 if (do_clear_heap)
901 {
904 MarkBufferDirtyHint(prstate->buffer, true);
905 }
906
907 if (do_clear_vm)
908 {
909 visibilitymap_clear(prstate->relation, prstate->block,
910 prstate->vmbuffer,
912 prstate->old_vmbits = 0;
913 }
914}
915
916/*
917 * Decide whether to set the visibility map bits (all-visible and all-frozen)
918 * for the current page using information from the PruneState and VM.
919 *
920 * This function does not actually set the VM bits or page-level visibility
921 * hint, PD_ALL_VISIBLE.
922 *
923 * Returns true if one or both VM bits should be set and false otherwise.
924 */
925static bool
927{
928 /*
929 * Though on-access pruning maintains prstate->set_all_visible, we don't
930 * set the VM on-access for now.
931 */
932 if (reason == PRUNE_ON_ACCESS)
933 return false;
934
935 if (!prstate->set_all_visible)
936 return false;
937
939
940 if (prstate->set_all_frozen)
941 prstate->new_vmbits |= VISIBILITYMAP_ALL_FROZEN;
942
943 if (prstate->new_vmbits == prstate->old_vmbits)
944 {
945 prstate->new_vmbits = 0;
946 return false;
947 }
948
949 return true;
950}
951
952/*
953 * If the page is already all-frozen, or already all-visible and freezing
954 * won't be attempted, there is no remaining work and we can use the fast path
955 * to avoid the expensive overhead of heap_page_prune_and_freeze().
956 *
957 * This can happen when the page has a stale prune hint, or if VACUUM is
958 * scanning an already all-frozen page due to SKIP_PAGES_THRESHOLD.
959 *
960 * The caller must already have examined the visibility map and saved the
961 * status of the page's VM bits in prstate->old_vmbits. Caller must hold a
962 * content lock on the heap page since it will examine line pointers.
963 *
964 * Before calling prune_freeze_fast_path(), the caller should first
965 * check for and fix any discrepancy between the page-level visibility hint
966 * and the visibility map. Otherwise, the fast path will always prevent us
967 * from getting them in sync. Note that if there are tuples on the page that
968 * are not visible to all but the VM is incorrectly marked
969 * all-visible/all-frozen, we will not get the chance to fix that corruption
970 * when using the fast path.
971 */
972static void
974{
976 Page page = prstate->page;
977
978 Assert((prstate->old_vmbits & VISIBILITYMAP_ALL_FROZEN) ||
979 ((prstate->old_vmbits & VISIBILITYMAP_ALL_VISIBLE) &&
980 !prstate->attempt_freeze));
981
982 /* We'll fill in presult for the caller */
983 memset(presult, 0, sizeof(PruneFreezeResult));
984
985 /* Clear any stale prune hint */
987 {
988 PageClearPrunable(page);
989 MarkBufferDirtyHint(prstate->buffer, true);
990 }
991
992 if (PageIsEmpty(page))
993 return;
994
995 /*
996 * Since the page is all-visible, a count of the normal ItemIds on the
997 * page should be sufficient for vacuum's live tuple count.
998 */
1000 off <= maxoff;
1001 off = OffsetNumberNext(off))
1002 {
1003 ItemId lp = PageGetItemId(page, off);
1004
1005 if (!ItemIdIsUsed(lp))
1006 continue;
1007
1008 presult->hastup = true;
1009
1010 if (ItemIdIsNormal(lp))
1011 prstate->live_tuples++;
1012 }
1013
1014 presult->live_tuples = prstate->live_tuples;
1015}
1016
1017/*
1018 * Prune and repair fragmentation and potentially freeze tuples on the
1019 * specified page. If the page's visibility status has changed, update it in
1020 * the VM.
1021 *
1022 * Caller must have pin and buffer cleanup lock on the page. Note that we
1023 * don't update the FSM information for page on caller's behalf. Caller might
1024 * also need to account for a reduction in the length of the line pointer
1025 * array following array truncation by us.
1026 *
1027 * params contains the input parameters used to control freezing and pruning
1028 * behavior. See the definition of PruneFreezeParams for more on what each
1029 * parameter does.
1030 *
1031 * If the HEAP_PAGE_PRUNE_FREEZE option is set in params, we will freeze
1032 * tuples if it's required in order to advance relfrozenxid / relminmxid, or
1033 * if it's considered advantageous for overall system performance to do so
1034 * now. The 'params.cutoffs', 'presult', 'new_relfrozen_xid' and
1035 * 'new_relmin_mxid' arguments are required when freezing.
1036 *
1037 * A vmbuffer corresponding to the heap page is also passed and if the page is
1038 * found to be all-visible/all-frozen, we will set it in the VM.
1039 *
1040 * presult contains output parameters needed by callers, such as the number of
1041 * tuples removed and the offsets of dead items on the page after pruning.
1042 * heap_page_prune_and_freeze() is responsible for initializing it. Required
1043 * by all callers.
1044 *
1045 * off_loc is the offset location required by the caller to use in error
1046 * callback.
1047 *
1048 * new_relfrozen_xid and new_relmin_mxid must be provided by the caller if the
1049 * HEAP_PAGE_PRUNE_FREEZE option is set in params. On entry, they contain the
1050 * oldest XID and multi-XID seen on the relation so far. They will be updated
1051 * with the oldest values present on the page after pruning. After processing
1052 * the whole relation, VACUUM can use these values as the new
1053 * relfrozenxid/relminmxid for the relation.
1054 */
1055void
1061{
1063 bool do_freeze;
1064 bool do_prune;
1065 bool do_hint_prune;
1066 bool do_set_vm;
1067 bool did_tuple_hint_fpi;
1070
1071 /* Initialize prstate */
1072 prune_freeze_setup(params,
1074 presult, &prstate);
1075
1076 /*
1077 * If the VM is set but PD_ALL_VISIBLE is clear, fix that corruption
1078 * before pruning and freezing so that the page and VM start out in a
1079 * consistent state.
1080 */
1081 if ((prstate.old_vmbits & VISIBILITYMAP_VALID_BITS) &&
1085
1086 /*
1087 * If the page is already all-frozen, or already all-visible when freezing
1088 * is not being attempted, take the fast path, skipping pruning and
1089 * freezing code entirely. This must be done after fixing any discrepancy
1090 * between the page-level visibility hint and the VM, since that may have
1091 * cleared old_vmbits.
1092 */
1093 if ((params->options & HEAP_PAGE_PRUNE_ALLOW_FAST_PATH) != 0 &&
1094 ((prstate.old_vmbits & VISIBILITYMAP_ALL_FROZEN) ||
1095 ((prstate.old_vmbits & VISIBILITYMAP_ALL_VISIBLE) &&
1096 !prstate.attempt_freeze)))
1097 {
1099 return;
1100 }
1101
1102 /*
1103 * Examine all line pointers and tuple visibility information to determine
1104 * which line pointers should change state and which tuples may be frozen.
1105 * Prepare queue of state changes to later be executed in a critical
1106 * section.
1107 */
1109
1110 /*
1111 * After processing all the live tuples on the page, if the newest xmin
1112 * amongst them may be considered running by any snapshot, the page cannot
1113 * be all-visible. This should be done before determining whether or not
1114 * to opportunistically freeze.
1115 */
1116 if (prstate.set_all_visible &&
1117 TransactionIdIsNormal(prstate.newest_live_xid) &&
1119 prstate.newest_live_xid,
1120 true))
1121 prstate.set_all_visible = prstate.set_all_frozen = false;
1122
1123 /*
1124 * If checksums are enabled, calling heap_prune_satisfies_vacuum() while
1125 * checking tuple visibility information in prune_freeze_plan() may have
1126 * caused an FPI to be emitted.
1127 */
1129
1130 do_prune = prstate.nredirected > 0 ||
1131 prstate.ndead > 0 ||
1132 prstate.nunused > 0;
1133
1134 /*
1135 * Even if we don't prune anything, if we found a new value for the
1136 * pd_prune_xid field or the page was marked full, we will update the hint
1137 * bit.
1138 */
1139 do_hint_prune = PageGetPruneXid(prstate.page) != prstate.new_prune_xid ||
1140 PageIsFull(prstate.page);
1141
1142 /*
1143 * Decide if we want to go ahead with freezing according to the freeze
1144 * plans we prepared, or not.
1145 */
1147 do_prune,
1149 &prstate);
1150
1151 /*
1152 * While scanning the line pointers, we did not clear
1153 * set_all_visible/set_all_frozen when encountering LP_DEAD items because
1154 * we wanted the decision whether or not to freeze the page to be
1155 * unaffected by the short-term presence of LP_DEAD items. These LP_DEAD
1156 * items are effectively assumed to be LP_UNUSED items in the making. It
1157 * doesn't matter which vacuum heap pass (initial pass or final pass) ends
1158 * up setting the page all-frozen, as long as the ongoing VACUUM does it.
1159 *
1160 * Now that we finished determining whether or not to freeze the page,
1161 * update set_all_visible and set_all_frozen so that they reflect the true
1162 * state of the page for setting PD_ALL_VISIBLE and VM bits.
1163 */
1164 if (prstate.lpdead_items > 0)
1165 prstate.set_all_visible = prstate.set_all_frozen = false;
1166
1167 Assert(!prstate.set_all_frozen || prstate.set_all_visible);
1168 Assert(!prstate.set_all_visible || (prstate.lpdead_items == 0));
1169
1171
1172 /*
1173 * new_vmbits should be 0 regardless of whether or not the page is
1174 * all-visible if we do not intend to set the VM.
1175 */
1176 Assert(do_set_vm || prstate.new_vmbits == 0);
1177
1178 /*
1179 * The snapshot conflict horizon for the whole record is the most
1180 * conservative (newest) horizon required by any change in the record.
1181 */
1183 if (do_set_vm)
1184 conflict_xid = prstate.newest_live_xid;
1185 if (do_freeze && TransactionIdFollows(prstate.pagefrz.FreezePageConflictXid, conflict_xid))
1186 conflict_xid = prstate.pagefrz.FreezePageConflictXid;
1187 if (do_prune && TransactionIdFollows(prstate.latest_xid_removed, conflict_xid))
1188 conflict_xid = prstate.latest_xid_removed;
1189
1190 /* Lock vmbuffer before entering a critical section */
1191 if (do_set_vm)
1193
1194 /* Any error while applying the changes is critical */
1196
1197 if (do_hint_prune)
1198 {
1199 /*
1200 * Update the page's pd_prune_xid field to either zero, or the lowest
1201 * XID of any soon-prunable tuple.
1202 */
1203 ((PageHeader) prstate.page)->pd_prune_xid = prstate.new_prune_xid;
1204
1205 /*
1206 * Also clear the "page is full" flag, since there's no point in
1207 * repeating the prune/defrag process until something else happens to
1208 * the page.
1209 */
1210 PageClearFull(prstate.page);
1211
1212 /*
1213 * If that's all we had to do to the page, this is a non-WAL-logged
1214 * hint. If we are going to freeze or prune the page or set
1215 * PD_ALL_VISIBLE, we will mark the buffer dirty below.
1216 *
1217 * Setting PD_ALL_VISIBLE is fully WAL-logged because it is forbidden
1218 * for the VM to be set and PD_ALL_VISIBLE to be clear.
1219 */
1220 if (!do_freeze && !do_prune && !do_set_vm)
1221 MarkBufferDirtyHint(prstate.buffer, true);
1222 }
1223
1224 if (do_prune || do_freeze || do_set_vm)
1225 {
1226 /* Apply the planned item changes and repair page fragmentation. */
1227 if (do_prune)
1228 {
1229 heap_page_prune_execute(prstate.buffer, false,
1230 prstate.redirected, prstate.nredirected,
1231 prstate.nowdead, prstate.ndead,
1232 prstate.nowunused, prstate.nunused);
1233 }
1234
1235 if (do_freeze)
1236 heap_freeze_prepared_tuples(prstate.buffer, prstate.frozen, prstate.nfrozen);
1237
1238 /* Set the visibility map and page visibility hint */
1239 if (do_set_vm)
1240 {
1241 /*
1242 * While it is valid for PD_ALL_VISIBLE to be set when the
1243 * corresponding VM bit is clear, we strongly prefer to keep them
1244 * in sync.
1245 *
1246 * The heap buffer must be marked dirty before adding it to the
1247 * WAL chain when setting the VM. We don't worry about
1248 * unnecessarily dirtying the heap buffer if PD_ALL_VISIBLE is
1249 * already set, though. It is extremely rare to have a clean heap
1250 * buffer with PD_ALL_VISIBLE already set and the VM bits clear,
1251 * so there is no point in optimizing it.
1252 */
1255 visibilitymap_set(prstate.block, prstate.vmbuffer, prstate.new_vmbits,
1256 prstate.relation->rd_locator);
1257 }
1258
1259 MarkBufferDirty(prstate.buffer);
1260
1261 /*
1262 * Emit a WAL XLOG_HEAP2_PRUNE* record showing what we did
1263 */
1264 if (RelationNeedsWAL(prstate.relation))
1265 {
1266 log_heap_prune_and_freeze(prstate.relation, prstate.buffer,
1267 do_set_vm ? prstate.vmbuffer : InvalidBuffer,
1268 do_set_vm ? prstate.new_vmbits : 0,
1270 true, /* cleanup lock */
1271 params->reason,
1272 prstate.frozen, prstate.nfrozen,
1273 prstate.redirected, prstate.nredirected,
1274 prstate.nowdead, prstate.ndead,
1275 prstate.nowunused, prstate.nunused);
1276 }
1277 }
1278
1280
1281 if (do_set_vm)
1283
1284 /*
1285 * During its second pass over the heap, VACUUM calls
1286 * heap_page_would_be_all_visible() to determine whether a page is
1287 * all-visible and all-frozen. The logic here is similar. After completing
1288 * pruning and freezing, use an assertion to verify that our results
1289 * remain consistent with heap_page_would_be_all_visible(). It's also a
1290 * valuable cross-check of the page state after pruning and freezing.
1291 */
1292#ifdef USE_ASSERT_CHECKING
1293 if (prstate.set_all_visible)
1294 {
1296 bool debug_all_frozen;
1297
1298 Assert(prstate.lpdead_items == 0);
1299
1301 prstate.vistest,
1304
1306 debug_cutoff == prstate.newest_live_xid);
1307
1308 /*
1309 * It's possible the page is composed entirely of frozen tuples but is
1310 * not set all-frozen in the VM and did not pass
1311 * HEAP_PAGE_PRUNE_FREEZE. In this case, it's possible
1312 * heap_page_is_all_visible() finds the page completely frozen, even
1313 * though prstate.set_all_frozen is false.
1314 */
1315 Assert(!prstate.set_all_frozen || debug_all_frozen);
1316 }
1317#endif
1318
1319 /* Copy information back for caller */
1320 presult->ndeleted = prstate.ndeleted;
1321 presult->nnewlpdead = prstate.ndead;
1322 presult->nfrozen = prstate.nfrozen;
1323 presult->live_tuples = prstate.live_tuples;
1324 presult->recently_dead_tuples = prstate.recently_dead_tuples;
1325 presult->hastup = prstate.hastup;
1326
1327 presult->lpdead_items = prstate.lpdead_items;
1328 /* the presult->deadoffsets array was already filled in */
1329
1330 presult->newly_all_visible = false;
1331 presult->newly_all_frozen = false;
1332 presult->newly_all_visible_frozen = false;
1333 if (do_set_vm)
1334 {
1335 if ((prstate.old_vmbits & VISIBILITYMAP_ALL_VISIBLE) == 0)
1336 {
1337 presult->newly_all_visible = true;
1338 if (prstate.set_all_frozen)
1339 presult->newly_all_visible_frozen = true;
1340 }
1341 else if ((prstate.old_vmbits & VISIBILITYMAP_ALL_FROZEN) == 0 &&
1342 prstate.set_all_frozen)
1343 presult->newly_all_frozen = true;
1344 }
1345
1346 if (prstate.attempt_freeze)
1347 {
1348 if (presult->nfrozen > 0)
1349 {
1350 *new_relfrozen_xid = prstate.pagefrz.FreezePageRelfrozenXid;
1351 *new_relmin_mxid = prstate.pagefrz.FreezePageRelminMxid;
1352 }
1353 else
1354 {
1355 *new_relfrozen_xid = prstate.pagefrz.NoFreezePageRelfrozenXid;
1356 *new_relmin_mxid = prstate.pagefrz.NoFreezePageRelminMxid;
1357 }
1358 }
1359}
1360
1361
1362/*
1363 * Perform visibility checks for heap pruning.
1364 */
1365static HTSV_Result
1367{
1368 HTSV_Result res;
1370
1372
1373 if (res != HEAPTUPLE_RECENTLY_DEAD)
1374 return res;
1375
1376 /*
1377 * For VACUUM, we must be sure to prune tuples with xmax older than
1378 * OldestXmin -- a visibility cutoff determined at the beginning of
1379 * vacuuming the relation. OldestXmin is used for freezing determination
1380 * and we cannot freeze dead tuples' xmaxes.
1381 */
1382 if (prstate->cutoffs &&
1383 TransactionIdIsValid(prstate->cutoffs->OldestXmin) &&
1384 NormalTransactionIdPrecedes(dead_after, prstate->cutoffs->OldestXmin))
1385 return HEAPTUPLE_DEAD;
1386
1387 /*
1388 * Determine whether or not the tuple is considered dead when compared
1389 * with the provided GlobalVisState. On-access pruning does not provide
1390 * VacuumCutoffs. And for vacuum, even if the tuple's xmax is not older
1391 * than OldestXmin, GlobalVisTestIsRemovableXid() could find the row dead
1392 * if the GlobalVisState has been updated since the beginning of vacuuming
1393 * the relation.
1394 */
1395 if (GlobalVisTestIsRemovableXid(prstate->vistest, dead_after, true))
1396 return HEAPTUPLE_DEAD;
1397
1398 return res;
1399}
1400
1401
1402/*
1403 * Pruning calculates tuple visibility once and saves the results in an array
1404 * of int8. See PruneState.htsv for details. This helper function is meant
1405 * to guard against examining visibility status array members which have not
1406 * yet been computed.
1407 */
1408static inline HTSV_Result
1410{
1411 Assert(status >= HEAPTUPLE_DEAD &&
1413 return (HTSV_Result) status;
1414}
1415
1416/*
1417 * Prune specified line pointer or a HOT chain originating at line pointer.
1418 *
1419 * Tuple visibility information is provided in prstate->htsv.
1420 *
1421 * If the item is an index-referenced tuple (i.e. not a heap-only tuple),
1422 * the HOT chain is pruned by removing all DEAD tuples at the start of the HOT
1423 * chain. We also prune any RECENTLY_DEAD tuples preceding a DEAD tuple.
1424 * This is OK because a RECENTLY_DEAD tuple preceding a DEAD tuple is really
1425 * DEAD, our visibility test is just too coarse to detect it.
1426 *
1427 * Pruning must never leave behind a DEAD tuple that still has tuple storage.
1428 * VACUUM isn't prepared to deal with that case.
1429 *
1430 * The root line pointer is redirected to the tuple immediately after the
1431 * latest DEAD tuple. If all tuples in the chain are DEAD, the root line
1432 * pointer is marked LP_DEAD. (This includes the case of a DEAD simple
1433 * tuple, which we treat as a chain of length 1.)
1434 *
1435 * We don't actually change the page here. We just add entries to the arrays in
1436 * prstate showing the changes to be made. Items to be redirected are added
1437 * to the redirected[] array (two entries per redirection); items to be set to
1438 * LP_DEAD state are added to nowdead[]; and items to be set to LP_UNUSED
1439 * state are added to nowunused[]. We perform bookkeeping of live tuples,
1440 * visibility etc. based on what the page will look like after the changes
1441 * applied. All that bookkeeping is performed in the heap_prune_record_*()
1442 * subroutines. The division of labor is that heap_prune_chain() decides the
1443 * fate of each tuple, ie. whether it's going to be removed, redirected or
1444 * left unchanged, and the heap_prune_record_*() subroutines update PruneState
1445 * based on that outcome.
1446 */
1447static void
1450{
1452 ItemId rootlp;
1453 OffsetNumber offnum;
1455 Page page = prstate->page;
1456
1457 /*
1458 * After traversing the HOT chain, ndeadchain is the index in chainitems
1459 * of the first live successor after the last dead item.
1460 */
1461 int ndeadchain = 0,
1462 nchain = 0;
1463
1465
1466 /* Start from the root tuple */
1467 offnum = rootoffnum;
1468
1469 /* while not end of the chain */
1470 for (;;)
1471 {
1472 HeapTupleHeader htup;
1473 ItemId lp;
1474
1475 /* Sanity check (pure paranoia) */
1476 if (offnum < FirstOffsetNumber)
1477 break;
1478
1479 /*
1480 * An offset past the end of page's line pointer array is possible
1481 * when the array was truncated (original item must have been unused)
1482 */
1483 if (offnum > maxoff)
1484 break;
1485
1486 /* If item is already processed, stop --- it must not be same chain */
1487 if (prstate->processed[offnum])
1488 break;
1489
1490 lp = PageGetItemId(page, offnum);
1491
1492 /*
1493 * Unused item obviously isn't part of the chain. Likewise, a dead
1494 * line pointer can't be part of the chain. Both of those cases were
1495 * already marked as processed.
1496 */
1499
1500 /*
1501 * If we are looking at the redirected root line pointer, jump to the
1502 * first normal tuple in the chain. If we find a redirect somewhere
1503 * else, stop --- it must not be same chain.
1504 */
1506 {
1507 if (nchain > 0)
1508 break; /* not at start of chain */
1509 chainitems[nchain++] = offnum;
1510 offnum = ItemIdGetRedirect(rootlp);
1511 continue;
1512 }
1513
1515
1516 htup = (HeapTupleHeader) PageGetItem(page, lp);
1517
1518 /*
1519 * Check the tuple XMIN against prior XMAX, if any
1520 */
1523 break;
1524
1525 /*
1526 * OK, this tuple is indeed a member of the chain.
1527 */
1528 chainitems[nchain++] = offnum;
1529
1530 switch (htsv_get_valid_status(prstate->htsv[offnum]))
1531 {
1532 case HEAPTUPLE_DEAD:
1533
1534 /* Remember the last DEAD tuple seen */
1537 &prstate->latest_xid_removed);
1538 /* Advance to next chain member */
1539 break;
1540
1542
1543 /*
1544 * We don't need to advance the conflict horizon for
1545 * RECENTLY_DEAD tuples, even if we are removing them. This
1546 * is because we only remove RECENTLY_DEAD tuples if they
1547 * precede a DEAD tuple, and the DEAD tuple must have been
1548 * inserted by a newer transaction than the RECENTLY_DEAD
1549 * tuple by virtue of being later in the chain. We will have
1550 * advanced the conflict horizon for the DEAD tuple.
1551 */
1552
1553 /*
1554 * Advance past RECENTLY_DEAD tuples just in case there's a
1555 * DEAD one after them. We have to make sure that we don't
1556 * miss any DEAD tuples, since DEAD tuples that still have
1557 * tuple storage after pruning will confuse VACUUM.
1558 */
1559 break;
1560
1562 case HEAPTUPLE_LIVE:
1564 goto process_chain;
1565
1566 default:
1567 elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
1568 goto process_chain;
1569 }
1570
1571 /*
1572 * If the tuple is not HOT-updated, then we are at the end of this
1573 * HOT-update chain.
1574 */
1575 if (!HeapTupleHeaderIsHotUpdated(htup))
1576 goto process_chain;
1577
1578 /* HOT implies it can't have moved to different partition */
1580
1581 /*
1582 * Advance to next chain member.
1583 */
1585 offnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
1587 }
1588
1589 if (ItemIdIsRedirected(rootlp) && nchain < 2)
1590 {
1591 /*
1592 * We found a redirect item that doesn't point to a valid follow-on
1593 * item. This can happen if the loop in heap_page_prune_and_freeze()
1594 * caused us to visit the dead successor of a redirect item before
1595 * visiting the redirect item. We can clean up by setting the
1596 * redirect item to LP_DEAD state or LP_UNUSED if the caller
1597 * indicated.
1598 */
1600 return;
1601 }
1602
1604
1605 if (ndeadchain == 0)
1606 {
1607 /*
1608 * No DEAD tuple was found, so the chain is entirely composed of
1609 * normal, unchanged tuples. Leave it alone.
1610 */
1611 int i = 0;
1612
1614 {
1616 i++;
1617 }
1618 for (; i < nchain; i++)
1620 }
1621 else if (ndeadchain == nchain)
1622 {
1623 /*
1624 * The entire chain is dead. Mark the root line pointer LP_DEAD, and
1625 * fully remove the other tuples in the chain.
1626 */
1628 for (int i = 1; i < nchain; i++)
1630 }
1631 else
1632 {
1633 /*
1634 * We found a DEAD tuple in the chain. Redirect the root line pointer
1635 * to the first non-DEAD tuple, and mark as unused each intermediate
1636 * item that we are able to remove from the chain.
1637 */
1640 for (int i = 1; i < ndeadchain; i++)
1642
1643 /* the rest of tuples in the chain are normal, unchanged tuples */
1644 for (int i = ndeadchain; i < nchain; i++)
1646 }
1647}
1648
1649/* Record lowest soon-prunable XID */
1650static void
1652 OffsetNumber offnum)
1653{
1654 /*
1655 * This should exactly match the PageSetPrunable macro. We can't store
1656 * directly into the page header yet, so we update working state.
1657 */
1659 if (!TransactionIdIsValid(prstate->new_prune_xid) ||
1660 TransactionIdPrecedes(xid, prstate->new_prune_xid))
1661 prstate->new_prune_xid = xid;
1662
1663 /*
1664 * It's incorrect for a page to be marked all-visible if it contains
1665 * prunable items.
1666 */
1667 if (PageIsAllVisible(prstate->page))
1670}
1671
1672/* Record line pointer to be redirected */
1673static void
1676 bool was_normal)
1677{
1678 Assert(!prstate->processed[offnum]);
1679 prstate->processed[offnum] = true;
1680
1681 /*
1682 * Do not mark the redirect target here. It needs to be counted
1683 * separately as an unchanged tuple.
1684 */
1685
1686 Assert(prstate->nredirected < MaxHeapTuplesPerPage);
1687 prstate->redirected[prstate->nredirected * 2] = offnum;
1688 prstate->redirected[prstate->nredirected * 2 + 1] = rdoffnum;
1689
1690 prstate->nredirected++;
1691
1692 /*
1693 * If the root entry had been a normal tuple, we are deleting it, so count
1694 * it in the result. But changing a redirect (even to DEAD state) doesn't
1695 * count.
1696 */
1697 if (was_normal)
1698 prstate->ndeleted++;
1699
1700 prstate->hastup = true;
1701}
1702
1703/* Record line pointer to be marked dead */
1704static void
1706 bool was_normal)
1707{
1708 Assert(!prstate->processed[offnum]);
1709 prstate->processed[offnum] = true;
1710
1712 prstate->nowdead[prstate->ndead] = offnum;
1713 prstate->ndead++;
1714
1715 /*
1716 * Deliberately delay unsetting set_all_visible and set_all_frozen until
1717 * later during pruning. Removable dead tuples shouldn't preclude freezing
1718 * the page.
1719 */
1720
1721 /* Record the dead offset for vacuum */
1722 prstate->deadoffsets[prstate->lpdead_items++] = offnum;
1723
1724 /*
1725 * If the root entry had been a normal tuple, we are deleting it, so count
1726 * it in the result. But changing a redirect (even to DEAD state) doesn't
1727 * count.
1728 */
1729 if (was_normal)
1730 prstate->ndeleted++;
1731}
1732
1733/*
1734 * Depending on whether or not the caller set mark_unused_now to true, record that a
1735 * line pointer should be marked LP_DEAD or LP_UNUSED. There are other cases in
1736 * which we will mark line pointers LP_UNUSED, but we will not mark line
1737 * pointers LP_DEAD if mark_unused_now is true.
1738 */
1739static void
1741 bool was_normal)
1742{
1743 /*
1744 * If the caller set mark_unused_now to true, we can remove dead tuples
1745 * during pruning instead of marking their line pointers dead. Set this
1746 * tuple's line pointer LP_UNUSED. We hint that this option is less
1747 * likely.
1748 */
1749 if (unlikely(prstate->mark_unused_now))
1751 else
1753
1754 /*
1755 * It's incorrect for the page to be set all-visible if it contains dead
1756 * items. Fix that on the heap page and check the VM for corruption as
1757 * well. Do that here rather than in heap_prune_record_dead() so we also
1758 * cover tuples that are directly marked LP_UNUSED via mark_unused_now.
1759 */
1760 if (PageIsAllVisible(prstate->page))
1762}
1763
1764/* Record line pointer to be marked unused */
1765static void
1767{
1768 Assert(!prstate->processed[offnum]);
1769 prstate->processed[offnum] = true;
1770
1772 prstate->nowunused[prstate->nunused] = offnum;
1773 prstate->nunused++;
1774
1775 /*
1776 * If the root entry had been a normal tuple, we are deleting it, so count
1777 * it in the result. But changing a redirect (even to DEAD state) doesn't
1778 * count.
1779 */
1780 if (was_normal)
1781 prstate->ndeleted++;
1782}
1783
1784/*
1785 * Record an unused line pointer that is left unchanged.
1786 */
1787static void
1789{
1790 Assert(!prstate->processed[offnum]);
1791 prstate->processed[offnum] = true;
1792}
1793
1794/*
1795 * Record line pointer that is left unchanged. We consider freezing it, and
1796 * update bookkeeping of tuple counts and page visibility.
1797 */
1798static void
1800{
1801 HeapTupleHeader htup;
1802 TransactionId xmin;
1803 Page page = prstate->page;
1804
1805 Assert(!prstate->processed[offnum]);
1806 prstate->processed[offnum] = true;
1807
1808 prstate->hastup = true; /* the page is not empty */
1809
1810 /*
1811 * The criteria for counting a tuple as live in this block need to match
1812 * what analyze.c's acquire_sample_rows() does, otherwise VACUUM and
1813 * ANALYZE may produce wildly different reltuples values, e.g. when there
1814 * are many recently-dead tuples.
1815 *
1816 * The logic here is a bit simpler than acquire_sample_rows(), as VACUUM
1817 * can't run inside a transaction block, which makes some cases impossible
1818 * (e.g. in-progress insert from the same transaction).
1819 *
1820 * HEAPTUPLE_DEAD are handled by the other heap_prune_record_*()
1821 * subroutines. They don't count dead items like acquire_sample_rows()
1822 * does, because we assume that all dead items will become LP_UNUSED
1823 * before VACUUM finishes. This difference is only superficial. VACUUM
1824 * effectively agrees with ANALYZE about DEAD items, in the end. VACUUM
1825 * won't remember LP_DEAD items, but only because they're not supposed to
1826 * be left behind when it is done. (Cases where we bypass index vacuuming
1827 * will violate this optimistic assumption, but the overall impact of that
1828 * should be negligible.)
1829 */
1830 htup = (HeapTupleHeader) PageGetItem(page, PageGetItemId(page, offnum));
1831
1832 switch (prstate->htsv[offnum])
1833 {
1834 case HEAPTUPLE_LIVE:
1835
1836 /*
1837 * Count it as live. Not only is this natural, but it's also what
1838 * acquire_sample_rows() does.
1839 */
1840 prstate->live_tuples++;
1841
1842 /*
1843 * Is the tuple definitely visible to all transactions?
1844 *
1845 * NB: Like with per-tuple hint bits, we can't set the
1846 * PD_ALL_VISIBLE flag if the inserter committed asynchronously.
1847 * See SetHintBits for more info. Check that the tuple is hinted
1848 * xmin-committed because of that.
1849 */
1851 {
1852 prstate->set_all_visible = false;
1853 prstate->set_all_frozen = false;
1854 break;
1855 }
1856
1857 /*
1858 * The inserter definitely committed. But we don't know if it is
1859 * old enough that everyone sees it as committed. Later, after
1860 * processing all the tuples on the page, we'll check if there is
1861 * any snapshot that still considers the newest xid on the page to
1862 * be running. If so, we don't consider the page all-visible.
1863 */
1864 xmin = HeapTupleHeaderGetXmin(htup);
1865
1866 /* Track newest xmin on page. */
1867 if (TransactionIdFollows(xmin, prstate->newest_live_xid) &&
1869 prstate->newest_live_xid = xmin;
1870
1871 break;
1872
1874 prstate->recently_dead_tuples++;
1875 prstate->set_all_visible = false;
1876 prstate->set_all_frozen = false;
1877
1878 /*
1879 * This tuple will soon become DEAD. Update the hint field so
1880 * that the page is reconsidered for pruning in future.
1881 */
1884 offnum);
1885 break;
1886
1888
1889 /*
1890 * We do not count these rows as live, because we expect the
1891 * inserting transaction to update the counters at commit, and we
1892 * assume that will happen only after we report our results. This
1893 * assumption is a bit shaky, but it is what acquire_sample_rows()
1894 * does, so be consistent.
1895 */
1896 prstate->set_all_visible = false;
1897 prstate->set_all_frozen = false;
1898
1899 /* The page should not be marked all-visible */
1900 if (PageIsAllVisible(page))
1903
1904 /*
1905 * If we wanted to optimize for aborts, we might consider marking
1906 * the page prunable when we see INSERT_IN_PROGRESS. But we
1907 * don't. See related decisions about when to mark the page
1908 * prunable in heapam.c.
1909 */
1910 break;
1911
1913
1914 /*
1915 * This an expected case during concurrent vacuum. Count such
1916 * rows as live. As above, we assume the deleting transaction
1917 * will commit and update the counters after we report.
1918 */
1919 prstate->live_tuples++;
1920 prstate->set_all_visible = false;
1921 prstate->set_all_frozen = false;
1922
1923 /*
1924 * This tuple may soon become DEAD. Update the hint field so that
1925 * the page is reconsidered for pruning in future.
1926 */
1929 offnum);
1930 break;
1931
1932 default:
1933
1934 /*
1935 * DEAD tuples should've been passed to heap_prune_record_dead()
1936 * or heap_prune_record_unused() instead.
1937 */
1938 elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result %d",
1939 prstate->htsv[offnum]);
1940 break;
1941 }
1942
1943 /* Consider freezing any normal tuples which will not be removed */
1944 if (prstate->attempt_freeze)
1945 {
1946 bool totally_frozen;
1947
1948 if ((heap_prepare_freeze_tuple(htup,
1949 prstate->cutoffs,
1950 &prstate->pagefrz,
1951 &prstate->frozen[prstate->nfrozen],
1952 &totally_frozen)))
1953 {
1954 /* Save prepared freeze plan for later */
1955 prstate->frozen[prstate->nfrozen++].offset = offnum;
1956 }
1957
1958 /*
1959 * If any tuple isn't either totally frozen already or eligible to
1960 * become totally frozen (according to its freeze plan), then the page
1961 * definitely cannot be set all-frozen in the visibility map later on.
1962 */
1963 if (!totally_frozen)
1964 prstate->set_all_frozen = false;
1965 }
1966}
1967
1968
1969/*
1970 * Record line pointer that was already LP_DEAD and is left unchanged.
1971 */
1972static void
1974{
1975 Assert(!prstate->processed[offnum]);
1976 prstate->processed[offnum] = true;
1977
1978 /*
1979 * Deliberately don't set hastup for LP_DEAD items. We make the soft
1980 * assumption that any LP_DEAD items encountered here will become
1981 * LP_UNUSED later on, before count_nondeletable_pages is reached. If we
1982 * don't make this assumption then rel truncation will only happen every
1983 * other VACUUM, at most. Besides, VACUUM must treat
1984 * hastup/nonempty_pages as provisional no matter how LP_DEAD items are
1985 * handled (handled here, or handled later on).
1986 *
1987 * Similarly, don't unset set_all_visible and set_all_frozen until later,
1988 * at the end of heap_page_prune_and_freeze(). This will allow us to
1989 * attempt to freeze the page after pruning. As long as we unset it
1990 * before updating the visibility map, this will be correct.
1991 */
1992
1993 /* Record the dead offset for vacuum */
1994 prstate->deadoffsets[prstate->lpdead_items++] = offnum;
1995
1996 /*
1997 * It's incorrect for a page to be marked all-visible if it contains dead
1998 * items.
1999 */
2000 if (PageIsAllVisible(prstate->page))
2002}
2003
2004/*
2005 * Record LP_REDIRECT that is left unchanged.
2006 */
2007static void
2009{
2010 /*
2011 * A redirect line pointer doesn't count as a live tuple.
2012 *
2013 * If we leave a redirect line pointer in place, there will be another
2014 * tuple on the page that it points to. We will do the bookkeeping for
2015 * that separately. So we have nothing to do here, except remember that
2016 * we processed this item.
2017 */
2018 Assert(!prstate->processed[offnum]);
2019 prstate->processed[offnum] = true;
2020}
2021
2022/*
2023 * Perform the actual page changes needed by heap_page_prune_and_freeze().
2024 *
2025 * If 'lp_truncate_only' is set, we are merely marking LP_DEAD line pointers
2026 * as unused, not redirecting or removing anything else. The
2027 * PageRepairFragmentation() call is skipped in that case.
2028 *
2029 * If 'lp_truncate_only' is not set, the caller must hold a cleanup lock on
2030 * the buffer. If it is set, an ordinary exclusive lock suffices.
2031 */
2032void
2034 OffsetNumber *redirected, int nredirected,
2035 OffsetNumber *nowdead, int ndead,
2036 OffsetNumber *nowunused, int nunused)
2037{
2038 Page page = BufferGetPage(buffer);
2039 OffsetNumber *offnum;
2041
2042 /* Shouldn't be called unless there's something to do */
2043 Assert(nredirected > 0 || ndead > 0 || nunused > 0);
2044
2045 /* If 'lp_truncate_only', we can only remove already-dead line pointers */
2046 Assert(!lp_truncate_only || (nredirected == 0 && ndead == 0));
2047
2048 /* Update all redirected line pointers */
2049 offnum = redirected;
2050 for (int i = 0; i < nredirected; i++)
2051 {
2052 OffsetNumber fromoff = *offnum++;
2053 OffsetNumber tooff = *offnum++;
2056
2057#ifdef USE_ASSERT_CHECKING
2058
2059 /*
2060 * Any existing item that we set as an LP_REDIRECT (any 'from' item)
2061 * must be the first item from a HOT chain. If the item has tuple
2062 * storage then it can't be a heap-only tuple. Otherwise we are just
2063 * maintaining an existing LP_REDIRECT from an existing HOT chain that
2064 * has been pruned at least once before now.
2065 */
2067 {
2069
2070 htup = (HeapTupleHeader) PageGetItem(page, fromlp);
2072 }
2073 else
2074 {
2075 /* We shouldn't need to redundantly set the redirect */
2077 }
2078
2079 /*
2080 * The item that we're about to set as an LP_REDIRECT (the 'from'
2081 * item) will point to an existing item (the 'to' item) that is
2082 * already a heap-only tuple. There can be at most one LP_REDIRECT
2083 * item per HOT chain.
2084 *
2085 * We need to keep around an LP_REDIRECT item (after original
2086 * non-heap-only root tuple gets pruned away) so that it's always
2087 * possible for VACUUM to easily figure out what TID to delete from
2088 * indexes when an entire HOT chain becomes dead. A heap-only tuple
2089 * can never become LP_DEAD; an LP_REDIRECT item or a regular heap
2090 * tuple can.
2091 *
2092 * This check may miss problems, e.g. the target of a redirect could
2093 * be marked as unused subsequently. The page_verify_redirects() check
2094 * below will catch such problems.
2095 */
2096 tolp = PageGetItemId(page, tooff);
2098 htup = (HeapTupleHeader) PageGetItem(page, tolp);
2100#endif
2101
2103 }
2104
2105 /* Update all now-dead line pointers */
2106 offnum = nowdead;
2107 for (int i = 0; i < ndead; i++)
2108 {
2109 OffsetNumber off = *offnum++;
2110 ItemId lp = PageGetItemId(page, off);
2111
2112#ifdef USE_ASSERT_CHECKING
2113
2114 /*
2115 * An LP_DEAD line pointer must be left behind when the original item
2116 * (which is dead to everybody) could still be referenced by a TID in
2117 * an index. This should never be necessary with any individual
2118 * heap-only tuple item, though. (It's not clear how much of a problem
2119 * that would be, but there is no reason to allow it.)
2120 */
2121 if (ItemIdHasStorage(lp))
2122 {
2124 htup = (HeapTupleHeader) PageGetItem(page, lp);
2126 }
2127 else
2128 {
2129 /* Whole HOT chain becomes dead */
2131 }
2132#endif
2133
2135 }
2136
2137 /* Update all now-unused line pointers */
2138 offnum = nowunused;
2139 for (int i = 0; i < nunused; i++)
2140 {
2141 OffsetNumber off = *offnum++;
2142 ItemId lp = PageGetItemId(page, off);
2143
2144#ifdef USE_ASSERT_CHECKING
2145
2146 if (lp_truncate_only)
2147 {
2148 /* Setting LP_DEAD to LP_UNUSED in vacuum's second pass */
2150 }
2151 else
2152 {
2153 /*
2154 * When heap_page_prune_and_freeze() was called, mark_unused_now
2155 * may have been passed as true, which allows would-be LP_DEAD
2156 * items to be made LP_UNUSED instead. This is only possible if
2157 * the relation has no indexes. If there are any dead items, then
2158 * mark_unused_now was not true and every item being marked
2159 * LP_UNUSED must refer to a heap-only tuple.
2160 */
2161 if (ndead > 0)
2162 {
2164 htup = (HeapTupleHeader) PageGetItem(page, lp);
2166 }
2167 else
2169 }
2170
2171#endif
2172
2174 }
2175
2176 if (lp_truncate_only)
2178 else
2179 {
2180 /*
2181 * Finally, repair any fragmentation, and update the page's hint bit
2182 * about whether it has free pointers.
2183 */
2185
2186 /*
2187 * Now that the page has been modified, assert that redirect items
2188 * still point to valid targets.
2189 */
2191 }
2192}
2193
2194
2195/*
2196 * If built with assertions, verify that all LP_REDIRECT items point to a
2197 * valid item.
2198 *
2199 * One way that bugs related to HOT pruning show is redirect items pointing to
2200 * removed tuples. It's not trivial to reliably check that marking an item
2201 * unused will not orphan a redirect item during heap_prune_chain() /
2202 * heap_page_prune_execute(), so we additionally check the whole page after
2203 * pruning. Without this check such bugs would typically only cause asserts
2204 * later, potentially well after the corruption has been introduced.
2205 *
2206 * Also check comments in heap_page_prune_execute()'s redirection loop.
2207 */
2208static void
2210{
2211#ifdef USE_ASSERT_CHECKING
2212 OffsetNumber offnum;
2213 OffsetNumber maxoff;
2214
2215 maxoff = PageGetMaxOffsetNumber(page);
2216 for (offnum = FirstOffsetNumber;
2217 offnum <= maxoff;
2218 offnum = OffsetNumberNext(offnum))
2219 {
2220 ItemId itemid = PageGetItemId(page, offnum);
2223 HeapTupleHeader htup;
2224
2225 if (!ItemIdIsRedirected(itemid))
2226 continue;
2227
2228 targoff = ItemIdGetRedirect(itemid);
2230
2234 htup = (HeapTupleHeader) PageGetItem(page, targitem);
2236 }
2237#endif
2238}
2239
2240
2241/*
2242 * For all items in this page, find their respective root line pointers.
2243 * If item k is part of a HOT-chain with root at item j, then we set
2244 * root_offsets[k - 1] = j.
2245 *
2246 * The passed-in root_offsets array must have MaxHeapTuplesPerPage entries.
2247 * Unused entries are filled with InvalidOffsetNumber (zero).
2248 *
2249 * The function must be called with at least share lock on the buffer, to
2250 * prevent concurrent prune operations.
2251 *
2252 * Note: The information collected here is valid only as long as the caller
2253 * holds a pin on the buffer. Once pin is released, a tuple might be pruned
2254 * and reused by a completely unrelated tuple.
2255 */
2256void
2258{
2259 OffsetNumber offnum,
2260 maxoff;
2261
2264
2265 maxoff = PageGetMaxOffsetNumber(page);
2266 for (offnum = FirstOffsetNumber; offnum <= maxoff; offnum = OffsetNumberNext(offnum))
2267 {
2268 ItemId lp = PageGetItemId(page, offnum);
2269 HeapTupleHeader htup;
2272
2273 /* skip unused and dead items */
2274 if (!ItemIdIsUsed(lp) || ItemIdIsDead(lp))
2275 continue;
2276
2277 if (ItemIdIsNormal(lp))
2278 {
2279 htup = (HeapTupleHeader) PageGetItem(page, lp);
2280
2281 /*
2282 * Check if this tuple is part of a HOT-chain rooted at some other
2283 * tuple. If so, skip it for now; we'll process it when we find
2284 * its root.
2285 */
2286 if (HeapTupleHeaderIsHeapOnly(htup))
2287 continue;
2288
2289 /*
2290 * This is either a plain tuple or the root of a HOT-chain.
2291 * Remember it in the mapping.
2292 */
2293 root_offsets[offnum - 1] = offnum;
2294
2295 /* If it's not the start of a HOT-chain, we're done with it */
2296 if (!HeapTupleHeaderIsHotUpdated(htup))
2297 continue;
2298
2299 /* Set up to scan the HOT-chain */
2302 }
2303 else
2304 {
2305 /* Must be a redirect item. We do not set its root_offsets entry */
2307 /* Set up to scan the HOT-chain */
2310 }
2311
2312 /*
2313 * Now follow the HOT-chain and collect other tuples in the chain.
2314 *
2315 * Note: Even though this is a nested loop, the complexity of the
2316 * function is O(N) because a tuple in the page should be visited not
2317 * more than twice, once in the outer loop and once in HOT-chain
2318 * chases.
2319 */
2320 for (;;)
2321 {
2322 /* Sanity check (pure paranoia) */
2323 if (offnum < FirstOffsetNumber)
2324 break;
2325
2326 /*
2327 * An offset past the end of page's line pointer array is possible
2328 * when the array was truncated
2329 */
2330 if (offnum > maxoff)
2331 break;
2332
2333 lp = PageGetItemId(page, nextoffnum);
2334
2335 /* Check for broken chains */
2336 if (!ItemIdIsNormal(lp))
2337 break;
2338
2339 htup = (HeapTupleHeader) PageGetItem(page, lp);
2340
2343 break;
2344
2345 /* Remember the root line pointer for this item */
2346 root_offsets[nextoffnum - 1] = offnum;
2347
2348 /* Advance to next chain member, if any */
2349 if (!HeapTupleHeaderIsHotUpdated(htup))
2350 break;
2351
2352 /* HOT implies it can't have moved to different partition */
2354
2357 }
2358 }
2359}
2360
2361
2362/*
2363 * Compare fields that describe actions required to freeze tuple with caller's
2364 * open plan. If everything matches then the frz tuple plan is equivalent to
2365 * caller's plan.
2366 */
2367static inline bool
2369{
2370 if (plan->xmax == frz->xmax &&
2371 plan->t_infomask2 == frz->t_infomask2 &&
2372 plan->t_infomask == frz->t_infomask &&
2373 plan->frzflags == frz->frzflags)
2374 return true;
2375
2376 /* Caller must call heap_log_freeze_new_plan again for frz */
2377 return false;
2378}
2379
2380/*
2381 * Comparator used to deduplicate the freeze plans used in WAL records.
2382 */
2383static int
2384heap_log_freeze_cmp(const void *arg1, const void *arg2)
2385{
2386 const HeapTupleFreeze *frz1 = arg1;
2387 const HeapTupleFreeze *frz2 = arg2;
2388
2389 if (frz1->xmax < frz2->xmax)
2390 return -1;
2391 else if (frz1->xmax > frz2->xmax)
2392 return 1;
2393
2394 if (frz1->t_infomask2 < frz2->t_infomask2)
2395 return -1;
2396 else if (frz1->t_infomask2 > frz2->t_infomask2)
2397 return 1;
2398
2399 if (frz1->t_infomask < frz2->t_infomask)
2400 return -1;
2401 else if (frz1->t_infomask > frz2->t_infomask)
2402 return 1;
2403
2404 if (frz1->frzflags < frz2->frzflags)
2405 return -1;
2406 else if (frz1->frzflags > frz2->frzflags)
2407 return 1;
2408
2409 /*
2410 * heap_log_freeze_eq would consider these tuple-wise plans to be equal.
2411 * (So the tuples will share a single canonical freeze plan.)
2412 *
2413 * We tiebreak on page offset number to keep each freeze plan's page
2414 * offset number array individually sorted. (Unnecessary, but be tidy.)
2415 */
2416 if (frz1->offset < frz2->offset)
2417 return -1;
2418 else if (frz1->offset > frz2->offset)
2419 return 1;
2420
2421 Assert(false);
2422 return 0;
2423}
2424
2425/*
2426 * Start new plan initialized using tuple-level actions. At least one tuple
2427 * will have steps required to freeze described by caller's plan during REDO.
2428 */
2429static inline void
2431{
2432 plan->xmax = frz->xmax;
2433 plan->t_infomask2 = frz->t_infomask2;
2434 plan->t_infomask = frz->t_infomask;
2435 plan->frzflags = frz->frzflags;
2436 plan->ntuples = 1; /* for now */
2437}
2438
2439/*
2440 * Deduplicate tuple-based freeze plans so that each distinct set of
2441 * processing steps is only stored once in the WAL record.
2442 * Called during original execution of freezing (for logged relations).
2443 *
2444 * Return value is number of plans set in *plans_out for caller. Also writes
2445 * an array of offset numbers into *offsets_out output argument for caller
2446 * (actually there is one array per freeze plan, but that's not of immediate
2447 * concern to our caller).
2448 */
2449static int
2453{
2454 int nplans = 0;
2455
2456 /* Sort tuple-based freeze plans in the order required to deduplicate */
2457 qsort(tuples, ntuples, sizeof(HeapTupleFreeze), heap_log_freeze_cmp);
2458
2459 for (int i = 0; i < ntuples; i++)
2460 {
2461 HeapTupleFreeze *frz = tuples + i;
2462
2463 if (i == 0)
2464 {
2465 /* New canonical freeze plan starting with first tup */
2467 nplans++;
2468 }
2469 else if (heap_log_freeze_eq(plans_out, frz))
2470 {
2471 /* tup matches open canonical plan -- include tup in it */
2472 Assert(offsets_out[i - 1] < frz->offset);
2473 plans_out->ntuples++;
2474 }
2475 else
2476 {
2477 /* Tup doesn't match current plan -- done with it now */
2478 plans_out++;
2479
2480 /* New canonical freeze plan starting with this tup */
2482 nplans++;
2483 }
2484
2485 /*
2486 * Save page offset number in dedicated buffer in passing.
2487 *
2488 * REDO routine relies on the record's offset numbers array grouping
2489 * offset numbers by freeze plan. The sort order within each grouping
2490 * is ascending offset number order, just to keep things tidy.
2491 */
2492 offsets_out[i] = frz->offset;
2493 }
2494
2495 Assert(nplans > 0 && nplans <= ntuples);
2496
2497 return nplans;
2498}
2499
2500/*
2501 * Write an XLOG_HEAP2_PRUNE* WAL record
2502 *
2503 * This is used for several different page maintenance operations:
2504 *
2505 * - Page pruning, in VACUUM's 1st pass or on access: Some items are
2506 * redirected, some marked dead, and some removed altogether.
2507 *
2508 * - Freezing: Items are marked as 'frozen'.
2509 *
2510 * - Vacuum, 2nd pass: Items that are already LP_DEAD are marked as unused.
2511 *
2512 * They have enough commonalities that we use a single WAL record for them
2513 * all.
2514 *
2515 * If replaying the record requires a cleanup lock, pass cleanup_lock = true.
2516 * Replaying 'redirected' or 'dead' items always requires a cleanup lock, but
2517 * replaying 'unused' items depends on whether they were all previously marked
2518 * as dead.
2519 *
2520 * If the VM is being updated, vmflags will contain the bits to set. In this
2521 * case, vmbuffer should already have been updated and marked dirty and should
2522 * still be pinned and locked.
2523 *
2524 * Note: This function scribbles on the 'frozen' array.
2525 *
2526 * Note: This is called in a critical section, so careful what you do here.
2527 */
2528void
2530 Buffer vmbuffer, uint8 vmflags,
2532 bool cleanup_lock,
2533 PruneReason reason,
2534 HeapTupleFreeze *frozen, int nfrozen,
2535 OffsetNumber *redirected, int nredirected,
2536 OffsetNumber *dead, int ndead,
2537 OffsetNumber *unused, int nunused)
2538{
2541 uint8 info;
2543
2544 Page heap_page = BufferGetPage(buffer);
2545
2546 /* The following local variables hold data registered in the WAL record: */
2550 xlhp_prune_items dead_items;
2553 bool do_prune = nredirected > 0 || ndead > 0 || nunused > 0;
2555 bool heap_fpi_allowed = true;
2556
2558
2559 xlrec.flags = 0;
2561
2562 /*
2563 * We can avoid an FPI of the heap page if the only modification we are
2564 * making to it is to set PD_ALL_VISIBLE and checksums/wal_log_hints are
2565 * disabled.
2566 *
2567 * However, if the page has never been WAL-logged (LSN is invalid), we
2568 * must force an FPI regardless. This can happen when another backend
2569 * extends the heap, initializes the page, and then fails before WAL-
2570 * logging it. Since heap extension is not WAL-logged, recovery might try
2571 * to replay our record and find that the page isn't initialized, which
2572 * would cause a PANIC.
2573 */
2576 else if (!do_prune && nfrozen == 0 && (!do_set_vm || !XLogHintBitIsNeeded()))
2577 {
2579 heap_fpi_allowed = false;
2580 }
2581
2582 /*
2583 * Prepare data for the buffer. The arrays are not actually in the
2584 * buffer, but we pretend that they are. When XLogInsert stores a full
2585 * page image, the arrays can be omitted.
2586 */
2589
2590 if (do_set_vm)
2591 XLogRegisterBuffer(1, vmbuffer, 0);
2592
2593 if (nfrozen > 0)
2594 {
2595 int nplans;
2596
2598
2599 /*
2600 * Prepare deduplicated representation for use in the WAL record. This
2601 * destructively sorts frozen tuples array in-place.
2602 */
2603 nplans = heap_log_freeze_plan(frozen, nfrozen, plans, frz_offsets);
2604
2605 freeze_plans.nplans = nplans;
2607 offsetof(xlhp_freeze_plans, plans));
2608 XLogRegisterBufData(0, plans,
2609 sizeof(xlhp_freeze_plan) * nplans);
2610 }
2611 if (nredirected > 0)
2612 {
2614
2615 redirect_items.ntargets = nredirected;
2618 XLogRegisterBufData(0, redirected,
2619 sizeof(OffsetNumber[2]) * nredirected);
2620 }
2621 if (ndead > 0)
2622 {
2623 xlrec.flags |= XLHP_HAS_DEAD_ITEMS;
2624
2625 dead_items.ntargets = ndead;
2626 XLogRegisterBufData(0, &dead_items,
2628 XLogRegisterBufData(0, dead,
2629 sizeof(OffsetNumber) * ndead);
2630 }
2631 if (nunused > 0)
2632 {
2634
2635 unused_items.ntargets = nunused;
2638 XLogRegisterBufData(0, unused,
2639 sizeof(OffsetNumber) * nunused);
2640 }
2641 if (nfrozen > 0)
2643 sizeof(OffsetNumber) * nfrozen);
2644
2645 /*
2646 * Prepare the main xl_heap_prune record. We already set the XLHP_HAS_*
2647 * flag above.
2648 */
2650 {
2651 xlrec.flags |= XLHP_VM_ALL_VISIBLE;
2653 xlrec.flags |= XLHP_VM_ALL_FROZEN;
2654 }
2656 xlrec.flags |= XLHP_IS_CATALOG_REL;
2659 if (cleanup_lock)
2660 xlrec.flags |= XLHP_CLEANUP_LOCK;
2661 else
2662 {
2663 Assert(nredirected == 0 && ndead == 0);
2664 /* also, any items in 'unused' must've been LP_DEAD previously */
2665 }
2669
2670 switch (reason)
2671 {
2672 case PRUNE_ON_ACCESS:
2674 break;
2675 case PRUNE_VACUUM_SCAN:
2677 break;
2680 break;
2681 default:
2682 elog(ERROR, "unrecognized prune reason: %d", (int) reason);
2683 break;
2684 }
2685 recptr = XLogInsert(RM_HEAP2_ID, info);
2686
2687 if (do_set_vm)
2688 {
2689 Assert(BufferIsDirty(vmbuffer));
2690 PageSetLSN(BufferGetPage(vmbuffer), recptr);
2691 }
2692
2693 /*
2694 * If we explicitly skip an FPI, we must not stamp the heap page with this
2695 * record's LSN. Recovery skips records <= the stamped LSN, so this could
2696 * lead to skipping an earlier FPI needed to repair a torn page.
2697 */
2698 if (heap_fpi_allowed)
2699 {
2700 Assert(BufferIsDirty(buffer));
2702 }
2703}
uint32 BlockNumber
Definition block.h:31
int Buffer
Definition buf.h:23
#define InvalidBuffer
Definition buf.h:25
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition bufmgr.c:4357
bool BufferIsLockedByMeInMode(Buffer buffer, BufferLockMode mode)
Definition bufmgr.c:3003
bool BufferIsDirty(Buffer buffer)
Definition bufmgr.c:3030
void MarkBufferDirty(Buffer buffer)
Definition bufmgr.c:3063
void MarkBufferDirtyHint(Buffer buffer, bool buffer_std)
Definition bufmgr.c:5688
bool ConditionalLockBufferForCleanup(Buffer buffer)
Definition bufmgr.c:6710
static Page BufferGetPage(Buffer buffer)
Definition bufmgr.h:470
@ BUFFER_LOCK_EXCLUSIVE
Definition bufmgr.h:220
@ BUFFER_LOCK_UNLOCK
Definition bufmgr.h:205
static void LockBuffer(Buffer buffer, BufferLockMode mode)
Definition bufmgr.h:332
static bool BufferIsValid(Buffer bufnum)
Definition bufmgr.h:421
Size PageGetHeapFreeSpace(const PageData *page)
Definition bufpage.c:990
void PageRepairFragmentation(Page page)
Definition bufpage.c:698
void PageTruncateLinePointerArray(Page page)
Definition bufpage.c:834
static bool PageIsEmpty(const PageData *page)
Definition bufpage.h:249
PageHeaderData * PageHeader
Definition bufpage.h:199
static bool PageIsAllVisible(const PageData *page)
Definition bufpage.h:455
static void PageClearAllVisible(Page page)
Definition bufpage.h:465
static TransactionId PageGetPruneXid(const PageData *page)
Definition bufpage.h:471
static void PageClearFull(Page page)
Definition bufpage.h:449
static void PageSetAllVisible(Page page)
Definition bufpage.h:460
static ItemId PageGetItemId(Page page, OffsetNumber offsetNumber)
Definition bufpage.h:269
static void * PageGetItem(PageData *page, const ItemIdData *itemId)
Definition bufpage.h:379
static void PageSetLSN(Page page, XLogRecPtr lsn)
Definition bufpage.h:417
PageData * Page
Definition bufpage.h:81
#define PageClearPrunable(page)
Definition bufpage.h:486
static XLogRecPtr PageGetLSN(const PageData *page)
Definition bufpage.h:411
static bool PageIsFull(const PageData *page)
Definition bufpage.h:439
static OffsetNumber PageGetMaxOffsetNumber(const PageData *page)
Definition bufpage.h:397
#define likely(x)
Definition c.h:431
uint8_t uint8
Definition c.h:616
#define PG_USED_FOR_ASSERTS_ONLY
Definition c.h:243
#define Max(x, y)
Definition c.h:1087
#define Assert(condition)
Definition c.h:945
int64_t int64
Definition c.h:615
TransactionId MultiXactId
Definition c.h:748
int8_t int8
Definition c.h:612
#define unlikely(x)
Definition c.h:432
#define MemSet(start, val, len)
Definition c.h:1109
uint32 TransactionId
Definition c.h:738
size_t Size
Definition c.h:691
int errcode(int sqlerrcode)
Definition elog.c:874
#define errcontext
Definition elog.h:198
#define WARNING
Definition elog.h:36
#define ERROR
Definition elog.h:39
#define elog(elevel,...)
Definition elog.h:226
#define ereport(elevel,...)
Definition elog.h:150
void HeapTupleHeaderAdvanceConflictHorizon(HeapTupleHeader tuple, TransactionId *snapshotConflictHorizon)
Definition heapam.c:8073
void heap_freeze_prepared_tuples(Buffer buffer, HeapTupleFreeze *tuples, int ntuples)
Definition heapam.c:7479
bool heap_prepare_freeze_tuple(HeapTupleHeader tuple, const struct VacuumCutoffs *cutoffs, HeapPageFreeze *pagefrz, HeapTupleFreeze *frz, bool *totally_frozen)
Definition heapam.c:7146
void heap_pre_freeze_checks(Buffer buffer, HeapTupleFreeze *tuples, int ntuples)
Definition heapam.c:7426
#define HEAP_PAGE_PRUNE_FREEZE
Definition heapam.h:44
#define HEAP_PAGE_PRUNE_ALLOW_FAST_PATH
Definition heapam.h:45
HTSV_Result
Definition heapam.h:138
@ HEAPTUPLE_RECENTLY_DEAD
Definition heapam.h:141
@ HEAPTUPLE_INSERT_IN_PROGRESS
Definition heapam.h:142
@ HEAPTUPLE_LIVE
Definition heapam.h:140
@ HEAPTUPLE_DELETE_IN_PROGRESS
Definition heapam.h:143
@ HEAPTUPLE_DEAD
Definition heapam.h:139
PruneReason
Definition heapam.h:252
@ PRUNE_VACUUM_CLEANUP
Definition heapam.h:255
@ PRUNE_ON_ACCESS
Definition heapam.h:253
@ PRUNE_VACUUM_SCAN
Definition heapam.h:254
#define HEAP_PAGE_PRUNE_MARK_UNUSED_NOW
Definition heapam.h:43
HTSV_Result HeapTupleSatisfiesVacuumHorizon(HeapTuple htup, Buffer buffer, TransactionId *dead_after)
#define XLHP_HAS_CONFLICT_HORIZON
#define XLHP_HAS_FREEZE_PLANS
#define XLHP_VM_ALL_VISIBLE
#define SizeOfHeapPrune
#define XLHP_HAS_NOW_UNUSED_ITEMS
#define XLHP_VM_ALL_FROZEN
#define XLHP_HAS_REDIRECTIONS
#define XLOG_HEAP2_PRUNE_VACUUM_SCAN
Definition heapam_xlog.h:61
#define XLOG_HEAP2_PRUNE_ON_ACCESS
Definition heapam_xlog.h:60
#define XLHP_CLEANUP_LOCK
#define XLHP_HAS_DEAD_ITEMS
#define XLOG_HEAP2_PRUNE_VACUUM_CLEANUP
Definition heapam_xlog.h:62
#define XLHP_IS_CATALOG_REL
HeapTupleHeaderData * HeapTupleHeader
Definition htup.h:23
static bool HeapTupleHeaderIsHeapOnly(const HeapTupleHeaderData *tup)
static TransactionId HeapTupleHeaderGetXmin(const HeapTupleHeaderData *tup)
static bool HeapTupleHeaderIndicatesMovedPartitions(const HeapTupleHeaderData *tup)
static bool HeapTupleHeaderIsHotUpdated(const HeapTupleHeaderData *tup)
static TransactionId HeapTupleHeaderGetUpdateXid(const HeapTupleHeaderData *tup)
#define MaxHeapTuplesPerPage
static bool HeapTupleHeaderXminCommitted(const HeapTupleHeaderData *tup)
WalUsage pgWalUsage
Definition instrument.c:22
int i
Definition isn.c:77
#define ItemIdGetLength(itemId)
Definition itemid.h:59
#define ItemIdSetRedirect(itemId, link)
Definition itemid.h:152
#define ItemIdIsNormal(itemId)
Definition itemid.h:99
#define ItemIdGetRedirect(itemId)
Definition itemid.h:78
#define ItemIdIsDead(itemId)
Definition itemid.h:113
#define ItemIdSetDead(itemId)
Definition itemid.h:164
#define ItemIdIsUsed(itemId)
Definition itemid.h:92
#define ItemIdSetUnused(itemId)
Definition itemid.h:128
#define ItemIdIsRedirected(itemId)
Definition itemid.h:106
#define ItemIdHasStorage(itemId)
Definition itemid.h:120
static void ItemPointerSet(ItemPointerData *pointer, BlockNumber blockNumber, OffsetNumber offNum)
Definition itemptr.h:135
static OffsetNumber ItemPointerGetOffsetNumber(const ItemPointerData *pointer)
Definition itemptr.h:124
static BlockNumber ItemPointerGetBlockNumber(const ItemPointerData *pointer)
Definition itemptr.h:103
#define START_CRIT_SECTION()
Definition miscadmin.h:150
#define END_CRIT_SECTION()
Definition miscadmin.h:152
#define InvalidMultiXactId
Definition multixact.h:25
static char * errmsg
#define InvalidOffsetNumber
Definition off.h:26
#define OffsetNumberNext(offsetNumber)
Definition off.h:52
uint16 OffsetNumber
Definition off.h:24
#define FirstOffsetNumber
Definition off.h:27
#define OffsetNumberPrev(offsetNumber)
Definition off.h:54
#define ERRCODE_DATA_CORRUPTED
NameData relname
Definition pg_class.h:40
const void * data
#define plan(x)
Definition pg_regress.c:161
void pgstat_update_heap_dead_tuples(Relation rel, int delta)
#define qsort(a, b, c, d)
Definition port.h:495
static int fb(int x)
GlobalVisState * GlobalVisTestFor(Relation rel)
Definition procarray.c:4114
bool GlobalVisTestIsRemovableXid(GlobalVisState *state, TransactionId xid, bool allow_update)
Definition procarray.c:4277
bool GlobalVisTestXidConsideredRunning(GlobalVisState *state, TransactionId xid, bool allow_update)
Definition procarray.c:4315
static void prune_freeze_fast_path(PruneState *prstate, PruneFreezeResult *presult)
Definition pruneheap.c:973
static void prune_freeze_plan(PruneState *prstate, OffsetNumber *off_loc)
Definition pruneheap.c:510
void heap_get_root_tuples(Page page, OffsetNumber *root_offsets)
Definition pruneheap.c:2257
static bool heap_page_will_freeze(bool did_tuple_hint_fpi, bool do_prune, bool do_hint_prune, PruneState *prstate)
Definition pruneheap.c:713
void heap_page_prune_and_freeze(PruneFreezeParams *params, PruneFreezeResult *presult, OffsetNumber *off_loc, TransactionId *new_relfrozen_xid, MultiXactId *new_relmin_mxid)
Definition pruneheap.c:1056
static HTSV_Result htsv_get_valid_status(int status)
Definition pruneheap.c:1409
static int heap_log_freeze_plan(HeapTupleFreeze *tuples, int ntuples, xlhp_freeze_plan *plans_out, OffsetNumber *offsets_out)
Definition pruneheap.c:2450
static void page_verify_redirects(Page page)
Definition pruneheap.c:2209
static bool heap_page_will_set_vm(PruneState *prstate, PruneReason reason)
Definition pruneheap.c:926
static void heap_prune_chain(OffsetNumber maxoff, OffsetNumber rootoffnum, PruneState *prstate)
Definition pruneheap.c:1448
VMCorruptionType
Definition pruneheap.c:188
@ VM_CORRUPT_MISSING_PAGE_HINT
Definition pruneheap.c:190
@ VM_CORRUPT_LPDEAD
Definition pruneheap.c:192
@ VM_CORRUPT_TUPLE_VISIBILITY
Definition pruneheap.c:194
void log_heap_prune_and_freeze(Relation relation, Buffer buffer, Buffer vmbuffer, uint8 vmflags, TransactionId conflict_xid, bool cleanup_lock, PruneReason reason, HeapTupleFreeze *frozen, int nfrozen, OffsetNumber *redirected, int nredirected, OffsetNumber *dead, int ndead, OffsetNumber *unused, int nunused)
Definition pruneheap.c:2529
static void heap_prune_record_unused(PruneState *prstate, OffsetNumber offnum, bool was_normal)
Definition pruneheap.c:1766
static void heap_prune_record_redirect(PruneState *prstate, OffsetNumber offnum, OffsetNumber rdoffnum, bool was_normal)
Definition pruneheap.c:1674
static int heap_log_freeze_cmp(const void *arg1, const void *arg2)
Definition pruneheap.c:2384
static void heap_prune_record_dead(PruneState *prstate, OffsetNumber offnum, bool was_normal)
Definition pruneheap.c:1705
static bool heap_log_freeze_eq(xlhp_freeze_plan *plan, HeapTupleFreeze *frz)
Definition pruneheap.c:2368
void heap_page_prune_opt(Relation relation, Buffer buffer, Buffer *vmbuffer)
Definition pruneheap.c:256
static void heap_prune_record_prunable(PruneState *prstate, TransactionId xid, OffsetNumber offnum)
Definition pruneheap.c:1651
static void heap_log_freeze_new_plan(xlhp_freeze_plan *plan, HeapTupleFreeze *frz)
Definition pruneheap.c:2430
static void prune_freeze_setup(PruneFreezeParams *params, TransactionId *new_relfrozen_xid, MultiXactId *new_relmin_mxid, PruneFreezeResult *presult, PruneState *prstate)
Definition pruneheap.c:381
static void heap_prune_record_dead_or_unused(PruneState *prstate, OffsetNumber offnum, bool was_normal)
Definition pruneheap.c:1740
static void heap_prune_record_unchanged_lp_unused(PruneState *prstate, OffsetNumber offnum)
Definition pruneheap.c:1788
static void heap_prune_record_unchanged_lp_dead(PruneState *prstate, OffsetNumber offnum)
Definition pruneheap.c:1973
static void heap_page_fix_vm_corruption(PruneState *prstate, OffsetNumber offnum, VMCorruptionType ctype)
Definition pruneheap.c:831
void heap_page_prune_execute(Buffer buffer, bool lp_truncate_only, OffsetNumber *redirected, int nredirected, OffsetNumber *nowdead, int ndead, OffsetNumber *nowunused, int nunused)
Definition pruneheap.c:2033
static HTSV_Result heap_prune_satisfies_vacuum(PruneState *prstate, HeapTuple tup)
Definition pruneheap.c:1366
static void heap_prune_record_unchanged_lp_redirect(PruneState *prstate, OffsetNumber offnum)
Definition pruneheap.c:2008
static void heap_prune_record_unchanged_lp_normal(PruneState *prstate, OffsetNumber offnum)
Definition pruneheap.c:1799
#define RelationGetRelid(relation)
Definition rel.h:514
#define RelationGetTargetPageFreeSpace(relation, defaultff)
Definition rel.h:389
#define RelationGetRelationName(relation)
Definition rel.h:548
#define RelationIsAccessibleInLogicalDecoding(relation)
Definition rel.h:693
#define RelationNeedsWAL(relation)
Definition rel.h:637
#define HEAP_DEFAULT_FILLFACTOR
Definition rel.h:360
Oid t_tableOid
Definition htup.h:66
ItemPointerData t_ctid
PruneReason reason
Definition heapam.h:277
GlobalVisState * vistest
Definition heapam.h:293
struct VacuumCutoffs * cutoffs
Definition heapam.h:302
Relation relation
Definition heapam.h:263
Buffer vmbuffer
Definition heapam.h:271
bool set_all_frozen
Definition pruneheap.c:88
Buffer vmbuffer
Definition pruneheap.c:142
HeapPageFreeze pagefrz
Definition pruneheap.c:130
TransactionId new_prune_xid
Definition pruneheap.c:63
bool attempt_freeze
Definition pruneheap.c:46
bool hastup
Definition pruneheap.c:171
int recently_dead_tuples
Definition pruneheap.c:168
uint8 old_vmbits
Definition pruneheap.c:148
uint8 new_vmbits
Definition pruneheap.c:149
int nroot_items
Definition pruneheap.c:101
int nheaponly_items
Definition pruneheap.c:103
bool set_all_visible
Definition pruneheap.c:87
bool mark_unused_now
Definition pruneheap.c:44
int live_tuples
Definition pruneheap.c:167
BlockNumber block
Definition pruneheap.c:55
Page page
Definition pruneheap.c:57
GlobalVisState * vistest
Definition pruneheap.c:42
struct VacuumCutoffs * cutoffs
Definition pruneheap.c:47
TransactionId newest_live_xid
Definition pruneheap.c:152
int lpdead_items
Definition pruneheap.c:177
Relation relation
Definition pruneheap.c:48
Buffer buffer
Definition pruneheap.c:56
int nfrozen
Definition pruneheap.c:68
int nredirected
Definition pruneheap.c:65
TransactionId latest_xid_removed
Definition pruneheap.c:64
int nunused
Definition pruneheap.c:67
OffsetNumber * deadoffsets
Definition pruneheap.c:178
int64 wal_fpi
Definition instrument.h:54
static bool TransactionIdFollows(TransactionId id1, TransactionId id2)
Definition transam.h:297
#define InvalidTransactionId
Definition transam.h:31
#define TransactionIdEquals(id1, id2)
Definition transam.h:43
#define NormalTransactionIdPrecedes(id1, id2)
Definition transam.h:147
#define TransactionIdIsValid(xid)
Definition transam.h:41
#define TransactionIdIsNormal(xid)
Definition transam.h:42
static bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition transam.h:263
void visibilitymap_set(BlockNumber heapBlk, Buffer vmBuf, uint8 flags, const RelFileLocator rlocator)
bool visibilitymap_clear(Relation rel, BlockNumber heapBlk, Buffer vmbuf, uint8 flags)
void visibilitymap_pin(Relation rel, BlockNumber heapBlk, Buffer *vmbuf)
uint8 visibilitymap_get_status(Relation rel, BlockNumber heapBlk, Buffer *vmbuf)
#define VISIBILITYMAP_VALID_BITS
#define VISIBILITYMAP_ALL_FROZEN
#define VISIBILITYMAP_ALL_VISIBLE
bool RecoveryInProgress(void)
Definition xlog.c:6444
#define XLogHintBitIsNeeded()
Definition xlog.h:122
#define XLogRecPtrIsValid(r)
Definition xlogdefs.h:29
uint64 XLogRecPtr
Definition xlogdefs.h:21
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition xloginsert.c:479
void XLogRegisterBufData(uint8 block_id, const void *data, uint32 len)
Definition xloginsert.c:410
bool XLogCheckBufferNeedsBackup(Buffer buffer)
void XLogRegisterData(const void *data, uint32 len)
Definition xloginsert.c:369
void XLogRegisterBuffer(uint8 block_id, Buffer buffer, uint8 flags)
Definition xloginsert.c:246
void XLogBeginInsert(void)
Definition xloginsert.c:153
#define REGBUF_STANDARD
Definition xloginsert.h:35
#define REGBUF_FORCE_IMAGE
Definition xloginsert.h:32
#define REGBUF_NO_IMAGE
Definition xloginsert.h:33