PostgreSQL Source Code  git master
pruneheap.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * pruneheap.c
4  * heap page pruning and HOT-chain management code
5  *
6  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  * src/backend/access/heap/pruneheap.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 #include "postgres.h"
16 
17 #include "access/heapam.h"
18 #include "access/heapam_xlog.h"
19 #include "access/htup_details.h"
20 #include "access/transam.h"
21 #include "access/xlog.h"
22 #include "access/xloginsert.h"
23 #include "miscadmin.h"
24 #include "pgstat.h"
25 #include "storage/bufmgr.h"
26 #include "utils/rel.h"
27 #include "utils/snapmgr.h"
28 
29 /* Working data for heap_page_prune and subroutines */
30 typedef struct
31 {
32  /* tuple visibility test, initialized for the relation */
34  /* whether or not dead items can be set LP_UNUSED during pruning */
36 
37  TransactionId new_prune_xid; /* new prune hint value for page */
38  TransactionId snapshotConflictHorizon; /* latest xid removed */
39  int nredirected; /* numbers of entries in arrays below */
40  int ndead;
41  int nunused;
42  /* arrays that accumulate indexes of items to be changed */
46 
47  /*
48  * marked[i] is true if item i is entered in one of the above arrays.
49  *
50  * This needs to be MaxHeapTuplesPerPage + 1 long as FirstOffsetNumber is
51  * 1. Otherwise every access would need to subtract 1.
52  */
53  bool marked[MaxHeapTuplesPerPage + 1];
54 } PruneState;
55 
56 /* Local functions */
58  HeapTuple tup,
59  Buffer buffer);
60 static int heap_prune_chain(Buffer buffer,
61  OffsetNumber rootoffnum,
62  int8 *htsv,
63  PruneState *prstate);
64 static void heap_prune_record_prunable(PruneState *prstate, TransactionId xid);
65 static void heap_prune_record_redirect(PruneState *prstate,
66  OffsetNumber offnum, OffsetNumber rdoffnum);
67 static void heap_prune_record_dead(PruneState *prstate, OffsetNumber offnum);
68 static void heap_prune_record_dead_or_unused(PruneState *prstate, OffsetNumber offnum);
69 static void heap_prune_record_unused(PruneState *prstate, OffsetNumber offnum);
70 static void page_verify_redirects(Page page);
71 
72 
73 /*
74  * Optionally prune and repair fragmentation in the specified page.
75  *
76  * This is an opportunistic function. It will perform housekeeping
77  * only if the page heuristically looks like a candidate for pruning and we
78  * can acquire buffer cleanup lock without blocking.
79  *
80  * Note: this is called quite often. It's important that it fall out quickly
81  * if there's not any use in pruning.
82  *
83  * Caller must have pin on the buffer, and must *not* have a lock on it.
84  */
85 void
87 {
88  Page page = BufferGetPage(buffer);
89  TransactionId prune_xid;
90  GlobalVisState *vistest;
91  Size minfree;
92 
93  /*
94  * We can't write WAL in recovery mode, so there's no point trying to
95  * clean the page. The primary will likely issue a cleaning WAL record
96  * soon anyway, so this is no particular loss.
97  */
98  if (RecoveryInProgress())
99  return;
100 
101  /*
102  * First check whether there's any chance there's something to prune,
103  * determining the appropriate horizon is a waste if there's no prune_xid
104  * (i.e. no updates/deletes left potentially dead tuples around).
105  */
106  prune_xid = ((PageHeader) page)->pd_prune_xid;
107  if (!TransactionIdIsValid(prune_xid))
108  return;
109 
110  /*
111  * Check whether prune_xid indicates that there may be dead rows that can
112  * be cleaned up.
113  */
114  vistest = GlobalVisTestFor(relation);
115 
116  if (!GlobalVisTestIsRemovableXid(vistest, prune_xid))
117  return;
118 
119  /*
120  * We prune when a previous UPDATE failed to find enough space on the page
121  * for a new tuple version, or when free space falls below the relation's
122  * fill-factor target (but not less than 10%).
123  *
124  * Checking free space here is questionable since we aren't holding any
125  * lock on the buffer; in the worst case we could get a bogus answer. It's
126  * unlikely to be *seriously* wrong, though, since reading either pd_lower
127  * or pd_upper is probably atomic. Avoiding taking a lock seems more
128  * important than sometimes getting a wrong answer in what is after all
129  * just a heuristic estimate.
130  */
131  minfree = RelationGetTargetPageFreeSpace(relation,
133  minfree = Max(minfree, BLCKSZ / 10);
134 
135  if (PageIsFull(page) || PageGetHeapFreeSpace(page) < minfree)
136  {
137  /* OK, try to get exclusive buffer lock */
138  if (!ConditionalLockBufferForCleanup(buffer))
139  return;
140 
141  /*
142  * Now that we have buffer lock, get accurate information about the
143  * page's free space, and recheck the heuristic about whether to
144  * prune.
145  */
146  if (PageIsFull(page) || PageGetHeapFreeSpace(page) < minfree)
147  {
148  PruneResult presult;
149 
150  /*
151  * For now, pass mark_unused_now as false regardless of whether or
152  * not the relation has indexes, since we cannot safely determine
153  * that during on-access pruning with the current implementation.
154  */
155  heap_page_prune(relation, buffer, vistest, false,
156  &presult, PRUNE_ON_ACCESS, NULL);
157 
158  /*
159  * Report the number of tuples reclaimed to pgstats. This is
160  * presult.ndeleted minus the number of newly-LP_DEAD-set items.
161  *
162  * We derive the number of dead tuples like this to avoid totally
163  * forgetting about items that were set to LP_DEAD, since they
164  * still need to be cleaned up by VACUUM. We only want to count
165  * heap-only tuples that just became LP_UNUSED in our report,
166  * which don't.
167  *
168  * VACUUM doesn't have to compensate in the same way when it
169  * tracks ndeleted, since it will set the same LP_DEAD items to
170  * LP_UNUSED separately.
171  */
172  if (presult.ndeleted > presult.nnewlpdead)
174  presult.ndeleted - presult.nnewlpdead);
175  }
176 
177  /* And release buffer lock */
179 
180  /*
181  * We avoid reuse of any free space created on the page by unrelated
182  * UPDATEs/INSERTs by opting to not update the FSM at this point. The
183  * free space should be reused by UPDATEs to *this* page.
184  */
185  }
186 }
187 
188 
189 /*
190  * Prune and repair fragmentation in the specified page.
191  *
192  * Caller must have pin and buffer cleanup lock on the page. Note that we
193  * don't update the FSM information for page on caller's behalf. Caller might
194  * also need to account for a reduction in the length of the line pointer
195  * array following array truncation by us.
196  *
197  * vistest is used to distinguish whether tuples are DEAD or RECENTLY_DEAD
198  * (see heap_prune_satisfies_vacuum).
199  *
200  * mark_unused_now indicates whether or not dead items can be set LP_UNUSED
201  * during pruning.
202  *
203  * presult contains output parameters needed by callers such as the number of
204  * tuples removed and the number of line pointers newly marked LP_DEAD.
205  * heap_page_prune() is responsible for initializing it.
206  *
207  * reason indicates why the pruning is performed. It is included in the WAL
208  * record for debugging and analysis purposes, but otherwise has no effect.
209  *
210  * off_loc is the offset location required by the caller to use in error
211  * callback.
212  */
213 void
214 heap_page_prune(Relation relation, Buffer buffer,
215  GlobalVisState *vistest,
216  bool mark_unused_now,
217  PruneResult *presult,
218  PruneReason reason,
219  OffsetNumber *off_loc)
220 {
221  Page page = BufferGetPage(buffer);
222  BlockNumber blockno = BufferGetBlockNumber(buffer);
223  OffsetNumber offnum,
224  maxoff;
225  PruneState prstate;
226  HeapTupleData tup;
227 
228  /*
229  * Our strategy is to scan the page and make lists of items to change,
230  * then apply the changes within a critical section. This keeps as much
231  * logic as possible out of the critical section, and also ensures that
232  * WAL replay will work the same as the normal case.
233  *
234  * First, initialize the new pd_prune_xid value to zero (indicating no
235  * prunable tuples). If we find any tuples which may soon become
236  * prunable, we will save the lowest relevant XID in new_prune_xid. Also
237  * initialize the rest of our working state.
238  */
240  prstate.vistest = vistest;
241  prstate.mark_unused_now = mark_unused_now;
243  prstate.nredirected = prstate.ndead = prstate.nunused = 0;
244  memset(prstate.marked, 0, sizeof(prstate.marked));
245 
246  /*
247  * presult->htsv is not initialized here because all ntuple spots in the
248  * array will be set either to a valid HTSV_Result value or -1.
249  */
250  presult->ndeleted = 0;
251  presult->nnewlpdead = 0;
252 
253  maxoff = PageGetMaxOffsetNumber(page);
254  tup.t_tableOid = RelationGetRelid(relation);
255 
256  /*
257  * Determine HTSV for all tuples.
258  *
259  * This is required for correctness to deal with cases where running HTSV
260  * twice could result in different results (e.g. RECENTLY_DEAD can turn to
261  * DEAD if another checked item causes GlobalVisTestIsRemovableFullXid()
262  * to update the horizon, INSERT_IN_PROGRESS can change to DEAD if the
263  * inserting transaction aborts, ...). That in turn could cause
264  * heap_prune_chain() to behave incorrectly if a tuple is reached twice,
265  * once directly via a heap_prune_chain() and once following a HOT chain.
266  *
267  * It's also good for performance. Most commonly tuples within a page are
268  * stored at decreasing offsets (while the items are stored at increasing
269  * offsets). When processing all tuples on a page this leads to reading
270  * memory at decreasing offsets within a page, with a variable stride.
271  * That's hard for CPU prefetchers to deal with. Processing the items in
272  * reverse order (and thus the tuples in increasing order) increases
273  * prefetching efficiency significantly / decreases the number of cache
274  * misses.
275  */
276  for (offnum = maxoff;
277  offnum >= FirstOffsetNumber;
278  offnum = OffsetNumberPrev(offnum))
279  {
280  ItemId itemid = PageGetItemId(page, offnum);
281  HeapTupleHeader htup;
282 
283  /* Nothing to do if slot doesn't contain a tuple */
284  if (!ItemIdIsNormal(itemid))
285  {
286  presult->htsv[offnum] = -1;
287  continue;
288  }
289 
290  htup = (HeapTupleHeader) PageGetItem(page, itemid);
291  tup.t_data = htup;
292  tup.t_len = ItemIdGetLength(itemid);
293  ItemPointerSet(&(tup.t_self), blockno, offnum);
294 
295  /*
296  * Set the offset number so that we can display it along with any
297  * error that occurred while processing this tuple.
298  */
299  if (off_loc)
300  *off_loc = offnum;
301 
302  presult->htsv[offnum] = heap_prune_satisfies_vacuum(&prstate, &tup,
303  buffer);
304  }
305 
306  /* Scan the page */
307  for (offnum = FirstOffsetNumber;
308  offnum <= maxoff;
309  offnum = OffsetNumberNext(offnum))
310  {
311  ItemId itemid;
312 
313  /* Ignore items already processed as part of an earlier chain */
314  if (prstate.marked[offnum])
315  continue;
316 
317  /* see preceding loop */
318  if (off_loc)
319  *off_loc = offnum;
320 
321  /* Nothing to do if slot is empty */
322  itemid = PageGetItemId(page, offnum);
323  if (!ItemIdIsUsed(itemid))
324  continue;
325 
326  /* Process this item or chain of items */
327  presult->ndeleted += heap_prune_chain(buffer, offnum,
328  presult->htsv, &prstate);
329  }
330 
331  /* Clear the offset information once we have processed the given page. */
332  if (off_loc)
333  *off_loc = InvalidOffsetNumber;
334 
335  /* Any error while applying the changes is critical */
337 
338  /* Have we found any prunable items? */
339  if (prstate.nredirected > 0 || prstate.ndead > 0 || prstate.nunused > 0)
340  {
341  /*
342  * Apply the planned item changes, then repair page fragmentation, and
343  * update the page's hint bit about whether it has free line pointers.
344  */
345  heap_page_prune_execute(buffer, false,
346  prstate.redirected, prstate.nredirected,
347  prstate.nowdead, prstate.ndead,
348  prstate.nowunused, prstate.nunused);
349 
350  /*
351  * Update the page's pd_prune_xid field to either zero, or the lowest
352  * XID of any soon-prunable tuple.
353  */
354  ((PageHeader) page)->pd_prune_xid = prstate.new_prune_xid;
355 
356  /*
357  * Also clear the "page is full" flag, since there's no point in
358  * repeating the prune/defrag process until something else happens to
359  * the page.
360  */
361  PageClearFull(page);
362 
363  MarkBufferDirty(buffer);
364 
365  /*
366  * Emit a WAL XLOG_HEAP2_PRUNE_FREEZE record showing what we did
367  */
368  if (RelationNeedsWAL(relation))
369  {
370  log_heap_prune_and_freeze(relation, buffer,
371  prstate.snapshotConflictHorizon,
372  true, reason,
373  NULL, 0,
374  prstate.redirected, prstate.nredirected,
375  prstate.nowdead, prstate.ndead,
376  prstate.nowunused, prstate.nunused);
377  }
378  }
379  else
380  {
381  /*
382  * If we didn't prune anything, but have found a new value for the
383  * pd_prune_xid field, update it and mark the buffer dirty. This is
384  * treated as a non-WAL-logged hint.
385  *
386  * Also clear the "page is full" flag if it is set, since there's no
387  * point in repeating the prune/defrag process until something else
388  * happens to the page.
389  */
390  if (((PageHeader) page)->pd_prune_xid != prstate.new_prune_xid ||
391  PageIsFull(page))
392  {
393  ((PageHeader) page)->pd_prune_xid = prstate.new_prune_xid;
394  PageClearFull(page);
395  MarkBufferDirtyHint(buffer, true);
396  }
397  }
398 
400 
401  /* Record number of newly-set-LP_DEAD items for caller */
402  presult->nnewlpdead = prstate.ndead;
403 }
404 
405 
406 /*
407  * Perform visibility checks for heap pruning.
408  */
409 static HTSV_Result
411 {
413  TransactionId dead_after;
414 
415  res = HeapTupleSatisfiesVacuumHorizon(tup, buffer, &dead_after);
416 
418  return res;
419 
420  if (GlobalVisTestIsRemovableXid(prstate->vistest, dead_after))
422 
423  return res;
424 }
425 
426 
427 /*
428  * Prune specified line pointer or a HOT chain originating at line pointer.
429  *
430  * Tuple visibility information is provided in htsv.
431  *
432  * If the item is an index-referenced tuple (i.e. not a heap-only tuple),
433  * the HOT chain is pruned by removing all DEAD tuples at the start of the HOT
434  * chain. We also prune any RECENTLY_DEAD tuples preceding a DEAD tuple.
435  * This is OK because a RECENTLY_DEAD tuple preceding a DEAD tuple is really
436  * DEAD, our visibility test is just too coarse to detect it.
437  *
438  * Pruning must never leave behind a DEAD tuple that still has tuple storage.
439  * VACUUM isn't prepared to deal with that case.
440  *
441  * The root line pointer is redirected to the tuple immediately after the
442  * latest DEAD tuple. If all tuples in the chain are DEAD, the root line
443  * pointer is marked LP_DEAD. (This includes the case of a DEAD simple
444  * tuple, which we treat as a chain of length 1.)
445  *
446  * We don't actually change the page here. We just add entries to the arrays in
447  * prstate showing the changes to be made. Items to be redirected are added
448  * to the redirected[] array (two entries per redirection); items to be set to
449  * LP_DEAD state are added to nowdead[]; and items to be set to LP_UNUSED
450  * state are added to nowunused[].
451  *
452  * Returns the number of tuples (to be) deleted from the page.
453  */
454 static int
456  int8 *htsv, PruneState *prstate)
457 {
458  int ndeleted = 0;
459  Page dp = (Page) BufferGetPage(buffer);
461  ItemId rootlp;
462  HeapTupleHeader htup;
463  OffsetNumber latestdead = InvalidOffsetNumber,
464  maxoff = PageGetMaxOffsetNumber(dp),
465  offnum;
467  int nchain = 0,
468  i;
469 
470  rootlp = PageGetItemId(dp, rootoffnum);
471 
472  /*
473  * If it's a heap-only tuple, then it is not the start of a HOT chain.
474  */
475  if (ItemIdIsNormal(rootlp))
476  {
477  Assert(htsv[rootoffnum] != -1);
478  htup = (HeapTupleHeader) PageGetItem(dp, rootlp);
479 
480  if (HeapTupleHeaderIsHeapOnly(htup))
481  {
482  /*
483  * If the tuple is DEAD and doesn't chain to anything else, mark
484  * it unused immediately. (If it does chain, we can only remove
485  * it as part of pruning its chain.)
486  *
487  * We need this primarily to handle aborted HOT updates, that is,
488  * XMIN_INVALID heap-only tuples. Those might not be linked to by
489  * any chain, since the parent tuple might be re-updated before
490  * any pruning occurs. So we have to be able to reap them
491  * separately from chain-pruning. (Note that
492  * HeapTupleHeaderIsHotUpdated will never return true for an
493  * XMIN_INVALID tuple, so this code will work even when there were
494  * sequential updates within the aborted transaction.)
495  *
496  * Note that we might first arrive at a dead heap-only tuple
497  * either here or while following a chain below. Whichever path
498  * gets there first will mark the tuple unused.
499  */
500  if (htsv[rootoffnum] == HEAPTUPLE_DEAD &&
502  {
503  heap_prune_record_unused(prstate, rootoffnum);
505  &prstate->snapshotConflictHorizon);
506  ndeleted++;
507  }
508 
509  /* Nothing more to do */
510  return ndeleted;
511  }
512  }
513 
514  /* Start from the root tuple */
515  offnum = rootoffnum;
516 
517  /* while not end of the chain */
518  for (;;)
519  {
520  ItemId lp;
521  bool tupdead,
522  recent_dead;
523 
524  /* Sanity check (pure paranoia) */
525  if (offnum < FirstOffsetNumber)
526  break;
527 
528  /*
529  * An offset past the end of page's line pointer array is possible
530  * when the array was truncated (original item must have been unused)
531  */
532  if (offnum > maxoff)
533  break;
534 
535  /* If item is already processed, stop --- it must not be same chain */
536  if (prstate->marked[offnum])
537  break;
538 
539  lp = PageGetItemId(dp, offnum);
540 
541  /* Unused item obviously isn't part of the chain */
542  if (!ItemIdIsUsed(lp))
543  break;
544 
545  /*
546  * If we are looking at the redirected root line pointer, jump to the
547  * first normal tuple in the chain. If we find a redirect somewhere
548  * else, stop --- it must not be same chain.
549  */
550  if (ItemIdIsRedirected(lp))
551  {
552  if (nchain > 0)
553  break; /* not at start of chain */
554  chainitems[nchain++] = offnum;
555  offnum = ItemIdGetRedirect(rootlp);
556  continue;
557  }
558 
559  /*
560  * Likewise, a dead line pointer can't be part of the chain. (We
561  * already eliminated the case of dead root tuple outside this
562  * function.)
563  */
564  if (ItemIdIsDead(lp))
565  {
566  /*
567  * If the caller set mark_unused_now true, we can set dead line
568  * pointers LP_UNUSED now. We don't increment ndeleted here since
569  * the LP was already marked dead.
570  */
571  if (unlikely(prstate->mark_unused_now))
572  heap_prune_record_unused(prstate, offnum);
573 
574  break;
575  }
576 
577  Assert(ItemIdIsNormal(lp));
578  htup = (HeapTupleHeader) PageGetItem(dp, lp);
579 
580  /*
581  * Check the tuple XMIN against prior XMAX, if any
582  */
583  if (TransactionIdIsValid(priorXmax) &&
584  !TransactionIdEquals(HeapTupleHeaderGetXmin(htup), priorXmax))
585  break;
586 
587  /*
588  * OK, this tuple is indeed a member of the chain.
589  */
590  chainitems[nchain++] = offnum;
591 
592  /*
593  * Check tuple's visibility status.
594  */
595  tupdead = recent_dead = false;
596 
597  switch (htsv_get_valid_status(htsv[offnum]))
598  {
599  case HEAPTUPLE_DEAD:
600  tupdead = true;
601  break;
602 
604  recent_dead = true;
605 
606  /*
607  * This tuple may soon become DEAD. Update the hint field so
608  * that the page is reconsidered for pruning in future.
609  */
612  break;
613 
615 
616  /*
617  * This tuple may soon become DEAD. Update the hint field so
618  * that the page is reconsidered for pruning in future.
619  */
622  break;
623 
624  case HEAPTUPLE_LIVE:
626 
627  /*
628  * If we wanted to optimize for aborts, we might consider
629  * marking the page prunable when we see INSERT_IN_PROGRESS.
630  * But we don't. See related decisions about when to mark the
631  * page prunable in heapam.c.
632  */
633  break;
634 
635  default:
636  elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
637  break;
638  }
639 
640  /*
641  * Remember the last DEAD tuple seen. We will advance past
642  * RECENTLY_DEAD tuples just in case there's a DEAD one after them;
643  * but we can't advance past anything else. We have to make sure that
644  * we don't miss any DEAD tuples, since DEAD tuples that still have
645  * tuple storage after pruning will confuse VACUUM.
646  */
647  if (tupdead)
648  {
649  latestdead = offnum;
651  &prstate->snapshotConflictHorizon);
652  }
653  else if (!recent_dead)
654  break;
655 
656  /*
657  * If the tuple is not HOT-updated, then we are at the end of this
658  * HOT-update chain.
659  */
660  if (!HeapTupleHeaderIsHotUpdated(htup))
661  break;
662 
663  /* HOT implies it can't have moved to different partition */
665 
666  /*
667  * Advance to next chain member.
668  */
670  BufferGetBlockNumber(buffer));
671  offnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
672  priorXmax = HeapTupleHeaderGetUpdateXid(htup);
673  }
674 
675  /*
676  * If we found a DEAD tuple in the chain, adjust the HOT chain so that all
677  * the DEAD tuples at the start of the chain are removed and the root line
678  * pointer is appropriately redirected.
679  */
680  if (OffsetNumberIsValid(latestdead))
681  {
682  /*
683  * Mark as unused each intermediate item that we are able to remove
684  * from the chain.
685  *
686  * When the previous item is the last dead tuple seen, we are at the
687  * right candidate for redirection.
688  */
689  for (i = 1; (i < nchain) && (chainitems[i - 1] != latestdead); i++)
690  {
691  heap_prune_record_unused(prstate, chainitems[i]);
692  ndeleted++;
693  }
694 
695  /*
696  * If the root entry had been a normal tuple, we are deleting it, so
697  * count it in the result. But changing a redirect (even to DEAD
698  * state) doesn't count.
699  */
700  if (ItemIdIsNormal(rootlp))
701  ndeleted++;
702 
703  /*
704  * If the DEAD tuple is at the end of the chain, the entire chain is
705  * dead and the root line pointer can be marked dead. Otherwise just
706  * redirect the root to the correct chain member.
707  */
708  if (i >= nchain)
709  heap_prune_record_dead_or_unused(prstate, rootoffnum);
710  else
711  heap_prune_record_redirect(prstate, rootoffnum, chainitems[i]);
712  }
713  else if (nchain < 2 && ItemIdIsRedirected(rootlp))
714  {
715  /*
716  * We found a redirect item that doesn't point to a valid follow-on
717  * item. This can happen if the loop in heap_page_prune caused us to
718  * visit the dead successor of a redirect item before visiting the
719  * redirect item. We can clean up by setting the redirect item to
720  * DEAD state or LP_UNUSED if the caller indicated.
721  */
722  heap_prune_record_dead_or_unused(prstate, rootoffnum);
723  }
724 
725  return ndeleted;
726 }
727 
728 /* Record lowest soon-prunable XID */
729 static void
731 {
732  /*
733  * This should exactly match the PageSetPrunable macro. We can't store
734  * directly into the page header yet, so we update working state.
735  */
737  if (!TransactionIdIsValid(prstate->new_prune_xid) ||
738  TransactionIdPrecedes(xid, prstate->new_prune_xid))
739  prstate->new_prune_xid = xid;
740 }
741 
742 /* Record line pointer to be redirected */
743 static void
745  OffsetNumber offnum, OffsetNumber rdoffnum)
746 {
748  prstate->redirected[prstate->nredirected * 2] = offnum;
749  prstate->redirected[prstate->nredirected * 2 + 1] = rdoffnum;
750  prstate->nredirected++;
751  Assert(!prstate->marked[offnum]);
752  prstate->marked[offnum] = true;
753  Assert(!prstate->marked[rdoffnum]);
754  prstate->marked[rdoffnum] = true;
755 }
756 
757 /* Record line pointer to be marked dead */
758 static void
760 {
761  Assert(prstate->ndead < MaxHeapTuplesPerPage);
762  prstate->nowdead[prstate->ndead] = offnum;
763  prstate->ndead++;
764  Assert(!prstate->marked[offnum]);
765  prstate->marked[offnum] = true;
766 }
767 
768 /*
769  * Depending on whether or not the caller set mark_unused_now to true, record that a
770  * line pointer should be marked LP_DEAD or LP_UNUSED. There are other cases in
771  * which we will mark line pointers LP_UNUSED, but we will not mark line
772  * pointers LP_DEAD if mark_unused_now is true.
773  */
774 static void
776 {
777  /*
778  * If the caller set mark_unused_now to true, we can remove dead tuples
779  * during pruning instead of marking their line pointers dead. Set this
780  * tuple's line pointer LP_UNUSED. We hint that this option is less
781  * likely.
782  */
783  if (unlikely(prstate->mark_unused_now))
784  heap_prune_record_unused(prstate, offnum);
785  else
786  heap_prune_record_dead(prstate, offnum);
787 }
788 
789 /* Record line pointer to be marked unused */
790 static void
792 {
793  Assert(prstate->nunused < MaxHeapTuplesPerPage);
794  prstate->nowunused[prstate->nunused] = offnum;
795  prstate->nunused++;
796  Assert(!prstate->marked[offnum]);
797  prstate->marked[offnum] = true;
798 }
799 
800 
801 /*
802  * Perform the actual page changes needed by heap_page_prune.
803  *
804  * If 'lp_truncate_only' is set, we are merely marking LP_DEAD line pointers
805  * as unused, not redirecting or removing anything else. The
806  * PageRepairFragmentation() call is skipped in that case.
807  *
808  * If 'lp_truncate_only' is not set, the caller must hold a cleanup lock on
809  * the buffer. If it is set, an ordinary exclusive lock suffices.
810  */
811 void
812 heap_page_prune_execute(Buffer buffer, bool lp_truncate_only,
813  OffsetNumber *redirected, int nredirected,
814  OffsetNumber *nowdead, int ndead,
815  OffsetNumber *nowunused, int nunused)
816 {
817  Page page = (Page) BufferGetPage(buffer);
818  OffsetNumber *offnum;
820 
821  /* Shouldn't be called unless there's something to do */
822  Assert(nredirected > 0 || ndead > 0 || nunused > 0);
823 
824  /* If 'lp_truncate_only', we can only remove already-dead line pointers */
825  Assert(!lp_truncate_only || (nredirected == 0 && ndead == 0));
826 
827  /* Update all redirected line pointers */
828  offnum = redirected;
829  for (int i = 0; i < nredirected; i++)
830  {
831  OffsetNumber fromoff = *offnum++;
832  OffsetNumber tooff = *offnum++;
833  ItemId fromlp = PageGetItemId(page, fromoff);
835 
836 #ifdef USE_ASSERT_CHECKING
837 
838  /*
839  * Any existing item that we set as an LP_REDIRECT (any 'from' item)
840  * must be the first item from a HOT chain. If the item has tuple
841  * storage then it can't be a heap-only tuple. Otherwise we are just
842  * maintaining an existing LP_REDIRECT from an existing HOT chain that
843  * has been pruned at least once before now.
844  */
845  if (!ItemIdIsRedirected(fromlp))
846  {
847  Assert(ItemIdHasStorage(fromlp) && ItemIdIsNormal(fromlp));
848 
849  htup = (HeapTupleHeader) PageGetItem(page, fromlp);
851  }
852  else
853  {
854  /* We shouldn't need to redundantly set the redirect */
855  Assert(ItemIdGetRedirect(fromlp) != tooff);
856  }
857 
858  /*
859  * The item that we're about to set as an LP_REDIRECT (the 'from'
860  * item) will point to an existing item (the 'to' item) that is
861  * already a heap-only tuple. There can be at most one LP_REDIRECT
862  * item per HOT chain.
863  *
864  * We need to keep around an LP_REDIRECT item (after original
865  * non-heap-only root tuple gets pruned away) so that it's always
866  * possible for VACUUM to easily figure out what TID to delete from
867  * indexes when an entire HOT chain becomes dead. A heap-only tuple
868  * can never become LP_DEAD; an LP_REDIRECT item or a regular heap
869  * tuple can.
870  *
871  * This check may miss problems, e.g. the target of a redirect could
872  * be marked as unused subsequently. The page_verify_redirects() check
873  * below will catch such problems.
874  */
875  tolp = PageGetItemId(page, tooff);
876  Assert(ItemIdHasStorage(tolp) && ItemIdIsNormal(tolp));
877  htup = (HeapTupleHeader) PageGetItem(page, tolp);
879 #endif
880 
881  ItemIdSetRedirect(fromlp, tooff);
882  }
883 
884  /* Update all now-dead line pointers */
885  offnum = nowdead;
886  for (int i = 0; i < ndead; i++)
887  {
888  OffsetNumber off = *offnum++;
889  ItemId lp = PageGetItemId(page, off);
890 
891 #ifdef USE_ASSERT_CHECKING
892 
893  /*
894  * An LP_DEAD line pointer must be left behind when the original item
895  * (which is dead to everybody) could still be referenced by a TID in
896  * an index. This should never be necessary with any individual
897  * heap-only tuple item, though. (It's not clear how much of a problem
898  * that would be, but there is no reason to allow it.)
899  */
900  if (ItemIdHasStorage(lp))
901  {
902  Assert(ItemIdIsNormal(lp));
903  htup = (HeapTupleHeader) PageGetItem(page, lp);
905  }
906  else
907  {
908  /* Whole HOT chain becomes dead */
910  }
911 #endif
912 
913  ItemIdSetDead(lp);
914  }
915 
916  /* Update all now-unused line pointers */
917  offnum = nowunused;
918  for (int i = 0; i < nunused; i++)
919  {
920  OffsetNumber off = *offnum++;
921  ItemId lp = PageGetItemId(page, off);
922 
923 #ifdef USE_ASSERT_CHECKING
924 
925  if (lp_truncate_only)
926  {
927  /* Setting LP_DEAD to LP_UNUSED in vacuum's second pass */
928  Assert(ItemIdIsDead(lp) && !ItemIdHasStorage(lp));
929  }
930  else
931  {
932  /*
933  * When heap_page_prune() was called, mark_unused_now may have
934  * been passed as true, which allows would-be LP_DEAD items to be
935  * made LP_UNUSED instead. This is only possible if the relation
936  * has no indexes. If there are any dead items, then
937  * mark_unused_now was not true and every item being marked
938  * LP_UNUSED must refer to a heap-only tuple.
939  */
940  if (ndead > 0)
941  {
943  htup = (HeapTupleHeader) PageGetItem(page, lp);
945  }
946  else
947  Assert(ItemIdIsUsed(lp));
948  }
949 
950 #endif
951 
952  ItemIdSetUnused(lp);
953  }
954 
955  if (lp_truncate_only)
957  else
958  {
959  /*
960  * Finally, repair any fragmentation, and update the page's hint bit
961  * about whether it has free pointers.
962  */
964 
965  /*
966  * Now that the page has been modified, assert that redirect items
967  * still point to valid targets.
968  */
969  page_verify_redirects(page);
970  }
971 }
972 
973 
974 /*
975  * If built with assertions, verify that all LP_REDIRECT items point to a
976  * valid item.
977  *
978  * One way that bugs related to HOT pruning show is redirect items pointing to
979  * removed tuples. It's not trivial to reliably check that marking an item
980  * unused will not orphan a redirect item during heap_prune_chain() /
981  * heap_page_prune_execute(), so we additionally check the whole page after
982  * pruning. Without this check such bugs would typically only cause asserts
983  * later, potentially well after the corruption has been introduced.
984  *
985  * Also check comments in heap_page_prune_execute()'s redirection loop.
986  */
987 static void
989 {
990 #ifdef USE_ASSERT_CHECKING
991  OffsetNumber offnum;
992  OffsetNumber maxoff;
993 
994  maxoff = PageGetMaxOffsetNumber(page);
995  for (offnum = FirstOffsetNumber;
996  offnum <= maxoff;
997  offnum = OffsetNumberNext(offnum))
998  {
999  ItemId itemid = PageGetItemId(page, offnum);
1000  OffsetNumber targoff;
1001  ItemId targitem;
1002  HeapTupleHeader htup;
1003 
1004  if (!ItemIdIsRedirected(itemid))
1005  continue;
1006 
1007  targoff = ItemIdGetRedirect(itemid);
1008  targitem = PageGetItemId(page, targoff);
1009 
1010  Assert(ItemIdIsUsed(targitem));
1011  Assert(ItemIdIsNormal(targitem));
1012  Assert(ItemIdHasStorage(targitem));
1013  htup = (HeapTupleHeader) PageGetItem(page, targitem);
1015  }
1016 #endif
1017 }
1018 
1019 
1020 /*
1021  * For all items in this page, find their respective root line pointers.
1022  * If item k is part of a HOT-chain with root at item j, then we set
1023  * root_offsets[k - 1] = j.
1024  *
1025  * The passed-in root_offsets array must have MaxHeapTuplesPerPage entries.
1026  * Unused entries are filled with InvalidOffsetNumber (zero).
1027  *
1028  * The function must be called with at least share lock on the buffer, to
1029  * prevent concurrent prune operations.
1030  *
1031  * Note: The information collected here is valid only as long as the caller
1032  * holds a pin on the buffer. Once pin is released, a tuple might be pruned
1033  * and reused by a completely unrelated tuple.
1034  */
1035 void
1037 {
1038  OffsetNumber offnum,
1039  maxoff;
1040 
1041  MemSet(root_offsets, InvalidOffsetNumber,
1043 
1044  maxoff = PageGetMaxOffsetNumber(page);
1045  for (offnum = FirstOffsetNumber; offnum <= maxoff; offnum = OffsetNumberNext(offnum))
1046  {
1047  ItemId lp = PageGetItemId(page, offnum);
1048  HeapTupleHeader htup;
1049  OffsetNumber nextoffnum;
1050  TransactionId priorXmax;
1051 
1052  /* skip unused and dead items */
1053  if (!ItemIdIsUsed(lp) || ItemIdIsDead(lp))
1054  continue;
1055 
1056  if (ItemIdIsNormal(lp))
1057  {
1058  htup = (HeapTupleHeader) PageGetItem(page, lp);
1059 
1060  /*
1061  * Check if this tuple is part of a HOT-chain rooted at some other
1062  * tuple. If so, skip it for now; we'll process it when we find
1063  * its root.
1064  */
1065  if (HeapTupleHeaderIsHeapOnly(htup))
1066  continue;
1067 
1068  /*
1069  * This is either a plain tuple or the root of a HOT-chain.
1070  * Remember it in the mapping.
1071  */
1072  root_offsets[offnum - 1] = offnum;
1073 
1074  /* If it's not the start of a HOT-chain, we're done with it */
1075  if (!HeapTupleHeaderIsHotUpdated(htup))
1076  continue;
1077 
1078  /* Set up to scan the HOT-chain */
1079  nextoffnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
1080  priorXmax = HeapTupleHeaderGetUpdateXid(htup);
1081  }
1082  else
1083  {
1084  /* Must be a redirect item. We do not set its root_offsets entry */
1086  /* Set up to scan the HOT-chain */
1087  nextoffnum = ItemIdGetRedirect(lp);
1088  priorXmax = InvalidTransactionId;
1089  }
1090 
1091  /*
1092  * Now follow the HOT-chain and collect other tuples in the chain.
1093  *
1094  * Note: Even though this is a nested loop, the complexity of the
1095  * function is O(N) because a tuple in the page should be visited not
1096  * more than twice, once in the outer loop and once in HOT-chain
1097  * chases.
1098  */
1099  for (;;)
1100  {
1101  /* Sanity check (pure paranoia) */
1102  if (offnum < FirstOffsetNumber)
1103  break;
1104 
1105  /*
1106  * An offset past the end of page's line pointer array is possible
1107  * when the array was truncated
1108  */
1109  if (offnum > maxoff)
1110  break;
1111 
1112  lp = PageGetItemId(page, nextoffnum);
1113 
1114  /* Check for broken chains */
1115  if (!ItemIdIsNormal(lp))
1116  break;
1117 
1118  htup = (HeapTupleHeader) PageGetItem(page, lp);
1119 
1120  if (TransactionIdIsValid(priorXmax) &&
1121  !TransactionIdEquals(priorXmax, HeapTupleHeaderGetXmin(htup)))
1122  break;
1123 
1124  /* Remember the root line pointer for this item */
1125  root_offsets[nextoffnum - 1] = offnum;
1126 
1127  /* Advance to next chain member, if any */
1128  if (!HeapTupleHeaderIsHotUpdated(htup))
1129  break;
1130 
1131  /* HOT implies it can't have moved to different partition */
1133 
1134  nextoffnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
1135  priorXmax = HeapTupleHeaderGetUpdateXid(htup);
1136  }
1137  }
1138 }
1139 
1140 
1141 /*
1142  * Compare fields that describe actions required to freeze tuple with caller's
1143  * open plan. If everything matches then the frz tuple plan is equivalent to
1144  * caller's plan.
1145  */
1146 static inline bool
1148 {
1149  if (plan->xmax == frz->xmax &&
1150  plan->t_infomask2 == frz->t_infomask2 &&
1151  plan->t_infomask == frz->t_infomask &&
1152  plan->frzflags == frz->frzflags)
1153  return true;
1154 
1155  /* Caller must call heap_log_freeze_new_plan again for frz */
1156  return false;
1157 }
1158 
1159 /*
1160  * Comparator used to deduplicate XLOG_HEAP2_FREEZE_PAGE freeze plans
1161  */
1162 static int
1163 heap_log_freeze_cmp(const void *arg1, const void *arg2)
1164 {
1165  HeapTupleFreeze *frz1 = (HeapTupleFreeze *) arg1;
1166  HeapTupleFreeze *frz2 = (HeapTupleFreeze *) arg2;
1167 
1168  if (frz1->xmax < frz2->xmax)
1169  return -1;
1170  else if (frz1->xmax > frz2->xmax)
1171  return 1;
1172 
1173  if (frz1->t_infomask2 < frz2->t_infomask2)
1174  return -1;
1175  else if (frz1->t_infomask2 > frz2->t_infomask2)
1176  return 1;
1177 
1178  if (frz1->t_infomask < frz2->t_infomask)
1179  return -1;
1180  else if (frz1->t_infomask > frz2->t_infomask)
1181  return 1;
1182 
1183  if (frz1->frzflags < frz2->frzflags)
1184  return -1;
1185  else if (frz1->frzflags > frz2->frzflags)
1186  return 1;
1187 
1188  /*
1189  * heap_log_freeze_eq would consider these tuple-wise plans to be equal.
1190  * (So the tuples will share a single canonical freeze plan.)
1191  *
1192  * We tiebreak on page offset number to keep each freeze plan's page
1193  * offset number array individually sorted. (Unnecessary, but be tidy.)
1194  */
1195  if (frz1->offset < frz2->offset)
1196  return -1;
1197  else if (frz1->offset > frz2->offset)
1198  return 1;
1199 
1200  Assert(false);
1201  return 0;
1202 }
1203 
1204 /*
1205  * Start new plan initialized using tuple-level actions. At least one tuple
1206  * will have steps required to freeze described by caller's plan during REDO.
1207  */
1208 static inline void
1210 {
1211  plan->xmax = frz->xmax;
1212  plan->t_infomask2 = frz->t_infomask2;
1213  plan->t_infomask = frz->t_infomask;
1214  plan->frzflags = frz->frzflags;
1215  plan->ntuples = 1; /* for now */
1216 }
1217 
1218 /*
1219  * Deduplicate tuple-based freeze plans so that each distinct set of
1220  * processing steps is only stored once in XLOG_HEAP2_FREEZE_PAGE records.
1221  * Called during original execution of freezing (for logged relations).
1222  *
1223  * Return value is number of plans set in *plans_out for caller. Also writes
1224  * an array of offset numbers into *offsets_out output argument for caller
1225  * (actually there is one array per freeze plan, but that's not of immediate
1226  * concern to our caller).
1227  */
1228 static int
1230  xlhp_freeze_plan *plans_out,
1231  OffsetNumber *offsets_out)
1232 {
1233  int nplans = 0;
1234 
1235  /* Sort tuple-based freeze plans in the order required to deduplicate */
1236  qsort(tuples, ntuples, sizeof(HeapTupleFreeze), heap_log_freeze_cmp);
1237 
1238  for (int i = 0; i < ntuples; i++)
1239  {
1240  HeapTupleFreeze *frz = tuples + i;
1241 
1242  if (i == 0)
1243  {
1244  /* New canonical freeze plan starting with first tup */
1245  heap_log_freeze_new_plan(plans_out, frz);
1246  nplans++;
1247  }
1248  else if (heap_log_freeze_eq(plans_out, frz))
1249  {
1250  /* tup matches open canonical plan -- include tup in it */
1251  Assert(offsets_out[i - 1] < frz->offset);
1252  plans_out->ntuples++;
1253  }
1254  else
1255  {
1256  /* Tup doesn't match current plan -- done with it now */
1257  plans_out++;
1258 
1259  /* New canonical freeze plan starting with this tup */
1260  heap_log_freeze_new_plan(plans_out, frz);
1261  nplans++;
1262  }
1263 
1264  /*
1265  * Save page offset number in dedicated buffer in passing.
1266  *
1267  * REDO routine relies on the record's offset numbers array grouping
1268  * offset numbers by freeze plan. The sort order within each grouping
1269  * is ascending offset number order, just to keep things tidy.
1270  */
1271  offsets_out[i] = frz->offset;
1272  }
1273 
1274  Assert(nplans > 0 && nplans <= ntuples);
1275 
1276  return nplans;
1277 }
1278 
1279 /*
1280  * Write an XLOG_HEAP2_PRUNE_FREEZE WAL record
1281  *
1282  * This is used for several different page maintenance operations:
1283  *
1284  * - Page pruning, in VACUUM's 1st pass or on access: Some items are
1285  * redirected, some marked dead, and some removed altogether.
1286  *
1287  * - Freezing: Items are marked as 'frozen'.
1288  *
1289  * - Vacuum, 2nd pass: Items that are already LP_DEAD are marked as unused.
1290  *
1291  * They have enough commonalities that we use a single WAL record for them
1292  * all.
1293  *
1294  * If replaying the record requires a cleanup lock, pass cleanup_lock = true.
1295  * Replaying 'redirected' or 'dead' items always requires a cleanup lock, but
1296  * replaying 'unused' items depends on whether they were all previously marked
1297  * as dead.
1298  *
1299  * Note: This function scribbles on the 'frozen' array.
1300  *
1301  * Note: This is called in a critical section, so careful what you do here.
1302  */
1303 void
1305  TransactionId conflict_xid,
1306  bool cleanup_lock,
1307  PruneReason reason,
1308  HeapTupleFreeze *frozen, int nfrozen,
1309  OffsetNumber *redirected, int nredirected,
1310  OffsetNumber *dead, int ndead,
1311  OffsetNumber *unused, int nunused)
1312 {
1313  xl_heap_prune xlrec;
1314  XLogRecPtr recptr;
1315  uint8 info;
1316 
1317  /* The following local variables hold data registered in the WAL record: */
1319  xlhp_freeze_plans freeze_plans;
1320  xlhp_prune_items redirect_items;
1321  xlhp_prune_items dead_items;
1322  xlhp_prune_items unused_items;
1323  OffsetNumber frz_offsets[MaxHeapTuplesPerPage];
1324 
1325  xlrec.flags = 0;
1326 
1327  /*
1328  * Prepare data for the buffer. The arrays are not actually in the
1329  * buffer, but we pretend that they are. When XLogInsert stores a full
1330  * page image, the arrays can be omitted.
1331  */
1332  XLogBeginInsert();
1333  XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
1334  if (nfrozen > 0)
1335  {
1336  int nplans;
1337 
1338  xlrec.flags |= XLHP_HAS_FREEZE_PLANS;
1339 
1340  /*
1341  * Prepare deduplicated representation for use in the WAL record. This
1342  * destructively sorts frozen tuples array in-place.
1343  */
1344  nplans = heap_log_freeze_plan(frozen, nfrozen, plans, frz_offsets);
1345 
1346  freeze_plans.nplans = nplans;
1347  XLogRegisterBufData(0, (char *) &freeze_plans,
1348  offsetof(xlhp_freeze_plans, plans));
1349  XLogRegisterBufData(0, (char *) plans,
1350  sizeof(xlhp_freeze_plan) * nplans);
1351  }
1352  if (nredirected > 0)
1353  {
1354  xlrec.flags |= XLHP_HAS_REDIRECTIONS;
1355 
1356  redirect_items.ntargets = nredirected;
1357  XLogRegisterBufData(0, (char *) &redirect_items,
1358  offsetof(xlhp_prune_items, data));
1359  XLogRegisterBufData(0, (char *) redirected,
1360  sizeof(OffsetNumber[2]) * nredirected);
1361  }
1362  if (ndead > 0)
1363  {
1364  xlrec.flags |= XLHP_HAS_DEAD_ITEMS;
1365 
1366  dead_items.ntargets = ndead;
1367  XLogRegisterBufData(0, (char *) &dead_items,
1368  offsetof(xlhp_prune_items, data));
1369  XLogRegisterBufData(0, (char *) dead,
1370  sizeof(OffsetNumber) * ndead);
1371  }
1372  if (nunused > 0)
1373  {
1375 
1376  unused_items.ntargets = nunused;
1377  XLogRegisterBufData(0, (char *) &unused_items,
1378  offsetof(xlhp_prune_items, data));
1379  XLogRegisterBufData(0, (char *) unused,
1380  sizeof(OffsetNumber) * nunused);
1381  }
1382  if (nfrozen > 0)
1383  XLogRegisterBufData(0, (char *) frz_offsets,
1384  sizeof(OffsetNumber) * nfrozen);
1385 
1386  /*
1387  * Prepare the main xl_heap_prune record. We already set the XLPH_HAS_*
1388  * flag above.
1389  */
1391  xlrec.flags |= XLHP_IS_CATALOG_REL;
1392  if (TransactionIdIsValid(conflict_xid))
1394  if (cleanup_lock)
1395  xlrec.flags |= XLHP_CLEANUP_LOCK;
1396  else
1397  {
1398  Assert(nredirected == 0 && ndead == 0);
1399  /* also, any items in 'unused' must've been LP_DEAD previously */
1400  }
1401  XLogRegisterData((char *) &xlrec, SizeOfHeapPrune);
1402  if (TransactionIdIsValid(conflict_xid))
1403  XLogRegisterData((char *) &conflict_xid, sizeof(TransactionId));
1404 
1405  switch (reason)
1406  {
1407  case PRUNE_ON_ACCESS:
1409  break;
1410  case PRUNE_VACUUM_SCAN:
1412  break;
1413  case PRUNE_VACUUM_CLEANUP:
1415  break;
1416  default:
1417  elog(ERROR, "unrecognized prune reason: %d", (int) reason);
1418  break;
1419  }
1420  recptr = XLogInsert(RM_HEAP2_ID, info);
1421 
1422  PageSetLSN(BufferGetPage(buffer), recptr);
1423 }
uint32 BlockNumber
Definition: block.h:31
int Buffer
Definition: buf.h:23
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:3377
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:2189
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:4795
void MarkBufferDirtyHint(Buffer buffer, bool buffer_std)
Definition: bufmgr.c:4624
bool ConditionalLockBufferForCleanup(Buffer buffer)
Definition: bufmgr.c:5036
#define BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:157
static Page BufferGetPage(Buffer buffer)
Definition: bufmgr.h:350
Size PageGetHeapFreeSpace(Page page)
Definition: bufpage.c:991
void PageRepairFragmentation(Page page)
Definition: bufpage.c:699
void PageTruncateLinePointerArray(Page page)
Definition: bufpage.c:835
PageHeaderData * PageHeader
Definition: bufpage.h:170
Pointer Page
Definition: bufpage.h:78
static Item PageGetItem(Page page, ItemId itemId)
Definition: bufpage.h:351
static void PageClearFull(Page page)
Definition: bufpage.h:420
static ItemId PageGetItemId(Page page, OffsetNumber offsetNumber)
Definition: bufpage.h:240
static void PageSetLSN(Page page, XLogRecPtr lsn)
Definition: bufpage.h:388
static bool PageIsFull(Page page)
Definition: bufpage.h:410
static OffsetNumber PageGetMaxOffsetNumber(Page page)
Definition: bufpage.h:369
signed char int8
Definition: c.h:479
#define PG_USED_FOR_ASSERTS_ONLY
Definition: c.h:169
#define Max(x, y)
Definition: c.h:985
#define unlikely(x)
Definition: c.h:298
unsigned char uint8
Definition: c.h:491
#define MemSet(start, val, len)
Definition: c.h:1007
uint32 TransactionId
Definition: c.h:639
size_t Size
Definition: c.h:592
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:224
void HeapTupleHeaderAdvanceConflictHorizon(HeapTupleHeader tuple, TransactionId *snapshotConflictHorizon)
Definition: heapam.c:7439
HTSV_Result
Definition: heapam.h:95
@ HEAPTUPLE_RECENTLY_DEAD
Definition: heapam.h:98
@ HEAPTUPLE_INSERT_IN_PROGRESS
Definition: heapam.h:99
@ HEAPTUPLE_LIVE
Definition: heapam.h:97
@ HEAPTUPLE_DELETE_IN_PROGRESS
Definition: heapam.h:100
@ HEAPTUPLE_DEAD
Definition: heapam.h:96
static HTSV_Result htsv_get_valid_status(int status)
Definition: heapam.h:229
PruneReason
Definition: heapam.h:216
@ PRUNE_VACUUM_CLEANUP
Definition: heapam.h:219
@ PRUNE_ON_ACCESS
Definition: heapam.h:217
@ PRUNE_VACUUM_SCAN
Definition: heapam.h:218
HTSV_Result HeapTupleSatisfiesVacuumHorizon(HeapTuple htup, Buffer buffer, TransactionId *dead_after)
#define XLHP_HAS_CONFLICT_HORIZON
Definition: heapam_xlog.h:316
#define XLHP_HAS_FREEZE_PLANS
Definition: heapam_xlog.h:322
#define SizeOfHeapPrune
Definition: heapam_xlog.h:295
#define XLHP_HAS_NOW_UNUSED_ITEMS
Definition: heapam_xlog.h:331
#define XLHP_HAS_REDIRECTIONS
Definition: heapam_xlog.h:329
#define XLOG_HEAP2_PRUNE_VACUUM_SCAN
Definition: heapam_xlog.h:60
#define XLOG_HEAP2_PRUNE_ON_ACCESS
Definition: heapam_xlog.h:59
#define XLHP_CLEANUP_LOCK
Definition: heapam_xlog.h:308
#define XLHP_HAS_DEAD_ITEMS
Definition: heapam_xlog.h:330
#define XLOG_HEAP2_PRUNE_VACUUM_CLEANUP
Definition: heapam_xlog.h:61
#define XLHP_IS_CATALOG_REL
Definition: heapam_xlog.h:298
HeapTupleHeaderData * HeapTupleHeader
Definition: htup.h:23
#define HeapTupleHeaderIsHeapOnly(tup)
Definition: htup_details.h:499
#define HeapTupleHeaderIndicatesMovedPartitions(tup)
Definition: htup_details.h:444
#define HeapTupleHeaderGetXmin(tup)
Definition: htup_details.h:309
#define MaxHeapTuplesPerPage
Definition: htup_details.h:572
#define HeapTupleHeaderGetUpdateXid(tup)
Definition: htup_details.h:361
#define HeapTupleHeaderIsHotUpdated(tup)
Definition: htup_details.h:482
int i
Definition: isn.c:73
#define ItemIdGetLength(itemId)
Definition: itemid.h:59
#define ItemIdSetRedirect(itemId, link)
Definition: itemid.h:152
#define ItemIdIsNormal(itemId)
Definition: itemid.h:99
#define ItemIdGetRedirect(itemId)
Definition: itemid.h:78
#define ItemIdIsDead(itemId)
Definition: itemid.h:113
#define ItemIdSetDead(itemId)
Definition: itemid.h:164
#define ItemIdIsUsed(itemId)
Definition: itemid.h:92
#define ItemIdSetUnused(itemId)
Definition: itemid.h:128
#define ItemIdIsRedirected(itemId)
Definition: itemid.h:106
#define ItemIdHasStorage(itemId)
Definition: itemid.h:120
static void ItemPointerSet(ItemPointerData *pointer, BlockNumber blockNumber, OffsetNumber offNum)
Definition: itemptr.h:135
static OffsetNumber ItemPointerGetOffsetNumber(const ItemPointerData *pointer)
Definition: itemptr.h:124
static BlockNumber ItemPointerGetBlockNumber(const ItemPointerData *pointer)
Definition: itemptr.h:103
Assert(fmt[strlen(fmt) - 1] !='\n')
#define START_CRIT_SECTION()
Definition: miscadmin.h:149
#define END_CRIT_SECTION()
Definition: miscadmin.h:151
#define InvalidOffsetNumber
Definition: off.h:26
#define OffsetNumberIsValid(offsetNumber)
Definition: off.h:39
#define OffsetNumberNext(offsetNumber)
Definition: off.h:52
uint16 OffsetNumber
Definition: off.h:24
#define FirstOffsetNumber
Definition: off.h:27
#define OffsetNumberPrev(offsetNumber)
Definition: off.h:54
const void * data
#define plan(x)
Definition: pg_regress.c:162
void pgstat_update_heap_dead_tuples(Relation rel, int delta)
#define qsort(a, b, c, d)
Definition: port.h:449
bool GlobalVisTestIsRemovableXid(GlobalVisState *state, TransactionId xid)
Definition: procarray.c:4248
GlobalVisState * GlobalVisTestFor(Relation rel)
Definition: procarray.c:4091
static void heap_prune_record_redirect(PruneState *prstate, OffsetNumber offnum, OffsetNumber rdoffnum)
Definition: pruneheap.c:744
void heap_page_prune(Relation relation, Buffer buffer, GlobalVisState *vistest, bool mark_unused_now, PruneResult *presult, PruneReason reason, OffsetNumber *off_loc)
Definition: pruneheap.c:214
static int heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum, int8 *htsv, PruneState *prstate)
Definition: pruneheap.c:455
static void heap_prune_record_dead(PruneState *prstate, OffsetNumber offnum)
Definition: pruneheap.c:759
void heap_get_root_tuples(Page page, OffsetNumber *root_offsets)
Definition: pruneheap.c:1036
void heap_page_prune_opt(Relation relation, Buffer buffer)
Definition: pruneheap.c:86
static void heap_prune_record_unused(PruneState *prstate, OffsetNumber offnum)
Definition: pruneheap.c:791
static int heap_log_freeze_plan(HeapTupleFreeze *tuples, int ntuples, xlhp_freeze_plan *plans_out, OffsetNumber *offsets_out)
Definition: pruneheap.c:1229
static void page_verify_redirects(Page page)
Definition: pruneheap.c:988
static int heap_log_freeze_cmp(const void *arg1, const void *arg2)
Definition: pruneheap.c:1163
void log_heap_prune_and_freeze(Relation relation, Buffer buffer, TransactionId conflict_xid, bool cleanup_lock, PruneReason reason, HeapTupleFreeze *frozen, int nfrozen, OffsetNumber *redirected, int nredirected, OffsetNumber *dead, int ndead, OffsetNumber *unused, int nunused)
Definition: pruneheap.c:1304
static bool heap_log_freeze_eq(xlhp_freeze_plan *plan, HeapTupleFreeze *frz)
Definition: pruneheap.c:1147
static void heap_prune_record_prunable(PruneState *prstate, TransactionId xid)
Definition: pruneheap.c:730
static void heap_log_freeze_new_plan(xlhp_freeze_plan *plan, HeapTupleFreeze *frz)
Definition: pruneheap.c:1209
static HTSV_Result heap_prune_satisfies_vacuum(PruneState *prstate, HeapTuple tup, Buffer buffer)
Definition: pruneheap.c:410
void heap_page_prune_execute(Buffer buffer, bool lp_truncate_only, OffsetNumber *redirected, int nredirected, OffsetNumber *nowdead, int ndead, OffsetNumber *nowunused, int nunused)
Definition: pruneheap.c:812
static void heap_prune_record_dead_or_unused(PruneState *prstate, OffsetNumber offnum)
Definition: pruneheap.c:775
#define RelationGetRelid(relation)
Definition: rel.h:507
#define RelationGetTargetPageFreeSpace(relation, defaultff)
Definition: rel.h:380
#define RelationIsAccessibleInLogicalDecoding(relation)
Definition: rel.h:686
#define RelationNeedsWAL(relation)
Definition: rel.h:630
#define HEAP_DEFAULT_FILLFACTOR
Definition: rel.h:351
ItemPointerData t_self
Definition: htup.h:65
uint32 t_len
Definition: htup.h:64
HeapTupleHeader t_data
Definition: htup.h:68
Oid t_tableOid
Definition: htup.h:66
uint8 frzflags
Definition: heapam.h:117
uint16 t_infomask2
Definition: heapam.h:115
TransactionId xmax
Definition: heapam.h:114
OffsetNumber offset
Definition: heapam.h:122
uint16 t_infomask
Definition: heapam.h:116
ItemPointerData t_ctid
Definition: htup_details.h:161
int nnewlpdead
Definition: heapam.h:200
int ndeleted
Definition: heapam.h:199
int8 htsv[MaxHeapTuplesPerPage+1]
Definition: heapam.h:211
int ndead
Definition: pruneheap.c:40
TransactionId new_prune_xid
Definition: pruneheap.c:37
OffsetNumber nowdead[MaxHeapTuplesPerPage]
Definition: pruneheap.c:44
bool marked[MaxHeapTuplesPerPage+1]
Definition: pruneheap.c:53
OffsetNumber nowunused[MaxHeapTuplesPerPage]
Definition: pruneheap.c:45
bool mark_unused_now
Definition: pruneheap.c:35
GlobalVisState * vistest
Definition: pruneheap.c:33
OffsetNumber redirected[MaxHeapTuplesPerPage *2]
Definition: pruneheap.c:43
int nredirected
Definition: pruneheap.c:39
int nunused
Definition: pruneheap.c:41
TransactionId snapshotConflictHorizon
Definition: pruneheap.c:38
bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition: transam.c:280
#define InvalidTransactionId
Definition: transam.h:31
#define TransactionIdEquals(id1, id2)
Definition: transam.h:43
#define TransactionIdIsValid(xid)
Definition: transam.h:41
#define TransactionIdIsNormal(xid)
Definition: transam.h:42
bool RecoveryInProgress(void)
Definition: xlog.c:6201
uint64 XLogRecPtr
Definition: xlogdefs.h:21
void XLogRegisterData(char *data, uint32 len)
Definition: xloginsert.c:364
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition: xloginsert.c:474
void XLogRegisterBufData(uint8 block_id, char *data, uint32 len)
Definition: xloginsert.c:405
void XLogRegisterBuffer(uint8 block_id, Buffer buffer, uint8 flags)
Definition: xloginsert.c:242
void XLogBeginInsert(void)
Definition: xloginsert.c:149
#define REGBUF_STANDARD
Definition: xloginsert.h:34