PostgreSQL Source Code  git master
pruneheap.c File Reference
#include "postgres.h"
#include "access/heapam.h"
#include "access/heapam_xlog.h"
#include "access/htup_details.h"
#include "access/transam.h"
#include "access/xlog.h"
#include "access/xloginsert.h"
#include "catalog/catalog.h"
#include "miscadmin.h"
#include "pgstat.h"
#include "storage/bufmgr.h"
#include "utils/snapmgr.h"
#include "utils/rel.h"
Include dependency graph for pruneheap.c:

Go to the source code of this file.

Data Structures

struct  PruneState
 

Functions

static HTSV_Result heap_prune_satisfies_vacuum (PruneState *prstate, HeapTuple tup, Buffer buffer)
 
static int heap_prune_chain (Buffer buffer, OffsetNumber rootoffnum, PruneState *prstate)
 
static void heap_prune_record_prunable (PruneState *prstate, TransactionId xid)
 
static void heap_prune_record_redirect (PruneState *prstate, OffsetNumber offnum, OffsetNumber rdoffnum)
 
static void heap_prune_record_dead (PruneState *prstate, OffsetNumber offnum)
 
static void heap_prune_record_unused (PruneState *prstate, OffsetNumber offnum)
 
static void page_verify_redirects (Page page)
 
void heap_page_prune_opt (Relation relation, Buffer buffer)
 
void heap_page_prune (Relation relation, Buffer buffer, GlobalVisState *vistest, PruneResult *presult, OffsetNumber *off_loc)
 
void heap_page_prune_execute (Buffer buffer, OffsetNumber *redirected, int nredirected, OffsetNumber *nowdead, int ndead, OffsetNumber *nowunused, int nunused)
 
void heap_get_root_tuples (Page page, OffsetNumber *root_offsets)
 

Function Documentation

◆ heap_get_root_tuples()

void heap_get_root_tuples ( Page  page,
OffsetNumber root_offsets 
)

Definition at line 996 of file pruneheap.c.

997 {
998  OffsetNumber offnum,
999  maxoff;
1000 
1001  MemSet(root_offsets, InvalidOffsetNumber,
1003 
1004  maxoff = PageGetMaxOffsetNumber(page);
1005  for (offnum = FirstOffsetNumber; offnum <= maxoff; offnum = OffsetNumberNext(offnum))
1006  {
1007  ItemId lp = PageGetItemId(page, offnum);
1008  HeapTupleHeader htup;
1009  OffsetNumber nextoffnum;
1010  TransactionId priorXmax;
1011 
1012  /* skip unused and dead items */
1013  if (!ItemIdIsUsed(lp) || ItemIdIsDead(lp))
1014  continue;
1015 
1016  if (ItemIdIsNormal(lp))
1017  {
1018  htup = (HeapTupleHeader) PageGetItem(page, lp);
1019 
1020  /*
1021  * Check if this tuple is part of a HOT-chain rooted at some other
1022  * tuple. If so, skip it for now; we'll process it when we find
1023  * its root.
1024  */
1025  if (HeapTupleHeaderIsHeapOnly(htup))
1026  continue;
1027 
1028  /*
1029  * This is either a plain tuple or the root of a HOT-chain.
1030  * Remember it in the mapping.
1031  */
1032  root_offsets[offnum - 1] = offnum;
1033 
1034  /* If it's not the start of a HOT-chain, we're done with it */
1035  if (!HeapTupleHeaderIsHotUpdated(htup))
1036  continue;
1037 
1038  /* Set up to scan the HOT-chain */
1039  nextoffnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
1040  priorXmax = HeapTupleHeaderGetUpdateXid(htup);
1041  }
1042  else
1043  {
1044  /* Must be a redirect item. We do not set its root_offsets entry */
1046  /* Set up to scan the HOT-chain */
1047  nextoffnum = ItemIdGetRedirect(lp);
1048  priorXmax = InvalidTransactionId;
1049  }
1050 
1051  /*
1052  * Now follow the HOT-chain and collect other tuples in the chain.
1053  *
1054  * Note: Even though this is a nested loop, the complexity of the
1055  * function is O(N) because a tuple in the page should be visited not
1056  * more than twice, once in the outer loop and once in HOT-chain
1057  * chases.
1058  */
1059  for (;;)
1060  {
1061  /* Sanity check (pure paranoia) */
1062  if (offnum < FirstOffsetNumber)
1063  break;
1064 
1065  /*
1066  * An offset past the end of page's line pointer array is possible
1067  * when the array was truncated
1068  */
1069  if (offnum > maxoff)
1070  break;
1071 
1072  lp = PageGetItemId(page, nextoffnum);
1073 
1074  /* Check for broken chains */
1075  if (!ItemIdIsNormal(lp))
1076  break;
1077 
1078  htup = (HeapTupleHeader) PageGetItem(page, lp);
1079 
1080  if (TransactionIdIsValid(priorXmax) &&
1081  !TransactionIdEquals(priorXmax, HeapTupleHeaderGetXmin(htup)))
1082  break;
1083 
1084  /* Remember the root line pointer for this item */
1085  root_offsets[nextoffnum - 1] = offnum;
1086 
1087  /* Advance to next chain member, if any */
1088  if (!HeapTupleHeaderIsHotUpdated(htup))
1089  break;
1090 
1091  /* HOT implies it can't have moved to different partition */
1093 
1094  nextoffnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
1095  priorXmax = HeapTupleHeaderGetUpdateXid(htup);
1096  }
1097  }
1098 }
static Item PageGetItem(Page page, ItemId itemId)
Definition: bufpage.h:351
static ItemId PageGetItemId(Page page, OffsetNumber offsetNumber)
Definition: bufpage.h:240
static OffsetNumber PageGetMaxOffsetNumber(Page page)
Definition: bufpage.h:369
#define MemSet(start, val, len)
Definition: c.h:1009
uint32 TransactionId
Definition: c.h:641
HeapTupleHeaderData * HeapTupleHeader
Definition: htup.h:23
#define HeapTupleHeaderIsHeapOnly(tup)
Definition: htup_details.h:499
#define HeapTupleHeaderIndicatesMovedPartitions(tup)
Definition: htup_details.h:444
#define HeapTupleHeaderGetXmin(tup)
Definition: htup_details.h:309
#define MaxHeapTuplesPerPage
Definition: htup_details.h:572
#define HeapTupleHeaderGetUpdateXid(tup)
Definition: htup_details.h:361
#define HeapTupleHeaderIsHotUpdated(tup)
Definition: htup_details.h:482
#define ItemIdIsNormal(itemId)
Definition: itemid.h:99
#define ItemIdGetRedirect(itemId)
Definition: itemid.h:78
#define ItemIdIsDead(itemId)
Definition: itemid.h:113
#define ItemIdIsUsed(itemId)
Definition: itemid.h:92
#define ItemIdIsRedirected(itemId)
Definition: itemid.h:106
static OffsetNumber ItemPointerGetOffsetNumber(const ItemPointerData *pointer)
Definition: itemptr.h:124
Assert(fmt[strlen(fmt) - 1] !='\n')
#define InvalidOffsetNumber
Definition: off.h:26
#define OffsetNumberNext(offsetNumber)
Definition: off.h:52
uint16 OffsetNumber
Definition: off.h:24
#define FirstOffsetNumber
Definition: off.h:27
ItemPointerData t_ctid
Definition: htup_details.h:161
#define InvalidTransactionId
Definition: transam.h:31
#define TransactionIdEquals(id1, id2)
Definition: transam.h:43
#define TransactionIdIsValid(xid)
Definition: transam.h:41

References Assert(), FirstOffsetNumber, HeapTupleHeaderGetUpdateXid, HeapTupleHeaderGetXmin, HeapTupleHeaderIndicatesMovedPartitions, HeapTupleHeaderIsHeapOnly, HeapTupleHeaderIsHotUpdated, InvalidOffsetNumber, InvalidTransactionId, ItemIdGetRedirect, ItemIdIsDead, ItemIdIsNormal, ItemIdIsRedirected, ItemIdIsUsed, ItemPointerGetOffsetNumber(), MaxHeapTuplesPerPage, MemSet, OffsetNumberNext, PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), HeapTupleHeaderData::t_ctid, TransactionIdEquals, and TransactionIdIsValid.

Referenced by heapam_index_build_range_scan(), and heapam_index_validate_scan().

◆ heap_page_prune()

void heap_page_prune ( Relation  relation,
Buffer  buffer,
GlobalVisState vistest,
PruneResult presult,
OffsetNumber off_loc 
)

Definition at line 213 of file pruneheap.c.

217 {
218  Page page = BufferGetPage(buffer);
219  BlockNumber blockno = BufferGetBlockNumber(buffer);
220  OffsetNumber offnum,
221  maxoff;
222  PruneState prstate;
223  HeapTupleData tup;
224 
225  /*
226  * Our strategy is to scan the page and make lists of items to change,
227  * then apply the changes within a critical section. This keeps as much
228  * logic as possible out of the critical section, and also ensures that
229  * WAL replay will work the same as the normal case.
230  *
231  * First, initialize the new pd_prune_xid value to zero (indicating no
232  * prunable tuples). If we find any tuples which may soon become
233  * prunable, we will save the lowest relevant XID in new_prune_xid. Also
234  * initialize the rest of our working state.
235  */
237  prstate.rel = relation;
238  prstate.vistest = vistest;
240  prstate.nredirected = prstate.ndead = prstate.nunused = 0;
241  memset(prstate.marked, 0, sizeof(prstate.marked));
242 
243  presult->ndeleted = 0;
244  presult->nnewlpdead = 0;
245 
246  maxoff = PageGetMaxOffsetNumber(page);
247  tup.t_tableOid = RelationGetRelid(prstate.rel);
248 
249  /*
250  * Determine HTSV for all tuples.
251  *
252  * This is required for correctness to deal with cases where running HTSV
253  * twice could result in different results (e.g. RECENTLY_DEAD can turn to
254  * DEAD if another checked item causes GlobalVisTestIsRemovableFullXid()
255  * to update the horizon, INSERT_IN_PROGRESS can change to DEAD if the
256  * inserting transaction aborts, ...). That in turn could cause
257  * heap_prune_chain() to behave incorrectly if a tuple is reached twice,
258  * once directly via a heap_prune_chain() and once following a HOT chain.
259  *
260  * It's also good for performance. Most commonly tuples within a page are
261  * stored at decreasing offsets (while the items are stored at increasing
262  * offsets). When processing all tuples on a page this leads to reading
263  * memory at decreasing offsets within a page, with a variable stride.
264  * That's hard for CPU prefetchers to deal with. Processing the items in
265  * reverse order (and thus the tuples in increasing order) increases
266  * prefetching efficiency significantly / decreases the number of cache
267  * misses.
268  */
269  for (offnum = maxoff;
270  offnum >= FirstOffsetNumber;
271  offnum = OffsetNumberPrev(offnum))
272  {
273  ItemId itemid = PageGetItemId(page, offnum);
274  HeapTupleHeader htup;
275 
276  /* Nothing to do if slot doesn't contain a tuple */
277  if (!ItemIdIsNormal(itemid))
278  {
279  prstate.htsv[offnum] = -1;
280  continue;
281  }
282 
283  htup = (HeapTupleHeader) PageGetItem(page, itemid);
284  tup.t_data = htup;
285  tup.t_len = ItemIdGetLength(itemid);
286  ItemPointerSet(&(tup.t_self), blockno, offnum);
287 
288  /*
289  * Set the offset number so that we can display it along with any
290  * error that occurred while processing this tuple.
291  */
292  if (off_loc)
293  *off_loc = offnum;
294 
295  prstate.htsv[offnum] = heap_prune_satisfies_vacuum(&prstate, &tup,
296  buffer);
297  }
298 
299  /* Scan the page */
300  for (offnum = FirstOffsetNumber;
301  offnum <= maxoff;
302  offnum = OffsetNumberNext(offnum))
303  {
304  ItemId itemid;
305 
306  /* Ignore items already processed as part of an earlier chain */
307  if (prstate.marked[offnum])
308  continue;
309 
310  /* see preceding loop */
311  if (off_loc)
312  *off_loc = offnum;
313 
314  /* Nothing to do if slot is empty or already dead */
315  itemid = PageGetItemId(page, offnum);
316  if (!ItemIdIsUsed(itemid) || ItemIdIsDead(itemid))
317  continue;
318 
319  /* Process this item or chain of items */
320  presult->ndeleted += heap_prune_chain(buffer, offnum, &prstate);
321  }
322 
323  /* Clear the offset information once we have processed the given page. */
324  if (off_loc)
325  *off_loc = InvalidOffsetNumber;
326 
327  /* Any error while applying the changes is critical */
329 
330  /* Have we found any prunable items? */
331  if (prstate.nredirected > 0 || prstate.ndead > 0 || prstate.nunused > 0)
332  {
333  /*
334  * Apply the planned item changes, then repair page fragmentation, and
335  * update the page's hint bit about whether it has free line pointers.
336  */
338  prstate.redirected, prstate.nredirected,
339  prstate.nowdead, prstate.ndead,
340  prstate.nowunused, prstate.nunused);
341 
342  /*
343  * Update the page's pd_prune_xid field to either zero, or the lowest
344  * XID of any soon-prunable tuple.
345  */
346  ((PageHeader) page)->pd_prune_xid = prstate.new_prune_xid;
347 
348  /*
349  * Also clear the "page is full" flag, since there's no point in
350  * repeating the prune/defrag process until something else happens to
351  * the page.
352  */
353  PageClearFull(page);
354 
355  MarkBufferDirty(buffer);
356 
357  /*
358  * Emit a WAL XLOG_HEAP2_PRUNE record showing what we did
359  */
360  if (RelationNeedsWAL(relation))
361  {
362  xl_heap_prune xlrec;
363  XLogRecPtr recptr;
364 
367  xlrec.nredirected = prstate.nredirected;
368  xlrec.ndead = prstate.ndead;
369 
370  XLogBeginInsert();
371  XLogRegisterData((char *) &xlrec, SizeOfHeapPrune);
372 
374 
375  /*
376  * The OffsetNumber arrays are not actually in the buffer, but we
377  * pretend that they are. When XLogInsert stores the whole
378  * buffer, the offset arrays need not be stored too.
379  */
380  if (prstate.nredirected > 0)
381  XLogRegisterBufData(0, (char *) prstate.redirected,
382  prstate.nredirected *
383  sizeof(OffsetNumber) * 2);
384 
385  if (prstate.ndead > 0)
386  XLogRegisterBufData(0, (char *) prstate.nowdead,
387  prstate.ndead * sizeof(OffsetNumber));
388 
389  if (prstate.nunused > 0)
390  XLogRegisterBufData(0, (char *) prstate.nowunused,
391  prstate.nunused * sizeof(OffsetNumber));
392 
393  recptr = XLogInsert(RM_HEAP2_ID, XLOG_HEAP2_PRUNE);
394 
395  PageSetLSN(BufferGetPage(buffer), recptr);
396  }
397  }
398  else
399  {
400  /*
401  * If we didn't prune anything, but have found a new value for the
402  * pd_prune_xid field, update it and mark the buffer dirty. This is
403  * treated as a non-WAL-logged hint.
404  *
405  * Also clear the "page is full" flag if it is set, since there's no
406  * point in repeating the prune/defrag process until something else
407  * happens to the page.
408  */
409  if (((PageHeader) page)->pd_prune_xid != prstate.new_prune_xid ||
410  PageIsFull(page))
411  {
412  ((PageHeader) page)->pd_prune_xid = prstate.new_prune_xid;
413  PageClearFull(page);
414  MarkBufferDirtyHint(buffer, true);
415  }
416  }
417 
419 
420  /* Record number of newly-set-LP_DEAD items for caller */
421  presult->nnewlpdead = prstate.ndead;
422 }
uint32 BlockNumber
Definition: block.h:31
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:3290
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:2111
void MarkBufferDirtyHint(Buffer buffer, bool buffer_std)
Definition: bufmgr.c:4544
static Page BufferGetPage(Buffer buffer)
Definition: bufmgr.h:350
PageHeaderData * PageHeader
Definition: bufpage.h:170
Pointer Page
Definition: bufpage.h:78
static void PageClearFull(Page page)
Definition: bufpage.h:420
static void PageSetLSN(Page page, XLogRecPtr lsn)
Definition: bufpage.h:388
static bool PageIsFull(Page page)
Definition: bufpage.h:410
#define XLOG_HEAP2_PRUNE
Definition: heapam_xlog.h:54
#define SizeOfHeapPrune
Definition: heapam_xlog.h:253
#define ItemIdGetLength(itemId)
Definition: itemid.h:59
static void ItemPointerSet(ItemPointerData *pointer, BlockNumber blockNumber, OffsetNumber offNum)
Definition: itemptr.h:135
#define START_CRIT_SECTION()
Definition: miscadmin.h:148
#define END_CRIT_SECTION()
Definition: miscadmin.h:150
#define OffsetNumberPrev(offsetNumber)
Definition: off.h:54
static int heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum, PruneState *prstate)
Definition: pruneheap.c:476
static HTSV_Result heap_prune_satisfies_vacuum(PruneState *prstate, HeapTuple tup, Buffer buffer)
Definition: pruneheap.c:429
void heap_page_prune_execute(Buffer buffer, OffsetNumber *redirected, int nredirected, OffsetNumber *nowdead, int ndead, OffsetNumber *nowunused, int nunused)
Definition: pruneheap.c:797
#define RelationGetRelid(relation)
Definition: rel.h:504
#define RelationIsAccessibleInLogicalDecoding(relation)
Definition: rel.h:685
#define RelationNeedsWAL(relation)
Definition: rel.h:629
ItemPointerData t_self
Definition: htup.h:65
uint32 t_len
Definition: htup.h:64
HeapTupleHeader t_data
Definition: htup.h:68
Oid t_tableOid
Definition: htup.h:66
int nnewlpdead
Definition: heapam.h:200
int ndeleted
Definition: heapam.h:199
int ndead
Definition: pruneheap.c:42
TransactionId new_prune_xid
Definition: pruneheap.c:39
OffsetNumber nowdead[MaxHeapTuplesPerPage]
Definition: pruneheap.c:46
bool marked[MaxHeapTuplesPerPage+1]
Definition: pruneheap.c:55
OffsetNumber nowunused[MaxHeapTuplesPerPage]
Definition: pruneheap.c:47
GlobalVisState * vistest
Definition: pruneheap.c:37
Relation rel
Definition: pruneheap.c:34
OffsetNumber redirected[MaxHeapTuplesPerPage *2]
Definition: pruneheap.c:45
int nredirected
Definition: pruneheap.c:41
int8 htsv[MaxHeapTuplesPerPage+1]
Definition: pruneheap.c:65
int nunused
Definition: pruneheap.c:43
TransactionId snapshotConflictHorizon
Definition: pruneheap.c:40
TransactionId snapshotConflictHorizon
Definition: heapam_xlog.h:245
uint16 nredirected
Definition: heapam_xlog.h:246
uint64 XLogRecPtr
Definition: xlogdefs.h:21
void XLogRegisterData(char *data, uint32 len)
Definition: xloginsert.c:351
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition: xloginsert.c:461
void XLogRegisterBufData(uint8 block_id, char *data, uint32 len)
Definition: xloginsert.c:392
void XLogRegisterBuffer(uint8 block_id, Buffer buffer, uint8 flags)
Definition: xloginsert.c:243
void XLogBeginInsert(void)
Definition: xloginsert.c:150
#define REGBUF_STANDARD
Definition: xloginsert.h:34

References BufferGetBlockNumber(), BufferGetPage(), END_CRIT_SECTION, FirstOffsetNumber, heap_page_prune_execute(), heap_prune_chain(), heap_prune_satisfies_vacuum(), PruneState::htsv, InvalidOffsetNumber, InvalidTransactionId, xl_heap_prune::isCatalogRel, ItemIdGetLength, ItemIdIsDead, ItemIdIsNormal, ItemIdIsUsed, ItemPointerSet(), MarkBufferDirty(), MarkBufferDirtyHint(), PruneState::marked, PruneState::ndead, xl_heap_prune::ndead, PruneResult::ndeleted, PruneState::new_prune_xid, PruneResult::nnewlpdead, PruneState::nowdead, PruneState::nowunused, PruneState::nredirected, xl_heap_prune::nredirected, PruneState::nunused, OffsetNumberNext, OffsetNumberPrev, PageClearFull(), PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), PageIsFull(), PageSetLSN(), PruneState::redirected, REGBUF_STANDARD, PruneState::rel, RelationGetRelid, RelationIsAccessibleInLogicalDecoding, RelationNeedsWAL, SizeOfHeapPrune, PruneState::snapshotConflictHorizon, xl_heap_prune::snapshotConflictHorizon, START_CRIT_SECTION, HeapTupleData::t_data, HeapTupleData::t_len, HeapTupleData::t_self, HeapTupleData::t_tableOid, PruneState::vistest, XLOG_HEAP2_PRUNE, XLogBeginInsert(), XLogInsert(), XLogRegisterBufData(), XLogRegisterBuffer(), and XLogRegisterData().

Referenced by heap_page_prune_opt(), and lazy_scan_prune().

◆ heap_page_prune_execute()

void heap_page_prune_execute ( Buffer  buffer,
OffsetNumber redirected,
int  nredirected,
OffsetNumber nowdead,
int  ndead,
OffsetNumber nowunused,
int  nunused 
)

Definition at line 797 of file pruneheap.c.

801 {
802  Page page = (Page) BufferGetPage(buffer);
803  OffsetNumber *offnum;
805 
806  /* Shouldn't be called unless there's something to do */
807  Assert(nredirected > 0 || ndead > 0 || nunused > 0);
808 
809  /* Update all redirected line pointers */
810  offnum = redirected;
811  for (int i = 0; i < nredirected; i++)
812  {
813  OffsetNumber fromoff = *offnum++;
814  OffsetNumber tooff = *offnum++;
815  ItemId fromlp = PageGetItemId(page, fromoff);
817 
818 #ifdef USE_ASSERT_CHECKING
819 
820  /*
821  * Any existing item that we set as an LP_REDIRECT (any 'from' item)
822  * must be the first item from a HOT chain. If the item has tuple
823  * storage then it can't be a heap-only tuple. Otherwise we are just
824  * maintaining an existing LP_REDIRECT from an existing HOT chain that
825  * has been pruned at least once before now.
826  */
827  if (!ItemIdIsRedirected(fromlp))
828  {
829  Assert(ItemIdHasStorage(fromlp) && ItemIdIsNormal(fromlp));
830 
831  htup = (HeapTupleHeader) PageGetItem(page, fromlp);
833  }
834  else
835  {
836  /* We shouldn't need to redundantly set the redirect */
837  Assert(ItemIdGetRedirect(fromlp) != tooff);
838  }
839 
840  /*
841  * The item that we're about to set as an LP_REDIRECT (the 'from'
842  * item) will point to an existing item (the 'to' item) that is
843  * already a heap-only tuple. There can be at most one LP_REDIRECT
844  * item per HOT chain.
845  *
846  * We need to keep around an LP_REDIRECT item (after original
847  * non-heap-only root tuple gets pruned away) so that it's always
848  * possible for VACUUM to easily figure out what TID to delete from
849  * indexes when an entire HOT chain becomes dead. A heap-only tuple
850  * can never become LP_DEAD; an LP_REDIRECT item or a regular heap
851  * tuple can.
852  *
853  * This check may miss problems, e.g. the target of a redirect could
854  * be marked as unused subsequently. The page_verify_redirects() check
855  * below will catch such problems.
856  */
857  tolp = PageGetItemId(page, tooff);
858  Assert(ItemIdHasStorage(tolp) && ItemIdIsNormal(tolp));
859  htup = (HeapTupleHeader) PageGetItem(page, tolp);
861 #endif
862 
863  ItemIdSetRedirect(fromlp, tooff);
864  }
865 
866  /* Update all now-dead line pointers */
867  offnum = nowdead;
868  for (int i = 0; i < ndead; i++)
869  {
870  OffsetNumber off = *offnum++;
871  ItemId lp = PageGetItemId(page, off);
872 
873 #ifdef USE_ASSERT_CHECKING
874 
875  /*
876  * An LP_DEAD line pointer must be left behind when the original item
877  * (which is dead to everybody) could still be referenced by a TID in
878  * an index. This should never be necessary with any individual
879  * heap-only tuple item, though. (It's not clear how much of a problem
880  * that would be, but there is no reason to allow it.)
881  */
882  if (ItemIdHasStorage(lp))
883  {
884  Assert(ItemIdIsNormal(lp));
885  htup = (HeapTupleHeader) PageGetItem(page, lp);
887  }
888  else
889  {
890  /* Whole HOT chain becomes dead */
892  }
893 #endif
894 
895  ItemIdSetDead(lp);
896  }
897 
898  /* Update all now-unused line pointers */
899  offnum = nowunused;
900  for (int i = 0; i < nunused; i++)
901  {
902  OffsetNumber off = *offnum++;
903  ItemId lp = PageGetItemId(page, off);
904 
905 #ifdef USE_ASSERT_CHECKING
906 
907  /*
908  * Only heap-only tuples can become LP_UNUSED during pruning. They
909  * don't need to be left in place as LP_DEAD items until VACUUM gets
910  * around to doing index vacuuming.
911  */
913  htup = (HeapTupleHeader) PageGetItem(page, lp);
915 #endif
916 
917  ItemIdSetUnused(lp);
918  }
919 
920  /*
921  * Finally, repair any fragmentation, and update the page's hint bit about
922  * whether it has free pointers.
923  */
925 
926  /*
927  * Now that the page has been modified, assert that redirect items still
928  * point to valid targets.
929  */
930  page_verify_redirects(page);
931 }
void PageRepairFragmentation(Page page)
Definition: bufpage.c:699
#define PG_USED_FOR_ASSERTS_ONLY
Definition: c.h:171
int i
Definition: isn.c:73
#define ItemIdSetRedirect(itemId, link)
Definition: itemid.h:152
#define ItemIdSetDead(itemId)
Definition: itemid.h:164
#define ItemIdSetUnused(itemId)
Definition: itemid.h:128
#define ItemIdHasStorage(itemId)
Definition: itemid.h:120
static void page_verify_redirects(Page page)
Definition: pruneheap.c:948

References Assert(), BufferGetPage(), HeapTupleHeaderIsHeapOnly, i, ItemIdGetRedirect, ItemIdHasStorage, ItemIdIsNormal, ItemIdIsRedirected, ItemIdSetDead, ItemIdSetRedirect, ItemIdSetUnused, page_verify_redirects(), PageGetItem(), PageGetItemId(), PageRepairFragmentation(), and PG_USED_FOR_ASSERTS_ONLY.

Referenced by heap_page_prune(), and heap_xlog_prune().

◆ heap_page_prune_opt()

void heap_page_prune_opt ( Relation  relation,
Buffer  buffer 
)

Definition at line 96 of file pruneheap.c.

97 {
98  Page page = BufferGetPage(buffer);
99  TransactionId prune_xid;
100  GlobalVisState *vistest;
101  Size minfree;
102 
103  /*
104  * We can't write WAL in recovery mode, so there's no point trying to
105  * clean the page. The primary will likely issue a cleaning WAL record
106  * soon anyway, so this is no particular loss.
107  */
108  if (RecoveryInProgress())
109  return;
110 
111  /*
112  * First check whether there's any chance there's something to prune,
113  * determining the appropriate horizon is a waste if there's no prune_xid
114  * (i.e. no updates/deletes left potentially dead tuples around).
115  */
116  prune_xid = ((PageHeader) page)->pd_prune_xid;
117  if (!TransactionIdIsValid(prune_xid))
118  return;
119 
120  /*
121  * Check whether prune_xid indicates that there may be dead rows that can
122  * be cleaned up.
123  */
124  vistest = GlobalVisTestFor(relation);
125 
126  if (!GlobalVisTestIsRemovableXid(vistest, prune_xid))
127  return;
128 
129  /*
130  * We prune when a previous UPDATE failed to find enough space on the page
131  * for a new tuple version, or when free space falls below the relation's
132  * fill-factor target (but not less than 10%).
133  *
134  * Checking free space here is questionable since we aren't holding any
135  * lock on the buffer; in the worst case we could get a bogus answer. It's
136  * unlikely to be *seriously* wrong, though, since reading either pd_lower
137  * or pd_upper is probably atomic. Avoiding taking a lock seems more
138  * important than sometimes getting a wrong answer in what is after all
139  * just a heuristic estimate.
140  */
141  minfree = RelationGetTargetPageFreeSpace(relation,
143  minfree = Max(minfree, BLCKSZ / 10);
144 
145  if (PageIsFull(page) || PageGetHeapFreeSpace(page) < minfree)
146  {
147  /* OK, try to get exclusive buffer lock */
148  if (!ConditionalLockBufferForCleanup(buffer))
149  return;
150 
151  /*
152  * Now that we have buffer lock, get accurate information about the
153  * page's free space, and recheck the heuristic about whether to
154  * prune.
155  */
156  if (PageIsFull(page) || PageGetHeapFreeSpace(page) < minfree)
157  {
158  PruneResult presult;
159 
160  heap_page_prune(relation, buffer, vistest, &presult, NULL);
161 
162  /*
163  * Report the number of tuples reclaimed to pgstats. This is
164  * presult.ndeleted minus the number of newly-LP_DEAD-set items.
165  *
166  * We derive the number of dead tuples like this to avoid totally
167  * forgetting about items that were set to LP_DEAD, since they
168  * still need to be cleaned up by VACUUM. We only want to count
169  * heap-only tuples that just became LP_UNUSED in our report,
170  * which don't.
171  *
172  * VACUUM doesn't have to compensate in the same way when it
173  * tracks ndeleted, since it will set the same LP_DEAD items to
174  * LP_UNUSED separately.
175  */
176  if (presult.ndeleted > presult.nnewlpdead)
178  presult.ndeleted - presult.nnewlpdead);
179  }
180 
181  /* And release buffer lock */
183 
184  /*
185  * We avoid reuse of any free space created on the page by unrelated
186  * UPDATEs/INSERTs by opting to not update the FSM at this point. The
187  * free space should be reused by UPDATEs to *this* page.
188  */
189  }
190 }
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:4715
bool ConditionalLockBufferForCleanup(Buffer buffer)
Definition: bufmgr.c:4956
#define BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:157
Size PageGetHeapFreeSpace(Page page)
Definition: bufpage.c:991
#define Max(x, y)
Definition: c.h:987
size_t Size
Definition: c.h:594
void pgstat_update_heap_dead_tuples(Relation rel, int delta)
bool GlobalVisTestIsRemovableXid(GlobalVisState *state, TransactionId xid)
Definition: procarray.c:4168
GlobalVisState * GlobalVisTestFor(Relation rel)
Definition: procarray.c:4011
void heap_page_prune(Relation relation, Buffer buffer, GlobalVisState *vistest, PruneResult *presult, OffsetNumber *off_loc)
Definition: pruneheap.c:213
#define RelationGetTargetPageFreeSpace(relation, defaultff)
Definition: rel.h:377
#define HEAP_DEFAULT_FILLFACTOR
Definition: rel.h:348
bool RecoveryInProgress(void)
Definition: xlog.c:5948

References BUFFER_LOCK_UNLOCK, BufferGetPage(), ConditionalLockBufferForCleanup(), GlobalVisTestFor(), GlobalVisTestIsRemovableXid(), HEAP_DEFAULT_FILLFACTOR, heap_page_prune(), LockBuffer(), Max, PruneResult::ndeleted, PruneResult::nnewlpdead, PageGetHeapFreeSpace(), PageIsFull(), pgstat_update_heap_dead_tuples(), RecoveryInProgress(), RelationGetTargetPageFreeSpace, and TransactionIdIsValid.

Referenced by heapam_index_fetch_tuple(), heapam_scan_bitmap_next_block(), and heapgetpage().

◆ heap_prune_chain()

static int heap_prune_chain ( Buffer  buffer,
OffsetNumber  rootoffnum,
PruneState prstate 
)
static

Definition at line 476 of file pruneheap.c.

477 {
478  int ndeleted = 0;
479  Page dp = (Page) BufferGetPage(buffer);
481  ItemId rootlp;
482  HeapTupleHeader htup;
483  OffsetNumber latestdead = InvalidOffsetNumber,
484  maxoff = PageGetMaxOffsetNumber(dp),
485  offnum;
487  int nchain = 0,
488  i;
489 
490  rootlp = PageGetItemId(dp, rootoffnum);
491 
492  /*
493  * If it's a heap-only tuple, then it is not the start of a HOT chain.
494  */
495  if (ItemIdIsNormal(rootlp))
496  {
497  Assert(prstate->htsv[rootoffnum] != -1);
498  htup = (HeapTupleHeader) PageGetItem(dp, rootlp);
499 
500  if (HeapTupleHeaderIsHeapOnly(htup))
501  {
502  /*
503  * If the tuple is DEAD and doesn't chain to anything else, mark
504  * it unused immediately. (If it does chain, we can only remove
505  * it as part of pruning its chain.)
506  *
507  * We need this primarily to handle aborted HOT updates, that is,
508  * XMIN_INVALID heap-only tuples. Those might not be linked to by
509  * any chain, since the parent tuple might be re-updated before
510  * any pruning occurs. So we have to be able to reap them
511  * separately from chain-pruning. (Note that
512  * HeapTupleHeaderIsHotUpdated will never return true for an
513  * XMIN_INVALID tuple, so this code will work even when there were
514  * sequential updates within the aborted transaction.)
515  *
516  * Note that we might first arrive at a dead heap-only tuple
517  * either here or while following a chain below. Whichever path
518  * gets there first will mark the tuple unused.
519  */
520  if (prstate->htsv[rootoffnum] == HEAPTUPLE_DEAD &&
522  {
523  heap_prune_record_unused(prstate, rootoffnum);
525  &prstate->snapshotConflictHorizon);
526  ndeleted++;
527  }
528 
529  /* Nothing more to do */
530  return ndeleted;
531  }
532  }
533 
534  /* Start from the root tuple */
535  offnum = rootoffnum;
536 
537  /* while not end of the chain */
538  for (;;)
539  {
540  ItemId lp;
541  bool tupdead,
542  recent_dead;
543 
544  /* Sanity check (pure paranoia) */
545  if (offnum < FirstOffsetNumber)
546  break;
547 
548  /*
549  * An offset past the end of page's line pointer array is possible
550  * when the array was truncated (original item must have been unused)
551  */
552  if (offnum > maxoff)
553  break;
554 
555  /* If item is already processed, stop --- it must not be same chain */
556  if (prstate->marked[offnum])
557  break;
558 
559  lp = PageGetItemId(dp, offnum);
560 
561  /* Unused item obviously isn't part of the chain */
562  if (!ItemIdIsUsed(lp))
563  break;
564 
565  /*
566  * If we are looking at the redirected root line pointer, jump to the
567  * first normal tuple in the chain. If we find a redirect somewhere
568  * else, stop --- it must not be same chain.
569  */
570  if (ItemIdIsRedirected(lp))
571  {
572  if (nchain > 0)
573  break; /* not at start of chain */
574  chainitems[nchain++] = offnum;
575  offnum = ItemIdGetRedirect(rootlp);
576  continue;
577  }
578 
579  /*
580  * Likewise, a dead line pointer can't be part of the chain. (We
581  * already eliminated the case of dead root tuple outside this
582  * function.)
583  */
584  if (ItemIdIsDead(lp))
585  break;
586 
587  Assert(ItemIdIsNormal(lp));
588  Assert(prstate->htsv[offnum] != -1);
589  htup = (HeapTupleHeader) PageGetItem(dp, lp);
590 
591  /*
592  * Check the tuple XMIN against prior XMAX, if any
593  */
594  if (TransactionIdIsValid(priorXmax) &&
595  !TransactionIdEquals(HeapTupleHeaderGetXmin(htup), priorXmax))
596  break;
597 
598  /*
599  * OK, this tuple is indeed a member of the chain.
600  */
601  chainitems[nchain++] = offnum;
602 
603  /*
604  * Check tuple's visibility status.
605  */
606  tupdead = recent_dead = false;
607 
608  switch ((HTSV_Result) prstate->htsv[offnum])
609  {
610  case HEAPTUPLE_DEAD:
611  tupdead = true;
612  break;
613 
615  recent_dead = true;
616 
617  /*
618  * This tuple may soon become DEAD. Update the hint field so
619  * that the page is reconsidered for pruning in future.
620  */
623  break;
624 
626 
627  /*
628  * This tuple may soon become DEAD. Update the hint field so
629  * that the page is reconsidered for pruning in future.
630  */
633  break;
634 
635  case HEAPTUPLE_LIVE:
637 
638  /*
639  * If we wanted to optimize for aborts, we might consider
640  * marking the page prunable when we see INSERT_IN_PROGRESS.
641  * But we don't. See related decisions about when to mark the
642  * page prunable in heapam.c.
643  */
644  break;
645 
646  default:
647  elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
648  break;
649  }
650 
651  /*
652  * Remember the last DEAD tuple seen. We will advance past
653  * RECENTLY_DEAD tuples just in case there's a DEAD one after them;
654  * but we can't advance past anything else. We have to make sure that
655  * we don't miss any DEAD tuples, since DEAD tuples that still have
656  * tuple storage after pruning will confuse VACUUM.
657  */
658  if (tupdead)
659  {
660  latestdead = offnum;
662  &prstate->snapshotConflictHorizon);
663  }
664  else if (!recent_dead)
665  break;
666 
667  /*
668  * If the tuple is not HOT-updated, then we are at the end of this
669  * HOT-update chain.
670  */
671  if (!HeapTupleHeaderIsHotUpdated(htup))
672  break;
673 
674  /* HOT implies it can't have moved to different partition */
676 
677  /*
678  * Advance to next chain member.
679  */
681  BufferGetBlockNumber(buffer));
682  offnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
683  priorXmax = HeapTupleHeaderGetUpdateXid(htup);
684  }
685 
686  /*
687  * If we found a DEAD tuple in the chain, adjust the HOT chain so that all
688  * the DEAD tuples at the start of the chain are removed and the root line
689  * pointer is appropriately redirected.
690  */
691  if (OffsetNumberIsValid(latestdead))
692  {
693  /*
694  * Mark as unused each intermediate item that we are able to remove
695  * from the chain.
696  *
697  * When the previous item is the last dead tuple seen, we are at the
698  * right candidate for redirection.
699  */
700  for (i = 1; (i < nchain) && (chainitems[i - 1] != latestdead); i++)
701  {
702  heap_prune_record_unused(prstate, chainitems[i]);
703  ndeleted++;
704  }
705 
706  /*
707  * If the root entry had been a normal tuple, we are deleting it, so
708  * count it in the result. But changing a redirect (even to DEAD
709  * state) doesn't count.
710  */
711  if (ItemIdIsNormal(rootlp))
712  ndeleted++;
713 
714  /*
715  * If the DEAD tuple is at the end of the chain, the entire chain is
716  * dead and the root line pointer can be marked dead. Otherwise just
717  * redirect the root to the correct chain member.
718  */
719  if (i >= nchain)
720  heap_prune_record_dead(prstate, rootoffnum);
721  else
722  heap_prune_record_redirect(prstate, rootoffnum, chainitems[i]);
723  }
724  else if (nchain < 2 && ItemIdIsRedirected(rootlp))
725  {
726  /*
727  * We found a redirect item that doesn't point to a valid follow-on
728  * item. This can happen if the loop in heap_page_prune caused us to
729  * visit the dead successor of a redirect item before visiting the
730  * redirect item. We can clean up by setting the redirect item to
731  * DEAD state.
732  */
733  heap_prune_record_dead(prstate, rootoffnum);
734  }
735 
736  return ndeleted;
737 }
#define ERROR
Definition: elog.h:39
void HeapTupleHeaderAdvanceConflictHorizon(HeapTupleHeader tuple, TransactionId *snapshotConflictHorizon)
Definition: heapam.c:7484
HTSV_Result
Definition: heapam.h:95
@ HEAPTUPLE_RECENTLY_DEAD
Definition: heapam.h:98
@ HEAPTUPLE_INSERT_IN_PROGRESS
Definition: heapam.h:99
@ HEAPTUPLE_LIVE
Definition: heapam.h:97
@ HEAPTUPLE_DELETE_IN_PROGRESS
Definition: heapam.h:100
@ HEAPTUPLE_DEAD
Definition: heapam.h:96
static BlockNumber ItemPointerGetBlockNumber(const ItemPointerData *pointer)
Definition: itemptr.h:103
#define OffsetNumberIsValid(offsetNumber)
Definition: off.h:39
static void heap_prune_record_redirect(PruneState *prstate, OffsetNumber offnum, OffsetNumber rdoffnum)
Definition: pruneheap.c:755
static void heap_prune_record_dead(PruneState *prstate, OffsetNumber offnum)
Definition: pruneheap.c:770
static void heap_prune_record_unused(PruneState *prstate, OffsetNumber offnum)
Definition: pruneheap.c:781
static void heap_prune_record_prunable(PruneState *prstate, TransactionId xid)
Definition: pruneheap.c:741

References Assert(), BufferGetBlockNumber(), BufferGetPage(), elog(), ERROR, FirstOffsetNumber, heap_prune_record_dead(), heap_prune_record_prunable(), heap_prune_record_redirect(), heap_prune_record_unused(), HEAPTUPLE_DEAD, HEAPTUPLE_DELETE_IN_PROGRESS, HEAPTUPLE_INSERT_IN_PROGRESS, HEAPTUPLE_LIVE, HEAPTUPLE_RECENTLY_DEAD, HeapTupleHeaderAdvanceConflictHorizon(), HeapTupleHeaderGetUpdateXid, HeapTupleHeaderGetXmin, HeapTupleHeaderIndicatesMovedPartitions, HeapTupleHeaderIsHeapOnly, HeapTupleHeaderIsHotUpdated, PruneState::htsv, i, InvalidOffsetNumber, InvalidTransactionId, ItemIdGetRedirect, ItemIdIsDead, ItemIdIsNormal, ItemIdIsRedirected, ItemIdIsUsed, ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), PruneState::marked, MaxHeapTuplesPerPage, OffsetNumberIsValid, PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), PruneState::snapshotConflictHorizon, HeapTupleHeaderData::t_ctid, TransactionIdEquals, and TransactionIdIsValid.

Referenced by heap_page_prune().

◆ heap_prune_record_dead()

static void heap_prune_record_dead ( PruneState prstate,
OffsetNumber  offnum 
)
static

Definition at line 770 of file pruneheap.c.

771 {
772  Assert(prstate->ndead < MaxHeapTuplesPerPage);
773  prstate->nowdead[prstate->ndead] = offnum;
774  prstate->ndead++;
775  Assert(!prstate->marked[offnum]);
776  prstate->marked[offnum] = true;
777 }

References Assert(), PruneState::marked, MaxHeapTuplesPerPage, PruneState::ndead, and PruneState::nowdead.

Referenced by heap_prune_chain().

◆ heap_prune_record_prunable()

static void heap_prune_record_prunable ( PruneState prstate,
TransactionId  xid 
)
static

Definition at line 741 of file pruneheap.c.

742 {
743  /*
744  * This should exactly match the PageSetPrunable macro. We can't store
745  * directly into the page header yet, so we update working state.
746  */
748  if (!TransactionIdIsValid(prstate->new_prune_xid) ||
749  TransactionIdPrecedes(xid, prstate->new_prune_xid))
750  prstate->new_prune_xid = xid;
751 }
bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition: transam.c:280
#define TransactionIdIsNormal(xid)
Definition: transam.h:42

References Assert(), PruneState::new_prune_xid, TransactionIdIsNormal, TransactionIdIsValid, and TransactionIdPrecedes().

Referenced by heap_prune_chain().

◆ heap_prune_record_redirect()

static void heap_prune_record_redirect ( PruneState prstate,
OffsetNumber  offnum,
OffsetNumber  rdoffnum 
)
static

Definition at line 755 of file pruneheap.c.

757 {
759  prstate->redirected[prstate->nredirected * 2] = offnum;
760  prstate->redirected[prstate->nredirected * 2 + 1] = rdoffnum;
761  prstate->nredirected++;
762  Assert(!prstate->marked[offnum]);
763  prstate->marked[offnum] = true;
764  Assert(!prstate->marked[rdoffnum]);
765  prstate->marked[rdoffnum] = true;
766 }

References Assert(), PruneState::marked, MaxHeapTuplesPerPage, PruneState::nredirected, and PruneState::redirected.

Referenced by heap_prune_chain().

◆ heap_prune_record_unused()

static void heap_prune_record_unused ( PruneState prstate,
OffsetNumber  offnum 
)
static

Definition at line 781 of file pruneheap.c.

782 {
783  Assert(prstate->nunused < MaxHeapTuplesPerPage);
784  prstate->nowunused[prstate->nunused] = offnum;
785  prstate->nunused++;
786  Assert(!prstate->marked[offnum]);
787  prstate->marked[offnum] = true;
788 }

References Assert(), PruneState::marked, MaxHeapTuplesPerPage, PruneState::nowunused, and PruneState::nunused.

Referenced by heap_prune_chain().

◆ heap_prune_satisfies_vacuum()

static HTSV_Result heap_prune_satisfies_vacuum ( PruneState prstate,
HeapTuple  tup,
Buffer  buffer 
)
static

Definition at line 429 of file pruneheap.c.

430 {
432  TransactionId dead_after;
433 
434  res = HeapTupleSatisfiesVacuumHorizon(tup, buffer, &dead_after);
435 
437  return res;
438 
439  if (GlobalVisTestIsRemovableXid(prstate->vistest, dead_after))
441 
442  return res;
443 }
HTSV_Result HeapTupleSatisfiesVacuumHorizon(HeapTuple htup, Buffer buffer, TransactionId *dead_after)

References GlobalVisTestIsRemovableXid(), HEAPTUPLE_DEAD, HEAPTUPLE_RECENTLY_DEAD, HeapTupleSatisfiesVacuumHorizon(), res, and PruneState::vistest.

Referenced by heap_page_prune().

◆ page_verify_redirects()

static void page_verify_redirects ( Page  page)
static

Definition at line 948 of file pruneheap.c.

949 {
950 #ifdef USE_ASSERT_CHECKING
951  OffsetNumber offnum;
952  OffsetNumber maxoff;
953 
954  maxoff = PageGetMaxOffsetNumber(page);
955  for (offnum = FirstOffsetNumber;
956  offnum <= maxoff;
957  offnum = OffsetNumberNext(offnum))
958  {
959  ItemId itemid = PageGetItemId(page, offnum);
960  OffsetNumber targoff;
961  ItemId targitem;
962  HeapTupleHeader htup;
963 
964  if (!ItemIdIsRedirected(itemid))
965  continue;
966 
967  targoff = ItemIdGetRedirect(itemid);
968  targitem = PageGetItemId(page, targoff);
969 
970  Assert(ItemIdIsUsed(targitem));
971  Assert(ItemIdIsNormal(targitem));
972  Assert(ItemIdHasStorage(targitem));
973  htup = (HeapTupleHeader) PageGetItem(page, targitem);
975  }
976 #endif
977 }

References Assert(), FirstOffsetNumber, HeapTupleHeaderIsHeapOnly, ItemIdGetRedirect, ItemIdHasStorage, ItemIdIsNormal, ItemIdIsRedirected, ItemIdIsUsed, OffsetNumberNext, PageGetItem(), PageGetItemId(), and PageGetMaxOffsetNumber().

Referenced by heap_page_prune_execute().