PostgreSQL Source Code  git master
pruneheap.c File Reference
#include "postgres.h"
#include "access/heapam.h"
#include "access/heapam_xlog.h"
#include "access/htup_details.h"
#include "access/transam.h"
#include "access/xlog.h"
#include "catalog/catalog.h"
#include "miscadmin.h"
#include "pgstat.h"
#include "storage/bufmgr.h"
#include "utils/snapmgr.h"
#include "utils/rel.h"
Include dependency graph for pruneheap.c:

Go to the source code of this file.

Data Structures

struct  PruneState
 

Functions

static int heap_prune_chain (Buffer buffer, OffsetNumber rootoffnum, PruneState *prstate)
 
static void heap_prune_record_prunable (PruneState *prstate, TransactionId xid)
 
static void heap_prune_record_redirect (PruneState *prstate, OffsetNumber offnum, OffsetNumber rdoffnum)
 
static void heap_prune_record_dead (PruneState *prstate, OffsetNumber offnum)
 
static void heap_prune_record_unused (PruneState *prstate, OffsetNumber offnum)
 
void heap_page_prune_opt (Relation relation, Buffer buffer)
 
int heap_page_prune (Relation relation, Buffer buffer, GlobalVisState *vistest, TransactionId old_snap_xmin, TimestampTz old_snap_ts, bool report_stats, TransactionId *latestRemovedXid, OffsetNumber *off_loc)
 
static HTSV_Result heap_prune_satisfies_vacuum (PruneState *prstate, HeapTuple tup, Buffer buffer)
 
void heap_page_prune_execute (Buffer buffer, OffsetNumber *redirected, int nredirected, OffsetNumber *nowdead, int ndead, OffsetNumber *nowunused, int nunused)
 
void heap_get_root_tuples (Page page, OffsetNumber *root_offsets)
 

Function Documentation

◆ heap_get_root_tuples()

void heap_get_root_tuples ( Page  page,
OffsetNumber root_offsets 
)

Definition at line 883 of file pruneheap.c.

References Assert, FirstOffsetNumber, HeapTupleHeaderGetUpdateXid, HeapTupleHeaderGetXmin, HeapTupleHeaderIndicatesMovedPartitions, HeapTupleHeaderIsHeapOnly, HeapTupleHeaderIsHotUpdated, InvalidOffsetNumber, InvalidTransactionId, ItemIdGetRedirect, ItemIdIsDead, ItemIdIsNormal, ItemIdIsRedirected, ItemIdIsUsed, ItemPointerGetOffsetNumber, MaxHeapTuplesPerPage, MemSet, OffsetNumberNext, PageGetItem, PageGetItemId, PageGetMaxOffsetNumber, HeapTupleHeaderData::t_ctid, TransactionIdEquals, and TransactionIdIsValid.

Referenced by heapam_index_build_range_scan(), and heapam_index_validate_scan().

884 {
885  OffsetNumber offnum,
886  maxoff;
887 
888  MemSet(root_offsets, InvalidOffsetNumber,
890 
891  maxoff = PageGetMaxOffsetNumber(page);
892  for (offnum = FirstOffsetNumber; offnum <= maxoff; offnum = OffsetNumberNext(offnum))
893  {
894  ItemId lp = PageGetItemId(page, offnum);
895  HeapTupleHeader htup;
896  OffsetNumber nextoffnum;
897  TransactionId priorXmax;
898 
899  /* skip unused and dead items */
900  if (!ItemIdIsUsed(lp) || ItemIdIsDead(lp))
901  continue;
902 
903  if (ItemIdIsNormal(lp))
904  {
905  htup = (HeapTupleHeader) PageGetItem(page, lp);
906 
907  /*
908  * Check if this tuple is part of a HOT-chain rooted at some other
909  * tuple. If so, skip it for now; we'll process it when we find
910  * its root.
911  */
912  if (HeapTupleHeaderIsHeapOnly(htup))
913  continue;
914 
915  /*
916  * This is either a plain tuple or the root of a HOT-chain.
917  * Remember it in the mapping.
918  */
919  root_offsets[offnum - 1] = offnum;
920 
921  /* If it's not the start of a HOT-chain, we're done with it */
922  if (!HeapTupleHeaderIsHotUpdated(htup))
923  continue;
924 
925  /* Set up to scan the HOT-chain */
926  nextoffnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
927  priorXmax = HeapTupleHeaderGetUpdateXid(htup);
928  }
929  else
930  {
931  /* Must be a redirect item. We do not set its root_offsets entry */
933  /* Set up to scan the HOT-chain */
934  nextoffnum = ItemIdGetRedirect(lp);
935  priorXmax = InvalidTransactionId;
936  }
937 
938  /*
939  * Now follow the HOT-chain and collect other tuples in the chain.
940  *
941  * Note: Even though this is a nested loop, the complexity of the
942  * function is O(N) because a tuple in the page should be visited not
943  * more than twice, once in the outer loop and once in HOT-chain
944  * chases.
945  */
946  for (;;)
947  {
948  lp = PageGetItemId(page, nextoffnum);
949 
950  /* Check for broken chains */
951  if (!ItemIdIsNormal(lp))
952  break;
953 
954  htup = (HeapTupleHeader) PageGetItem(page, lp);
955 
956  if (TransactionIdIsValid(priorXmax) &&
957  !TransactionIdEquals(priorXmax, HeapTupleHeaderGetXmin(htup)))
958  break;
959 
960  /* Remember the root line pointer for this item */
961  root_offsets[nextoffnum - 1] = offnum;
962 
963  /* Advance to next chain member, if any */
964  if (!HeapTupleHeaderIsHotUpdated(htup))
965  break;
966 
967  /* HOT implies it can't have moved to different partition */
969 
970  nextoffnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
971  priorXmax = HeapTupleHeaderGetUpdateXid(htup);
972  }
973  }
974 }
#define HeapTupleHeaderGetUpdateXid(tup)
Definition: htup_details.h:365
#define ItemIdIsRedirected(itemId)
Definition: itemid.h:106
#define TransactionIdEquals(id1, id2)
Definition: transam.h:43
uint32 TransactionId
Definition: c.h:521
HeapTupleHeaderData * HeapTupleHeader
Definition: htup.h:23
#define ItemIdGetRedirect(itemId)
Definition: itemid.h:78
#define ItemIdIsUsed(itemId)
Definition: itemid.h:92
#define MaxHeapTuplesPerPage
Definition: htup_details.h:574
#define MemSet(start, val, len)
Definition: c.h:950
#define HeapTupleHeaderIndicatesMovedPartitions(tup)
Definition: htup_details.h:445
#define ItemIdIsDead(itemId)
Definition: itemid.h:113
#define PageGetMaxOffsetNumber(page)
Definition: bufpage.h:357
uint16 OffsetNumber
Definition: off.h:24
#define HeapTupleHeaderIsHeapOnly(tup)
Definition: htup_details.h:501
ItemPointerData t_ctid
Definition: htup_details.h:160
#define FirstOffsetNumber
Definition: off.h:27
#define InvalidTransactionId
Definition: transam.h:31
#define HeapTupleHeaderIsHotUpdated(tup)
Definition: htup_details.h:484
#define PageGetItemId(page, offsetNumber)
Definition: bufpage.h:235
#define InvalidOffsetNumber
Definition: off.h:26
#define Assert(condition)
Definition: c.h:746
#define ItemIdIsNormal(itemId)
Definition: itemid.h:99
#define HeapTupleHeaderGetXmin(tup)
Definition: htup_details.h:313
#define OffsetNumberNext(offsetNumber)
Definition: off.h:52
#define ItemPointerGetOffsetNumber(pointer)
Definition: itemptr.h:117
#define TransactionIdIsValid(xid)
Definition: transam.h:41
#define PageGetItem(page, itemId)
Definition: bufpage.h:340

◆ heap_page_prune()

int heap_page_prune ( Relation  relation,
Buffer  buffer,
GlobalVisState vistest,
TransactionId  old_snap_xmin,
TimestampTz  old_snap_ts,
bool  report_stats,
TransactionId latestRemovedXid,
OffsetNumber off_loc 
)

Definition at line 223 of file pruneheap.c.

References BufferGetPage, END_CRIT_SECTION, FirstOffsetNumber, heap_page_prune_execute(), heap_prune_chain(), InvalidOffsetNumber, InvalidTransactionId, ItemIdIsDead, ItemIdIsUsed, PruneState::latestRemovedXid, log_heap_clean(), MarkBufferDirty(), MarkBufferDirtyHint(), PruneState::marked, PruneState::ndead, PruneState::new_prune_xid, PruneState::nowdead, PruneState::nowunused, PruneState::nredirected, PruneState::nunused, OffsetNumberNext, PruneState::old_snap_ts, PruneState::old_snap_used, PruneState::old_snap_xmin, PageClearFull, PageGetItemId, PageGetMaxOffsetNumber, PageIsFull, PageSetLSN, pgstat_update_heap_dead_tuples(), PruneState::redirected, PruneState::rel, RelationNeedsWAL, START_CRIT_SECTION, and PruneState::vistest.

Referenced by heap_page_prune_opt(), and lazy_scan_heap().

229 {
230  int ndeleted = 0;
231  Page page = BufferGetPage(buffer);
232  OffsetNumber offnum,
233  maxoff;
234  PruneState prstate;
235 
236  /*
237  * Our strategy is to scan the page and make lists of items to change,
238  * then apply the changes within a critical section. This keeps as much
239  * logic as possible out of the critical section, and also ensures that
240  * WAL replay will work the same as the normal case.
241  *
242  * First, initialize the new pd_prune_xid value to zero (indicating no
243  * prunable tuples). If we find any tuples which may soon become
244  * prunable, we will save the lowest relevant XID in new_prune_xid. Also
245  * initialize the rest of our working state.
246  */
248  prstate.rel = relation;
249  prstate.vistest = vistest;
250  prstate.old_snap_xmin = old_snap_xmin;
251  prstate.old_snap_ts = old_snap_ts;
252  prstate.old_snap_used = false;
253  prstate.latestRemovedXid = *latestRemovedXid;
254  prstate.nredirected = prstate.ndead = prstate.nunused = 0;
255  memset(prstate.marked, 0, sizeof(prstate.marked));
256 
257  /* Scan the page */
258  maxoff = PageGetMaxOffsetNumber(page);
259  for (offnum = FirstOffsetNumber;
260  offnum <= maxoff;
261  offnum = OffsetNumberNext(offnum))
262  {
263  ItemId itemid;
264 
265  /* Ignore items already processed as part of an earlier chain */
266  if (prstate.marked[offnum])
267  continue;
268 
269  /*
270  * Set the offset number so that we can display it along with any
271  * error that occurred while processing this tuple.
272  */
273  if (off_loc)
274  *off_loc = offnum;
275 
276  /* Nothing to do if slot is empty or already dead */
277  itemid = PageGetItemId(page, offnum);
278  if (!ItemIdIsUsed(itemid) || ItemIdIsDead(itemid))
279  continue;
280 
281  /* Process this item or chain of items */
282  ndeleted += heap_prune_chain(buffer, offnum, &prstate);
283  }
284 
285  /* Clear the offset information once we have processed the given page. */
286  if (off_loc)
287  *off_loc = InvalidOffsetNumber;
288 
289  /* Any error while applying the changes is critical */
291 
292  /* Have we found any prunable items? */
293  if (prstate.nredirected > 0 || prstate.ndead > 0 || prstate.nunused > 0)
294  {
295  /*
296  * Apply the planned item changes, then repair page fragmentation, and
297  * update the page's hint bit about whether it has free line pointers.
298  */
300  prstate.redirected, prstate.nredirected,
301  prstate.nowdead, prstate.ndead,
302  prstate.nowunused, prstate.nunused);
303 
304  /*
305  * Update the page's pd_prune_xid field to either zero, or the lowest
306  * XID of any soon-prunable tuple.
307  */
308  ((PageHeader) page)->pd_prune_xid = prstate.new_prune_xid;
309 
310  /*
311  * Also clear the "page is full" flag, since there's no point in
312  * repeating the prune/defrag process until something else happens to
313  * the page.
314  */
315  PageClearFull(page);
316 
317  MarkBufferDirty(buffer);
318 
319  /*
320  * Emit a WAL XLOG_HEAP2_CLEAN record showing what we did
321  */
322  if (RelationNeedsWAL(relation))
323  {
324  XLogRecPtr recptr;
325 
326  recptr = log_heap_clean(relation, buffer,
327  prstate.redirected, prstate.nredirected,
328  prstate.nowdead, prstate.ndead,
329  prstate.nowunused, prstate.nunused,
330  prstate.latestRemovedXid);
331 
332  PageSetLSN(BufferGetPage(buffer), recptr);
333  }
334  }
335  else
336  {
337  /*
338  * If we didn't prune anything, but have found a new value for the
339  * pd_prune_xid field, update it and mark the buffer dirty. This is
340  * treated as a non-WAL-logged hint.
341  *
342  * Also clear the "page is full" flag if it is set, since there's no
343  * point in repeating the prune/defrag process until something else
344  * happens to the page.
345  */
346  if (((PageHeader) page)->pd_prune_xid != prstate.new_prune_xid ||
347  PageIsFull(page))
348  {
349  ((PageHeader) page)->pd_prune_xid = prstate.new_prune_xid;
350  PageClearFull(page);
351  MarkBufferDirtyHint(buffer, true);
352  }
353  }
354 
356 
357  /*
358  * If requested, report the number of tuples reclaimed to pgstats. This is
359  * ndeleted minus ndead, because we don't want to count a now-DEAD root
360  * item as a deletion for this purpose.
361  */
362  if (report_stats && ndeleted > prstate.ndead)
363  pgstat_update_heap_dead_tuples(relation, ndeleted - prstate.ndead);
364 
365  *latestRemovedXid = prstate.latestRemovedXid;
366 
367  /*
368  * XXX Should we update the FSM information of this page ?
369  *
370  * There are two schools of thought here. We may not want to update FSM
371  * information so that the page is not used for unrelated UPDATEs/INSERTs
372  * and any free space in this page will remain available for further
373  * UPDATEs in *this* page, thus improving chances for doing HOT updates.
374  *
375  * But for a large table and where a page does not receive further UPDATEs
376  * for a long time, we might waste this space by not updating the FSM
377  * information. The relation may get extended and fragmented further.
378  *
379  * One possibility is to leave "fillfactor" worth of space in this page
380  * and update FSM with the remaining space.
381  */
382 
383  return ndeleted;
384 }
int nredirected
Definition: pruneheap.c:52
static int heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum, PruneState *prstate)
Definition: pruneheap.c:492
void pgstat_update_heap_dead_tuples(Relation rel, int delta)
Definition: pgstat.c:2238
void MarkBufferDirtyHint(Buffer buffer, bool buffer_std)
Definition: bufmgr.c:3581
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:1469
bool old_snap_used
Definition: pruneheap.c:48
Relation rel
Definition: pruneheap.c:33
#define END_CRIT_SECTION()
Definition: miscadmin.h:134
#define ItemIdIsUsed(itemId)
Definition: itemid.h:92
TimestampTz old_snap_ts
Definition: pruneheap.c:46
#define START_CRIT_SECTION()
Definition: miscadmin.h:132
OffsetNumber nowdead[MaxHeapTuplesPerPage]
Definition: pruneheap.c:57
#define ItemIdIsDead(itemId)
Definition: itemid.h:113
bool marked[MaxHeapTuplesPerPage+1]
Definition: pruneheap.c:60
#define PageGetMaxOffsetNumber(page)
Definition: bufpage.h:357
TransactionId new_prune_xid
Definition: pruneheap.c:50
#define PageIsFull(page)
Definition: bufpage.h:378
int nunused
Definition: pruneheap.c:54
uint16 OffsetNumber
Definition: off.h:24
TransactionId latestRemovedXid
Definition: pruneheap.c:51
#define FirstOffsetNumber
Definition: off.h:27
#define InvalidTransactionId
Definition: transam.h:31
#define BufferGetPage(buffer)
Definition: bufmgr.h:169
#define PageClearFull(page)
Definition: bufpage.h:382
#define PageGetItemId(page, offsetNumber)
Definition: bufpage.h:235
TransactionId old_snap_xmin
Definition: pruneheap.c:47
#define InvalidOffsetNumber
Definition: off.h:26
GlobalVisState * vistest
Definition: pruneheap.c:36
void heap_page_prune_execute(Buffer buffer, OffsetNumber *redirected, int nredirected, OffsetNumber *nowdead, int ndead, OffsetNumber *nowunused, int nunused)
Definition: pruneheap.c:819
PageHeaderData * PageHeader
Definition: bufpage.h:166
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define OffsetNumberNext(offsetNumber)
Definition: off.h:52
OffsetNumber nowunused[MaxHeapTuplesPerPage]
Definition: pruneheap.c:58
XLogRecPtr log_heap_clean(Relation reln, Buffer buffer, OffsetNumber *redirected, int nredirected, OffsetNumber *nowdead, int ndead, OffsetNumber *nowunused, int nunused, TransactionId latestRemovedXid)
Definition: heapam.c:7173
#define RelationNeedsWAL(relation)
Definition: rel.h:562
OffsetNumber redirected[MaxHeapTuplesPerPage *2]
Definition: pruneheap.c:56
int ndead
Definition: pruneheap.c:53
#define PageSetLSN(page, lsn)
Definition: bufpage.h:368
Pointer Page
Definition: bufpage.h:78

◆ heap_page_prune_execute()

void heap_page_prune_execute ( Buffer  buffer,
OffsetNumber redirected,
int  nredirected,
OffsetNumber nowdead,
int  ndead,
OffsetNumber nowunused,
int  nunused 
)

Definition at line 819 of file pruneheap.c.

References BufferGetPage, i, ItemIdSetDead, ItemIdSetRedirect, ItemIdSetUnused, PageGetItemId, and PageRepairFragmentation().

Referenced by heap_page_prune(), and heap_xlog_clean().

823 {
824  Page page = (Page) BufferGetPage(buffer);
825  OffsetNumber *offnum;
826  int i;
827 
828  /* Update all redirected line pointers */
829  offnum = redirected;
830  for (i = 0; i < nredirected; i++)
831  {
832  OffsetNumber fromoff = *offnum++;
833  OffsetNumber tooff = *offnum++;
834  ItemId fromlp = PageGetItemId(page, fromoff);
835 
836  ItemIdSetRedirect(fromlp, tooff);
837  }
838 
839  /* Update all now-dead line pointers */
840  offnum = nowdead;
841  for (i = 0; i < ndead; i++)
842  {
843  OffsetNumber off = *offnum++;
844  ItemId lp = PageGetItemId(page, off);
845 
846  ItemIdSetDead(lp);
847  }
848 
849  /* Update all now-unused line pointers */
850  offnum = nowunused;
851  for (i = 0; i < nunused; i++)
852  {
853  OffsetNumber off = *offnum++;
854  ItemId lp = PageGetItemId(page, off);
855 
856  ItemIdSetUnused(lp);
857  }
858 
859  /*
860  * Finally, repair any fragmentation, and update the page's hint bit about
861  * whether it has free pointers.
862  */
864 }
uint16 OffsetNumber
Definition: off.h:24
#define ItemIdSetRedirect(itemId, link)
Definition: itemid.h:152
#define BufferGetPage(buffer)
Definition: bufmgr.h:169
#define PageGetItemId(page, offsetNumber)
Definition: bufpage.h:235
void PageRepairFragmentation(Page page)
Definition: bufpage.c:674
int i
#define ItemIdSetDead(itemId)
Definition: itemid.h:164
#define ItemIdSetUnused(itemId)
Definition: itemid.h:128
Pointer Page
Definition: bufpage.h:78

◆ heap_page_prune_opt()

void heap_page_prune_opt ( Relation  relation,
Buffer  buffer 
)

Definition at line 87 of file pruneheap.c.

References BUFFER_LOCK_UNLOCK, BufferGetPage, ConditionalLockBufferForCleanup(), GlobalVisTestFor(), GlobalVisTestIsRemovableXid(), GlobalVisTestNonRemovableHorizon(), HEAP_DEFAULT_FILLFACTOR, heap_page_prune(), InvalidTransactionId, LockBuffer(), Max, old_snapshot_threshold, OldSnapshotThresholdActive(), PageGetHeapFreeSpace(), PageIsFull, RecoveryInProgress(), RelationGetTargetPageFreeSpace, SnapshotTooOldMagicForTest(), TransactionIdIsValid, TransactionIdLimitedForOldSnapshots(), and TransactionIdPrecedes().

Referenced by heapam_index_fetch_tuple(), heapam_scan_bitmap_next_block(), and heapgetpage().

88 {
89  Page page = BufferGetPage(buffer);
90  TransactionId prune_xid;
91  GlobalVisState *vistest;
92  TransactionId limited_xmin = InvalidTransactionId;
93  TimestampTz limited_ts = 0;
94  Size minfree;
95 
96  /*
97  * We can't write WAL in recovery mode, so there's no point trying to
98  * clean the page. The primary will likely issue a cleaning WAL record soon
99  * anyway, so this is no particular loss.
100  */
101  if (RecoveryInProgress())
102  return;
103 
104  /*
105  * XXX: Magic to keep old_snapshot_threshold tests appear "working". They
106  * currently are broken, and discussion of what to do about them is
107  * ongoing. See
108  * https://www.postgresql.org/message-id/20200403001235.e6jfdll3gh2ygbuc%40alap3.anarazel.de
109  */
110  if (old_snapshot_threshold == 0)
112 
113  /*
114  * First check whether there's any chance there's something to prune,
115  * determining the appropriate horizon is a waste if there's no prune_xid
116  * (i.e. no updates/deletes left potentially dead tuples around).
117  */
118  prune_xid = ((PageHeader) page)->pd_prune_xid;
119  if (!TransactionIdIsValid(prune_xid))
120  return;
121 
122  /*
123  * Check whether prune_xid indicates that there may be dead rows that can
124  * be cleaned up.
125  *
126  * It is OK to check the old snapshot limit before acquiring the cleanup
127  * lock because the worst that can happen is that we are not quite as
128  * aggressive about the cleanup (by however many transaction IDs are
129  * consumed between this point and acquiring the lock). This allows us to
130  * save significant overhead in the case where the page is found not to be
131  * prunable.
132  *
133  * Even if old_snapshot_threshold is set, we first check whether the page
134  * can be pruned without. Both because
135  * TransactionIdLimitedForOldSnapshots() is not cheap, and because not
136  * unnecessarily relying on old_snapshot_threshold avoids causing
137  * conflicts.
138  */
139  vistest = GlobalVisTestFor(relation);
140 
141  if (!GlobalVisTestIsRemovableXid(vistest, prune_xid))
142  {
144  return;
145 
147  relation,
148  &limited_xmin, &limited_ts))
149  return;
150 
151  if (!TransactionIdPrecedes(prune_xid, limited_xmin))
152  return;
153  }
154 
155  /*
156  * We prune when a previous UPDATE failed to find enough space on the page
157  * for a new tuple version, or when free space falls below the relation's
158  * fill-factor target (but not less than 10%).
159  *
160  * Checking free space here is questionable since we aren't holding any
161  * lock on the buffer; in the worst case we could get a bogus answer. It's
162  * unlikely to be *seriously* wrong, though, since reading either pd_lower
163  * or pd_upper is probably atomic. Avoiding taking a lock seems more
164  * important than sometimes getting a wrong answer in what is after all
165  * just a heuristic estimate.
166  */
167  minfree = RelationGetTargetPageFreeSpace(relation,
169  minfree = Max(minfree, BLCKSZ / 10);
170 
171  if (PageIsFull(page) || PageGetHeapFreeSpace(page) < minfree)
172  {
173  /* OK, try to get exclusive buffer lock */
174  if (!ConditionalLockBufferForCleanup(buffer))
175  return;
176 
177  /*
178  * Now that we have buffer lock, get accurate information about the
179  * page's free space, and recheck the heuristic about whether to
180  * prune. (We needn't recheck PageIsPrunable, since no one else could
181  * have pruned while we hold pin.)
182  */
183  if (PageIsFull(page) || PageGetHeapFreeSpace(page) < minfree)
184  {
185  TransactionId ignore = InvalidTransactionId; /* return value not
186  * needed */
187 
188  /* OK to prune */
189  (void) heap_page_prune(relation, buffer, vistest,
190  limited_xmin, limited_ts,
191  true, &ignore, NULL);
192  }
193 
194  /* And release buffer lock */
196  }
197 }
#define BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:96
int heap_page_prune(Relation relation, Buffer buffer, GlobalVisState *vistest, TransactionId old_snap_xmin, TimestampTz old_snap_ts, bool report_stats, TransactionId *latestRemovedXid, OffsetNumber *off_loc)
Definition: pruneheap.c:223
uint32 TransactionId
Definition: c.h:521
int64 TimestampTz
Definition: timestamp.h:39
static bool OldSnapshotThresholdActive(void)
Definition: snapmgr.h:102
bool TransactionIdLimitedForOldSnapshots(TransactionId recentXmin, Relation relation, TransactionId *limit_xid, TimestampTz *limit_ts)
Definition: snapmgr.c:1751
bool RecoveryInProgress(void)
Definition: xlog.c:8076
bool GlobalVisTestIsRemovableXid(GlobalVisState *state, TransactionId xid)
Definition: procarray.c:4028
#define PageIsFull(page)
Definition: bufpage.h:378
GlobalVisState * GlobalVisTestFor(Relation rel)
Definition: procarray.c:3866
bool ConditionalLockBufferForCleanup(Buffer buffer)
Definition: bufmgr.c:3944
Size PageGetHeapFreeSpace(Page page)
Definition: bufpage.c:866
void SnapshotTooOldMagicForTest(void)
Definition: snapmgr.c:1689
#define InvalidTransactionId
Definition: transam.h:31
#define BufferGetPage(buffer)
Definition: bufmgr.h:169
bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition: transam.c:300
#define RelationGetTargetPageFreeSpace(relation, defaultff)
Definition: rel.h:340
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:3750
#define Max(x, y)
Definition: c.h:922
PageHeaderData * PageHeader
Definition: bufpage.h:166
size_t Size
Definition: c.h:474
int old_snapshot_threshold
Definition: snapmgr.c:78
#define HEAP_DEFAULT_FILLFACTOR
Definition: rel.h:311
#define TransactionIdIsValid(xid)
Definition: transam.h:41
TransactionId GlobalVisTestNonRemovableHorizon(GlobalVisState *state)
Definition: procarray.c:4066
Pointer Page
Definition: bufpage.h:78

◆ heap_prune_chain()

static int heap_prune_chain ( Buffer  buffer,
OffsetNumber  rootoffnum,
PruneState prstate 
)
static

Definition at line 492 of file pruneheap.c.

References Assert, BufferGetBlockNumber(), BufferGetPage, elog, ERROR, heap_prune_record_dead(), heap_prune_record_prunable(), heap_prune_record_redirect(), heap_prune_record_unused(), heap_prune_satisfies_vacuum(), HEAPTUPLE_DEAD, HEAPTUPLE_DELETE_IN_PROGRESS, HEAPTUPLE_INSERT_IN_PROGRESS, HEAPTUPLE_LIVE, HEAPTUPLE_RECENTLY_DEAD, HeapTupleHeaderAdvanceLatestRemovedXid(), HeapTupleHeaderGetUpdateXid, HeapTupleHeaderGetXmin, HeapTupleHeaderIndicatesMovedPartitions, HeapTupleHeaderIsHeapOnly, HeapTupleHeaderIsHotUpdated, i, InvalidOffsetNumber, InvalidTransactionId, ItemIdGetLength, ItemIdGetRedirect, ItemIdIsDead, ItemIdIsNormal, ItemIdIsRedirected, ItemIdIsUsed, ItemPointerGetBlockNumber, ItemPointerGetOffsetNumber, ItemPointerSet, PruneState::latestRemovedXid, PruneState::marked, MaxHeapTuplesPerPage, OffsetNumberIsValid, PageGetItem, PageGetItemId, PageGetMaxOffsetNumber, PruneState::rel, RelationGetRelid, HeapTupleHeaderData::t_ctid, HeapTupleData::t_data, HeapTupleData::t_len, HeapTupleData::t_self, HeapTupleData::t_tableOid, TransactionIdEquals, and TransactionIdIsValid.

Referenced by heap_page_prune().

493 {
494  int ndeleted = 0;
495  Page dp = (Page) BufferGetPage(buffer);
497  ItemId rootlp;
498  HeapTupleHeader htup;
499  OffsetNumber latestdead = InvalidOffsetNumber,
500  maxoff = PageGetMaxOffsetNumber(dp),
501  offnum;
503  int nchain = 0,
504  i;
505  HeapTupleData tup;
506 
507  tup.t_tableOid = RelationGetRelid(prstate->rel);
508 
509  rootlp = PageGetItemId(dp, rootoffnum);
510 
511  /*
512  * If it's a heap-only tuple, then it is not the start of a HOT chain.
513  */
514  if (ItemIdIsNormal(rootlp))
515  {
516  htup = (HeapTupleHeader) PageGetItem(dp, rootlp);
517 
518  tup.t_data = htup;
519  tup.t_len = ItemIdGetLength(rootlp);
520  ItemPointerSet(&(tup.t_self), BufferGetBlockNumber(buffer), rootoffnum);
521 
522  if (HeapTupleHeaderIsHeapOnly(htup))
523  {
524  /*
525  * If the tuple is DEAD and doesn't chain to anything else, mark
526  * it unused immediately. (If it does chain, we can only remove
527  * it as part of pruning its chain.)
528  *
529  * We need this primarily to handle aborted HOT updates, that is,
530  * XMIN_INVALID heap-only tuples. Those might not be linked to by
531  * any chain, since the parent tuple might be re-updated before
532  * any pruning occurs. So we have to be able to reap them
533  * separately from chain-pruning. (Note that
534  * HeapTupleHeaderIsHotUpdated will never return true for an
535  * XMIN_INVALID tuple, so this code will work even when there were
536  * sequential updates within the aborted transaction.)
537  *
538  * Note that we might first arrive at a dead heap-only tuple
539  * either here or while following a chain below. Whichever path
540  * gets there first will mark the tuple unused.
541  */
542  if (heap_prune_satisfies_vacuum(prstate, &tup, buffer)
544  {
545  heap_prune_record_unused(prstate, rootoffnum);
547  &prstate->latestRemovedXid);
548  ndeleted++;
549  }
550 
551  /* Nothing more to do */
552  return ndeleted;
553  }
554  }
555 
556  /* Start from the root tuple */
557  offnum = rootoffnum;
558 
559  /* while not end of the chain */
560  for (;;)
561  {
562  ItemId lp;
563  bool tupdead,
564  recent_dead;
565 
566  /* Some sanity checks */
567  if (offnum < FirstOffsetNumber || offnum > maxoff)
568  break;
569 
570  /* If item is already processed, stop --- it must not be same chain */
571  if (prstate->marked[offnum])
572  break;
573 
574  lp = PageGetItemId(dp, offnum);
575 
576  /* Unused item obviously isn't part of the chain */
577  if (!ItemIdIsUsed(lp))
578  break;
579 
580  /*
581  * If we are looking at the redirected root line pointer, jump to the
582  * first normal tuple in the chain. If we find a redirect somewhere
583  * else, stop --- it must not be same chain.
584  */
585  if (ItemIdIsRedirected(lp))
586  {
587  if (nchain > 0)
588  break; /* not at start of chain */
589  chainitems[nchain++] = offnum;
590  offnum = ItemIdGetRedirect(rootlp);
591  continue;
592  }
593 
594  /*
595  * Likewise, a dead line pointer can't be part of the chain. (We
596  * already eliminated the case of dead root tuple outside this
597  * function.)
598  */
599  if (ItemIdIsDead(lp))
600  break;
601 
602  Assert(ItemIdIsNormal(lp));
603  htup = (HeapTupleHeader) PageGetItem(dp, lp);
604 
605  tup.t_data = htup;
606  tup.t_len = ItemIdGetLength(lp);
607  ItemPointerSet(&(tup.t_self), BufferGetBlockNumber(buffer), offnum);
608 
609  /*
610  * Check the tuple XMIN against prior XMAX, if any
611  */
612  if (TransactionIdIsValid(priorXmax) &&
613  !TransactionIdEquals(HeapTupleHeaderGetXmin(htup), priorXmax))
614  break;
615 
616  /*
617  * OK, this tuple is indeed a member of the chain.
618  */
619  chainitems[nchain++] = offnum;
620 
621  /*
622  * Check tuple's visibility status.
623  */
624  tupdead = recent_dead = false;
625 
626  switch (heap_prune_satisfies_vacuum(prstate, &tup, buffer))
627  {
628  case HEAPTUPLE_DEAD:
629  tupdead = true;
630  break;
631 
633  recent_dead = true;
634 
635  /*
636  * This tuple may soon become DEAD. Update the hint field so
637  * that the page is reconsidered for pruning in future.
638  */
641  break;
642 
644 
645  /*
646  * This tuple may soon become DEAD. Update the hint field so
647  * that the page is reconsidered for pruning in future.
648  */
651  break;
652 
653  case HEAPTUPLE_LIVE:
655 
656  /*
657  * If we wanted to optimize for aborts, we might consider
658  * marking the page prunable when we see INSERT_IN_PROGRESS.
659  * But we don't. See related decisions about when to mark the
660  * page prunable in heapam.c.
661  */
662  break;
663 
664  default:
665  elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
666  break;
667  }
668 
669  /*
670  * Remember the last DEAD tuple seen. We will advance past
671  * RECENTLY_DEAD tuples just in case there's a DEAD one after them;
672  * but we can't advance past anything else. (XXX is it really worth
673  * continuing to scan beyond RECENTLY_DEAD? The case where we will
674  * find another DEAD tuple is a fairly unusual corner case.)
675  */
676  if (tupdead)
677  {
678  latestdead = offnum;
680  &prstate->latestRemovedXid);
681  }
682  else if (!recent_dead)
683  break;
684 
685  /*
686  * If the tuple is not HOT-updated, then we are at the end of this
687  * HOT-update chain.
688  */
689  if (!HeapTupleHeaderIsHotUpdated(htup))
690  break;
691 
692  /* HOT implies it can't have moved to different partition */
694 
695  /*
696  * Advance to next chain member.
697  */
699  BufferGetBlockNumber(buffer));
700  offnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
701  priorXmax = HeapTupleHeaderGetUpdateXid(htup);
702  }
703 
704  /*
705  * If we found a DEAD tuple in the chain, adjust the HOT chain so that all
706  * the DEAD tuples at the start of the chain are removed and the root line
707  * pointer is appropriately redirected.
708  */
709  if (OffsetNumberIsValid(latestdead))
710  {
711  /*
712  * Mark as unused each intermediate item that we are able to remove
713  * from the chain.
714  *
715  * When the previous item is the last dead tuple seen, we are at the
716  * right candidate for redirection.
717  */
718  for (i = 1; (i < nchain) && (chainitems[i - 1] != latestdead); i++)
719  {
720  heap_prune_record_unused(prstate, chainitems[i]);
721  ndeleted++;
722  }
723 
724  /*
725  * If the root entry had been a normal tuple, we are deleting it, so
726  * count it in the result. But changing a redirect (even to DEAD
727  * state) doesn't count.
728  */
729  if (ItemIdIsNormal(rootlp))
730  ndeleted++;
731 
732  /*
733  * If the DEAD tuple is at the end of the chain, the entire chain is
734  * dead and the root line pointer can be marked dead. Otherwise just
735  * redirect the root to the correct chain member.
736  */
737  if (i >= nchain)
738  heap_prune_record_dead(prstate, rootoffnum);
739  else
740  heap_prune_record_redirect(prstate, rootoffnum, chainitems[i]);
741  }
742  else if (nchain < 2 && ItemIdIsRedirected(rootlp))
743  {
744  /*
745  * We found a redirect item that doesn't point to a valid follow-on
746  * item. This can happen if the loop in heap_page_prune caused us to
747  * visit the dead successor of a redirect item before visiting the
748  * redirect item. We can clean up by setting the redirect item to
749  * DEAD state.
750  */
751  heap_prune_record_dead(prstate, rootoffnum);
752  }
753 
754  return ndeleted;
755 }
#define HeapTupleHeaderGetUpdateXid(tup)
Definition: htup_details.h:365
void HeapTupleHeaderAdvanceLatestRemovedXid(HeapTupleHeader tuple, TransactionId *latestRemovedXid)
Definition: heapam.c:6903
#define ItemIdIsRedirected(itemId)
Definition: itemid.h:106
#define TransactionIdEquals(id1, id2)
Definition: transam.h:43
uint32 TransactionId
Definition: c.h:521
HeapTupleHeaderData * HeapTupleHeader
Definition: htup.h:23
#define ItemIdGetRedirect(itemId)
Definition: itemid.h:78
Relation rel
Definition: pruneheap.c:33
#define ItemIdIsUsed(itemId)
Definition: itemid.h:92
#define MaxHeapTuplesPerPage
Definition: htup_details.h:574
static void heap_prune_record_dead(PruneState *prstate, OffsetNumber offnum)
Definition: pruneheap.c:788
#define HeapTupleHeaderIndicatesMovedPartitions(tup)
Definition: htup_details.h:445
#define ItemIdIsDead(itemId)
Definition: itemid.h:113
bool marked[MaxHeapTuplesPerPage+1]
Definition: pruneheap.c:60
#define PageGetMaxOffsetNumber(page)
Definition: bufpage.h:357
uint16 OffsetNumber
Definition: off.h:24
HeapTupleHeader t_data
Definition: htup.h:68
#define HeapTupleHeaderIsHeapOnly(tup)
Definition: htup_details.h:501
#define ItemIdGetLength(itemId)
Definition: itemid.h:59
#define ERROR
Definition: elog.h:43
ItemPointerData t_ctid
Definition: htup_details.h:160
ItemPointerData t_self
Definition: htup.h:65
uint32 t_len
Definition: htup.h:64
static void heap_prune_record_prunable(PruneState *prstate, TransactionId xid)
Definition: pruneheap.c:759
TransactionId latestRemovedXid
Definition: pruneheap.c:51
#define InvalidTransactionId
Definition: transam.h:31
Oid t_tableOid
Definition: htup.h:66
#define BufferGetPage(buffer)
Definition: bufmgr.h:169
#define HeapTupleHeaderIsHotUpdated(tup)
Definition: htup_details.h:484
#define PageGetItemId(page, offsetNumber)
Definition: bufpage.h:235
static HTSV_Result heap_prune_satisfies_vacuum(PruneState *prstate, HeapTuple tup, Buffer buffer)
Definition: pruneheap.c:404
#define InvalidOffsetNumber
Definition: off.h:26
#define Assert(condition)
Definition: c.h:746
static void heap_prune_record_redirect(PruneState *prstate, OffsetNumber offnum, OffsetNumber rdoffnum)
Definition: pruneheap.c:773
#define ItemIdIsNormal(itemId)
Definition: itemid.h:99
#define HeapTupleHeaderGetXmin(tup)
Definition: htup_details.h:313
#define ItemPointerGetOffsetNumber(pointer)
Definition: itemptr.h:117
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:2661
#define elog(elevel,...)
Definition: elog.h:214
int i
#define OffsetNumberIsValid(offsetNumber)
Definition: off.h:39
#define ItemPointerGetBlockNumber(pointer)
Definition: itemptr.h:98
#define TransactionIdIsValid(xid)
Definition: transam.h:41
#define RelationGetRelid(relation)
Definition: rel.h:456
#define PageGetItem(page, itemId)
Definition: bufpage.h:340
Pointer Page
Definition: bufpage.h:78
#define ItemPointerSet(pointer, blockNumber, offNum)
Definition: itemptr.h:127
static void heap_prune_record_unused(PruneState *prstate, OffsetNumber offnum)
Definition: pruneheap.c:799

◆ heap_prune_record_dead()

static void heap_prune_record_dead ( PruneState prstate,
OffsetNumber  offnum 
)
static

Definition at line 788 of file pruneheap.c.

References Assert, PruneState::marked, MaxHeapTuplesPerPage, PruneState::ndead, and PruneState::nowdead.

Referenced by heap_prune_chain().

789 {
790  Assert(prstate->ndead < MaxHeapTuplesPerPage);
791  prstate->nowdead[prstate->ndead] = offnum;
792  prstate->ndead++;
793  Assert(!prstate->marked[offnum]);
794  prstate->marked[offnum] = true;
795 }
#define MaxHeapTuplesPerPage
Definition: htup_details.h:574
OffsetNumber nowdead[MaxHeapTuplesPerPage]
Definition: pruneheap.c:57
bool marked[MaxHeapTuplesPerPage+1]
Definition: pruneheap.c:60
#define Assert(condition)
Definition: c.h:746
int ndead
Definition: pruneheap.c:53

◆ heap_prune_record_prunable()

static void heap_prune_record_prunable ( PruneState prstate,
TransactionId  xid 
)
static

Definition at line 759 of file pruneheap.c.

References Assert, PruneState::new_prune_xid, TransactionIdIsNormal, TransactionIdIsValid, and TransactionIdPrecedes().

Referenced by heap_prune_chain().

760 {
761  /*
762  * This should exactly match the PageSetPrunable macro. We can't store
763  * directly into the page header yet, so we update working state.
764  */
766  if (!TransactionIdIsValid(prstate->new_prune_xid) ||
767  TransactionIdPrecedes(xid, prstate->new_prune_xid))
768  prstate->new_prune_xid = xid;
769 }
TransactionId new_prune_xid
Definition: pruneheap.c:50
bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition: transam.c:300
#define Assert(condition)
Definition: c.h:746
#define TransactionIdIsValid(xid)
Definition: transam.h:41
#define TransactionIdIsNormal(xid)
Definition: transam.h:42

◆ heap_prune_record_redirect()

static void heap_prune_record_redirect ( PruneState prstate,
OffsetNumber  offnum,
OffsetNumber  rdoffnum 
)
static

Definition at line 773 of file pruneheap.c.

References Assert, PruneState::marked, MaxHeapTuplesPerPage, PruneState::nredirected, and PruneState::redirected.

Referenced by heap_prune_chain().

775 {
777  prstate->redirected[prstate->nredirected * 2] = offnum;
778  prstate->redirected[prstate->nredirected * 2 + 1] = rdoffnum;
779  prstate->nredirected++;
780  Assert(!prstate->marked[offnum]);
781  prstate->marked[offnum] = true;
782  Assert(!prstate->marked[rdoffnum]);
783  prstate->marked[rdoffnum] = true;
784 }
int nredirected
Definition: pruneheap.c:52
#define MaxHeapTuplesPerPage
Definition: htup_details.h:574
bool marked[MaxHeapTuplesPerPage+1]
Definition: pruneheap.c:60
#define Assert(condition)
Definition: c.h:746
OffsetNumber redirected[MaxHeapTuplesPerPage *2]
Definition: pruneheap.c:56

◆ heap_prune_record_unused()

static void heap_prune_record_unused ( PruneState prstate,
OffsetNumber  offnum 
)
static

Definition at line 799 of file pruneheap.c.

References Assert, PruneState::marked, MaxHeapTuplesPerPage, PruneState::nowunused, and PruneState::nunused.

Referenced by heap_prune_chain().

800 {
801  Assert(prstate->nunused < MaxHeapTuplesPerPage);
802  prstate->nowunused[prstate->nunused] = offnum;
803  prstate->nunused++;
804  Assert(!prstate->marked[offnum]);
805  prstate->marked[offnum] = true;
806 }
#define MaxHeapTuplesPerPage
Definition: htup_details.h:574
bool marked[MaxHeapTuplesPerPage+1]
Definition: pruneheap.c:60
int nunused
Definition: pruneheap.c:54
#define Assert(condition)
Definition: c.h:746
OffsetNumber nowunused[MaxHeapTuplesPerPage]
Definition: pruneheap.c:58

◆ heap_prune_satisfies_vacuum()

static HTSV_Result heap_prune_satisfies_vacuum ( PruneState prstate,
HeapTuple  tup,
Buffer  buffer 
)
static

Definition at line 404 of file pruneheap.c.

References Assert, GlobalVisTestIsRemovableXid(), GlobalVisTestNonRemovableHorizon(), HEAPTUPLE_DEAD, HEAPTUPLE_RECENTLY_DEAD, HeapTupleSatisfiesVacuumHorizon(), PruneState::old_snap_ts, PruneState::old_snap_used, PruneState::old_snap_xmin, OldSnapshotThresholdActive(), PruneState::rel, SetOldSnapshotThresholdTimestamp(), TransactionIdIsValid, TransactionIdLimitedForOldSnapshots(), TransactionIdPrecedes(), and PruneState::vistest.

Referenced by heap_prune_chain().

405 {
406  HTSV_Result res;
407  TransactionId dead_after;
408 
409  res = HeapTupleSatisfiesVacuumHorizon(tup, buffer, &dead_after);
410 
411  if (res != HEAPTUPLE_RECENTLY_DEAD)
412  return res;
413 
414  /*
415  * If we are already relying on the limited xmin, there is no need to
416  * delay doing so anymore.
417  */
418  if (prstate->old_snap_used)
419  {
421 
422  if (TransactionIdPrecedes(dead_after, prstate->old_snap_xmin))
423  res = HEAPTUPLE_DEAD;
424  return res;
425  }
426 
427  /*
428  * First check if GlobalVisTestIsRemovableXid() is sufficient to find the
429  * row dead. If not, and old_snapshot_threshold is enabled, try to use the
430  * lowered horizon.
431  */
432  if (GlobalVisTestIsRemovableXid(prstate->vistest, dead_after))
433  res = HEAPTUPLE_DEAD;
434  else if (OldSnapshotThresholdActive())
435  {
436  /* haven't determined limited horizon yet, requests */
437  if (!TransactionIdIsValid(prstate->old_snap_xmin))
438  {
439  TransactionId horizon =
441 
442  TransactionIdLimitedForOldSnapshots(horizon, prstate->rel,
443  &prstate->old_snap_xmin,
444  &prstate->old_snap_ts);
445  }
446 
447  if (TransactionIdIsValid(prstate->old_snap_xmin) &&
448  TransactionIdPrecedes(dead_after, prstate->old_snap_xmin))
449  {
450  /*
451  * About to remove row based on snapshot_too_old. Need to raise
452  * the threshold so problematic accesses would error.
453  */
454  Assert(!prstate->old_snap_used);
456  prstate->old_snap_xmin);
457  prstate->old_snap_used = true;
458  res = HEAPTUPLE_DEAD;
459  }
460  }
461 
462  return res;
463 }
uint32 TransactionId
Definition: c.h:521
bool old_snap_used
Definition: pruneheap.c:48
static bool OldSnapshotThresholdActive(void)
Definition: snapmgr.h:102
Relation rel
Definition: pruneheap.c:33
TimestampTz old_snap_ts
Definition: pruneheap.c:46
bool TransactionIdLimitedForOldSnapshots(TransactionId recentXmin, Relation relation, TransactionId *limit_xid, TimestampTz *limit_ts)
Definition: snapmgr.c:1751
bool GlobalVisTestIsRemovableXid(GlobalVisState *state, TransactionId xid)
Definition: procarray.c:4028
void SetOldSnapshotThresholdTimestamp(TimestampTz ts, TransactionId xlimit)
Definition: snapmgr.c:1672
HTSV_Result HeapTupleSatisfiesVacuumHorizon(HeapTuple htup, Buffer buffer, TransactionId *dead_after)
bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition: transam.c:300
TransactionId old_snap_xmin
Definition: pruneheap.c:47
GlobalVisState * vistest
Definition: pruneheap.c:36
#define Assert(condition)
Definition: c.h:746
HTSV_Result
Definition: heapam.h:87
#define TransactionIdIsValid(xid)
Definition: transam.h:41
TransactionId GlobalVisTestNonRemovableHorizon(GlobalVisState *state)
Definition: procarray.c:4066