PostgreSQL Source Code  git master
pruneheap.c File Reference
#include "postgres.h"
#include "access/heapam.h"
#include "access/heapam_xlog.h"
#include "access/htup_details.h"
#include "access/transam.h"
#include "access/xlog.h"
#include "catalog/catalog.h"
#include "miscadmin.h"
#include "pgstat.h"
#include "storage/bufmgr.h"
#include "utils/snapmgr.h"
#include "utils/rel.h"
Include dependency graph for pruneheap.c:

Go to the source code of this file.

Data Structures

struct  PruneState
 

Functions

static int heap_prune_chain (Buffer buffer, OffsetNumber rootoffnum, PruneState *prstate)
 
static void heap_prune_record_prunable (PruneState *prstate, TransactionId xid)
 
static void heap_prune_record_redirect (PruneState *prstate, OffsetNumber offnum, OffsetNumber rdoffnum)
 
static void heap_prune_record_dead (PruneState *prstate, OffsetNumber offnum)
 
static void heap_prune_record_unused (PruneState *prstate, OffsetNumber offnum)
 
void heap_page_prune_opt (Relation relation, Buffer buffer)
 
int heap_page_prune (Relation relation, Buffer buffer, GlobalVisState *vistest, TransactionId old_snap_xmin, TimestampTz old_snap_ts, bool report_stats, TransactionId *latestRemovedXid, OffsetNumber *off_loc)
 
static HTSV_Result heap_prune_satisfies_vacuum (PruneState *prstate, HeapTuple tup, Buffer buffer)
 
void heap_page_prune_execute (Buffer buffer, OffsetNumber *redirected, int nredirected, OffsetNumber *nowdead, int ndead, OffsetNumber *nowunused, int nunused)
 
void heap_get_root_tuples (Page page, OffsetNumber *root_offsets)
 

Function Documentation

◆ heap_get_root_tuples()

void heap_get_root_tuples ( Page  page,
OffsetNumber root_offsets 
)

Definition at line 884 of file pruneheap.c.

References Assert, FirstOffsetNumber, HeapTupleHeaderGetUpdateXid, HeapTupleHeaderGetXmin, HeapTupleHeaderIndicatesMovedPartitions, HeapTupleHeaderIsHeapOnly, HeapTupleHeaderIsHotUpdated, InvalidOffsetNumber, InvalidTransactionId, ItemIdGetRedirect, ItemIdIsDead, ItemIdIsNormal, ItemIdIsRedirected, ItemIdIsUsed, ItemPointerGetOffsetNumber, MaxHeapTuplesPerPage, MemSet, OffsetNumberNext, PageGetItem, PageGetItemId, PageGetMaxOffsetNumber, HeapTupleHeaderData::t_ctid, TransactionIdEquals, and TransactionIdIsValid.

Referenced by heapam_index_build_range_scan(), and heapam_index_validate_scan().

885 {
886  OffsetNumber offnum,
887  maxoff;
888 
889  MemSet(root_offsets, InvalidOffsetNumber,
891 
892  maxoff = PageGetMaxOffsetNumber(page);
893  for (offnum = FirstOffsetNumber; offnum <= maxoff; offnum = OffsetNumberNext(offnum))
894  {
895  ItemId lp = PageGetItemId(page, offnum);
896  HeapTupleHeader htup;
897  OffsetNumber nextoffnum;
898  TransactionId priorXmax;
899 
900  /* skip unused and dead items */
901  if (!ItemIdIsUsed(lp) || ItemIdIsDead(lp))
902  continue;
903 
904  if (ItemIdIsNormal(lp))
905  {
906  htup = (HeapTupleHeader) PageGetItem(page, lp);
907 
908  /*
909  * Check if this tuple is part of a HOT-chain rooted at some other
910  * tuple. If so, skip it for now; we'll process it when we find
911  * its root.
912  */
913  if (HeapTupleHeaderIsHeapOnly(htup))
914  continue;
915 
916  /*
917  * This is either a plain tuple or the root of a HOT-chain.
918  * Remember it in the mapping.
919  */
920  root_offsets[offnum - 1] = offnum;
921 
922  /* If it's not the start of a HOT-chain, we're done with it */
923  if (!HeapTupleHeaderIsHotUpdated(htup))
924  continue;
925 
926  /* Set up to scan the HOT-chain */
927  nextoffnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
928  priorXmax = HeapTupleHeaderGetUpdateXid(htup);
929  }
930  else
931  {
932  /* Must be a redirect item. We do not set its root_offsets entry */
934  /* Set up to scan the HOT-chain */
935  nextoffnum = ItemIdGetRedirect(lp);
936  priorXmax = InvalidTransactionId;
937  }
938 
939  /*
940  * Now follow the HOT-chain and collect other tuples in the chain.
941  *
942  * Note: Even though this is a nested loop, the complexity of the
943  * function is O(N) because a tuple in the page should be visited not
944  * more than twice, once in the outer loop and once in HOT-chain
945  * chases.
946  */
947  for (;;)
948  {
949  lp = PageGetItemId(page, nextoffnum);
950 
951  /* Check for broken chains */
952  if (!ItemIdIsNormal(lp))
953  break;
954 
955  htup = (HeapTupleHeader) PageGetItem(page, lp);
956 
957  if (TransactionIdIsValid(priorXmax) &&
958  !TransactionIdEquals(priorXmax, HeapTupleHeaderGetXmin(htup)))
959  break;
960 
961  /* Remember the root line pointer for this item */
962  root_offsets[nextoffnum - 1] = offnum;
963 
964  /* Advance to next chain member, if any */
965  if (!HeapTupleHeaderIsHotUpdated(htup))
966  break;
967 
968  /* HOT implies it can't have moved to different partition */
970 
971  nextoffnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
972  priorXmax = HeapTupleHeaderGetUpdateXid(htup);
973  }
974  }
975 }
#define HeapTupleHeaderGetUpdateXid(tup)
Definition: htup_details.h:365
#define ItemIdIsRedirected(itemId)
Definition: itemid.h:106
#define TransactionIdEquals(id1, id2)
Definition: transam.h:43
uint32 TransactionId
Definition: c.h:587
HeapTupleHeaderData * HeapTupleHeader
Definition: htup.h:23
#define ItemIdGetRedirect(itemId)
Definition: itemid.h:78
#define ItemIdIsUsed(itemId)
Definition: itemid.h:92
#define MaxHeapTuplesPerPage
Definition: htup_details.h:574
#define MemSet(start, val, len)
Definition: c.h:1008
#define HeapTupleHeaderIndicatesMovedPartitions(tup)
Definition: htup_details.h:445
#define ItemIdIsDead(itemId)
Definition: itemid.h:113
#define PageGetMaxOffsetNumber(page)
Definition: bufpage.h:357
uint16 OffsetNumber
Definition: off.h:24
#define HeapTupleHeaderIsHeapOnly(tup)
Definition: htup_details.h:501
ItemPointerData t_ctid
Definition: htup_details.h:160
#define FirstOffsetNumber
Definition: off.h:27
#define InvalidTransactionId
Definition: transam.h:31
#define HeapTupleHeaderIsHotUpdated(tup)
Definition: htup_details.h:484
#define PageGetItemId(page, offsetNumber)
Definition: bufpage.h:235
#define InvalidOffsetNumber
Definition: off.h:26
#define Assert(condition)
Definition: c.h:804
#define ItemIdIsNormal(itemId)
Definition: itemid.h:99
#define HeapTupleHeaderGetXmin(tup)
Definition: htup_details.h:313
#define OffsetNumberNext(offsetNumber)
Definition: off.h:52
#define ItemPointerGetOffsetNumber(pointer)
Definition: itemptr.h:117
#define TransactionIdIsValid(xid)
Definition: transam.h:41
#define PageGetItem(page, itemId)
Definition: bufpage.h:340

◆ heap_page_prune()

int heap_page_prune ( Relation  relation,
Buffer  buffer,
GlobalVisState vistest,
TransactionId  old_snap_xmin,
TimestampTz  old_snap_ts,
bool  report_stats,
TransactionId latestRemovedXid,
OffsetNumber off_loc 
)

Definition at line 224 of file pruneheap.c.

References BufferGetPage, END_CRIT_SECTION, FirstOffsetNumber, heap_page_prune_execute(), heap_prune_chain(), InvalidOffsetNumber, InvalidTransactionId, ItemIdIsDead, ItemIdIsUsed, PruneState::latestRemovedXid, log_heap_clean(), MarkBufferDirty(), MarkBufferDirtyHint(), PruneState::marked, PruneState::ndead, PruneState::new_prune_xid, PruneState::nowdead, PruneState::nowunused, PruneState::nredirected, PruneState::nunused, OffsetNumberNext, PruneState::old_snap_ts, PruneState::old_snap_used, PruneState::old_snap_xmin, PageClearFull, PageGetItemId, PageGetMaxOffsetNumber, PageIsFull, PageSetLSN, pgstat_update_heap_dead_tuples(), PruneState::redirected, PruneState::rel, RelationNeedsWAL, START_CRIT_SECTION, and PruneState::vistest.

Referenced by heap_page_prune_opt(), and lazy_scan_heap().

230 {
231  int ndeleted = 0;
232  Page page = BufferGetPage(buffer);
233  OffsetNumber offnum,
234  maxoff;
235  PruneState prstate;
236 
237  /*
238  * Our strategy is to scan the page and make lists of items to change,
239  * then apply the changes within a critical section. This keeps as much
240  * logic as possible out of the critical section, and also ensures that
241  * WAL replay will work the same as the normal case.
242  *
243  * First, initialize the new pd_prune_xid value to zero (indicating no
244  * prunable tuples). If we find any tuples which may soon become
245  * prunable, we will save the lowest relevant XID in new_prune_xid. Also
246  * initialize the rest of our working state.
247  */
249  prstate.rel = relation;
250  prstate.vistest = vistest;
251  prstate.old_snap_xmin = old_snap_xmin;
252  prstate.old_snap_ts = old_snap_ts;
253  prstate.old_snap_used = false;
254  prstate.latestRemovedXid = *latestRemovedXid;
255  prstate.nredirected = prstate.ndead = prstate.nunused = 0;
256  memset(prstate.marked, 0, sizeof(prstate.marked));
257 
258  /* Scan the page */
259  maxoff = PageGetMaxOffsetNumber(page);
260  for (offnum = FirstOffsetNumber;
261  offnum <= maxoff;
262  offnum = OffsetNumberNext(offnum))
263  {
264  ItemId itemid;
265 
266  /* Ignore items already processed as part of an earlier chain */
267  if (prstate.marked[offnum])
268  continue;
269 
270  /*
271  * Set the offset number so that we can display it along with any
272  * error that occurred while processing this tuple.
273  */
274  if (off_loc)
275  *off_loc = offnum;
276 
277  /* Nothing to do if slot is empty or already dead */
278  itemid = PageGetItemId(page, offnum);
279  if (!ItemIdIsUsed(itemid) || ItemIdIsDead(itemid))
280  continue;
281 
282  /* Process this item or chain of items */
283  ndeleted += heap_prune_chain(buffer, offnum, &prstate);
284  }
285 
286  /* Clear the offset information once we have processed the given page. */
287  if (off_loc)
288  *off_loc = InvalidOffsetNumber;
289 
290  /* Any error while applying the changes is critical */
292 
293  /* Have we found any prunable items? */
294  if (prstate.nredirected > 0 || prstate.ndead > 0 || prstate.nunused > 0)
295  {
296  /*
297  * Apply the planned item changes, then repair page fragmentation, and
298  * update the page's hint bit about whether it has free line pointers.
299  */
301  prstate.redirected, prstate.nredirected,
302  prstate.nowdead, prstate.ndead,
303  prstate.nowunused, prstate.nunused);
304 
305  /*
306  * Update the page's pd_prune_xid field to either zero, or the lowest
307  * XID of any soon-prunable tuple.
308  */
309  ((PageHeader) page)->pd_prune_xid = prstate.new_prune_xid;
310 
311  /*
312  * Also clear the "page is full" flag, since there's no point in
313  * repeating the prune/defrag process until something else happens to
314  * the page.
315  */
316  PageClearFull(page);
317 
318  MarkBufferDirty(buffer);
319 
320  /*
321  * Emit a WAL XLOG_HEAP2_CLEAN record showing what we did
322  */
323  if (RelationNeedsWAL(relation))
324  {
325  XLogRecPtr recptr;
326 
327  recptr = log_heap_clean(relation, buffer,
328  prstate.redirected, prstate.nredirected,
329  prstate.nowdead, prstate.ndead,
330  prstate.nowunused, prstate.nunused,
331  prstate.latestRemovedXid);
332 
333  PageSetLSN(BufferGetPage(buffer), recptr);
334  }
335  }
336  else
337  {
338  /*
339  * If we didn't prune anything, but have found a new value for the
340  * pd_prune_xid field, update it and mark the buffer dirty. This is
341  * treated as a non-WAL-logged hint.
342  *
343  * Also clear the "page is full" flag if it is set, since there's no
344  * point in repeating the prune/defrag process until something else
345  * happens to the page.
346  */
347  if (((PageHeader) page)->pd_prune_xid != prstate.new_prune_xid ||
348  PageIsFull(page))
349  {
350  ((PageHeader) page)->pd_prune_xid = prstate.new_prune_xid;
351  PageClearFull(page);
352  MarkBufferDirtyHint(buffer, true);
353  }
354  }
355 
357 
358  /*
359  * If requested, report the number of tuples reclaimed to pgstats. This is
360  * ndeleted minus ndead, because we don't want to count a now-DEAD root
361  * item as a deletion for this purpose.
362  */
363  if (report_stats && ndeleted > prstate.ndead)
364  pgstat_update_heap_dead_tuples(relation, ndeleted - prstate.ndead);
365 
366  *latestRemovedXid = prstate.latestRemovedXid;
367 
368  /*
369  * XXX Should we update the FSM information of this page ?
370  *
371  * There are two schools of thought here. We may not want to update FSM
372  * information so that the page is not used for unrelated UPDATEs/INSERTs
373  * and any free space in this page will remain available for further
374  * UPDATEs in *this* page, thus improving chances for doing HOT updates.
375  *
376  * But for a large table and where a page does not receive further UPDATEs
377  * for a long time, we might waste this space by not updating the FSM
378  * information. The relation may get extended and fragmented further.
379  *
380  * One possibility is to leave "fillfactor" worth of space in this page
381  * and update FSM with the remaining space.
382  */
383 
384  return ndeleted;
385 }
int nredirected
Definition: pruneheap.c:52
static int heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum, PruneState *prstate)
Definition: pruneheap.c:493
void pgstat_update_heap_dead_tuples(Relation rel, int delta)
Definition: pgstat.c:2305
void MarkBufferDirtyHint(Buffer buffer, bool buffer_std)
Definition: bufmgr.c:3770
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:1483
bool old_snap_used
Definition: pruneheap.c:48
Relation rel
Definition: pruneheap.c:33
#define END_CRIT_SECTION()
Definition: miscadmin.h:135
#define ItemIdIsUsed(itemId)
Definition: itemid.h:92
TimestampTz old_snap_ts
Definition: pruneheap.c:46
#define START_CRIT_SECTION()
Definition: miscadmin.h:133
OffsetNumber nowdead[MaxHeapTuplesPerPage]
Definition: pruneheap.c:57
#define ItemIdIsDead(itemId)
Definition: itemid.h:113
bool marked[MaxHeapTuplesPerPage+1]
Definition: pruneheap.c:60
#define PageGetMaxOffsetNumber(page)
Definition: bufpage.h:357
TransactionId new_prune_xid
Definition: pruneheap.c:50
#define PageIsFull(page)
Definition: bufpage.h:378
int nunused
Definition: pruneheap.c:54
uint16 OffsetNumber
Definition: off.h:24
TransactionId latestRemovedXid
Definition: pruneheap.c:51
#define FirstOffsetNumber
Definition: off.h:27
#define InvalidTransactionId
Definition: transam.h:31
#define BufferGetPage(buffer)
Definition: bufmgr.h:169
#define PageClearFull(page)
Definition: bufpage.h:382
#define PageGetItemId(page, offsetNumber)
Definition: bufpage.h:235
TransactionId old_snap_xmin
Definition: pruneheap.c:47
#define InvalidOffsetNumber
Definition: off.h:26
GlobalVisState * vistest
Definition: pruneheap.c:36
void heap_page_prune_execute(Buffer buffer, OffsetNumber *redirected, int nredirected, OffsetNumber *nowdead, int ndead, OffsetNumber *nowunused, int nunused)
Definition: pruneheap.c:820
PageHeaderData * PageHeader
Definition: bufpage.h:166
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define OffsetNumberNext(offsetNumber)
Definition: off.h:52
OffsetNumber nowunused[MaxHeapTuplesPerPage]
Definition: pruneheap.c:58
XLogRecPtr log_heap_clean(Relation reln, Buffer buffer, OffsetNumber *redirected, int nredirected, OffsetNumber *nowdead, int ndead, OffsetNumber *nowunused, int nunused, TransactionId latestRemovedXid)
Definition: heapam.c:7984
#define RelationNeedsWAL(relation)
Definition: rel.h:563
OffsetNumber redirected[MaxHeapTuplesPerPage *2]
Definition: pruneheap.c:56
int ndead
Definition: pruneheap.c:53
#define PageSetLSN(page, lsn)
Definition: bufpage.h:368
Pointer Page
Definition: bufpage.h:78

◆ heap_page_prune_execute()

void heap_page_prune_execute ( Buffer  buffer,
OffsetNumber redirected,
int  nredirected,
OffsetNumber nowdead,
int  ndead,
OffsetNumber nowunused,
int  nunused 
)

Definition at line 820 of file pruneheap.c.

References BufferGetPage, i, ItemIdSetDead, ItemIdSetRedirect, ItemIdSetUnused, PageGetItemId, and PageRepairFragmentation().

Referenced by heap_page_prune(), and heap_xlog_clean().

824 {
825  Page page = (Page) BufferGetPage(buffer);
826  OffsetNumber *offnum;
827  int i;
828 
829  /* Update all redirected line pointers */
830  offnum = redirected;
831  for (i = 0; i < nredirected; i++)
832  {
833  OffsetNumber fromoff = *offnum++;
834  OffsetNumber tooff = *offnum++;
835  ItemId fromlp = PageGetItemId(page, fromoff);
836 
837  ItemIdSetRedirect(fromlp, tooff);
838  }
839 
840  /* Update all now-dead line pointers */
841  offnum = nowdead;
842  for (i = 0; i < ndead; i++)
843  {
844  OffsetNumber off = *offnum++;
845  ItemId lp = PageGetItemId(page, off);
846 
847  ItemIdSetDead(lp);
848  }
849 
850  /* Update all now-unused line pointers */
851  offnum = nowunused;
852  for (i = 0; i < nunused; i++)
853  {
854  OffsetNumber off = *offnum++;
855  ItemId lp = PageGetItemId(page, off);
856 
857  ItemIdSetUnused(lp);
858  }
859 
860  /*
861  * Finally, repair any fragmentation, and update the page's hint bit about
862  * whether it has free pointers.
863  */
865 }
uint16 OffsetNumber
Definition: off.h:24
#define ItemIdSetRedirect(itemId, link)
Definition: itemid.h:152
#define BufferGetPage(buffer)
Definition: bufmgr.h:169
#define PageGetItemId(page, offsetNumber)
Definition: bufpage.h:235
void PageRepairFragmentation(Page page)
Definition: bufpage.c:682
int i
#define ItemIdSetDead(itemId)
Definition: itemid.h:164
#define ItemIdSetUnused(itemId)
Definition: itemid.h:128
Pointer Page
Definition: bufpage.h:78

◆ heap_page_prune_opt()

void heap_page_prune_opt ( Relation  relation,
Buffer  buffer 
)

Definition at line 87 of file pruneheap.c.

References BUFFER_LOCK_UNLOCK, BufferGetPage, ConditionalLockBufferForCleanup(), GlobalVisTestFor(), GlobalVisTestIsRemovableXid(), GlobalVisTestNonRemovableHorizon(), HEAP_DEFAULT_FILLFACTOR, heap_page_prune(), InvalidTransactionId, LockBuffer(), Max, old_snapshot_threshold, OldSnapshotThresholdActive(), PageGetHeapFreeSpace(), PageIsFull, RecoveryInProgress(), RelationGetTargetPageFreeSpace, SnapshotTooOldMagicForTest(), TransactionIdIsValid, TransactionIdLimitedForOldSnapshots(), and TransactionIdPrecedes().

Referenced by heapam_index_fetch_tuple(), heapam_scan_bitmap_next_block(), and heapgetpage().

88 {
89  Page page = BufferGetPage(buffer);
90  TransactionId prune_xid;
91  GlobalVisState *vistest;
92  TransactionId limited_xmin = InvalidTransactionId;
93  TimestampTz limited_ts = 0;
94  Size minfree;
95 
96  /*
97  * We can't write WAL in recovery mode, so there's no point trying to
98  * clean the page. The primary will likely issue a cleaning WAL record soon
99  * anyway, so this is no particular loss.
100  */
101  if (RecoveryInProgress())
102  return;
103 
104  /*
105  * XXX: Magic to keep old_snapshot_threshold tests appear "working". They
106  * currently are broken, and discussion of what to do about them is
107  * ongoing. See
108  * https://www.postgresql.org/message-id/20200403001235.e6jfdll3gh2ygbuc%40alap3.anarazel.de
109  */
110  if (old_snapshot_threshold == 0)
112 
113  /*
114  * First check whether there's any chance there's something to prune,
115  * determining the appropriate horizon is a waste if there's no prune_xid
116  * (i.e. no updates/deletes left potentially dead tuples around).
117  */
118  prune_xid = ((PageHeader) page)->pd_prune_xid;
119  if (!TransactionIdIsValid(prune_xid))
120  return;
121 
122  /*
123  * Check whether prune_xid indicates that there may be dead rows that can
124  * be cleaned up.
125  *
126  * It is OK to check the old snapshot limit before acquiring the cleanup
127  * lock because the worst that can happen is that we are not quite as
128  * aggressive about the cleanup (by however many transaction IDs are
129  * consumed between this point and acquiring the lock). This allows us to
130  * save significant overhead in the case where the page is found not to be
131  * prunable.
132  *
133  * Even if old_snapshot_threshold is set, we first check whether the page
134  * can be pruned without. Both because
135  * TransactionIdLimitedForOldSnapshots() is not cheap, and because not
136  * unnecessarily relying on old_snapshot_threshold avoids causing
137  * conflicts.
138  */
139  vistest = GlobalVisTestFor(relation);
140 
141  if (!GlobalVisTestIsRemovableXid(vistest, prune_xid))
142  {
144  return;
145 
147  relation,
148  &limited_xmin, &limited_ts))
149  return;
150 
151  if (!TransactionIdPrecedes(prune_xid, limited_xmin))
152  return;
153  }
154 
155  /*
156  * We prune when a previous UPDATE failed to find enough space on the page
157  * for a new tuple version, or when free space falls below the relation's
158  * fill-factor target (but not less than 10%).
159  *
160  * Checking free space here is questionable since we aren't holding any
161  * lock on the buffer; in the worst case we could get a bogus answer. It's
162  * unlikely to be *seriously* wrong, though, since reading either pd_lower
163  * or pd_upper is probably atomic. Avoiding taking a lock seems more
164  * important than sometimes getting a wrong answer in what is after all
165  * just a heuristic estimate.
166  */
167  minfree = RelationGetTargetPageFreeSpace(relation,
169  minfree = Max(minfree, BLCKSZ / 10);
170 
171  if (PageIsFull(page) || PageGetHeapFreeSpace(page) < minfree)
172  {
173  /* OK, try to get exclusive buffer lock */
174  if (!ConditionalLockBufferForCleanup(buffer))
175  return;
176 
177  /*
178  * Now that we have buffer lock, get accurate information about the
179  * page's free space, and recheck the heuristic about whether to
180  * prune. (We needn't recheck PageIsPrunable, since no one else could
181  * have pruned while we hold pin.)
182  */
183  if (PageIsFull(page) || PageGetHeapFreeSpace(page) < minfree)
184  {
185  TransactionId ignore = InvalidTransactionId; /* return value not
186  * needed */
187 
188  /* OK to prune */
189  (void) heap_page_prune(relation, buffer, vistest,
190  limited_xmin, limited_ts,
191  true, &ignore, NULL);
192  }
193 
194  /* And release buffer lock */
196  }
197 }
#define BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:96
int heap_page_prune(Relation relation, Buffer buffer, GlobalVisState *vistest, TransactionId old_snap_xmin, TimestampTz old_snap_ts, bool report_stats, TransactionId *latestRemovedXid, OffsetNumber *off_loc)
Definition: pruneheap.c:224
uint32 TransactionId
Definition: c.h:587
int64 TimestampTz
Definition: timestamp.h:39
static bool OldSnapshotThresholdActive(void)
Definition: snapmgr.h:102
bool TransactionIdLimitedForOldSnapshots(TransactionId recentXmin, Relation relation, TransactionId *limit_xid, TimestampTz *limit_ts)
Definition: snapmgr.c:1751
bool RecoveryInProgress(void)
Definition: xlog.c:8132
bool GlobalVisTestIsRemovableXid(GlobalVisState *state, TransactionId xid)
Definition: procarray.c:4123
#define PageIsFull(page)
Definition: bufpage.h:378
GlobalVisState * GlobalVisTestFor(Relation rel)
Definition: procarray.c:3955
bool ConditionalLockBufferForCleanup(Buffer buffer)
Definition: bufmgr.c:4173
Size PageGetHeapFreeSpace(Page page)
Definition: bufpage.c:874
void SnapshotTooOldMagicForTest(void)
Definition: snapmgr.c:1689
#define InvalidTransactionId
Definition: transam.h:31
#define BufferGetPage(buffer)
Definition: bufmgr.h:169
bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition: transam.c:300
#define RelationGetTargetPageFreeSpace(relation, defaultff)
Definition: rel.h:341
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:3939
#define Max(x, y)
Definition: c.h:980
PageHeaderData * PageHeader
Definition: bufpage.h:166
size_t Size
Definition: c.h:540
int old_snapshot_threshold
Definition: snapmgr.c:78
#define HEAP_DEFAULT_FILLFACTOR
Definition: rel.h:312
#define TransactionIdIsValid(xid)
Definition: transam.h:41
TransactionId GlobalVisTestNonRemovableHorizon(GlobalVisState *state)
Definition: procarray.c:4161
Pointer Page
Definition: bufpage.h:78

◆ heap_prune_chain()

static int heap_prune_chain ( Buffer  buffer,
OffsetNumber  rootoffnum,
PruneState prstate 
)
static

Definition at line 493 of file pruneheap.c.

References Assert, BufferGetBlockNumber(), BufferGetPage, elog, ERROR, heap_prune_record_dead(), heap_prune_record_prunable(), heap_prune_record_redirect(), heap_prune_record_unused(), heap_prune_satisfies_vacuum(), HEAPTUPLE_DEAD, HEAPTUPLE_DELETE_IN_PROGRESS, HEAPTUPLE_INSERT_IN_PROGRESS, HEAPTUPLE_LIVE, HEAPTUPLE_RECENTLY_DEAD, HeapTupleHeaderAdvanceLatestRemovedXid(), HeapTupleHeaderGetUpdateXid, HeapTupleHeaderGetXmin, HeapTupleHeaderIndicatesMovedPartitions, HeapTupleHeaderIsHeapOnly, HeapTupleHeaderIsHotUpdated, i, InvalidOffsetNumber, InvalidTransactionId, ItemIdGetLength, ItemIdGetRedirect, ItemIdIsDead, ItemIdIsNormal, ItemIdIsRedirected, ItemIdIsUsed, ItemPointerGetBlockNumber, ItemPointerGetOffsetNumber, ItemPointerSet, PruneState::latestRemovedXid, PruneState::marked, MaxHeapTuplesPerPage, OffsetNumberIsValid, PageGetItem, PageGetItemId, PageGetMaxOffsetNumber, PruneState::rel, RelationGetRelid, HeapTupleHeaderData::t_ctid, HeapTupleData::t_data, HeapTupleData::t_len, HeapTupleData::t_self, HeapTupleData::t_tableOid, TransactionIdEquals, and TransactionIdIsValid.

Referenced by heap_page_prune().

494 {
495  int ndeleted = 0;
496  Page dp = (Page) BufferGetPage(buffer);
498  ItemId rootlp;
499  HeapTupleHeader htup;
500  OffsetNumber latestdead = InvalidOffsetNumber,
501  maxoff = PageGetMaxOffsetNumber(dp),
502  offnum;
504  int nchain = 0,
505  i;
506  HeapTupleData tup;
507 
508  tup.t_tableOid = RelationGetRelid(prstate->rel);
509 
510  rootlp = PageGetItemId(dp, rootoffnum);
511 
512  /*
513  * If it's a heap-only tuple, then it is not the start of a HOT chain.
514  */
515  if (ItemIdIsNormal(rootlp))
516  {
517  htup = (HeapTupleHeader) PageGetItem(dp, rootlp);
518 
519  tup.t_data = htup;
520  tup.t_len = ItemIdGetLength(rootlp);
521  ItemPointerSet(&(tup.t_self), BufferGetBlockNumber(buffer), rootoffnum);
522 
523  if (HeapTupleHeaderIsHeapOnly(htup))
524  {
525  /*
526  * If the tuple is DEAD and doesn't chain to anything else, mark
527  * it unused immediately. (If it does chain, we can only remove
528  * it as part of pruning its chain.)
529  *
530  * We need this primarily to handle aborted HOT updates, that is,
531  * XMIN_INVALID heap-only tuples. Those might not be linked to by
532  * any chain, since the parent tuple might be re-updated before
533  * any pruning occurs. So we have to be able to reap them
534  * separately from chain-pruning. (Note that
535  * HeapTupleHeaderIsHotUpdated will never return true for an
536  * XMIN_INVALID tuple, so this code will work even when there were
537  * sequential updates within the aborted transaction.)
538  *
539  * Note that we might first arrive at a dead heap-only tuple
540  * either here or while following a chain below. Whichever path
541  * gets there first will mark the tuple unused.
542  */
543  if (heap_prune_satisfies_vacuum(prstate, &tup, buffer)
545  {
546  heap_prune_record_unused(prstate, rootoffnum);
548  &prstate->latestRemovedXid);
549  ndeleted++;
550  }
551 
552  /* Nothing more to do */
553  return ndeleted;
554  }
555  }
556 
557  /* Start from the root tuple */
558  offnum = rootoffnum;
559 
560  /* while not end of the chain */
561  for (;;)
562  {
563  ItemId lp;
564  bool tupdead,
565  recent_dead;
566 
567  /* Some sanity checks */
568  if (offnum < FirstOffsetNumber || offnum > maxoff)
569  break;
570 
571  /* If item is already processed, stop --- it must not be same chain */
572  if (prstate->marked[offnum])
573  break;
574 
575  lp = PageGetItemId(dp, offnum);
576 
577  /* Unused item obviously isn't part of the chain */
578  if (!ItemIdIsUsed(lp))
579  break;
580 
581  /*
582  * If we are looking at the redirected root line pointer, jump to the
583  * first normal tuple in the chain. If we find a redirect somewhere
584  * else, stop --- it must not be same chain.
585  */
586  if (ItemIdIsRedirected(lp))
587  {
588  if (nchain > 0)
589  break; /* not at start of chain */
590  chainitems[nchain++] = offnum;
591  offnum = ItemIdGetRedirect(rootlp);
592  continue;
593  }
594 
595  /*
596  * Likewise, a dead line pointer can't be part of the chain. (We
597  * already eliminated the case of dead root tuple outside this
598  * function.)
599  */
600  if (ItemIdIsDead(lp))
601  break;
602 
603  Assert(ItemIdIsNormal(lp));
604  htup = (HeapTupleHeader) PageGetItem(dp, lp);
605 
606  tup.t_data = htup;
607  tup.t_len = ItemIdGetLength(lp);
608  ItemPointerSet(&(tup.t_self), BufferGetBlockNumber(buffer), offnum);
609 
610  /*
611  * Check the tuple XMIN against prior XMAX, if any
612  */
613  if (TransactionIdIsValid(priorXmax) &&
614  !TransactionIdEquals(HeapTupleHeaderGetXmin(htup), priorXmax))
615  break;
616 
617  /*
618  * OK, this tuple is indeed a member of the chain.
619  */
620  chainitems[nchain++] = offnum;
621 
622  /*
623  * Check tuple's visibility status.
624  */
625  tupdead = recent_dead = false;
626 
627  switch (heap_prune_satisfies_vacuum(prstate, &tup, buffer))
628  {
629  case HEAPTUPLE_DEAD:
630  tupdead = true;
631  break;
632 
634  recent_dead = true;
635 
636  /*
637  * This tuple may soon become DEAD. Update the hint field so
638  * that the page is reconsidered for pruning in future.
639  */
642  break;
643 
645 
646  /*
647  * This tuple may soon become DEAD. Update the hint field so
648  * that the page is reconsidered for pruning in future.
649  */
652  break;
653 
654  case HEAPTUPLE_LIVE:
656 
657  /*
658  * If we wanted to optimize for aborts, we might consider
659  * marking the page prunable when we see INSERT_IN_PROGRESS.
660  * But we don't. See related decisions about when to mark the
661  * page prunable in heapam.c.
662  */
663  break;
664 
665  default:
666  elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
667  break;
668  }
669 
670  /*
671  * Remember the last DEAD tuple seen. We will advance past
672  * RECENTLY_DEAD tuples just in case there's a DEAD one after them;
673  * but we can't advance past anything else. (XXX is it really worth
674  * continuing to scan beyond RECENTLY_DEAD? The case where we will
675  * find another DEAD tuple is a fairly unusual corner case.)
676  */
677  if (tupdead)
678  {
679  latestdead = offnum;
681  &prstate->latestRemovedXid);
682  }
683  else if (!recent_dead)
684  break;
685 
686  /*
687  * If the tuple is not HOT-updated, then we are at the end of this
688  * HOT-update chain.
689  */
690  if (!HeapTupleHeaderIsHotUpdated(htup))
691  break;
692 
693  /* HOT implies it can't have moved to different partition */
695 
696  /*
697  * Advance to next chain member.
698  */
700  BufferGetBlockNumber(buffer));
701  offnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
702  priorXmax = HeapTupleHeaderGetUpdateXid(htup);
703  }
704 
705  /*
706  * If we found a DEAD tuple in the chain, adjust the HOT chain so that all
707  * the DEAD tuples at the start of the chain are removed and the root line
708  * pointer is appropriately redirected.
709  */
710  if (OffsetNumberIsValid(latestdead))
711  {
712  /*
713  * Mark as unused each intermediate item that we are able to remove
714  * from the chain.
715  *
716  * When the previous item is the last dead tuple seen, we are at the
717  * right candidate for redirection.
718  */
719  for (i = 1; (i < nchain) && (chainitems[i - 1] != latestdead); i++)
720  {
721  heap_prune_record_unused(prstate, chainitems[i]);
722  ndeleted++;
723  }
724 
725  /*
726  * If the root entry had been a normal tuple, we are deleting it, so
727  * count it in the result. But changing a redirect (even to DEAD
728  * state) doesn't count.
729  */
730  if (ItemIdIsNormal(rootlp))
731  ndeleted++;
732 
733  /*
734  * If the DEAD tuple is at the end of the chain, the entire chain is
735  * dead and the root line pointer can be marked dead. Otherwise just
736  * redirect the root to the correct chain member.
737  */
738  if (i >= nchain)
739  heap_prune_record_dead(prstate, rootoffnum);
740  else
741  heap_prune_record_redirect(prstate, rootoffnum, chainitems[i]);
742  }
743  else if (nchain < 2 && ItemIdIsRedirected(rootlp))
744  {
745  /*
746  * We found a redirect item that doesn't point to a valid follow-on
747  * item. This can happen if the loop in heap_page_prune caused us to
748  * visit the dead successor of a redirect item before visiting the
749  * redirect item. We can clean up by setting the redirect item to
750  * DEAD state.
751  */
752  heap_prune_record_dead(prstate, rootoffnum);
753  }
754 
755  return ndeleted;
756 }
#define HeapTupleHeaderGetUpdateXid(tup)
Definition: htup_details.h:365
void HeapTupleHeaderAdvanceLatestRemovedXid(HeapTupleHeader tuple, TransactionId *latestRemovedXid)
Definition: heapam.c:7201
#define ItemIdIsRedirected(itemId)
Definition: itemid.h:106
#define TransactionIdEquals(id1, id2)
Definition: transam.h:43
uint32 TransactionId
Definition: c.h:587
HeapTupleHeaderData * HeapTupleHeader
Definition: htup.h:23
#define ItemIdGetRedirect(itemId)
Definition: itemid.h:78
Relation rel
Definition: pruneheap.c:33
#define ItemIdIsUsed(itemId)
Definition: itemid.h:92
#define MaxHeapTuplesPerPage
Definition: htup_details.h:574
static void heap_prune_record_dead(PruneState *prstate, OffsetNumber offnum)
Definition: pruneheap.c:789
#define HeapTupleHeaderIndicatesMovedPartitions(tup)
Definition: htup_details.h:445
#define ItemIdIsDead(itemId)
Definition: itemid.h:113
bool marked[MaxHeapTuplesPerPage+1]
Definition: pruneheap.c:60
#define PageGetMaxOffsetNumber(page)
Definition: bufpage.h:357
uint16 OffsetNumber
Definition: off.h:24
HeapTupleHeader t_data
Definition: htup.h:68
#define HeapTupleHeaderIsHeapOnly(tup)
Definition: htup_details.h:501
#define ItemIdGetLength(itemId)
Definition: itemid.h:59
#define ERROR
Definition: elog.h:45
ItemPointerData t_ctid
Definition: htup_details.h:160
ItemPointerData t_self
Definition: htup.h:65
uint32 t_len
Definition: htup.h:64
static void heap_prune_record_prunable(PruneState *prstate, TransactionId xid)
Definition: pruneheap.c:760
TransactionId latestRemovedXid
Definition: pruneheap.c:51
#define InvalidTransactionId
Definition: transam.h:31
Oid t_tableOid
Definition: htup.h:66
#define BufferGetPage(buffer)
Definition: bufmgr.h:169
#define HeapTupleHeaderIsHotUpdated(tup)
Definition: htup_details.h:484
#define PageGetItemId(page, offsetNumber)
Definition: bufpage.h:235
static HTSV_Result heap_prune_satisfies_vacuum(PruneState *prstate, HeapTuple tup, Buffer buffer)
Definition: pruneheap.c:405
#define InvalidOffsetNumber
Definition: off.h:26
#define Assert(condition)
Definition: c.h:804
static void heap_prune_record_redirect(PruneState *prstate, OffsetNumber offnum, OffsetNumber rdoffnum)
Definition: pruneheap.c:774
#define ItemIdIsNormal(itemId)
Definition: itemid.h:99
#define HeapTupleHeaderGetXmin(tup)
Definition: htup_details.h:313
#define ItemPointerGetOffsetNumber(pointer)
Definition: itemptr.h:117
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:2674
#define elog(elevel,...)
Definition: elog.h:227
int i
#define OffsetNumberIsValid(offsetNumber)
Definition: off.h:39
#define ItemPointerGetBlockNumber(pointer)
Definition: itemptr.h:98
#define TransactionIdIsValid(xid)
Definition: transam.h:41
#define RelationGetRelid(relation)
Definition: rel.h:457
#define PageGetItem(page, itemId)
Definition: bufpage.h:340
Pointer Page
Definition: bufpage.h:78
#define ItemPointerSet(pointer, blockNumber, offNum)
Definition: itemptr.h:127
static void heap_prune_record_unused(PruneState *prstate, OffsetNumber offnum)
Definition: pruneheap.c:800

◆ heap_prune_record_dead()

static void heap_prune_record_dead ( PruneState prstate,
OffsetNumber  offnum 
)
static

Definition at line 789 of file pruneheap.c.

References Assert, PruneState::marked, MaxHeapTuplesPerPage, PruneState::ndead, and PruneState::nowdead.

Referenced by heap_prune_chain().

790 {
791  Assert(prstate->ndead < MaxHeapTuplesPerPage);
792  prstate->nowdead[prstate->ndead] = offnum;
793  prstate->ndead++;
794  Assert(!prstate->marked[offnum]);
795  prstate->marked[offnum] = true;
796 }
#define MaxHeapTuplesPerPage
Definition: htup_details.h:574
OffsetNumber nowdead[MaxHeapTuplesPerPage]
Definition: pruneheap.c:57
bool marked[MaxHeapTuplesPerPage+1]
Definition: pruneheap.c:60
#define Assert(condition)
Definition: c.h:804
int ndead
Definition: pruneheap.c:53

◆ heap_prune_record_prunable()

static void heap_prune_record_prunable ( PruneState prstate,
TransactionId  xid 
)
static

Definition at line 760 of file pruneheap.c.

References Assert, PruneState::new_prune_xid, TransactionIdIsNormal, TransactionIdIsValid, and TransactionIdPrecedes().

Referenced by heap_prune_chain().

761 {
762  /*
763  * This should exactly match the PageSetPrunable macro. We can't store
764  * directly into the page header yet, so we update working state.
765  */
767  if (!TransactionIdIsValid(prstate->new_prune_xid) ||
768  TransactionIdPrecedes(xid, prstate->new_prune_xid))
769  prstate->new_prune_xid = xid;
770 }
TransactionId new_prune_xid
Definition: pruneheap.c:50
bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition: transam.c:300
#define Assert(condition)
Definition: c.h:804
#define TransactionIdIsValid(xid)
Definition: transam.h:41
#define TransactionIdIsNormal(xid)
Definition: transam.h:42

◆ heap_prune_record_redirect()

static void heap_prune_record_redirect ( PruneState prstate,
OffsetNumber  offnum,
OffsetNumber  rdoffnum 
)
static

Definition at line 774 of file pruneheap.c.

References Assert, PruneState::marked, MaxHeapTuplesPerPage, PruneState::nredirected, and PruneState::redirected.

Referenced by heap_prune_chain().

776 {
778  prstate->redirected[prstate->nredirected * 2] = offnum;
779  prstate->redirected[prstate->nredirected * 2 + 1] = rdoffnum;
780  prstate->nredirected++;
781  Assert(!prstate->marked[offnum]);
782  prstate->marked[offnum] = true;
783  Assert(!prstate->marked[rdoffnum]);
784  prstate->marked[rdoffnum] = true;
785 }
int nredirected
Definition: pruneheap.c:52
#define MaxHeapTuplesPerPage
Definition: htup_details.h:574
bool marked[MaxHeapTuplesPerPage+1]
Definition: pruneheap.c:60
#define Assert(condition)
Definition: c.h:804
OffsetNumber redirected[MaxHeapTuplesPerPage *2]
Definition: pruneheap.c:56

◆ heap_prune_record_unused()

static void heap_prune_record_unused ( PruneState prstate,
OffsetNumber  offnum 
)
static

Definition at line 800 of file pruneheap.c.

References Assert, PruneState::marked, MaxHeapTuplesPerPage, PruneState::nowunused, and PruneState::nunused.

Referenced by heap_prune_chain().

801 {
802  Assert(prstate->nunused < MaxHeapTuplesPerPage);
803  prstate->nowunused[prstate->nunused] = offnum;
804  prstate->nunused++;
805  Assert(!prstate->marked[offnum]);
806  prstate->marked[offnum] = true;
807 }
#define MaxHeapTuplesPerPage
Definition: htup_details.h:574
bool marked[MaxHeapTuplesPerPage+1]
Definition: pruneheap.c:60
int nunused
Definition: pruneheap.c:54
#define Assert(condition)
Definition: c.h:804
OffsetNumber nowunused[MaxHeapTuplesPerPage]
Definition: pruneheap.c:58

◆ heap_prune_satisfies_vacuum()

static HTSV_Result heap_prune_satisfies_vacuum ( PruneState prstate,
HeapTuple  tup,
Buffer  buffer 
)
static

Definition at line 405 of file pruneheap.c.

References Assert, GlobalVisTestIsRemovableXid(), GlobalVisTestNonRemovableHorizon(), HEAPTUPLE_DEAD, HEAPTUPLE_RECENTLY_DEAD, HeapTupleSatisfiesVacuumHorizon(), PruneState::old_snap_ts, PruneState::old_snap_used, PruneState::old_snap_xmin, OldSnapshotThresholdActive(), PruneState::rel, SetOldSnapshotThresholdTimestamp(), TransactionIdIsValid, TransactionIdLimitedForOldSnapshots(), TransactionIdPrecedes(), and PruneState::vistest.

Referenced by heap_prune_chain().

406 {
407  HTSV_Result res;
408  TransactionId dead_after;
409 
410  res = HeapTupleSatisfiesVacuumHorizon(tup, buffer, &dead_after);
411 
412  if (res != HEAPTUPLE_RECENTLY_DEAD)
413  return res;
414 
415  /*
416  * If we are already relying on the limited xmin, there is no need to
417  * delay doing so anymore.
418  */
419  if (prstate->old_snap_used)
420  {
422 
423  if (TransactionIdPrecedes(dead_after, prstate->old_snap_xmin))
424  res = HEAPTUPLE_DEAD;
425  return res;
426  }
427 
428  /*
429  * First check if GlobalVisTestIsRemovableXid() is sufficient to find the
430  * row dead. If not, and old_snapshot_threshold is enabled, try to use the
431  * lowered horizon.
432  */
433  if (GlobalVisTestIsRemovableXid(prstate->vistest, dead_after))
434  res = HEAPTUPLE_DEAD;
435  else if (OldSnapshotThresholdActive())
436  {
437  /* haven't determined limited horizon yet, requests */
438  if (!TransactionIdIsValid(prstate->old_snap_xmin))
439  {
440  TransactionId horizon =
442 
443  TransactionIdLimitedForOldSnapshots(horizon, prstate->rel,
444  &prstate->old_snap_xmin,
445  &prstate->old_snap_ts);
446  }
447 
448  if (TransactionIdIsValid(prstate->old_snap_xmin) &&
449  TransactionIdPrecedes(dead_after, prstate->old_snap_xmin))
450  {
451  /*
452  * About to remove row based on snapshot_too_old. Need to raise
453  * the threshold so problematic accesses would error.
454  */
455  Assert(!prstate->old_snap_used);
457  prstate->old_snap_xmin);
458  prstate->old_snap_used = true;
459  res = HEAPTUPLE_DEAD;
460  }
461  }
462 
463  return res;
464 }
uint32 TransactionId
Definition: c.h:587
bool old_snap_used
Definition: pruneheap.c:48
static bool OldSnapshotThresholdActive(void)
Definition: snapmgr.h:102
Relation rel
Definition: pruneheap.c:33
TimestampTz old_snap_ts
Definition: pruneheap.c:46
bool TransactionIdLimitedForOldSnapshots(TransactionId recentXmin, Relation relation, TransactionId *limit_xid, TimestampTz *limit_ts)
Definition: snapmgr.c:1751
bool GlobalVisTestIsRemovableXid(GlobalVisState *state, TransactionId xid)
Definition: procarray.c:4123
void SetOldSnapshotThresholdTimestamp(TimestampTz ts, TransactionId xlimit)
Definition: snapmgr.c:1672
HTSV_Result HeapTupleSatisfiesVacuumHorizon(HeapTuple htup, Buffer buffer, TransactionId *dead_after)
bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition: transam.c:300
TransactionId old_snap_xmin
Definition: pruneheap.c:47
GlobalVisState * vistest
Definition: pruneheap.c:36
#define Assert(condition)
Definition: c.h:804
HTSV_Result
Definition: heapam.h:87
#define TransactionIdIsValid(xid)
Definition: transam.h:41
TransactionId GlobalVisTestNonRemovableHorizon(GlobalVisState *state)
Definition: procarray.c:4161