PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
pruneheap.c File Reference
#include "postgres.h"
#include "access/heapam.h"
#include "access/heapam_xlog.h"
#include "access/transam.h"
#include "access/htup_details.h"
#include "access/xlog.h"
#include "catalog/catalog.h"
#include "miscadmin.h"
#include "pgstat.h"
#include "storage/bufmgr.h"
#include "utils/snapmgr.h"
#include "utils/rel.h"
#include "utils/tqual.h"
Include dependency graph for pruneheap.c:

Go to the source code of this file.

Data Structures

struct  PruneState
 

Functions

static int heap_prune_chain (Relation relation, Buffer buffer, OffsetNumber rootoffnum, TransactionId OldestXmin, PruneState *prstate)
 
static void heap_prune_record_prunable (PruneState *prstate, TransactionId xid)
 
static void heap_prune_record_redirect (PruneState *prstate, OffsetNumber offnum, OffsetNumber rdoffnum)
 
static void heap_prune_record_dead (PruneState *prstate, OffsetNumber offnum)
 
static void heap_prune_record_unused (PruneState *prstate, OffsetNumber offnum)
 
void heap_page_prune_opt (Relation relation, Buffer buffer)
 
int heap_page_prune (Relation relation, Buffer buffer, TransactionId OldestXmin, bool report_stats, TransactionId *latestRemovedXid)
 
void heap_page_prune_execute (Buffer buffer, OffsetNumber *redirected, int nredirected, OffsetNumber *nowdead, int ndead, OffsetNumber *nowunused, int nunused)
 
void heap_get_root_tuples (Page page, OffsetNumber *root_offsets)
 

Function Documentation

void heap_get_root_tuples ( Page  page,
OffsetNumber root_offsets 
)

Definition at line 744 of file pruneheap.c.

References Assert, FirstOffsetNumber, HeapTupleHeaderGetUpdateXid, HeapTupleHeaderGetXmin, HeapTupleHeaderIsHeapOnly, HeapTupleHeaderIsHotUpdated, InvalidTransactionId, ItemIdGetRedirect, ItemIdIsDead, ItemIdIsNormal, ItemIdIsRedirected, ItemIdIsUsed, ItemPointerGetOffsetNumber, MaxHeapTuplesPerPage, MemSet, OffsetNumberNext, PageGetItem, PageGetItemId, PageGetMaxOffsetNumber, HeapTupleHeaderData::t_ctid, TransactionIdEquals, and TransactionIdIsValid.

Referenced by IndexBuildHeapRangeScan(), and validate_index_heapscan().

745 {
746  OffsetNumber offnum,
747  maxoff;
748 
749  MemSet(root_offsets, 0, MaxHeapTuplesPerPage * sizeof(OffsetNumber));
750 
751  maxoff = PageGetMaxOffsetNumber(page);
752  for (offnum = FirstOffsetNumber; offnum <= maxoff; offnum = OffsetNumberNext(offnum))
753  {
754  ItemId lp = PageGetItemId(page, offnum);
755  HeapTupleHeader htup;
756  OffsetNumber nextoffnum;
757  TransactionId priorXmax;
758 
759  /* skip unused and dead items */
760  if (!ItemIdIsUsed(lp) || ItemIdIsDead(lp))
761  continue;
762 
763  if (ItemIdIsNormal(lp))
764  {
765  htup = (HeapTupleHeader) PageGetItem(page, lp);
766 
767  /*
768  * Check if this tuple is part of a HOT-chain rooted at some other
769  * tuple. If so, skip it for now; we'll process it when we find
770  * its root.
771  */
772  if (HeapTupleHeaderIsHeapOnly(htup))
773  continue;
774 
775  /*
776  * This is either a plain tuple or the root of a HOT-chain.
777  * Remember it in the mapping.
778  */
779  root_offsets[offnum - 1] = offnum;
780 
781  /* If it's not the start of a HOT-chain, we're done with it */
782  if (!HeapTupleHeaderIsHotUpdated(htup))
783  continue;
784 
785  /* Set up to scan the HOT-chain */
786  nextoffnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
787  priorXmax = HeapTupleHeaderGetUpdateXid(htup);
788  }
789  else
790  {
791  /* Must be a redirect item. We do not set its root_offsets entry */
793  /* Set up to scan the HOT-chain */
794  nextoffnum = ItemIdGetRedirect(lp);
795  priorXmax = InvalidTransactionId;
796  }
797 
798  /*
799  * Now follow the HOT-chain and collect other tuples in the chain.
800  *
801  * Note: Even though this is a nested loop, the complexity of the
802  * function is O(N) because a tuple in the page should be visited not
803  * more than twice, once in the outer loop and once in HOT-chain
804  * chases.
805  */
806  for (;;)
807  {
808  lp = PageGetItemId(page, nextoffnum);
809 
810  /* Check for broken chains */
811  if (!ItemIdIsNormal(lp))
812  break;
813 
814  htup = (HeapTupleHeader) PageGetItem(page, lp);
815 
816  if (TransactionIdIsValid(priorXmax) &&
817  !TransactionIdEquals(priorXmax, HeapTupleHeaderGetXmin(htup)))
818  break;
819 
820  /* Remember the root line pointer for this item */
821  root_offsets[nextoffnum - 1] = offnum;
822 
823  /* Advance to next chain member, if any */
824  if (!HeapTupleHeaderIsHotUpdated(htup))
825  break;
826 
827  nextoffnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
828  priorXmax = HeapTupleHeaderGetUpdateXid(htup);
829  }
830  }
831 }
#define HeapTupleHeaderGetUpdateXid(tup)
Definition: htup_details.h:359
#define ItemIdIsRedirected(itemId)
Definition: itemid.h:105
#define TransactionIdEquals(id1, id2)
Definition: transam.h:43
uint32 TransactionId
Definition: c.h:394
HeapTupleHeaderData * HeapTupleHeader
Definition: htup.h:23
#define ItemIdGetRedirect(itemId)
Definition: itemid.h:77
#define ItemIdIsUsed(itemId)
Definition: itemid.h:91
#define MaxHeapTuplesPerPage
Definition: htup_details.h:575
#define MemSet(start, val, len)
Definition: c.h:853
#define ItemIdIsDead(itemId)
Definition: itemid.h:112
#define PageGetMaxOffsetNumber(page)
Definition: bufpage.h:354
uint16 OffsetNumber
Definition: off.h:24
#define HeapTupleHeaderIsHeapOnly(tup)
Definition: htup_details.h:502
ItemPointerData t_ctid
Definition: htup_details.h:150
#define FirstOffsetNumber
Definition: off.h:27
#define InvalidTransactionId
Definition: transam.h:31
#define HeapTupleHeaderIsHotUpdated(tup)
Definition: htup_details.h:485
#define PageGetItemId(page, offsetNumber)
Definition: bufpage.h:232
#define Assert(condition)
Definition: c.h:671
#define ItemIdIsNormal(itemId)
Definition: itemid.h:98
#define HeapTupleHeaderGetXmin(tup)
Definition: htup_details.h:307
#define OffsetNumberNext(offsetNumber)
Definition: off.h:53
#define ItemPointerGetOffsetNumber(pointer)
Definition: itemptr.h:76
#define TransactionIdIsValid(xid)
Definition: transam.h:41
#define PageGetItem(page, itemId)
Definition: bufpage.h:337
int heap_page_prune ( Relation  relation,
Buffer  buffer,
TransactionId  OldestXmin,
bool  report_stats,
TransactionId latestRemovedXid 
)

Definition at line 182 of file pruneheap.c.

References BufferGetPage, END_CRIT_SECTION, FirstOffsetNumber, heap_page_prune_execute(), heap_prune_chain(), InvalidTransactionId, ItemIdIsDead, ItemIdIsUsed, PruneState::latestRemovedXid, log_heap_clean(), MarkBufferDirty(), MarkBufferDirtyHint(), PruneState::marked, PruneState::ndead, PruneState::new_prune_xid, PruneState::nowdead, PruneState::nowunused, PruneState::nredirected, PruneState::nunused, OffsetNumberNext, PageClearFull, PageGetItemId, PageGetMaxOffsetNumber, PageIsFull, PageSetLSN, pgstat_update_heap_dead_tuples(), PruneState::redirected, RelationNeedsWAL, and START_CRIT_SECTION.

Referenced by heap_page_prune_opt(), and lazy_scan_heap().

184 {
185  int ndeleted = 0;
186  Page page = BufferGetPage(buffer);
187  OffsetNumber offnum,
188  maxoff;
189  PruneState prstate;
190 
191  /*
192  * Our strategy is to scan the page and make lists of items to change,
193  * then apply the changes within a critical section. This keeps as much
194  * logic as possible out of the critical section, and also ensures that
195  * WAL replay will work the same as the normal case.
196  *
197  * First, initialize the new pd_prune_xid value to zero (indicating no
198  * prunable tuples). If we find any tuples which may soon become
199  * prunable, we will save the lowest relevant XID in new_prune_xid. Also
200  * initialize the rest of our working state.
201  */
203  prstate.latestRemovedXid = *latestRemovedXid;
204  prstate.nredirected = prstate.ndead = prstate.nunused = 0;
205  memset(prstate.marked, 0, sizeof(prstate.marked));
206 
207  /* Scan the page */
208  maxoff = PageGetMaxOffsetNumber(page);
209  for (offnum = FirstOffsetNumber;
210  offnum <= maxoff;
211  offnum = OffsetNumberNext(offnum))
212  {
213  ItemId itemid;
214 
215  /* Ignore items already processed as part of an earlier chain */
216  if (prstate.marked[offnum])
217  continue;
218 
219  /* Nothing to do if slot is empty or already dead */
220  itemid = PageGetItemId(page, offnum);
221  if (!ItemIdIsUsed(itemid) || ItemIdIsDead(itemid))
222  continue;
223 
224  /* Process this item or chain of items */
225  ndeleted += heap_prune_chain(relation, buffer, offnum,
226  OldestXmin,
227  &prstate);
228  }
229 
230  /* Any error while applying the changes is critical */
232 
233  /* Have we found any prunable items? */
234  if (prstate.nredirected > 0 || prstate.ndead > 0 || prstate.nunused > 0)
235  {
236  /*
237  * Apply the planned item changes, then repair page fragmentation, and
238  * update the page's hint bit about whether it has free line pointers.
239  */
241  prstate.redirected, prstate.nredirected,
242  prstate.nowdead, prstate.ndead,
243  prstate.nowunused, prstate.nunused);
244 
245  /*
246  * Update the page's pd_prune_xid field to either zero, or the lowest
247  * XID of any soon-prunable tuple.
248  */
249  ((PageHeader) page)->pd_prune_xid = prstate.new_prune_xid;
250 
251  /*
252  * Also clear the "page is full" flag, since there's no point in
253  * repeating the prune/defrag process until something else happens to
254  * the page.
255  */
256  PageClearFull(page);
257 
258  MarkBufferDirty(buffer);
259 
260  /*
261  * Emit a WAL HEAP_CLEAN record showing what we did
262  */
263  if (RelationNeedsWAL(relation))
264  {
265  XLogRecPtr recptr;
266 
267  recptr = log_heap_clean(relation, buffer,
268  prstate.redirected, prstate.nredirected,
269  prstate.nowdead, prstate.ndead,
270  prstate.nowunused, prstate.nunused,
271  prstate.latestRemovedXid);
272 
273  PageSetLSN(BufferGetPage(buffer), recptr);
274  }
275  }
276  else
277  {
278  /*
279  * If we didn't prune anything, but have found a new value for the
280  * pd_prune_xid field, update it and mark the buffer dirty. This is
281  * treated as a non-WAL-logged hint.
282  *
283  * Also clear the "page is full" flag if it is set, since there's no
284  * point in repeating the prune/defrag process until something else
285  * happens to the page.
286  */
287  if (((PageHeader) page)->pd_prune_xid != prstate.new_prune_xid ||
288  PageIsFull(page))
289  {
290  ((PageHeader) page)->pd_prune_xid = prstate.new_prune_xid;
291  PageClearFull(page);
292  MarkBufferDirtyHint(buffer, true);
293  }
294  }
295 
297 
298  /*
299  * If requested, report the number of tuples reclaimed to pgstats. This is
300  * ndeleted minus ndead, because we don't want to count a now-DEAD root
301  * item as a deletion for this purpose.
302  */
303  if (report_stats && ndeleted > prstate.ndead)
304  pgstat_update_heap_dead_tuples(relation, ndeleted - prstate.ndead);
305 
306  *latestRemovedXid = prstate.latestRemovedXid;
307 
308  /*
309  * XXX Should we update the FSM information of this page ?
310  *
311  * There are two schools of thought here. We may not want to update FSM
312  * information so that the page is not used for unrelated UPDATEs/INSERTs
313  * and any free space in this page will remain available for further
314  * UPDATEs in *this* page, thus improving chances for doing HOT updates.
315  *
316  * But for a large table and where a page does not receive further UPDATEs
317  * for a long time, we might waste this space by not updating the FSM
318  * information. The relation may get extended and fragmented further.
319  *
320  * One possibility is to leave "fillfactor" worth of space in this page
321  * and update FSM with the remaining space.
322  */
323 
324  return ndeleted;
325 }
static int heap_prune_chain(Relation relation, Buffer buffer, OffsetNumber rootoffnum, TransactionId OldestXmin, PruneState *prstate)
Definition: pruneheap.c:354
int nredirected
Definition: pruneheap.c:36
void pgstat_update_heap_dead_tuples(Relation rel, int delta)
Definition: pgstat.c:1935
void MarkBufferDirtyHint(Buffer buffer, bool buffer_std)
Definition: bufmgr.c:3362
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:1445
#define END_CRIT_SECTION()
Definition: miscadmin.h:132
#define ItemIdIsUsed(itemId)
Definition: itemid.h:91
#define START_CRIT_SECTION()
Definition: miscadmin.h:130
OffsetNumber nowdead[MaxHeapTuplesPerPage]
Definition: pruneheap.c:41
#define ItemIdIsDead(itemId)
Definition: itemid.h:112
bool marked[MaxHeapTuplesPerPage+1]
Definition: pruneheap.c:44
#define PageGetMaxOffsetNumber(page)
Definition: bufpage.h:354
TransactionId new_prune_xid
Definition: pruneheap.c:33
#define PageIsFull(page)
Definition: bufpage.h:375
int nunused
Definition: pruneheap.c:38
uint16 OffsetNumber
Definition: off.h:24
TransactionId latestRemovedXid
Definition: pruneheap.c:34
#define FirstOffsetNumber
Definition: off.h:27
#define InvalidTransactionId
Definition: transam.h:31
static TransactionId OldestXmin
Definition: vacuumlazy.c:138
#define BufferGetPage(buffer)
Definition: bufmgr.h:160
#define PageClearFull(page)
Definition: bufpage.h:379
#define PageGetItemId(page, offsetNumber)
Definition: bufpage.h:232
void heap_page_prune_execute(Buffer buffer, OffsetNumber *redirected, int nredirected, OffsetNumber *nowdead, int ndead, OffsetNumber *nowunused, int nunused)
Definition: pruneheap.c:680
PageHeaderData * PageHeader
Definition: bufpage.h:162
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define OffsetNumberNext(offsetNumber)
Definition: off.h:53
OffsetNumber nowunused[MaxHeapTuplesPerPage]
Definition: pruneheap.c:42
XLogRecPtr log_heap_clean(Relation reln, Buffer buffer, OffsetNumber *redirected, int nredirected, OffsetNumber *nowdead, int ndead, OffsetNumber *nowunused, int nunused, TransactionId latestRemovedXid)
Definition: heapam.c:7386
#define RelationNeedsWAL(relation)
Definition: rel.h:502
OffsetNumber redirected[MaxHeapTuplesPerPage *2]
Definition: pruneheap.c:40
int ndead
Definition: pruneheap.c:37
#define PageSetLSN(page, lsn)
Definition: bufpage.h:365
Pointer Page
Definition: bufpage.h:74
void heap_page_prune_execute ( Buffer  buffer,
OffsetNumber redirected,
int  nredirected,
OffsetNumber nowdead,
int  ndead,
OffsetNumber nowunused,
int  nunused 
)

Definition at line 680 of file pruneheap.c.

References BufferGetPage, i, ItemIdSetDead, ItemIdSetRedirect, ItemIdSetUnused, PageGetItemId, and PageRepairFragmentation().

Referenced by heap_page_prune(), and heap_xlog_clean().

684 {
685  Page page = (Page) BufferGetPage(buffer);
686  OffsetNumber *offnum;
687  int i;
688 
689  /* Update all redirected line pointers */
690  offnum = redirected;
691  for (i = 0; i < nredirected; i++)
692  {
693  OffsetNumber fromoff = *offnum++;
694  OffsetNumber tooff = *offnum++;
695  ItemId fromlp = PageGetItemId(page, fromoff);
696 
697  ItemIdSetRedirect(fromlp, tooff);
698  }
699 
700  /* Update all now-dead line pointers */
701  offnum = nowdead;
702  for (i = 0; i < ndead; i++)
703  {
704  OffsetNumber off = *offnum++;
705  ItemId lp = PageGetItemId(page, off);
706 
707  ItemIdSetDead(lp);
708  }
709 
710  /* Update all now-unused line pointers */
711  offnum = nowunused;
712  for (i = 0; i < nunused; i++)
713  {
714  OffsetNumber off = *offnum++;
715  ItemId lp = PageGetItemId(page, off);
716 
717  ItemIdSetUnused(lp);
718  }
719 
720  /*
721  * Finally, repair any fragmentation, and update the page's hint bit about
722  * whether it has free pointers.
723  */
725 }
uint16 OffsetNumber
Definition: off.h:24
#define ItemIdSetRedirect(itemId, link)
Definition: itemid.h:151
#define BufferGetPage(buffer)
Definition: bufmgr.h:160
#define PageGetItemId(page, offsetNumber)
Definition: bufpage.h:232
void PageRepairFragmentation(Page page)
Definition: bufpage.c:479
int i
#define ItemIdSetDead(itemId)
Definition: itemid.h:163
#define ItemIdSetUnused(itemId)
Definition: itemid.h:127
Pointer Page
Definition: bufpage.h:74
void heap_page_prune_opt ( Relation  relation,
Buffer  buffer 
)

Definition at line 75 of file pruneheap.c.

References Assert, BUFFER_LOCK_UNLOCK, BufferGetPage, ConditionalLockBufferForCleanup(), HEAP_DEFAULT_FILLFACTOR, heap_page_prune(), InvalidTransactionId, IsCatalogRelation(), LockBuffer(), Max, OldestXmin, PageGetHeapFreeSpace(), PageIsFull, PageIsPrunable, RecentGlobalDataXmin, RecentGlobalXmin, RecoveryInProgress(), RelationGetTargetPageFreeSpace, RelationIsAccessibleInLogicalDecoding, TransactionIdIsValid, and TransactionIdLimitedForOldSnapshots().

Referenced by bitgetpage(), heapgetpage(), and index_fetch_heap().

76 {
77  Page page = BufferGetPage(buffer);
78  Size minfree;
80 
81  /*
82  * We can't write WAL in recovery mode, so there's no point trying to
83  * clean the page. The master will likely issue a cleaning WAL record soon
84  * anyway, so this is no particular loss.
85  */
86  if (RecoveryInProgress())
87  return;
88 
89  /*
90  * Use the appropriate xmin horizon for this relation. If it's a proper
91  * catalog relation or a user defined, additional, catalog relation, we
92  * need to use the horizon that includes slots, otherwise the data-only
93  * horizon can be used. Note that the toast relation of user defined
94  * relations are *not* considered catalog relations.
95  *
96  * It is OK to apply the old snapshot limit before acquiring the cleanup
97  * lock because the worst that can happen is that we are not quite as
98  * aggressive about the cleanup (by however many transaction IDs are
99  * consumed between this point and acquiring the lock). This allows us to
100  * save significant overhead in the case where the page is found not to be
101  * prunable.
102  */
103  if (IsCatalogRelation(relation) ||
105  OldestXmin = RecentGlobalXmin;
106  else
107  OldestXmin =
109  relation);
110 
111  Assert(TransactionIdIsValid(OldestXmin));
112 
113  /*
114  * Let's see if we really need pruning.
115  *
116  * Forget it if page is not hinted to contain something prunable that's
117  * older than OldestXmin.
118  */
119  if (!PageIsPrunable(page, OldestXmin))
120  return;
121 
122  /*
123  * We prune when a previous UPDATE failed to find enough space on the page
124  * for a new tuple version, or when free space falls below the relation's
125  * fill-factor target (but not less than 10%).
126  *
127  * Checking free space here is questionable since we aren't holding any
128  * lock on the buffer; in the worst case we could get a bogus answer. It's
129  * unlikely to be *seriously* wrong, though, since reading either pd_lower
130  * or pd_upper is probably atomic. Avoiding taking a lock seems more
131  * important than sometimes getting a wrong answer in what is after all
132  * just a heuristic estimate.
133  */
134  minfree = RelationGetTargetPageFreeSpace(relation,
136  minfree = Max(minfree, BLCKSZ / 10);
137 
138  if (PageIsFull(page) || PageGetHeapFreeSpace(page) < minfree)
139  {
140  /* OK, try to get exclusive buffer lock */
141  if (!ConditionalLockBufferForCleanup(buffer))
142  return;
143 
144  /*
145  * Now that we have buffer lock, get accurate information about the
146  * page's free space, and recheck the heuristic about whether to
147  * prune. (We needn't recheck PageIsPrunable, since no one else could
148  * have pruned while we hold pin.)
149  */
150  if (PageIsFull(page) || PageGetHeapFreeSpace(page) < minfree)
151  {
152  TransactionId ignore = InvalidTransactionId; /* return value not
153  * needed */
154 
155  /* OK to prune */
156  (void) heap_page_prune(relation, buffer, OldestXmin, true, &ignore);
157  }
158 
159  /* And release buffer lock */
161  }
162 }
#define BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:87
int heap_page_prune(Relation relation, Buffer buffer, TransactionId OldestXmin, bool report_stats, TransactionId *latestRemovedXid)
Definition: pruneheap.c:182
bool IsCatalogRelation(Relation relation)
Definition: catalog.c:91
uint32 TransactionId
Definition: c.h:394
TransactionId TransactionIdLimitedForOldSnapshots(TransactionId recentXmin, Relation relation)
Definition: snapmgr.c:1689
bool RecoveryInProgress(void)
Definition: xlog.c:7805
#define PageIsFull(page)
Definition: bufpage.h:375
bool ConditionalLockBufferForCleanup(Buffer buffer)
Definition: bufmgr.c:3701
Size PageGetHeapFreeSpace(Page page)
Definition: bufpage.c:639
TransactionId RecentGlobalXmin
Definition: snapmgr.c:166
#define InvalidTransactionId
Definition: transam.h:31
static TransactionId OldestXmin
Definition: vacuumlazy.c:138
#define BufferGetPage(buffer)
Definition: bufmgr.h:160
TransactionId RecentGlobalDataXmin
Definition: snapmgr.c:167
#define RelationGetTargetPageFreeSpace(relation, defaultff)
Definition: rel.h:304
#define RelationIsAccessibleInLogicalDecoding(relation)
Definition: rel.h:556
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:3529
#define Max(x, y)
Definition: c.h:796
#define Assert(condition)
Definition: c.h:671
size_t Size
Definition: c.h:353
#define PageIsPrunable(page, oldestxmin)
Definition: bufpage.h:389
#define HEAP_DEFAULT_FILLFACTOR
Definition: rel.h:283
#define TransactionIdIsValid(xid)
Definition: transam.h:41
Pointer Page
Definition: bufpage.h:74
static int heap_prune_chain ( Relation  relation,
Buffer  buffer,
OffsetNumber  rootoffnum,
TransactionId  OldestXmin,
PruneState prstate 
)
static

Definition at line 354 of file pruneheap.c.

References Assert, BufferGetBlockNumber(), BufferGetPage, elog, ERROR, heap_prune_record_dead(), heap_prune_record_prunable(), heap_prune_record_redirect(), heap_prune_record_unused(), HEAPTUPLE_DEAD, HEAPTUPLE_DELETE_IN_PROGRESS, HEAPTUPLE_INSERT_IN_PROGRESS, HEAPTUPLE_LIVE, HEAPTUPLE_RECENTLY_DEAD, HeapTupleHeaderAdvanceLatestRemovedXid(), HeapTupleHeaderGetUpdateXid, HeapTupleHeaderGetXmin, HeapTupleHeaderIsHeapOnly, HeapTupleHeaderIsHotUpdated, HeapTupleSatisfiesVacuum(), i, InvalidOffsetNumber, InvalidTransactionId, ItemIdGetLength, ItemIdGetRedirect, ItemIdIsDead, ItemIdIsNormal, ItemIdIsRedirected, ItemIdIsUsed, ItemPointerGetBlockNumber, ItemPointerGetOffsetNumber, ItemPointerSet, PruneState::latestRemovedXid, PruneState::marked, MaxHeapTuplesPerPage, OffsetNumberIsValid, PageGetItem, PageGetItemId, PageGetMaxOffsetNumber, RelationGetRelid, HeapTupleHeaderData::t_ctid, HeapTupleData::t_data, HeapTupleData::t_len, HeapTupleData::t_self, HeapTupleData::t_tableOid, TransactionIdEquals, and TransactionIdIsValid.

Referenced by heap_page_prune().

357 {
358  int ndeleted = 0;
359  Page dp = (Page) BufferGetPage(buffer);
361  ItemId rootlp;
362  HeapTupleHeader htup;
363  OffsetNumber latestdead = InvalidOffsetNumber,
364  maxoff = PageGetMaxOffsetNumber(dp),
365  offnum;
367  int nchain = 0,
368  i;
369  HeapTupleData tup;
370 
371  tup.t_tableOid = RelationGetRelid(relation);
372 
373  rootlp = PageGetItemId(dp, rootoffnum);
374 
375  /*
376  * If it's a heap-only tuple, then it is not the start of a HOT chain.
377  */
378  if (ItemIdIsNormal(rootlp))
379  {
380  htup = (HeapTupleHeader) PageGetItem(dp, rootlp);
381 
382  tup.t_data = htup;
383  tup.t_len = ItemIdGetLength(rootlp);
384  ItemPointerSet(&(tup.t_self), BufferGetBlockNumber(buffer), rootoffnum);
385 
386  if (HeapTupleHeaderIsHeapOnly(htup))
387  {
388  /*
389  * If the tuple is DEAD and doesn't chain to anything else, mark
390  * it unused immediately. (If it does chain, we can only remove
391  * it as part of pruning its chain.)
392  *
393  * We need this primarily to handle aborted HOT updates, that is,
394  * XMIN_INVALID heap-only tuples. Those might not be linked to by
395  * any chain, since the parent tuple might be re-updated before
396  * any pruning occurs. So we have to be able to reap them
397  * separately from chain-pruning. (Note that
398  * HeapTupleHeaderIsHotUpdated will never return true for an
399  * XMIN_INVALID tuple, so this code will work even when there were
400  * sequential updates within the aborted transaction.)
401  *
402  * Note that we might first arrive at a dead heap-only tuple
403  * either here or while following a chain below. Whichever path
404  * gets there first will mark the tuple unused.
405  */
406  if (HeapTupleSatisfiesVacuum(&tup, OldestXmin, buffer)
408  {
409  heap_prune_record_unused(prstate, rootoffnum);
411  &prstate->latestRemovedXid);
412  ndeleted++;
413  }
414 
415  /* Nothing more to do */
416  return ndeleted;
417  }
418  }
419 
420  /* Start from the root tuple */
421  offnum = rootoffnum;
422 
423  /* while not end of the chain */
424  for (;;)
425  {
426  ItemId lp;
427  bool tupdead,
428  recent_dead;
429 
430  /* Some sanity checks */
431  if (offnum < FirstOffsetNumber || offnum > maxoff)
432  break;
433 
434  /* If item is already processed, stop --- it must not be same chain */
435  if (prstate->marked[offnum])
436  break;
437 
438  lp = PageGetItemId(dp, offnum);
439 
440  /* Unused item obviously isn't part of the chain */
441  if (!ItemIdIsUsed(lp))
442  break;
443 
444  /*
445  * If we are looking at the redirected root line pointer, jump to the
446  * first normal tuple in the chain. If we find a redirect somewhere
447  * else, stop --- it must not be same chain.
448  */
449  if (ItemIdIsRedirected(lp))
450  {
451  if (nchain > 0)
452  break; /* not at start of chain */
453  chainitems[nchain++] = offnum;
454  offnum = ItemIdGetRedirect(rootlp);
455  continue;
456  }
457 
458  /*
459  * Likewise, a dead item pointer can't be part of the chain. (We
460  * already eliminated the case of dead root tuple outside this
461  * function.)
462  */
463  if (ItemIdIsDead(lp))
464  break;
465 
466  Assert(ItemIdIsNormal(lp));
467  htup = (HeapTupleHeader) PageGetItem(dp, lp);
468 
469  tup.t_data = htup;
470  tup.t_len = ItemIdGetLength(lp);
471  ItemPointerSet(&(tup.t_self), BufferGetBlockNumber(buffer), offnum);
472 
473  /*
474  * Check the tuple XMIN against prior XMAX, if any
475  */
476  if (TransactionIdIsValid(priorXmax) &&
477  !TransactionIdEquals(HeapTupleHeaderGetXmin(htup), priorXmax))
478  break;
479 
480  /*
481  * OK, this tuple is indeed a member of the chain.
482  */
483  chainitems[nchain++] = offnum;
484 
485  /*
486  * Check tuple's visibility status.
487  */
488  tupdead = recent_dead = false;
489 
490  switch (HeapTupleSatisfiesVacuum(&tup, OldestXmin, buffer))
491  {
492  case HEAPTUPLE_DEAD:
493  tupdead = true;
494  break;
495 
497  recent_dead = true;
498 
499  /*
500  * This tuple may soon become DEAD. Update the hint field so
501  * that the page is reconsidered for pruning in future.
502  */
505  break;
506 
508 
509  /*
510  * This tuple may soon become DEAD. Update the hint field so
511  * that the page is reconsidered for pruning in future.
512  */
515  break;
516 
517  case HEAPTUPLE_LIVE:
519 
520  /*
521  * If we wanted to optimize for aborts, we might consider
522  * marking the page prunable when we see INSERT_IN_PROGRESS.
523  * But we don't. See related decisions about when to mark the
524  * page prunable in heapam.c.
525  */
526  break;
527 
528  default:
529  elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
530  break;
531  }
532 
533  /*
534  * Remember the last DEAD tuple seen. We will advance past
535  * RECENTLY_DEAD tuples just in case there's a DEAD one after them;
536  * but we can't advance past anything else. (XXX is it really worth
537  * continuing to scan beyond RECENTLY_DEAD? The case where we will
538  * find another DEAD tuple is a fairly unusual corner case.)
539  */
540  if (tupdead)
541  {
542  latestdead = offnum;
544  &prstate->latestRemovedXid);
545  }
546  else if (!recent_dead)
547  break;
548 
549  /*
550  * If the tuple is not HOT-updated, then we are at the end of this
551  * HOT-update chain.
552  */
553  if (!HeapTupleHeaderIsHotUpdated(htup))
554  break;
555 
556  /*
557  * Advance to next chain member.
558  */
560  BufferGetBlockNumber(buffer));
561  offnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
562  priorXmax = HeapTupleHeaderGetUpdateXid(htup);
563  }
564 
565  /*
566  * If we found a DEAD tuple in the chain, adjust the HOT chain so that all
567  * the DEAD tuples at the start of the chain are removed and the root line
568  * pointer is appropriately redirected.
569  */
570  if (OffsetNumberIsValid(latestdead))
571  {
572  /*
573  * Mark as unused each intermediate item that we are able to remove
574  * from the chain.
575  *
576  * When the previous item is the last dead tuple seen, we are at the
577  * right candidate for redirection.
578  */
579  for (i = 1; (i < nchain) && (chainitems[i - 1] != latestdead); i++)
580  {
581  heap_prune_record_unused(prstate, chainitems[i]);
582  ndeleted++;
583  }
584 
585  /*
586  * If the root entry had been a normal tuple, we are deleting it, so
587  * count it in the result. But changing a redirect (even to DEAD
588  * state) doesn't count.
589  */
590  if (ItemIdIsNormal(rootlp))
591  ndeleted++;
592 
593  /*
594  * If the DEAD tuple is at the end of the chain, the entire chain is
595  * dead and the root line pointer can be marked dead. Otherwise just
596  * redirect the root to the correct chain member.
597  */
598  if (i >= nchain)
599  heap_prune_record_dead(prstate, rootoffnum);
600  else
601  heap_prune_record_redirect(prstate, rootoffnum, chainitems[i]);
602  }
603  else if (nchain < 2 && ItemIdIsRedirected(rootlp))
604  {
605  /*
606  * We found a redirect item that doesn't point to a valid follow-on
607  * item. This can happen if the loop in heap_page_prune caused us to
608  * visit the dead successor of a redirect item before visiting the
609  * redirect item. We can clean up by setting the redirect item to
610  * DEAD state.
611  */
612  heap_prune_record_dead(prstate, rootoffnum);
613  }
614 
615  return ndeleted;
616 }
#define HeapTupleHeaderGetUpdateXid(tup)
Definition: htup_details.h:359
void HeapTupleHeaderAdvanceLatestRemovedXid(HeapTupleHeader tuple, TransactionId *latestRemovedXid)
Definition: heapam.c:7318
#define ItemIdIsRedirected(itemId)
Definition: itemid.h:105
#define TransactionIdEquals(id1, id2)
Definition: transam.h:43
uint32 TransactionId
Definition: c.h:394
HeapTupleHeaderData * HeapTupleHeader
Definition: htup.h:23
#define ItemIdGetRedirect(itemId)
Definition: itemid.h:77
#define ItemIdIsUsed(itemId)
Definition: itemid.h:91
#define MaxHeapTuplesPerPage
Definition: htup_details.h:575
HTSV_Result HeapTupleSatisfiesVacuum(HeapTuple htup, TransactionId OldestXmin, Buffer buffer)
Definition: tqual.c:1164
static void heap_prune_record_dead(PruneState *prstate, OffsetNumber offnum)
Definition: pruneheap.c:649
#define ItemIdIsDead(itemId)
Definition: itemid.h:112
bool marked[MaxHeapTuplesPerPage+1]
Definition: pruneheap.c:44
#define PageGetMaxOffsetNumber(page)
Definition: bufpage.h:354
uint16 OffsetNumber
Definition: off.h:24
HeapTupleHeader t_data
Definition: htup.h:67
#define HeapTupleHeaderIsHeapOnly(tup)
Definition: htup_details.h:502
#define ItemIdGetLength(itemId)
Definition: itemid.h:58
#define ERROR
Definition: elog.h:43
ItemPointerData t_ctid
Definition: htup_details.h:150
ItemPointerData t_self
Definition: htup.h:65
uint32 t_len
Definition: htup.h:64
static void heap_prune_record_prunable(PruneState *prstate, TransactionId xid)
Definition: pruneheap.c:620
TransactionId latestRemovedXid
Definition: pruneheap.c:34
#define InvalidTransactionId
Definition: transam.h:31
static TransactionId OldestXmin
Definition: vacuumlazy.c:138
Oid t_tableOid
Definition: htup.h:66
#define BufferGetPage(buffer)
Definition: bufmgr.h:160
#define HeapTupleHeaderIsHotUpdated(tup)
Definition: htup_details.h:485
#define PageGetItemId(page, offsetNumber)
Definition: bufpage.h:232
#define InvalidOffsetNumber
Definition: off.h:26
#define Assert(condition)
Definition: c.h:671
static void heap_prune_record_redirect(PruneState *prstate, OffsetNumber offnum, OffsetNumber rdoffnum)
Definition: pruneheap.c:634
#define ItemIdIsNormal(itemId)
Definition: itemid.h:98
#define HeapTupleHeaderGetXmin(tup)
Definition: htup_details.h:307
#define ItemPointerGetOffsetNumber(pointer)
Definition: itemptr.h:76
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:2588
int i
#define OffsetNumberIsValid(offsetNumber)
Definition: off.h:40
#define elog
Definition: elog.h:219
#define ItemPointerGetBlockNumber(pointer)
Definition: itemptr.h:66
#define TransactionIdIsValid(xid)
Definition: transam.h:41
#define RelationGetRelid(relation)
Definition: rel.h:413
#define PageGetItem(page, itemId)
Definition: bufpage.h:337
Pointer Page
Definition: bufpage.h:74
#define ItemPointerSet(pointer, blockNumber, offNum)
Definition: itemptr.h:86
static void heap_prune_record_unused(PruneState *prstate, OffsetNumber offnum)
Definition: pruneheap.c:660
static void heap_prune_record_dead ( PruneState prstate,
OffsetNumber  offnum 
)
static

Definition at line 649 of file pruneheap.c.

References Assert, PruneState::marked, MaxHeapTuplesPerPage, PruneState::ndead, and PruneState::nowdead.

Referenced by heap_prune_chain().

650 {
651  Assert(prstate->ndead < MaxHeapTuplesPerPage);
652  prstate->nowdead[prstate->ndead] = offnum;
653  prstate->ndead++;
654  Assert(!prstate->marked[offnum]);
655  prstate->marked[offnum] = true;
656 }
#define MaxHeapTuplesPerPage
Definition: htup_details.h:575
OffsetNumber nowdead[MaxHeapTuplesPerPage]
Definition: pruneheap.c:41
bool marked[MaxHeapTuplesPerPage+1]
Definition: pruneheap.c:44
#define Assert(condition)
Definition: c.h:671
int ndead
Definition: pruneheap.c:37
static void heap_prune_record_prunable ( PruneState prstate,
TransactionId  xid 
)
static

Definition at line 620 of file pruneheap.c.

References Assert, PruneState::new_prune_xid, TransactionIdIsNormal, TransactionIdIsValid, and TransactionIdPrecedes().

Referenced by heap_prune_chain().

621 {
622  /*
623  * This should exactly match the PageSetPrunable macro. We can't store
624  * directly into the page header yet, so we update working state.
625  */
627  if (!TransactionIdIsValid(prstate->new_prune_xid) ||
628  TransactionIdPrecedes(xid, prstate->new_prune_xid))
629  prstate->new_prune_xid = xid;
630 }
TransactionId new_prune_xid
Definition: pruneheap.c:33
bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition: transam.c:300
#define Assert(condition)
Definition: c.h:671
#define TransactionIdIsValid(xid)
Definition: transam.h:41
#define TransactionIdIsNormal(xid)
Definition: transam.h:42
static void heap_prune_record_redirect ( PruneState prstate,
OffsetNumber  offnum,
OffsetNumber  rdoffnum 
)
static

Definition at line 634 of file pruneheap.c.

References Assert, PruneState::marked, MaxHeapTuplesPerPage, PruneState::nredirected, and PruneState::redirected.

Referenced by heap_prune_chain().

636 {
638  prstate->redirected[prstate->nredirected * 2] = offnum;
639  prstate->redirected[prstate->nredirected * 2 + 1] = rdoffnum;
640  prstate->nredirected++;
641  Assert(!prstate->marked[offnum]);
642  prstate->marked[offnum] = true;
643  Assert(!prstate->marked[rdoffnum]);
644  prstate->marked[rdoffnum] = true;
645 }
int nredirected
Definition: pruneheap.c:36
#define MaxHeapTuplesPerPage
Definition: htup_details.h:575
bool marked[MaxHeapTuplesPerPage+1]
Definition: pruneheap.c:44
#define Assert(condition)
Definition: c.h:671
OffsetNumber redirected[MaxHeapTuplesPerPage *2]
Definition: pruneheap.c:40
static void heap_prune_record_unused ( PruneState prstate,
OffsetNumber  offnum 
)
static

Definition at line 660 of file pruneheap.c.

References Assert, PruneState::marked, MaxHeapTuplesPerPage, PruneState::nowunused, and PruneState::nunused.

Referenced by heap_prune_chain().

661 {
662  Assert(prstate->nunused < MaxHeapTuplesPerPage);
663  prstate->nowunused[prstate->nunused] = offnum;
664  prstate->nunused++;
665  Assert(!prstate->marked[offnum]);
666  prstate->marked[offnum] = true;
667 }
#define MaxHeapTuplesPerPage
Definition: htup_details.h:575
bool marked[MaxHeapTuplesPerPage+1]
Definition: pruneheap.c:44
int nunused
Definition: pruneheap.c:38
#define Assert(condition)
Definition: c.h:671
OffsetNumber nowunused[MaxHeapTuplesPerPage]
Definition: pruneheap.c:42