PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
pruneheap.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * pruneheap.c
4  * heap page pruning and HOT-chain management code
5  *
6  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  * src/backend/access/heap/pruneheap.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 #include "postgres.h"
16 
17 #include "access/heapam.h"
18 #include "access/heapam_xlog.h"
19 #include "access/transam.h"
20 #include "access/htup_details.h"
21 #include "access/xlog.h"
22 #include "catalog/catalog.h"
23 #include "miscadmin.h"
24 #include "pgstat.h"
25 #include "storage/bufmgr.h"
26 #include "utils/snapmgr.h"
27 #include "utils/rel.h"
28 #include "utils/tqual.h"
29 
30 /* Working data for heap_page_prune and subroutines */
31 typedef struct
32 {
33  TransactionId new_prune_xid; /* new prune hint value for page */
34  TransactionId latestRemovedXid; /* latest xid to be removed by this
35  * prune */
36  int nredirected; /* numbers of entries in arrays below */
37  int ndead;
38  int nunused;
39  /* arrays that accumulate indexes of items to be changed */
43  /* marked[i] is TRUE if item i is entered in one of the above arrays */
44  bool marked[MaxHeapTuplesPerPage + 1];
45 } PruneState;
46 
47 /* Local functions */
48 static int heap_prune_chain(Relation relation, Buffer buffer,
49  OffsetNumber rootoffnum,
51  PruneState *prstate);
52 static void heap_prune_record_prunable(PruneState *prstate, TransactionId xid);
53 static void heap_prune_record_redirect(PruneState *prstate,
54  OffsetNumber offnum, OffsetNumber rdoffnum);
55 static void heap_prune_record_dead(PruneState *prstate, OffsetNumber offnum);
56 static void heap_prune_record_unused(PruneState *prstate, OffsetNumber offnum);
57 
58 
59 /*
60  * Optionally prune and repair fragmentation in the specified page.
61  *
62  * This is an opportunistic function. It will perform housekeeping
63  * only if the page heuristically looks like a candidate for pruning and we
64  * can acquire buffer cleanup lock without blocking.
65  *
66  * Note: this is called quite often. It's important that it fall out quickly
67  * if there's not any use in pruning.
68  *
69  * Caller must have pin on the buffer, and must *not* have a lock on it.
70  *
71  * OldestXmin is the cutoff XID used to distinguish whether tuples are DEAD
72  * or RECENTLY_DEAD (see HeapTupleSatisfiesVacuum).
73  */
74 void
76 {
77  Page page = BufferGetPage(buffer);
78  Size minfree;
80 
81  /*
82  * We can't write WAL in recovery mode, so there's no point trying to
83  * clean the page. The master will likely issue a cleaning WAL record soon
84  * anyway, so this is no particular loss.
85  */
86  if (RecoveryInProgress())
87  return;
88 
89  /*
90  * Use the appropriate xmin horizon for this relation. If it's a proper
91  * catalog relation or a user defined, additional, catalog relation, we
92  * need to use the horizon that includes slots, otherwise the data-only
93  * horizon can be used. Note that the toast relation of user defined
94  * relations are *not* considered catalog relations.
95  *
96  * It is OK to apply the old snapshot limit before acquiring the cleanup
97  * lock because the worst that can happen is that we are not quite as
98  * aggressive about the cleanup (by however many transaction IDs are
99  * consumed between this point and acquiring the lock). This allows us to
100  * save significant overhead in the case where the page is found not to be
101  * prunable.
102  */
103  if (IsCatalogRelation(relation) ||
105  OldestXmin = RecentGlobalXmin;
106  else
107  OldestXmin =
109  relation);
110 
111  Assert(TransactionIdIsValid(OldestXmin));
112 
113  /*
114  * Let's see if we really need pruning.
115  *
116  * Forget it if page is not hinted to contain something prunable that's
117  * older than OldestXmin.
118  */
119  if (!PageIsPrunable(page, OldestXmin))
120  return;
121 
122  /*
123  * We prune when a previous UPDATE failed to find enough space on the page
124  * for a new tuple version, or when free space falls below the relation's
125  * fill-factor target (but not less than 10%).
126  *
127  * Checking free space here is questionable since we aren't holding any
128  * lock on the buffer; in the worst case we could get a bogus answer. It's
129  * unlikely to be *seriously* wrong, though, since reading either pd_lower
130  * or pd_upper is probably atomic. Avoiding taking a lock seems more
131  * important than sometimes getting a wrong answer in what is after all
132  * just a heuristic estimate.
133  */
134  minfree = RelationGetTargetPageFreeSpace(relation,
136  minfree = Max(minfree, BLCKSZ / 10);
137 
138  if (PageIsFull(page) || PageGetHeapFreeSpace(page) < minfree)
139  {
140  /* OK, try to get exclusive buffer lock */
141  if (!ConditionalLockBufferForCleanup(buffer))
142  return;
143 
144  /*
145  * Now that we have buffer lock, get accurate information about the
146  * page's free space, and recheck the heuristic about whether to
147  * prune. (We needn't recheck PageIsPrunable, since no one else could
148  * have pruned while we hold pin.)
149  */
150  if (PageIsFull(page) || PageGetHeapFreeSpace(page) < minfree)
151  {
152  TransactionId ignore = InvalidTransactionId; /* return value not
153  * needed */
154 
155  /* OK to prune */
156  (void) heap_page_prune(relation, buffer, OldestXmin, true, &ignore);
157  }
158 
159  /* And release buffer lock */
161  }
162 }
163 
164 
165 /*
166  * Prune and repair fragmentation in the specified page.
167  *
168  * Caller must have pin and buffer cleanup lock on the page.
169  *
170  * OldestXmin is the cutoff XID used to distinguish whether tuples are DEAD
171  * or RECENTLY_DEAD (see HeapTupleSatisfiesVacuum).
172  *
173  * If report_stats is true then we send the number of reclaimed heap-only
174  * tuples to pgstats. (This must be FALSE during vacuum, since vacuum will
175  * send its own new total to pgstats, and we don't want this delta applied
176  * on top of that.)
177  *
178  * Returns the number of tuples deleted from the page and sets
179  * latestRemovedXid.
180  */
181 int
183  bool report_stats, TransactionId *latestRemovedXid)
184 {
185  int ndeleted = 0;
186  Page page = BufferGetPage(buffer);
187  OffsetNumber offnum,
188  maxoff;
189  PruneState prstate;
190 
191  /*
192  * Our strategy is to scan the page and make lists of items to change,
193  * then apply the changes within a critical section. This keeps as much
194  * logic as possible out of the critical section, and also ensures that
195  * WAL replay will work the same as the normal case.
196  *
197  * First, initialize the new pd_prune_xid value to zero (indicating no
198  * prunable tuples). If we find any tuples which may soon become
199  * prunable, we will save the lowest relevant XID in new_prune_xid. Also
200  * initialize the rest of our working state.
201  */
203  prstate.latestRemovedXid = *latestRemovedXid;
204  prstate.nredirected = prstate.ndead = prstate.nunused = 0;
205  memset(prstate.marked, 0, sizeof(prstate.marked));
206 
207  /* Scan the page */
208  maxoff = PageGetMaxOffsetNumber(page);
209  for (offnum = FirstOffsetNumber;
210  offnum <= maxoff;
211  offnum = OffsetNumberNext(offnum))
212  {
213  ItemId itemid;
214 
215  /* Ignore items already processed as part of an earlier chain */
216  if (prstate.marked[offnum])
217  continue;
218 
219  /* Nothing to do if slot is empty or already dead */
220  itemid = PageGetItemId(page, offnum);
221  if (!ItemIdIsUsed(itemid) || ItemIdIsDead(itemid))
222  continue;
223 
224  /* Process this item or chain of items */
225  ndeleted += heap_prune_chain(relation, buffer, offnum,
226  OldestXmin,
227  &prstate);
228  }
229 
230  /* Any error while applying the changes is critical */
232 
233  /* Have we found any prunable items? */
234  if (prstate.nredirected > 0 || prstate.ndead > 0 || prstate.nunused > 0)
235  {
236  /*
237  * Apply the planned item changes, then repair page fragmentation, and
238  * update the page's hint bit about whether it has free line pointers.
239  */
241  prstate.redirected, prstate.nredirected,
242  prstate.nowdead, prstate.ndead,
243  prstate.nowunused, prstate.nunused);
244 
245  /*
246  * Update the page's pd_prune_xid field to either zero, or the lowest
247  * XID of any soon-prunable tuple.
248  */
249  ((PageHeader) page)->pd_prune_xid = prstate.new_prune_xid;
250 
251  /*
252  * Also clear the "page is full" flag, since there's no point in
253  * repeating the prune/defrag process until something else happens to
254  * the page.
255  */
256  PageClearFull(page);
257 
258  MarkBufferDirty(buffer);
259 
260  /*
261  * Emit a WAL HEAP_CLEAN record showing what we did
262  */
263  if (RelationNeedsWAL(relation))
264  {
265  XLogRecPtr recptr;
266 
267  recptr = log_heap_clean(relation, buffer,
268  prstate.redirected, prstate.nredirected,
269  prstate.nowdead, prstate.ndead,
270  prstate.nowunused, prstate.nunused,
271  prstate.latestRemovedXid);
272 
273  PageSetLSN(BufferGetPage(buffer), recptr);
274  }
275  }
276  else
277  {
278  /*
279  * If we didn't prune anything, but have found a new value for the
280  * pd_prune_xid field, update it and mark the buffer dirty. This is
281  * treated as a non-WAL-logged hint.
282  *
283  * Also clear the "page is full" flag if it is set, since there's no
284  * point in repeating the prune/defrag process until something else
285  * happens to the page.
286  */
287  if (((PageHeader) page)->pd_prune_xid != prstate.new_prune_xid ||
288  PageIsFull(page))
289  {
290  ((PageHeader) page)->pd_prune_xid = prstate.new_prune_xid;
291  PageClearFull(page);
292  MarkBufferDirtyHint(buffer, true);
293  }
294  }
295 
297 
298  /*
299  * If requested, report the number of tuples reclaimed to pgstats. This is
300  * ndeleted minus ndead, because we don't want to count a now-DEAD root
301  * item as a deletion for this purpose.
302  */
303  if (report_stats && ndeleted > prstate.ndead)
304  pgstat_update_heap_dead_tuples(relation, ndeleted - prstate.ndead);
305 
306  *latestRemovedXid = prstate.latestRemovedXid;
307 
308  /*
309  * XXX Should we update the FSM information of this page ?
310  *
311  * There are two schools of thought here. We may not want to update FSM
312  * information so that the page is not used for unrelated UPDATEs/INSERTs
313  * and any free space in this page will remain available for further
314  * UPDATEs in *this* page, thus improving chances for doing HOT updates.
315  *
316  * But for a large table and where a page does not receive further UPDATEs
317  * for a long time, we might waste this space by not updating the FSM
318  * information. The relation may get extended and fragmented further.
319  *
320  * One possibility is to leave "fillfactor" worth of space in this page
321  * and update FSM with the remaining space.
322  */
323 
324  return ndeleted;
325 }
326 
327 
328 /*
329  * Prune specified item pointer or a HOT chain originating at that item.
330  *
331  * If the item is an index-referenced tuple (i.e. not a heap-only tuple),
332  * the HOT chain is pruned by removing all DEAD tuples at the start of the HOT
333  * chain. We also prune any RECENTLY_DEAD tuples preceding a DEAD tuple.
334  * This is OK because a RECENTLY_DEAD tuple preceding a DEAD tuple is really
335  * DEAD, the OldestXmin test is just too coarse to detect it.
336  *
337  * The root line pointer is redirected to the tuple immediately after the
338  * latest DEAD tuple. If all tuples in the chain are DEAD, the root line
339  * pointer is marked LP_DEAD. (This includes the case of a DEAD simple
340  * tuple, which we treat as a chain of length 1.)
341  *
342  * OldestXmin is the cutoff XID used to identify dead tuples.
343  *
344  * We don't actually change the page here, except perhaps for hint-bit updates
345  * caused by HeapTupleSatisfiesVacuum. We just add entries to the arrays in
346  * prstate showing the changes to be made. Items to be redirected are added
347  * to the redirected[] array (two entries per redirection); items to be set to
348  * LP_DEAD state are added to nowdead[]; and items to be set to LP_UNUSED
349  * state are added to nowunused[].
350  *
351  * Returns the number of tuples (to be) deleted from the page.
352  */
353 static int
356  PruneState *prstate)
357 {
358  int ndeleted = 0;
359  Page dp = (Page) BufferGetPage(buffer);
361  ItemId rootlp;
362  HeapTupleHeader htup;
363  OffsetNumber latestdead = InvalidOffsetNumber,
364  maxoff = PageGetMaxOffsetNumber(dp),
365  offnum;
367  int nchain = 0,
368  i;
369  HeapTupleData tup;
370 
371  tup.t_tableOid = RelationGetRelid(relation);
372 
373  rootlp = PageGetItemId(dp, rootoffnum);
374 
375  /*
376  * If it's a heap-only tuple, then it is not the start of a HOT chain.
377  */
378  if (ItemIdIsNormal(rootlp))
379  {
380  htup = (HeapTupleHeader) PageGetItem(dp, rootlp);
381 
382  tup.t_data = htup;
383  tup.t_len = ItemIdGetLength(rootlp);
384  ItemPointerSet(&(tup.t_self), BufferGetBlockNumber(buffer), rootoffnum);
385 
386  if (HeapTupleHeaderIsHeapOnly(htup))
387  {
388  /*
389  * If the tuple is DEAD and doesn't chain to anything else, mark
390  * it unused immediately. (If it does chain, we can only remove
391  * it as part of pruning its chain.)
392  *
393  * We need this primarily to handle aborted HOT updates, that is,
394  * XMIN_INVALID heap-only tuples. Those might not be linked to by
395  * any chain, since the parent tuple might be re-updated before
396  * any pruning occurs. So we have to be able to reap them
397  * separately from chain-pruning. (Note that
398  * HeapTupleHeaderIsHotUpdated will never return true for an
399  * XMIN_INVALID tuple, so this code will work even when there were
400  * sequential updates within the aborted transaction.)
401  *
402  * Note that we might first arrive at a dead heap-only tuple
403  * either here or while following a chain below. Whichever path
404  * gets there first will mark the tuple unused.
405  */
406  if (HeapTupleSatisfiesVacuum(&tup, OldestXmin, buffer)
408  {
409  heap_prune_record_unused(prstate, rootoffnum);
411  &prstate->latestRemovedXid);
412  ndeleted++;
413  }
414 
415  /* Nothing more to do */
416  return ndeleted;
417  }
418  }
419 
420  /* Start from the root tuple */
421  offnum = rootoffnum;
422 
423  /* while not end of the chain */
424  for (;;)
425  {
426  ItemId lp;
427  bool tupdead,
428  recent_dead;
429 
430  /* Some sanity checks */
431  if (offnum < FirstOffsetNumber || offnum > maxoff)
432  break;
433 
434  /* If item is already processed, stop --- it must not be same chain */
435  if (prstate->marked[offnum])
436  break;
437 
438  lp = PageGetItemId(dp, offnum);
439 
440  /* Unused item obviously isn't part of the chain */
441  if (!ItemIdIsUsed(lp))
442  break;
443 
444  /*
445  * If we are looking at the redirected root line pointer, jump to the
446  * first normal tuple in the chain. If we find a redirect somewhere
447  * else, stop --- it must not be same chain.
448  */
449  if (ItemIdIsRedirected(lp))
450  {
451  if (nchain > 0)
452  break; /* not at start of chain */
453  chainitems[nchain++] = offnum;
454  offnum = ItemIdGetRedirect(rootlp);
455  continue;
456  }
457 
458  /*
459  * Likewise, a dead item pointer can't be part of the chain. (We
460  * already eliminated the case of dead root tuple outside this
461  * function.)
462  */
463  if (ItemIdIsDead(lp))
464  break;
465 
466  Assert(ItemIdIsNormal(lp));
467  htup = (HeapTupleHeader) PageGetItem(dp, lp);
468 
469  tup.t_data = htup;
470  tup.t_len = ItemIdGetLength(lp);
471  ItemPointerSet(&(tup.t_self), BufferGetBlockNumber(buffer), offnum);
472 
473  /*
474  * Check the tuple XMIN against prior XMAX, if any
475  */
476  if (TransactionIdIsValid(priorXmax) &&
477  !TransactionIdEquals(HeapTupleHeaderGetXmin(htup), priorXmax))
478  break;
479 
480  /*
481  * OK, this tuple is indeed a member of the chain.
482  */
483  chainitems[nchain++] = offnum;
484 
485  /*
486  * Check tuple's visibility status.
487  */
488  tupdead = recent_dead = false;
489 
490  switch (HeapTupleSatisfiesVacuum(&tup, OldestXmin, buffer))
491  {
492  case HEAPTUPLE_DEAD:
493  tupdead = true;
494  break;
495 
497  recent_dead = true;
498 
499  /*
500  * This tuple may soon become DEAD. Update the hint field so
501  * that the page is reconsidered for pruning in future.
502  */
505  break;
506 
508 
509  /*
510  * This tuple may soon become DEAD. Update the hint field so
511  * that the page is reconsidered for pruning in future.
512  */
515  break;
516 
517  case HEAPTUPLE_LIVE:
519 
520  /*
521  * If we wanted to optimize for aborts, we might consider
522  * marking the page prunable when we see INSERT_IN_PROGRESS.
523  * But we don't. See related decisions about when to mark the
524  * page prunable in heapam.c.
525  */
526  break;
527 
528  default:
529  elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
530  break;
531  }
532 
533  /*
534  * Remember the last DEAD tuple seen. We will advance past
535  * RECENTLY_DEAD tuples just in case there's a DEAD one after them;
536  * but we can't advance past anything else. (XXX is it really worth
537  * continuing to scan beyond RECENTLY_DEAD? The case where we will
538  * find another DEAD tuple is a fairly unusual corner case.)
539  */
540  if (tupdead)
541  {
542  latestdead = offnum;
544  &prstate->latestRemovedXid);
545  }
546  else if (!recent_dead)
547  break;
548 
549  /*
550  * If the tuple is not HOT-updated, then we are at the end of this
551  * HOT-update chain.
552  */
553  if (!HeapTupleHeaderIsHotUpdated(htup))
554  break;
555 
556  /*
557  * Advance to next chain member.
558  */
560  BufferGetBlockNumber(buffer));
561  offnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
562  priorXmax = HeapTupleHeaderGetUpdateXid(htup);
563  }
564 
565  /*
566  * If we found a DEAD tuple in the chain, adjust the HOT chain so that all
567  * the DEAD tuples at the start of the chain are removed and the root line
568  * pointer is appropriately redirected.
569  */
570  if (OffsetNumberIsValid(latestdead))
571  {
572  /*
573  * Mark as unused each intermediate item that we are able to remove
574  * from the chain.
575  *
576  * When the previous item is the last dead tuple seen, we are at the
577  * right candidate for redirection.
578  */
579  for (i = 1; (i < nchain) && (chainitems[i - 1] != latestdead); i++)
580  {
581  heap_prune_record_unused(prstate, chainitems[i]);
582  ndeleted++;
583  }
584 
585  /*
586  * If the root entry had been a normal tuple, we are deleting it, so
587  * count it in the result. But changing a redirect (even to DEAD
588  * state) doesn't count.
589  */
590  if (ItemIdIsNormal(rootlp))
591  ndeleted++;
592 
593  /*
594  * If the DEAD tuple is at the end of the chain, the entire chain is
595  * dead and the root line pointer can be marked dead. Otherwise just
596  * redirect the root to the correct chain member.
597  */
598  if (i >= nchain)
599  heap_prune_record_dead(prstate, rootoffnum);
600  else
601  heap_prune_record_redirect(prstate, rootoffnum, chainitems[i]);
602  }
603  else if (nchain < 2 && ItemIdIsRedirected(rootlp))
604  {
605  /*
606  * We found a redirect item that doesn't point to a valid follow-on
607  * item. This can happen if the loop in heap_page_prune caused us to
608  * visit the dead successor of a redirect item before visiting the
609  * redirect item. We can clean up by setting the redirect item to
610  * DEAD state.
611  */
612  heap_prune_record_dead(prstate, rootoffnum);
613  }
614 
615  return ndeleted;
616 }
617 
618 /* Record lowest soon-prunable XID */
619 static void
621 {
622  /*
623  * This should exactly match the PageSetPrunable macro. We can't store
624  * directly into the page header yet, so we update working state.
625  */
627  if (!TransactionIdIsValid(prstate->new_prune_xid) ||
628  TransactionIdPrecedes(xid, prstate->new_prune_xid))
629  prstate->new_prune_xid = xid;
630 }
631 
632 /* Record item pointer to be redirected */
633 static void
635  OffsetNumber offnum, OffsetNumber rdoffnum)
636 {
638  prstate->redirected[prstate->nredirected * 2] = offnum;
639  prstate->redirected[prstate->nredirected * 2 + 1] = rdoffnum;
640  prstate->nredirected++;
641  Assert(!prstate->marked[offnum]);
642  prstate->marked[offnum] = true;
643  Assert(!prstate->marked[rdoffnum]);
644  prstate->marked[rdoffnum] = true;
645 }
646 
647 /* Record item pointer to be marked dead */
648 static void
650 {
651  Assert(prstate->ndead < MaxHeapTuplesPerPage);
652  prstate->nowdead[prstate->ndead] = offnum;
653  prstate->ndead++;
654  Assert(!prstate->marked[offnum]);
655  prstate->marked[offnum] = true;
656 }
657 
658 /* Record item pointer to be marked unused */
659 static void
661 {
662  Assert(prstate->nunused < MaxHeapTuplesPerPage);
663  prstate->nowunused[prstate->nunused] = offnum;
664  prstate->nunused++;
665  Assert(!prstate->marked[offnum]);
666  prstate->marked[offnum] = true;
667 }
668 
669 
670 /*
671  * Perform the actual page changes needed by heap_page_prune.
672  * It is expected that the caller has suitable pin and lock on the
673  * buffer, and is inside a critical section.
674  *
675  * This is split out because it is also used by heap_xlog_clean()
676  * to replay the WAL record when needed after a crash. Note that the
677  * arguments are identical to those of log_heap_clean().
678  */
679 void
681  OffsetNumber *redirected, int nredirected,
682  OffsetNumber *nowdead, int ndead,
683  OffsetNumber *nowunused, int nunused)
684 {
685  Page page = (Page) BufferGetPage(buffer);
686  OffsetNumber *offnum;
687  int i;
688 
689  /* Update all redirected line pointers */
690  offnum = redirected;
691  for (i = 0; i < nredirected; i++)
692  {
693  OffsetNumber fromoff = *offnum++;
694  OffsetNumber tooff = *offnum++;
695  ItemId fromlp = PageGetItemId(page, fromoff);
696 
697  ItemIdSetRedirect(fromlp, tooff);
698  }
699 
700  /* Update all now-dead line pointers */
701  offnum = nowdead;
702  for (i = 0; i < ndead; i++)
703  {
704  OffsetNumber off = *offnum++;
705  ItemId lp = PageGetItemId(page, off);
706 
707  ItemIdSetDead(lp);
708  }
709 
710  /* Update all now-unused line pointers */
711  offnum = nowunused;
712  for (i = 0; i < nunused; i++)
713  {
714  OffsetNumber off = *offnum++;
715  ItemId lp = PageGetItemId(page, off);
716 
717  ItemIdSetUnused(lp);
718  }
719 
720  /*
721  * Finally, repair any fragmentation, and update the page's hint bit about
722  * whether it has free pointers.
723  */
725 }
726 
727 
728 /*
729  * For all items in this page, find their respective root line pointers.
730  * If item k is part of a HOT-chain with root at item j, then we set
731  * root_offsets[k - 1] = j.
732  *
733  * The passed-in root_offsets array must have MaxHeapTuplesPerPage entries.
734  * We zero out all unused entries.
735  *
736  * The function must be called with at least share lock on the buffer, to
737  * prevent concurrent prune operations.
738  *
739  * Note: The information collected here is valid only as long as the caller
740  * holds a pin on the buffer. Once pin is released, a tuple might be pruned
741  * and reused by a completely unrelated tuple.
742  */
743 void
745 {
746  OffsetNumber offnum,
747  maxoff;
748 
749  MemSet(root_offsets, 0, MaxHeapTuplesPerPage * sizeof(OffsetNumber));
750 
751  maxoff = PageGetMaxOffsetNumber(page);
752  for (offnum = FirstOffsetNumber; offnum <= maxoff; offnum = OffsetNumberNext(offnum))
753  {
754  ItemId lp = PageGetItemId(page, offnum);
755  HeapTupleHeader htup;
756  OffsetNumber nextoffnum;
757  TransactionId priorXmax;
758 
759  /* skip unused and dead items */
760  if (!ItemIdIsUsed(lp) || ItemIdIsDead(lp))
761  continue;
762 
763  if (ItemIdIsNormal(lp))
764  {
765  htup = (HeapTupleHeader) PageGetItem(page, lp);
766 
767  /*
768  * Check if this tuple is part of a HOT-chain rooted at some other
769  * tuple. If so, skip it for now; we'll process it when we find
770  * its root.
771  */
772  if (HeapTupleHeaderIsHeapOnly(htup))
773  continue;
774 
775  /*
776  * This is either a plain tuple or the root of a HOT-chain.
777  * Remember it in the mapping.
778  */
779  root_offsets[offnum - 1] = offnum;
780 
781  /* If it's not the start of a HOT-chain, we're done with it */
782  if (!HeapTupleHeaderIsHotUpdated(htup))
783  continue;
784 
785  /* Set up to scan the HOT-chain */
786  nextoffnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
787  priorXmax = HeapTupleHeaderGetUpdateXid(htup);
788  }
789  else
790  {
791  /* Must be a redirect item. We do not set its root_offsets entry */
793  /* Set up to scan the HOT-chain */
794  nextoffnum = ItemIdGetRedirect(lp);
795  priorXmax = InvalidTransactionId;
796  }
797 
798  /*
799  * Now follow the HOT-chain and collect other tuples in the chain.
800  *
801  * Note: Even though this is a nested loop, the complexity of the
802  * function is O(N) because a tuple in the page should be visited not
803  * more than twice, once in the outer loop and once in HOT-chain
804  * chases.
805  */
806  for (;;)
807  {
808  lp = PageGetItemId(page, nextoffnum);
809 
810  /* Check for broken chains */
811  if (!ItemIdIsNormal(lp))
812  break;
813 
814  htup = (HeapTupleHeader) PageGetItem(page, lp);
815 
816  if (TransactionIdIsValid(priorXmax) &&
817  !TransactionIdEquals(priorXmax, HeapTupleHeaderGetXmin(htup)))
818  break;
819 
820  /* Remember the root line pointer for this item */
821  root_offsets[nextoffnum - 1] = offnum;
822 
823  /* Advance to next chain member, if any */
824  if (!HeapTupleHeaderIsHotUpdated(htup))
825  break;
826 
827  nextoffnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
828  priorXmax = HeapTupleHeaderGetUpdateXid(htup);
829  }
830  }
831 }
#define HeapTupleHeaderGetUpdateXid(tup)
Definition: htup_details.h:359
void HeapTupleHeaderAdvanceLatestRemovedXid(HeapTupleHeader tuple, TransactionId *latestRemovedXid)
Definition: heapam.c:7334
static int heap_prune_chain(Relation relation, Buffer buffer, OffsetNumber rootoffnum, TransactionId OldestXmin, PruneState *prstate)
Definition: pruneheap.c:354
#define BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:87
int heap_page_prune(Relation relation, Buffer buffer, TransactionId OldestXmin, bool report_stats, TransactionId *latestRemovedXid)
Definition: pruneheap.c:182
bool IsCatalogRelation(Relation relation)
Definition: catalog.c:91
int nredirected
Definition: pruneheap.c:36
#define ItemIdIsRedirected(itemId)
Definition: itemid.h:105
#define TransactionIdEquals(id1, id2)
Definition: transam.h:43
void pgstat_update_heap_dead_tuples(Relation rel, int delta)
Definition: pgstat.c:1936
uint32 TransactionId
Definition: c.h:397
void MarkBufferDirtyHint(Buffer buffer, bool buffer_std)
Definition: bufmgr.c:3379
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:1450
HeapTupleHeaderData * HeapTupleHeader
Definition: htup.h:23
#define ItemIdGetRedirect(itemId)
Definition: itemid.h:77
#define END_CRIT_SECTION()
Definition: miscadmin.h:132
#define ItemIdIsUsed(itemId)
Definition: itemid.h:91
TransactionId TransactionIdLimitedForOldSnapshots(TransactionId recentXmin, Relation relation)
Definition: snapmgr.c:1689
#define MaxHeapTuplesPerPage
Definition: htup_details.h:575
HTSV_Result HeapTupleSatisfiesVacuum(HeapTuple htup, TransactionId OldestXmin, Buffer buffer)
Definition: tqual.c:1164
static void heap_prune_record_dead(PruneState *prstate, OffsetNumber offnum)
Definition: pruneheap.c:649
#define START_CRIT_SECTION()
Definition: miscadmin.h:130
#define MemSet(start, val, len)
Definition: c.h:857
OffsetNumber nowdead[MaxHeapTuplesPerPage]
Definition: pruneheap.c:41
bool RecoveryInProgress(void)
Definition: xlog.c:7857
#define ItemIdIsDead(itemId)
Definition: itemid.h:112
bool marked[MaxHeapTuplesPerPage+1]
Definition: pruneheap.c:44
#define PageGetMaxOffsetNumber(page)
Definition: bufpage.h:354
TransactionId new_prune_xid
Definition: pruneheap.c:33
#define PageIsFull(page)
Definition: bufpage.h:375
int nunused
Definition: pruneheap.c:38
uint16 OffsetNumber
Definition: off.h:24
HeapTupleHeader t_data
Definition: htup.h:67
#define HeapTupleHeaderIsHeapOnly(tup)
Definition: htup_details.h:502
#define ItemIdGetLength(itemId)
Definition: itemid.h:58
bool ConditionalLockBufferForCleanup(Buffer buffer)
Definition: bufmgr.c:3718
#define ERROR
Definition: elog.h:43
Size PageGetHeapFreeSpace(Page page)
Definition: bufpage.c:666
ItemPointerData t_ctid
Definition: htup_details.h:150
ItemPointerData t_self
Definition: htup.h:65
uint32 t_len
Definition: htup.h:64
static void heap_prune_record_prunable(PruneState *prstate, TransactionId xid)
Definition: pruneheap.c:620
TransactionId RecentGlobalXmin
Definition: snapmgr.c:166
TransactionId latestRemovedXid
Definition: pruneheap.c:34
#define ItemIdSetRedirect(itemId, link)
Definition: itemid.h:151
#define FirstOffsetNumber
Definition: off.h:27
#define InvalidTransactionId
Definition: transam.h:31
static TransactionId OldestXmin
Definition: vacuumlazy.c:139
Oid t_tableOid
Definition: htup.h:66
#define BufferGetPage(buffer)
Definition: bufmgr.h:160
TransactionId RecentGlobalDataXmin
Definition: snapmgr.c:167
void heap_get_root_tuples(Page page, OffsetNumber *root_offsets)
Definition: pruneheap.c:744
#define PageClearFull(page)
Definition: bufpage.h:379
bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition: transam.c:300
#define RelationGetTargetPageFreeSpace(relation, defaultff)
Definition: rel.h:304
#define HeapTupleHeaderIsHotUpdated(tup)
Definition: htup_details.h:485
#define PageGetItemId(page, offsetNumber)
Definition: bufpage.h:232
#define RelationIsAccessibleInLogicalDecoding(relation)
Definition: rel.h:556
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:3546
#define InvalidOffsetNumber
Definition: off.h:26
void heap_page_prune_execute(Buffer buffer, OffsetNumber *redirected, int nredirected, OffsetNumber *nowdead, int ndead, OffsetNumber *nowunused, int nunused)
Definition: pruneheap.c:680
#define Max(x, y)
Definition: c.h:800
PageHeaderData * PageHeader
Definition: bufpage.h:162
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define Assert(condition)
Definition: c.h:675
static void heap_prune_record_redirect(PruneState *prstate, OffsetNumber offnum, OffsetNumber rdoffnum)
Definition: pruneheap.c:634
#define ItemIdIsNormal(itemId)
Definition: itemid.h:98
WalTimeSample buffer[LAG_TRACKER_BUFFER_SIZE]
Definition: walsender.c:207
#define HeapTupleHeaderGetXmin(tup)
Definition: htup_details.h:307
#define OffsetNumberNext(offsetNumber)
Definition: off.h:53
size_t Size
Definition: c.h:356
OffsetNumber nowunused[MaxHeapTuplesPerPage]
Definition: pruneheap.c:42
XLogRecPtr log_heap_clean(Relation reln, Buffer buffer, OffsetNumber *redirected, int nredirected, OffsetNumber *nowdead, int ndead, OffsetNumber *nowunused, int nunused, TransactionId latestRemovedXid)
Definition: heapam.c:7402
#define ItemPointerGetOffsetNumber(pointer)
Definition: itemptr.h:76
#define RelationNeedsWAL(relation)
Definition: rel.h:502
#define PageIsPrunable(page, oldestxmin)
Definition: bufpage.h:389
OffsetNumber redirected[MaxHeapTuplesPerPage *2]
Definition: pruneheap.c:40
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:2605
int ndead
Definition: pruneheap.c:37
void PageRepairFragmentation(Page page)
Definition: bufpage.c:479
int i
#define OffsetNumberIsValid(offsetNumber)
Definition: off.h:40
#define ItemIdSetDead(itemId)
Definition: itemid.h:163
#define elog
Definition: elog.h:219
void heap_page_prune_opt(Relation relation, Buffer buffer)
Definition: pruneheap.c:75
#define ItemPointerGetBlockNumber(pointer)
Definition: itemptr.h:66
#define HEAP_DEFAULT_FILLFACTOR
Definition: rel.h:283
#define TransactionIdIsValid(xid)
Definition: transam.h:41
#define ItemIdSetUnused(itemId)
Definition: itemid.h:127
#define TransactionIdIsNormal(xid)
Definition: transam.h:42
#define PageSetLSN(page, lsn)
Definition: bufpage.h:365
int Buffer
Definition: buf.h:23
#define RelationGetRelid(relation)
Definition: rel.h:413
#define PageGetItem(page, itemId)
Definition: bufpage.h:337
Pointer Page
Definition: bufpage.h:74
#define ItemPointerSet(pointer, blockNumber, offNum)
Definition: itemptr.h:86
static void heap_prune_record_unused(PruneState *prstate, OffsetNumber offnum)
Definition: pruneheap.c:660