PostgreSQL Source Code  git master
heapam_handler.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * heapam_handler.c
4  * heap table access method code
5  *
6  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  * src/backend/access/heap/heapam_handler.c
12  *
13  *
14  * NOTES
15  * This files wires up the lower level heapam.c et al routines with the
16  * tableam abstraction.
17  *
18  *-------------------------------------------------------------------------
19  */
20 #include "postgres.h"
21 
22 #include "access/genam.h"
23 #include "access/heapam.h"
24 #include "access/heaptoast.h"
25 #include "access/multixact.h"
26 #include "access/rewriteheap.h"
27 #include "access/syncscan.h"
28 #include "access/tableam.h"
29 #include "access/tsmapi.h"
30 #include "access/visibilitymap.h"
31 #include "access/xact.h"
32 #include "catalog/catalog.h"
33 #include "catalog/index.h"
34 #include "catalog/storage.h"
35 #include "catalog/storage_xlog.h"
36 #include "commands/progress.h"
37 #include "executor/executor.h"
38 #include "miscadmin.h"
39 #include "pgstat.h"
40 #include "storage/bufmgr.h"
41 #include "storage/bufpage.h"
42 #include "storage/lmgr.h"
43 #include "storage/predicate.h"
44 #include "storage/procarray.h"
45 #include "storage/smgr.h"
46 #include "utils/builtins.h"
47 #include "utils/rel.h"
48 
49 static void reform_and_rewrite_tuple(HeapTuple tuple,
50  Relation OldHeap, Relation NewHeap,
51  Datum *values, bool *isnull, RewriteState rwstate);
52 
53 static bool SampleHeapTupleVisible(TableScanDesc scan, Buffer buffer,
54  HeapTuple tuple,
55  OffsetNumber tupoffset);
56 
58 
59 
60 /* ------------------------------------------------------------------------
61  * Slot related callbacks for heap AM
62  * ------------------------------------------------------------------------
63  */
64 
65 static const TupleTableSlotOps *
67 {
68  return &TTSOpsBufferHeapTuple;
69 }
70 
71 
72 /* ------------------------------------------------------------------------
73  * Index Scan Callbacks for heap AM
74  * ------------------------------------------------------------------------
75  */
76 
77 static IndexFetchTableData *
79 {
81 
82  hscan->xs_base.rel = rel;
83  hscan->xs_cbuf = InvalidBuffer;
84 
85  return &hscan->xs_base;
86 }
87 
88 static void
90 {
91  IndexFetchHeapData *hscan = (IndexFetchHeapData *) scan;
92 
93  if (BufferIsValid(hscan->xs_cbuf))
94  {
95  ReleaseBuffer(hscan->xs_cbuf);
96  hscan->xs_cbuf = InvalidBuffer;
97  }
98 }
99 
100 static void
102 {
103  IndexFetchHeapData *hscan = (IndexFetchHeapData *) scan;
104 
106 
107  pfree(hscan);
108 }
109 
110 static bool
112  ItemPointer tid,
113  Snapshot snapshot,
114  TupleTableSlot *slot,
115  bool *call_again, bool *all_dead)
116 {
117  IndexFetchHeapData *hscan = (IndexFetchHeapData *) scan;
119  bool got_heap_tuple;
120 
121  Assert(TTS_IS_BUFFERTUPLE(slot));
122 
123  /* We can skip the buffer-switching logic if we're in mid-HOT chain. */
124  if (!*call_again)
125  {
126  /* Switch to correct buffer if we don't have it already */
127  Buffer prev_buf = hscan->xs_cbuf;
128 
129  hscan->xs_cbuf = ReleaseAndReadBuffer(hscan->xs_cbuf,
130  hscan->xs_base.rel,
132 
133  /*
134  * Prune page, but only if we weren't already on this page
135  */
136  if (prev_buf != hscan->xs_cbuf)
137  heap_page_prune_opt(hscan->xs_base.rel, hscan->xs_cbuf);
138  }
139 
140  /* Obtain share-lock on the buffer so we can examine visibility */
142  got_heap_tuple = heap_hot_search_buffer(tid,
143  hscan->xs_base.rel,
144  hscan->xs_cbuf,
145  snapshot,
146  &bslot->base.tupdata,
147  all_dead,
148  !*call_again);
149  bslot->base.tupdata.t_self = *tid;
151 
152  if (got_heap_tuple)
153  {
154  /*
155  * Only in a non-MVCC snapshot can more than one member of the HOT
156  * chain be visible.
157  */
158  *call_again = !IsMVCCSnapshot(snapshot);
159 
160  slot->tts_tableOid = RelationGetRelid(scan->rel);
161  ExecStoreBufferHeapTuple(&bslot->base.tupdata, slot, hscan->xs_cbuf);
162  }
163  else
164  {
165  /* We've reached the end of the HOT chain. */
166  *call_again = false;
167  }
168 
169  return got_heap_tuple;
170 }
171 
172 
173 /* ------------------------------------------------------------------------
174  * Callbacks for non-modifying operations on individual tuples for heap AM
175  * ------------------------------------------------------------------------
176  */
177 
178 static bool
180  ItemPointer tid,
181  Snapshot snapshot,
182  TupleTableSlot *slot)
183 {
185  Buffer buffer;
186 
187  Assert(TTS_IS_BUFFERTUPLE(slot));
188 
189  bslot->base.tupdata.t_self = *tid;
190  if (heap_fetch(relation, snapshot, &bslot->base.tupdata, &buffer, false))
191  {
192  /* store in slot, transferring existing pin */
193  ExecStorePinnedBufferHeapTuple(&bslot->base.tupdata, slot, buffer);
194  slot->tts_tableOid = RelationGetRelid(relation);
195 
196  return true;
197  }
198 
199  return false;
200 }
201 
202 static bool
204 {
205  HeapScanDesc hscan = (HeapScanDesc) scan;
206 
207  return ItemPointerIsValid(tid) &&
209 }
210 
211 static bool
213  Snapshot snapshot)
214 {
216  bool res;
217 
218  Assert(TTS_IS_BUFFERTUPLE(slot));
219  Assert(BufferIsValid(bslot->buffer));
220 
221  /*
222  * We need buffer pin and lock to call HeapTupleSatisfiesVisibility.
223  * Caller should be holding pin, but not lock.
224  */
226  res = HeapTupleSatisfiesVisibility(bslot->base.tuple, snapshot,
227  bslot->buffer);
229 
230  return res;
231 }
232 
233 
234 /* ----------------------------------------------------------------------------
235  * Functions for manipulations of physical tuples for heap AM.
236  * ----------------------------------------------------------------------------
237  */
238 
239 static void
241  int options, BulkInsertState bistate)
242 {
243  bool shouldFree = true;
244  HeapTuple tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree);
245 
246  /* Update the tuple with table oid */
247  slot->tts_tableOid = RelationGetRelid(relation);
248  tuple->t_tableOid = slot->tts_tableOid;
249 
250  /* Perform the insertion, and copy the resulting ItemPointer */
251  heap_insert(relation, tuple, cid, options, bistate);
252  ItemPointerCopy(&tuple->t_self, &slot->tts_tid);
253 
254  if (shouldFree)
255  pfree(tuple);
256 }
257 
258 static void
260  CommandId cid, int options,
261  BulkInsertState bistate, uint32 specToken)
262 {
263  bool shouldFree = true;
264  HeapTuple tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree);
265 
266  /* Update the tuple with table oid */
267  slot->tts_tableOid = RelationGetRelid(relation);
268  tuple->t_tableOid = slot->tts_tableOid;
269 
270  HeapTupleHeaderSetSpeculativeToken(tuple->t_data, specToken);
272 
273  /* Perform the insertion, and copy the resulting ItemPointer */
274  heap_insert(relation, tuple, cid, options, bistate);
275  ItemPointerCopy(&tuple->t_self, &slot->tts_tid);
276 
277  if (shouldFree)
278  pfree(tuple);
279 }
280 
281 static void
283  uint32 specToken, bool succeeded)
284 {
285  bool shouldFree = true;
286  HeapTuple tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree);
287 
288  /* adjust the tuple's state accordingly */
289  if (succeeded)
290  heap_finish_speculative(relation, &slot->tts_tid);
291  else
292  heap_abort_speculative(relation, &slot->tts_tid);
293 
294  if (shouldFree)
295  pfree(tuple);
296 }
297 
298 static TM_Result
300  Snapshot snapshot, Snapshot crosscheck, bool wait,
301  TM_FailureData *tmfd, bool changingPart)
302 {
303  /*
304  * Currently Deleting of index tuples are handled at vacuum, in case if
305  * the storage itself is cleaning the dead tuples by itself, it is the
306  * time to call the index tuple deletion also.
307  */
308  return heap_delete(relation, tid, cid, crosscheck, wait, tmfd, changingPart);
309 }
310 
311 
312 static TM_Result
314  CommandId cid, Snapshot snapshot, Snapshot crosscheck,
315  bool wait, TM_FailureData *tmfd,
316  LockTupleMode *lockmode, TU_UpdateIndexes *update_indexes)
317 {
318  bool shouldFree = true;
319  HeapTuple tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree);
320  TM_Result result;
321 
322  /* Update the tuple with table oid */
323  slot->tts_tableOid = RelationGetRelid(relation);
324  tuple->t_tableOid = slot->tts_tableOid;
325 
326  result = heap_update(relation, otid, tuple, cid, crosscheck, wait,
327  tmfd, lockmode, update_indexes);
328  ItemPointerCopy(&tuple->t_self, &slot->tts_tid);
329 
330  /*
331  * Decide whether new index entries are needed for the tuple
332  *
333  * Note: heap_update returns the tid (location) of the new tuple in the
334  * t_self field.
335  *
336  * If the update is not HOT, we must update all indexes. If the update is
337  * HOT, it could be that we updated summarized columns, so we either
338  * update only summarized indexes, or none at all.
339  */
340  if (result != TM_Ok)
341  {
342  Assert(*update_indexes == TU_None);
343  *update_indexes = TU_None;
344  }
345  else if (!HeapTupleIsHeapOnly(tuple))
346  Assert(*update_indexes == TU_All);
347  else
348  Assert((*update_indexes == TU_Summarizing) ||
349  (*update_indexes == TU_None));
350 
351  if (shouldFree)
352  pfree(tuple);
353 
354  return result;
355 }
356 
357 static TM_Result
360  LockWaitPolicy wait_policy, uint8 flags,
361  TM_FailureData *tmfd)
362 {
364  TM_Result result;
365  Buffer buffer;
366  HeapTuple tuple = &bslot->base.tupdata;
367  bool follow_updates;
368 
369  follow_updates = (flags & TUPLE_LOCK_FLAG_LOCK_UPDATE_IN_PROGRESS) != 0;
370  tmfd->traversed = false;
371 
372  Assert(TTS_IS_BUFFERTUPLE(slot));
373 
374 tuple_lock_retry:
375  tuple->t_self = *tid;
376  result = heap_lock_tuple(relation, tuple, cid, mode, wait_policy,
377  follow_updates, &buffer, tmfd);
378 
379  if (result == TM_Updated &&
381  {
382  /* Should not encounter speculative tuple on recheck */
384 
385  ReleaseBuffer(buffer);
386 
387  if (!ItemPointerEquals(&tmfd->ctid, &tuple->t_self))
388  {
389  SnapshotData SnapshotDirty;
390  TransactionId priorXmax;
391 
392  /* it was updated, so look at the updated version */
393  *tid = tmfd->ctid;
394  /* updated row should have xmin matching this xmax */
395  priorXmax = tmfd->xmax;
396 
397  /* signal that a tuple later in the chain is getting locked */
398  tmfd->traversed = true;
399 
400  /*
401  * fetch target tuple
402  *
403  * Loop here to deal with updated or busy tuples
404  */
405  InitDirtySnapshot(SnapshotDirty);
406  for (;;)
407  {
409  ereport(ERROR,
411  errmsg("tuple to be locked was already moved to another partition due to concurrent update")));
412 
413  tuple->t_self = *tid;
414  if (heap_fetch(relation, &SnapshotDirty, tuple, &buffer, true))
415  {
416  /*
417  * If xmin isn't what we're expecting, the slot must have
418  * been recycled and reused for an unrelated tuple. This
419  * implies that the latest version of the row was deleted,
420  * so we need do nothing. (Should be safe to examine xmin
421  * without getting buffer's content lock. We assume
422  * reading a TransactionId to be atomic, and Xmin never
423  * changes in an existing tuple, except to invalid or
424  * frozen, and neither of those can match priorXmax.)
425  */
427  priorXmax))
428  {
429  ReleaseBuffer(buffer);
430  return TM_Deleted;
431  }
432 
433  /* otherwise xmin should not be dirty... */
434  if (TransactionIdIsValid(SnapshotDirty.xmin))
435  ereport(ERROR,
437  errmsg_internal("t_xmin %u is uncommitted in tuple (%u,%u) to be updated in table \"%s\"",
438  SnapshotDirty.xmin,
441  RelationGetRelationName(relation))));
442 
443  /*
444  * If tuple is being updated by other transaction then we
445  * have to wait for its commit/abort, or die trying.
446  */
447  if (TransactionIdIsValid(SnapshotDirty.xmax))
448  {
449  ReleaseBuffer(buffer);
450  switch (wait_policy)
451  {
452  case LockWaitBlock:
453  XactLockTableWait(SnapshotDirty.xmax,
454  relation, &tuple->t_self,
456  break;
457  case LockWaitSkip:
458  if (!ConditionalXactLockTableWait(SnapshotDirty.xmax))
459  /* skip instead of waiting */
460  return TM_WouldBlock;
461  break;
462  case LockWaitError:
463  if (!ConditionalXactLockTableWait(SnapshotDirty.xmax))
464  ereport(ERROR,
465  (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
466  errmsg("could not obtain lock on row in relation \"%s\"",
467  RelationGetRelationName(relation))));
468  break;
469  }
470  continue; /* loop back to repeat heap_fetch */
471  }
472 
473  /*
474  * If tuple was inserted by our own transaction, we have
475  * to check cmin against cid: cmin >= current CID means
476  * our command cannot see the tuple, so we should ignore
477  * it. Otherwise heap_lock_tuple() will throw an error,
478  * and so would any later attempt to update or delete the
479  * tuple. (We need not check cmax because
480  * HeapTupleSatisfiesDirty will consider a tuple deleted
481  * by our transaction dead, regardless of cmax.) We just
482  * checked that priorXmax == xmin, so we can test that
483  * variable instead of doing HeapTupleHeaderGetXmin again.
484  */
485  if (TransactionIdIsCurrentTransactionId(priorXmax) &&
486  HeapTupleHeaderGetCmin(tuple->t_data) >= cid)
487  {
488  tmfd->xmax = priorXmax;
489 
490  /*
491  * Cmin is the problematic value, so store that. See
492  * above.
493  */
494  tmfd->cmax = HeapTupleHeaderGetCmin(tuple->t_data);
495  ReleaseBuffer(buffer);
496  return TM_SelfModified;
497  }
498 
499  /*
500  * This is a live tuple, so try to lock it again.
501  */
502  ReleaseBuffer(buffer);
503  goto tuple_lock_retry;
504  }
505 
506  /*
507  * If the referenced slot was actually empty, the latest
508  * version of the row must have been deleted, so we need do
509  * nothing.
510  */
511  if (tuple->t_data == NULL)
512  {
513  Assert(!BufferIsValid(buffer));
514  return TM_Deleted;
515  }
516 
517  /*
518  * As above, if xmin isn't what we're expecting, do nothing.
519  */
521  priorXmax))
522  {
523  ReleaseBuffer(buffer);
524  return TM_Deleted;
525  }
526 
527  /*
528  * If we get here, the tuple was found but failed
529  * SnapshotDirty. Assuming the xmin is either a committed xact
530  * or our own xact (as it certainly should be if we're trying
531  * to modify the tuple), this must mean that the row was
532  * updated or deleted by either a committed xact or our own
533  * xact. If it was deleted, we can ignore it; if it was
534  * updated then chain up to the next version and repeat the
535  * whole process.
536  *
537  * As above, it should be safe to examine xmax and t_ctid
538  * without the buffer content lock, because they can't be
539  * changing. We'd better hold a buffer pin though.
540  */
541  if (ItemPointerEquals(&tuple->t_self, &tuple->t_data->t_ctid))
542  {
543  /* deleted, so forget about it */
544  ReleaseBuffer(buffer);
545  return TM_Deleted;
546  }
547 
548  /* updated, so look at the updated row */
549  *tid = tuple->t_data->t_ctid;
550  /* updated row should have xmin matching this xmax */
551  priorXmax = HeapTupleHeaderGetUpdateXid(tuple->t_data);
552  ReleaseBuffer(buffer);
553  /* loop back to fetch next in chain */
554  }
555  }
556  else
557  {
558  /* tuple was deleted, so give up */
559  return TM_Deleted;
560  }
561  }
562 
563  slot->tts_tableOid = RelationGetRelid(relation);
564  tuple->t_tableOid = slot->tts_tableOid;
565 
566  /* store in slot, transferring existing pin */
567  ExecStorePinnedBufferHeapTuple(tuple, slot, buffer);
568 
569  return result;
570 }
571 
572 
573 /* ------------------------------------------------------------------------
574  * DDL related callbacks for heap AM.
575  * ------------------------------------------------------------------------
576  */
577 
578 static void
580  const RelFileLocator *newrlocator,
581  char persistence,
582  TransactionId *freezeXid,
583  MultiXactId *minmulti)
584 {
585  SMgrRelation srel;
586 
587  /*
588  * Initialize to the minimum XID that could put tuples in the table. We
589  * know that no xacts older than RecentXmin are still running, so that
590  * will do.
591  */
592  *freezeXid = RecentXmin;
593 
594  /*
595  * Similarly, initialize the minimum Multixact to the first value that
596  * could possibly be stored in tuples in the table. Running transactions
597  * could reuse values from their local cache, so we are careful to
598  * consider all currently running multis.
599  *
600  * XXX this could be refined further, but is it worth the hassle?
601  */
602  *minmulti = GetOldestMultiXactId();
603 
604  srel = RelationCreateStorage(*newrlocator, persistence, true);
605 
606  /*
607  * If required, set up an init fork for an unlogged table so that it can
608  * be correctly reinitialized on restart.
609  */
610  if (persistence == RELPERSISTENCE_UNLOGGED)
611  {
612  Assert(rel->rd_rel->relkind == RELKIND_RELATION ||
613  rel->rd_rel->relkind == RELKIND_TOASTVALUE);
614  smgrcreate(srel, INIT_FORKNUM, false);
615  log_smgrcreate(newrlocator, INIT_FORKNUM);
616  }
617 
618  smgrclose(srel);
619 }
620 
621 static void
623 {
624  RelationTruncate(rel, 0);
625 }
626 
627 static void
629 {
630  SMgrRelation dstrel;
631 
632  /*
633  * Since we copy the file directly without looking at the shared buffers,
634  * we'd better first flush out any pages of the source relation that are
635  * in shared buffers. We assume no new changes will be made while we are
636  * holding exclusive lock on the rel.
637  */
639 
640  /*
641  * Create and copy all forks of the relation, and schedule unlinking of
642  * old physical files.
643  *
644  * NOTE: any conflict in relfilenumber value will be caught in
645  * RelationCreateStorage().
646  */
647  dstrel = RelationCreateStorage(*newrlocator, rel->rd_rel->relpersistence, true);
648 
649  /* copy main fork */
651  rel->rd_rel->relpersistence);
652 
653  /* copy those extra forks that exist */
654  for (ForkNumber forkNum = MAIN_FORKNUM + 1;
655  forkNum <= MAX_FORKNUM; forkNum++)
656  {
657  if (smgrexists(RelationGetSmgr(rel), forkNum))
658  {
659  smgrcreate(dstrel, forkNum, false);
660 
661  /*
662  * WAL log creation if the relation is persistent, or this is the
663  * init fork of an unlogged relation.
664  */
665  if (RelationIsPermanent(rel) ||
666  (rel->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED &&
667  forkNum == INIT_FORKNUM))
668  log_smgrcreate(newrlocator, forkNum);
669  RelationCopyStorage(RelationGetSmgr(rel), dstrel, forkNum,
670  rel->rd_rel->relpersistence);
671  }
672  }
673 
674 
675  /* drop old relation, and close new one */
676  RelationDropStorage(rel);
677  smgrclose(dstrel);
678 }
679 
680 static void
682  Relation OldIndex, bool use_sort,
683  TransactionId OldestXmin,
684  TransactionId *xid_cutoff,
685  MultiXactId *multi_cutoff,
686  double *num_tuples,
687  double *tups_vacuumed,
688  double *tups_recently_dead)
689 {
690  RewriteState rwstate;
691  IndexScanDesc indexScan;
692  TableScanDesc tableScan;
693  HeapScanDesc heapScan;
694  bool is_system_catalog;
695  Tuplesortstate *tuplesort;
696  TupleDesc oldTupDesc = RelationGetDescr(OldHeap);
697  TupleDesc newTupDesc = RelationGetDescr(NewHeap);
698  TupleTableSlot *slot;
699  int natts;
700  Datum *values;
701  bool *isnull;
703  BlockNumber prev_cblock = InvalidBlockNumber;
704 
705  /* Remember if it's a system catalog */
706  is_system_catalog = IsSystemRelation(OldHeap);
707 
708  /*
709  * Valid smgr_targblock implies something already wrote to the relation.
710  * This may be harmless, but this function hasn't planned for it.
711  */
713 
714  /* Preallocate values/isnull arrays */
715  natts = newTupDesc->natts;
716  values = (Datum *) palloc(natts * sizeof(Datum));
717  isnull = (bool *) palloc(natts * sizeof(bool));
718 
719  /* Initialize the rewrite operation */
720  rwstate = begin_heap_rewrite(OldHeap, NewHeap, OldestXmin, *xid_cutoff,
721  *multi_cutoff);
722 
723 
724  /* Set up sorting if wanted */
725  if (use_sort)
726  tuplesort = tuplesort_begin_cluster(oldTupDesc, OldIndex,
728  NULL, TUPLESORT_NONE);
729  else
730  tuplesort = NULL;
731 
732  /*
733  * Prepare to scan the OldHeap. To ensure we see recently-dead tuples
734  * that still need to be copied, we scan with SnapshotAny and use
735  * HeapTupleSatisfiesVacuum for the visibility test.
736  */
737  if (OldIndex != NULL && !use_sort)
738  {
739  const int ci_index[] = {
742  };
743  int64 ci_val[2];
744 
745  /* Set phase and OIDOldIndex to columns */
747  ci_val[1] = RelationGetRelid(OldIndex);
748  pgstat_progress_update_multi_param(2, ci_index, ci_val);
749 
750  tableScan = NULL;
751  heapScan = NULL;
752  indexScan = index_beginscan(OldHeap, OldIndex, SnapshotAny, 0, 0);
753  index_rescan(indexScan, NULL, 0, NULL, 0);
754  }
755  else
756  {
757  /* In scan-and-sort mode and also VACUUM FULL, set phase */
760 
761  tableScan = table_beginscan(OldHeap, SnapshotAny, 0, (ScanKey) NULL);
762  heapScan = (HeapScanDesc) tableScan;
763  indexScan = NULL;
764 
765  /* Set total heap blocks */
767  heapScan->rs_nblocks);
768  }
769 
770  slot = table_slot_create(OldHeap, NULL);
771  hslot = (BufferHeapTupleTableSlot *) slot;
772 
773  /*
774  * Scan through the OldHeap, either in OldIndex order or sequentially;
775  * copy each tuple into the NewHeap, or transiently to the tuplesort
776  * module. Note that we don't bother sorting dead tuples (they won't get
777  * to the new table anyway).
778  */
779  for (;;)
780  {
781  HeapTuple tuple;
782  Buffer buf;
783  bool isdead;
784 
786 
787  if (indexScan != NULL)
788  {
789  if (!index_getnext_slot(indexScan, ForwardScanDirection, slot))
790  break;
791 
792  /* Since we used no scan keys, should never need to recheck */
793  if (indexScan->xs_recheck)
794  elog(ERROR, "CLUSTER does not support lossy index conditions");
795  }
796  else
797  {
798  if (!table_scan_getnextslot(tableScan, ForwardScanDirection, slot))
799  {
800  /*
801  * If the last pages of the scan were empty, we would go to
802  * the next phase while heap_blks_scanned != heap_blks_total.
803  * Instead, to ensure that heap_blks_scanned is equivalent to
804  * heap_blks_total after the table scan phase, this parameter
805  * is manually updated to the correct value when the table
806  * scan finishes.
807  */
809  heapScan->rs_nblocks);
810  break;
811  }
812 
813  /*
814  * In scan-and-sort mode and also VACUUM FULL, set heap blocks
815  * scanned
816  *
817  * Note that heapScan may start at an offset and wrap around, i.e.
818  * rs_startblock may be >0, and rs_cblock may end with a number
819  * below rs_startblock. To prevent showing this wraparound to the
820  * user, we offset rs_cblock by rs_startblock (modulo rs_nblocks).
821  */
822  if (prev_cblock != heapScan->rs_cblock)
823  {
825  (heapScan->rs_cblock +
826  heapScan->rs_nblocks -
827  heapScan->rs_startblock
828  ) % heapScan->rs_nblocks + 1);
829  prev_cblock = heapScan->rs_cblock;
830  }
831  }
832 
833  tuple = ExecFetchSlotHeapTuple(slot, false, NULL);
834  buf = hslot->buffer;
835 
837 
838  switch (HeapTupleSatisfiesVacuum(tuple, OldestXmin, buf))
839  {
840  case HEAPTUPLE_DEAD:
841  /* Definitely dead */
842  isdead = true;
843  break;
845  *tups_recently_dead += 1;
846  /* fall through */
847  case HEAPTUPLE_LIVE:
848  /* Live or recently dead, must copy it */
849  isdead = false;
850  break;
852 
853  /*
854  * Since we hold exclusive lock on the relation, normally the
855  * only way to see this is if it was inserted earlier in our
856  * own transaction. However, it can happen in system
857  * catalogs, since we tend to release write lock before commit
858  * there. Give a warning if neither case applies; but in any
859  * case we had better copy it.
860  */
861  if (!is_system_catalog &&
863  elog(WARNING, "concurrent insert in progress within table \"%s\"",
864  RelationGetRelationName(OldHeap));
865  /* treat as live */
866  isdead = false;
867  break;
869 
870  /*
871  * Similar situation to INSERT_IN_PROGRESS case.
872  */
873  if (!is_system_catalog &&
875  elog(WARNING, "concurrent delete in progress within table \"%s\"",
876  RelationGetRelationName(OldHeap));
877  /* treat as recently dead */
878  *tups_recently_dead += 1;
879  isdead = false;
880  break;
881  default:
882  elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
883  isdead = false; /* keep compiler quiet */
884  break;
885  }
886 
888 
889  if (isdead)
890  {
891  *tups_vacuumed += 1;
892  /* heap rewrite module still needs to see it... */
893  if (rewrite_heap_dead_tuple(rwstate, tuple))
894  {
895  /* A previous recently-dead tuple is now known dead */
896  *tups_vacuumed += 1;
897  *tups_recently_dead -= 1;
898  }
899  continue;
900  }
901 
902  *num_tuples += 1;
903  if (tuplesort != NULL)
904  {
905  tuplesort_putheaptuple(tuplesort, tuple);
906 
907  /*
908  * In scan-and-sort mode, report increase in number of tuples
909  * scanned
910  */
912  *num_tuples);
913  }
914  else
915  {
916  const int ct_index[] = {
919  };
920  int64 ct_val[2];
921 
922  reform_and_rewrite_tuple(tuple, OldHeap, NewHeap,
923  values, isnull, rwstate);
924 
925  /*
926  * In indexscan mode and also VACUUM FULL, report increase in
927  * number of tuples scanned and written
928  */
929  ct_val[0] = *num_tuples;
930  ct_val[1] = *num_tuples;
931  pgstat_progress_update_multi_param(2, ct_index, ct_val);
932  }
933  }
934 
935  if (indexScan != NULL)
936  index_endscan(indexScan);
937  if (tableScan != NULL)
938  table_endscan(tableScan);
939  if (slot)
941 
942  /*
943  * In scan-and-sort mode, complete the sort, then read out all live tuples
944  * from the tuplestore and write them to the new relation.
945  */
946  if (tuplesort != NULL)
947  {
948  double n_tuples = 0;
949 
950  /* Report that we are now sorting tuples */
953 
954  tuplesort_performsort(tuplesort);
955 
956  /* Report that we are now writing new heap */
959 
960  for (;;)
961  {
962  HeapTuple tuple;
963 
965 
966  tuple = tuplesort_getheaptuple(tuplesort, true);
967  if (tuple == NULL)
968  break;
969 
970  n_tuples += 1;
972  OldHeap, NewHeap,
973  values, isnull,
974  rwstate);
975  /* Report n_tuples */
977  n_tuples);
978  }
979 
980  tuplesort_end(tuplesort);
981  }
982 
983  /* Write out any remaining tuples, and fsync if needed */
984  end_heap_rewrite(rwstate);
985 
986  /* Clean up */
987  pfree(values);
988  pfree(isnull);
989 }
990 
991 /*
992  * Prepare to analyze the next block in the read stream. Returns false if
993  * the stream is exhausted and true otherwise. The scan must have been started
994  * with SO_TYPE_ANALYZE option.
995  *
996  * This routine holds a buffer pin and lock on the heap page. They are held
997  * until heapam_scan_analyze_next_tuple() returns false. That is until all the
998  * items of the heap page are analyzed.
999  */
1000 static bool
1002 {
1003  HeapScanDesc hscan = (HeapScanDesc) scan;
1004 
1005  /*
1006  * We must maintain a pin on the target page's buffer to ensure that
1007  * concurrent activity - e.g. HOT pruning - doesn't delete tuples out from
1008  * under us. It comes from the stream already pinned. We also choose to
1009  * hold sharelock on the buffer throughout --- we could release and
1010  * re-acquire sharelock for each tuple, but since we aren't doing much
1011  * work per tuple, the extra lock traffic is probably better avoided.
1012  */
1013  hscan->rs_cbuf = read_stream_next_buffer(stream, NULL);
1014  if (!BufferIsValid(hscan->rs_cbuf))
1015  return false;
1016 
1018 
1019  hscan->rs_cblock = BufferGetBlockNumber(hscan->rs_cbuf);
1020  hscan->rs_cindex = FirstOffsetNumber;
1021  return true;
1022 }
1023 
1024 static bool
1026  double *liverows, double *deadrows,
1027  TupleTableSlot *slot)
1028 {
1029  HeapScanDesc hscan = (HeapScanDesc) scan;
1030  Page targpage;
1031  OffsetNumber maxoffset;
1032  BufferHeapTupleTableSlot *hslot;
1033 
1034  Assert(TTS_IS_BUFFERTUPLE(slot));
1035 
1036  hslot = (BufferHeapTupleTableSlot *) slot;
1037  targpage = BufferGetPage(hscan->rs_cbuf);
1038  maxoffset = PageGetMaxOffsetNumber(targpage);
1039 
1040  /* Inner loop over all tuples on the selected page */
1041  for (; hscan->rs_cindex <= maxoffset; hscan->rs_cindex++)
1042  {
1043  ItemId itemid;
1044  HeapTuple targtuple = &hslot->base.tupdata;
1045  bool sample_it = false;
1046 
1047  itemid = PageGetItemId(targpage, hscan->rs_cindex);
1048 
1049  /*
1050  * We ignore unused and redirect line pointers. DEAD line pointers
1051  * should be counted as dead, because we need vacuum to run to get rid
1052  * of them. Note that this rule agrees with the way that
1053  * heap_page_prune_and_freeze() counts things.
1054  */
1055  if (!ItemIdIsNormal(itemid))
1056  {
1057  if (ItemIdIsDead(itemid))
1058  *deadrows += 1;
1059  continue;
1060  }
1061 
1062  ItemPointerSet(&targtuple->t_self, hscan->rs_cblock, hscan->rs_cindex);
1063 
1064  targtuple->t_tableOid = RelationGetRelid(scan->rs_rd);
1065  targtuple->t_data = (HeapTupleHeader) PageGetItem(targpage, itemid);
1066  targtuple->t_len = ItemIdGetLength(itemid);
1067 
1068  switch (HeapTupleSatisfiesVacuum(targtuple, OldestXmin,
1069  hscan->rs_cbuf))
1070  {
1071  case HEAPTUPLE_LIVE:
1072  sample_it = true;
1073  *liverows += 1;
1074  break;
1075 
1076  case HEAPTUPLE_DEAD:
1078  /* Count dead and recently-dead rows */
1079  *deadrows += 1;
1080  break;
1081 
1083 
1084  /*
1085  * Insert-in-progress rows are not counted. We assume that
1086  * when the inserting transaction commits or aborts, it will
1087  * send a stats message to increment the proper count. This
1088  * works right only if that transaction ends after we finish
1089  * analyzing the table; if things happen in the other order,
1090  * its stats update will be overwritten by ours. However, the
1091  * error will be large only if the other transaction runs long
1092  * enough to insert many tuples, so assuming it will finish
1093  * after us is the safer option.
1094  *
1095  * A special case is that the inserting transaction might be
1096  * our own. In this case we should count and sample the row,
1097  * to accommodate users who load a table and analyze it in one
1098  * transaction. (pgstat_report_analyze has to adjust the
1099  * numbers we report to the cumulative stats system to make
1100  * this come out right.)
1101  */
1103  {
1104  sample_it = true;
1105  *liverows += 1;
1106  }
1107  break;
1108 
1110 
1111  /*
1112  * We count and sample delete-in-progress rows the same as
1113  * live ones, so that the stats counters come out right if the
1114  * deleting transaction commits after us, per the same
1115  * reasoning given above.
1116  *
1117  * If the delete was done by our own transaction, however, we
1118  * must count the row as dead to make pgstat_report_analyze's
1119  * stats adjustments come out right. (Note: this works out
1120  * properly when the row was both inserted and deleted in our
1121  * xact.)
1122  *
1123  * The net effect of these choices is that we act as though an
1124  * IN_PROGRESS transaction hasn't happened yet, except if it
1125  * is our own transaction, which we assume has happened.
1126  *
1127  * This approach ensures that we behave sanely if we see both
1128  * the pre-image and post-image rows for a row being updated
1129  * by a concurrent transaction: we will sample the pre-image
1130  * but not the post-image. We also get sane results if the
1131  * concurrent transaction never commits.
1132  */
1134  *deadrows += 1;
1135  else
1136  {
1137  sample_it = true;
1138  *liverows += 1;
1139  }
1140  break;
1141 
1142  default:
1143  elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
1144  break;
1145  }
1146 
1147  if (sample_it)
1148  {
1149  ExecStoreBufferHeapTuple(targtuple, slot, hscan->rs_cbuf);
1150  hscan->rs_cindex++;
1151 
1152  /* note that we leave the buffer locked here! */
1153  return true;
1154  }
1155  }
1156 
1157  /* Now release the lock and pin on the page */
1158  UnlockReleaseBuffer(hscan->rs_cbuf);
1159  hscan->rs_cbuf = InvalidBuffer;
1160 
1161  /* also prevent old slot contents from having pin on page */
1162  ExecClearTuple(slot);
1163 
1164  return false;
1165 }
1166 
1167 static double
1169  Relation indexRelation,
1170  IndexInfo *indexInfo,
1171  bool allow_sync,
1172  bool anyvisible,
1173  bool progress,
1174  BlockNumber start_blockno,
1175  BlockNumber numblocks,
1177  void *callback_state,
1178  TableScanDesc scan)
1179 {
1180  HeapScanDesc hscan;
1181  bool is_system_catalog;
1182  bool checking_uniqueness;
1183  HeapTuple heapTuple;
1185  bool isnull[INDEX_MAX_KEYS];
1186  double reltuples;
1187  ExprState *predicate;
1188  TupleTableSlot *slot;
1189  EState *estate;
1190  ExprContext *econtext;
1191  Snapshot snapshot;
1192  bool need_unregister_snapshot = false;
1193  TransactionId OldestXmin;
1194  BlockNumber previous_blkno = InvalidBlockNumber;
1195  BlockNumber root_blkno = InvalidBlockNumber;
1196  OffsetNumber root_offsets[MaxHeapTuplesPerPage];
1197 
1198  /*
1199  * sanity checks
1200  */
1201  Assert(OidIsValid(indexRelation->rd_rel->relam));
1202 
1203  /* Remember if it's a system catalog */
1204  is_system_catalog = IsSystemRelation(heapRelation);
1205 
1206  /* See whether we're verifying uniqueness/exclusion properties */
1207  checking_uniqueness = (indexInfo->ii_Unique ||
1208  indexInfo->ii_ExclusionOps != NULL);
1209 
1210  /*
1211  * "Any visible" mode is not compatible with uniqueness checks; make sure
1212  * only one of those is requested.
1213  */
1214  Assert(!(anyvisible && checking_uniqueness));
1215 
1216  /*
1217  * Need an EState for evaluation of index expressions and partial-index
1218  * predicates. Also a slot to hold the current tuple.
1219  */
1220  estate = CreateExecutorState();
1221  econtext = GetPerTupleExprContext(estate);
1222  slot = table_slot_create(heapRelation, NULL);
1223 
1224  /* Arrange for econtext's scan tuple to be the tuple under test */
1225  econtext->ecxt_scantuple = slot;
1226 
1227  /* Set up execution state for predicate, if any. */
1228  predicate = ExecPrepareQual(indexInfo->ii_Predicate, estate);
1229 
1230  /*
1231  * Prepare for scan of the base relation. In a normal index build, we use
1232  * SnapshotAny because we must retrieve all tuples and do our own time
1233  * qual checks (because we have to index RECENTLY_DEAD tuples). In a
1234  * concurrent build, or during bootstrap, we take a regular MVCC snapshot
1235  * and index whatever's live according to that.
1236  */
1237  OldestXmin = InvalidTransactionId;
1238 
1239  /* okay to ignore lazy VACUUMs here */
1240  if (!IsBootstrapProcessingMode() && !indexInfo->ii_Concurrent)
1241  OldestXmin = GetOldestNonRemovableTransactionId(heapRelation);
1242 
1243  if (!scan)
1244  {
1245  /*
1246  * Serial index build.
1247  *
1248  * Must begin our own heap scan in this case. We may also need to
1249  * register a snapshot whose lifetime is under our direct control.
1250  */
1251  if (!TransactionIdIsValid(OldestXmin))
1252  {
1254  need_unregister_snapshot = true;
1255  }
1256  else
1257  snapshot = SnapshotAny;
1258 
1259  scan = table_beginscan_strat(heapRelation, /* relation */
1260  snapshot, /* snapshot */
1261  0, /* number of keys */
1262  NULL, /* scan key */
1263  true, /* buffer access strategy OK */
1264  allow_sync); /* syncscan OK? */
1265  }
1266  else
1267  {
1268  /*
1269  * Parallel index build.
1270  *
1271  * Parallel case never registers/unregisters own snapshot. Snapshot
1272  * is taken from parallel heap scan, and is SnapshotAny or an MVCC
1273  * snapshot, based on same criteria as serial case.
1274  */
1276  Assert(allow_sync);
1277  snapshot = scan->rs_snapshot;
1278  }
1279 
1280  hscan = (HeapScanDesc) scan;
1281 
1282  /*
1283  * Must have called GetOldestNonRemovableTransactionId() if using
1284  * SnapshotAny. Shouldn't have for an MVCC snapshot. (It's especially
1285  * worth checking this for parallel builds, since ambuild routines that
1286  * support parallel builds must work these details out for themselves.)
1287  */
1288  Assert(snapshot == SnapshotAny || IsMVCCSnapshot(snapshot));
1289  Assert(snapshot == SnapshotAny ? TransactionIdIsValid(OldestXmin) :
1290  !TransactionIdIsValid(OldestXmin));
1291  Assert(snapshot == SnapshotAny || !anyvisible);
1292 
1293  /* Publish number of blocks to scan */
1294  if (progress)
1295  {
1296  BlockNumber nblocks;
1297 
1298  if (hscan->rs_base.rs_parallel != NULL)
1299  {
1301 
1303  nblocks = pbscan->phs_nblocks;
1304  }
1305  else
1306  nblocks = hscan->rs_nblocks;
1307 
1309  nblocks);
1310  }
1311 
1312  /* set our scan endpoints */
1313  if (!allow_sync)
1314  heap_setscanlimits(scan, start_blockno, numblocks);
1315  else
1316  {
1317  /* syncscan can only be requested on whole relation */
1318  Assert(start_blockno == 0);
1319  Assert(numblocks == InvalidBlockNumber);
1320  }
1321 
1322  reltuples = 0;
1323 
1324  /*
1325  * Scan all tuples in the base relation.
1326  */
1327  while ((heapTuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
1328  {
1329  bool tupleIsAlive;
1330 
1332 
1333  /* Report scan progress, if asked to. */
1334  if (progress)
1335  {
1336  BlockNumber blocks_done = heapam_scan_get_blocks_done(hscan);
1337 
1338  if (blocks_done != previous_blkno)
1339  {
1341  blocks_done);
1342  previous_blkno = blocks_done;
1343  }
1344  }
1345 
1346  /*
1347  * When dealing with a HOT-chain of updated tuples, we want to index
1348  * the values of the live tuple (if any), but index it under the TID
1349  * of the chain's root tuple. This approach is necessary to preserve
1350  * the HOT-chain structure in the heap. So we need to be able to find
1351  * the root item offset for every tuple that's in a HOT-chain. When
1352  * first reaching a new page of the relation, call
1353  * heap_get_root_tuples() to build a map of root item offsets on the
1354  * page.
1355  *
1356  * It might look unsafe to use this information across buffer
1357  * lock/unlock. However, we hold ShareLock on the table so no
1358  * ordinary insert/update/delete should occur; and we hold pin on the
1359  * buffer continuously while visiting the page, so no pruning
1360  * operation can occur either.
1361  *
1362  * In cases with only ShareUpdateExclusiveLock on the table, it's
1363  * possible for some HOT tuples to appear that we didn't know about
1364  * when we first read the page. To handle that case, we re-obtain the
1365  * list of root offsets when a HOT tuple points to a root item that we
1366  * don't know about.
1367  *
1368  * Also, although our opinions about tuple liveness could change while
1369  * we scan the page (due to concurrent transaction commits/aborts),
1370  * the chain root locations won't, so this info doesn't need to be
1371  * rebuilt after waiting for another transaction.
1372  *
1373  * Note the implied assumption that there is no more than one live
1374  * tuple per HOT-chain --- else we could create more than one index
1375  * entry pointing to the same root tuple.
1376  */
1377  if (hscan->rs_cblock != root_blkno)
1378  {
1379  Page page = BufferGetPage(hscan->rs_cbuf);
1380 
1382  heap_get_root_tuples(page, root_offsets);
1384 
1385  root_blkno = hscan->rs_cblock;
1386  }
1387 
1388  if (snapshot == SnapshotAny)
1389  {
1390  /* do our own time qual check */
1391  bool indexIt;
1392  TransactionId xwait;
1393 
1394  recheck:
1395 
1396  /*
1397  * We could possibly get away with not locking the buffer here,
1398  * since caller should hold ShareLock on the relation, but let's
1399  * be conservative about it. (This remark is still correct even
1400  * with HOT-pruning: our pin on the buffer prevents pruning.)
1401  */
1403 
1404  /*
1405  * The criteria for counting a tuple as live in this block need to
1406  * match what analyze.c's heapam_scan_analyze_next_tuple() does,
1407  * otherwise CREATE INDEX and ANALYZE may produce wildly different
1408  * reltuples values, e.g. when there are many recently-dead
1409  * tuples.
1410  */
1411  switch (HeapTupleSatisfiesVacuum(heapTuple, OldestXmin,
1412  hscan->rs_cbuf))
1413  {
1414  case HEAPTUPLE_DEAD:
1415  /* Definitely dead, we can ignore it */
1416  indexIt = false;
1417  tupleIsAlive = false;
1418  break;
1419  case HEAPTUPLE_LIVE:
1420  /* Normal case, index and unique-check it */
1421  indexIt = true;
1422  tupleIsAlive = true;
1423  /* Count it as live, too */
1424  reltuples += 1;
1425  break;
1427 
1428  /*
1429  * If tuple is recently deleted then we must index it
1430  * anyway to preserve MVCC semantics. (Pre-existing
1431  * transactions could try to use the index after we finish
1432  * building it, and may need to see such tuples.)
1433  *
1434  * However, if it was HOT-updated then we must only index
1435  * the live tuple at the end of the HOT-chain. Since this
1436  * breaks semantics for pre-existing snapshots, mark the
1437  * index as unusable for them.
1438  *
1439  * We don't count recently-dead tuples in reltuples, even
1440  * if we index them; see heapam_scan_analyze_next_tuple().
1441  */
1442  if (HeapTupleIsHotUpdated(heapTuple))
1443  {
1444  indexIt = false;
1445  /* mark the index as unsafe for old snapshots */
1446  indexInfo->ii_BrokenHotChain = true;
1447  }
1448  else
1449  indexIt = true;
1450  /* In any case, exclude the tuple from unique-checking */
1451  tupleIsAlive = false;
1452  break;
1454 
1455  /*
1456  * In "anyvisible" mode, this tuple is visible and we
1457  * don't need any further checks.
1458  */
1459  if (anyvisible)
1460  {
1461  indexIt = true;
1462  tupleIsAlive = true;
1463  reltuples += 1;
1464  break;
1465  }
1466 
1467  /*
1468  * Since caller should hold ShareLock or better, normally
1469  * the only way to see this is if it was inserted earlier
1470  * in our own transaction. However, it can happen in
1471  * system catalogs, since we tend to release write lock
1472  * before commit there. Give a warning if neither case
1473  * applies.
1474  */
1475  xwait = HeapTupleHeaderGetXmin(heapTuple->t_data);
1477  {
1478  if (!is_system_catalog)
1479  elog(WARNING, "concurrent insert in progress within table \"%s\"",
1480  RelationGetRelationName(heapRelation));
1481 
1482  /*
1483  * If we are performing uniqueness checks, indexing
1484  * such a tuple could lead to a bogus uniqueness
1485  * failure. In that case we wait for the inserting
1486  * transaction to finish and check again.
1487  */
1488  if (checking_uniqueness)
1489  {
1490  /*
1491  * Must drop the lock on the buffer before we wait
1492  */
1494  XactLockTableWait(xwait, heapRelation,
1495  &heapTuple->t_self,
1498  goto recheck;
1499  }
1500  }
1501  else
1502  {
1503  /*
1504  * For consistency with
1505  * heapam_scan_analyze_next_tuple(), count
1506  * HEAPTUPLE_INSERT_IN_PROGRESS tuples as live only
1507  * when inserted by our own transaction.
1508  */
1509  reltuples += 1;
1510  }
1511 
1512  /*
1513  * We must index such tuples, since if the index build
1514  * commits then they're good.
1515  */
1516  indexIt = true;
1517  tupleIsAlive = true;
1518  break;
1520 
1521  /*
1522  * As with INSERT_IN_PROGRESS case, this is unexpected
1523  * unless it's our own deletion or a system catalog; but
1524  * in anyvisible mode, this tuple is visible.
1525  */
1526  if (anyvisible)
1527  {
1528  indexIt = true;
1529  tupleIsAlive = false;
1530  reltuples += 1;
1531  break;
1532  }
1533 
1534  xwait = HeapTupleHeaderGetUpdateXid(heapTuple->t_data);
1536  {
1537  if (!is_system_catalog)
1538  elog(WARNING, "concurrent delete in progress within table \"%s\"",
1539  RelationGetRelationName(heapRelation));
1540 
1541  /*
1542  * If we are performing uniqueness checks, assuming
1543  * the tuple is dead could lead to missing a
1544  * uniqueness violation. In that case we wait for the
1545  * deleting transaction to finish and check again.
1546  *
1547  * Also, if it's a HOT-updated tuple, we should not
1548  * index it but rather the live tuple at the end of
1549  * the HOT-chain. However, the deleting transaction
1550  * could abort, possibly leaving this tuple as live
1551  * after all, in which case it has to be indexed. The
1552  * only way to know what to do is to wait for the
1553  * deleting transaction to finish and check again.
1554  */
1555  if (checking_uniqueness ||
1556  HeapTupleIsHotUpdated(heapTuple))
1557  {
1558  /*
1559  * Must drop the lock on the buffer before we wait
1560  */
1562  XactLockTableWait(xwait, heapRelation,
1563  &heapTuple->t_self,
1566  goto recheck;
1567  }
1568 
1569  /*
1570  * Otherwise index it but don't check for uniqueness,
1571  * the same as a RECENTLY_DEAD tuple.
1572  */
1573  indexIt = true;
1574 
1575  /*
1576  * Count HEAPTUPLE_DELETE_IN_PROGRESS tuples as live,
1577  * if they were not deleted by the current
1578  * transaction. That's what
1579  * heapam_scan_analyze_next_tuple() does, and we want
1580  * the behavior to be consistent.
1581  */
1582  reltuples += 1;
1583  }
1584  else if (HeapTupleIsHotUpdated(heapTuple))
1585  {
1586  /*
1587  * It's a HOT-updated tuple deleted by our own xact.
1588  * We can assume the deletion will commit (else the
1589  * index contents don't matter), so treat the same as
1590  * RECENTLY_DEAD HOT-updated tuples.
1591  */
1592  indexIt = false;
1593  /* mark the index as unsafe for old snapshots */
1594  indexInfo->ii_BrokenHotChain = true;
1595  }
1596  else
1597  {
1598  /*
1599  * It's a regular tuple deleted by our own xact. Index
1600  * it, but don't check for uniqueness nor count in
1601  * reltuples, the same as a RECENTLY_DEAD tuple.
1602  */
1603  indexIt = true;
1604  }
1605  /* In any case, exclude the tuple from unique-checking */
1606  tupleIsAlive = false;
1607  break;
1608  default:
1609  elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
1610  indexIt = tupleIsAlive = false; /* keep compiler quiet */
1611  break;
1612  }
1613 
1615 
1616  if (!indexIt)
1617  continue;
1618  }
1619  else
1620  {
1621  /* heap_getnext did the time qual check */
1622  tupleIsAlive = true;
1623  reltuples += 1;
1624  }
1625 
1627 
1628  /* Set up for predicate or expression evaluation */
1629  ExecStoreBufferHeapTuple(heapTuple, slot, hscan->rs_cbuf);
1630 
1631  /*
1632  * In a partial index, discard tuples that don't satisfy the
1633  * predicate.
1634  */
1635  if (predicate != NULL)
1636  {
1637  if (!ExecQual(predicate, econtext))
1638  continue;
1639  }
1640 
1641  /*
1642  * For the current heap tuple, extract all the attributes we use in
1643  * this index, and note which are null. This also performs evaluation
1644  * of any expressions needed.
1645  */
1646  FormIndexDatum(indexInfo,
1647  slot,
1648  estate,
1649  values,
1650  isnull);
1651 
1652  /*
1653  * You'd think we should go ahead and build the index tuple here, but
1654  * some index AMs want to do further processing on the data first. So
1655  * pass the values[] and isnull[] arrays, instead.
1656  */
1657 
1658  if (HeapTupleIsHeapOnly(heapTuple))
1659  {
1660  /*
1661  * For a heap-only tuple, pretend its TID is that of the root. See
1662  * src/backend/access/heap/README.HOT for discussion.
1663  */
1664  ItemPointerData tid;
1665  OffsetNumber offnum;
1666 
1667  offnum = ItemPointerGetOffsetNumber(&heapTuple->t_self);
1668 
1669  /*
1670  * If a HOT tuple points to a root that we don't know about,
1671  * obtain root items afresh. If that still fails, report it as
1672  * corruption.
1673  */
1674  if (root_offsets[offnum - 1] == InvalidOffsetNumber)
1675  {
1676  Page page = BufferGetPage(hscan->rs_cbuf);
1677 
1679  heap_get_root_tuples(page, root_offsets);
1681  }
1682 
1683  if (!OffsetNumberIsValid(root_offsets[offnum - 1]))
1684  ereport(ERROR,
1686  errmsg_internal("failed to find parent tuple for heap-only tuple at (%u,%u) in table \"%s\"",
1687  ItemPointerGetBlockNumber(&heapTuple->t_self),
1688  offnum,
1689  RelationGetRelationName(heapRelation))));
1690 
1691  ItemPointerSet(&tid, ItemPointerGetBlockNumber(&heapTuple->t_self),
1692  root_offsets[offnum - 1]);
1693 
1694  /* Call the AM's callback routine to process the tuple */
1695  callback(indexRelation, &tid, values, isnull, tupleIsAlive,
1696  callback_state);
1697  }
1698  else
1699  {
1700  /* Call the AM's callback routine to process the tuple */
1701  callback(indexRelation, &heapTuple->t_self, values, isnull,
1702  tupleIsAlive, callback_state);
1703  }
1704  }
1705 
1706  /* Report scan progress one last time. */
1707  if (progress)
1708  {
1709  BlockNumber blks_done;
1710 
1711  if (hscan->rs_base.rs_parallel != NULL)
1712  {
1714 
1716  blks_done = pbscan->phs_nblocks;
1717  }
1718  else
1719  blks_done = hscan->rs_nblocks;
1720 
1722  blks_done);
1723  }
1724 
1725  table_endscan(scan);
1726 
1727  /* we can now forget our snapshot, if set and registered by us */
1728  if (need_unregister_snapshot)
1729  UnregisterSnapshot(snapshot);
1730 
1732 
1733  FreeExecutorState(estate);
1734 
1735  /* These may have been pointing to the now-gone estate */
1736  indexInfo->ii_ExpressionsState = NIL;
1737  indexInfo->ii_PredicateState = NULL;
1738 
1739  return reltuples;
1740 }
1741 
1742 static void
1744  Relation indexRelation,
1745  IndexInfo *indexInfo,
1746  Snapshot snapshot,
1748 {
1749  TableScanDesc scan;
1750  HeapScanDesc hscan;
1751  HeapTuple heapTuple;
1753  bool isnull[INDEX_MAX_KEYS];
1754  ExprState *predicate;
1755  TupleTableSlot *slot;
1756  EState *estate;
1757  ExprContext *econtext;
1758  BlockNumber root_blkno = InvalidBlockNumber;
1759  OffsetNumber root_offsets[MaxHeapTuplesPerPage];
1760  bool in_index[MaxHeapTuplesPerPage];
1761  BlockNumber previous_blkno = InvalidBlockNumber;
1762 
1763  /* state variables for the merge */
1764  ItemPointer indexcursor = NULL;
1765  ItemPointerData decoded;
1766  bool tuplesort_empty = false;
1767 
1768  /*
1769  * sanity checks
1770  */
1771  Assert(OidIsValid(indexRelation->rd_rel->relam));
1772 
1773  /*
1774  * Need an EState for evaluation of index expressions and partial-index
1775  * predicates. Also a slot to hold the current tuple.
1776  */
1777  estate = CreateExecutorState();
1778  econtext = GetPerTupleExprContext(estate);
1779  slot = MakeSingleTupleTableSlot(RelationGetDescr(heapRelation),
1780  &TTSOpsHeapTuple);
1781 
1782  /* Arrange for econtext's scan tuple to be the tuple under test */
1783  econtext->ecxt_scantuple = slot;
1784 
1785  /* Set up execution state for predicate, if any. */
1786  predicate = ExecPrepareQual(indexInfo->ii_Predicate, estate);
1787 
1788  /*
1789  * Prepare for scan of the base relation. We need just those tuples
1790  * satisfying the passed-in reference snapshot. We must disable syncscan
1791  * here, because it's critical that we read from block zero forward to
1792  * match the sorted TIDs.
1793  */
1794  scan = table_beginscan_strat(heapRelation, /* relation */
1795  snapshot, /* snapshot */
1796  0, /* number of keys */
1797  NULL, /* scan key */
1798  true, /* buffer access strategy OK */
1799  false); /* syncscan not OK */
1800  hscan = (HeapScanDesc) scan;
1801 
1803  hscan->rs_nblocks);
1804 
1805  /*
1806  * Scan all tuples matching the snapshot.
1807  */
1808  while ((heapTuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
1809  {
1810  ItemPointer heapcursor = &heapTuple->t_self;
1811  ItemPointerData rootTuple;
1812  OffsetNumber root_offnum;
1813 
1815 
1816  state->htups += 1;
1817 
1818  if ((previous_blkno == InvalidBlockNumber) ||
1819  (hscan->rs_cblock != previous_blkno))
1820  {
1822  hscan->rs_cblock);
1823  previous_blkno = hscan->rs_cblock;
1824  }
1825 
1826  /*
1827  * As commented in table_index_build_scan, we should index heap-only
1828  * tuples under the TIDs of their root tuples; so when we advance onto
1829  * a new heap page, build a map of root item offsets on the page.
1830  *
1831  * This complicates merging against the tuplesort output: we will
1832  * visit the live tuples in order by their offsets, but the root
1833  * offsets that we need to compare against the index contents might be
1834  * ordered differently. So we might have to "look back" within the
1835  * tuplesort output, but only within the current page. We handle that
1836  * by keeping a bool array in_index[] showing all the
1837  * already-passed-over tuplesort output TIDs of the current page. We
1838  * clear that array here, when advancing onto a new heap page.
1839  */
1840  if (hscan->rs_cblock != root_blkno)
1841  {
1842  Page page = BufferGetPage(hscan->rs_cbuf);
1843 
1845  heap_get_root_tuples(page, root_offsets);
1847 
1848  memset(in_index, 0, sizeof(in_index));
1849 
1850  root_blkno = hscan->rs_cblock;
1851  }
1852 
1853  /* Convert actual tuple TID to root TID */
1854  rootTuple = *heapcursor;
1855  root_offnum = ItemPointerGetOffsetNumber(heapcursor);
1856 
1857  if (HeapTupleIsHeapOnly(heapTuple))
1858  {
1859  root_offnum = root_offsets[root_offnum - 1];
1860  if (!OffsetNumberIsValid(root_offnum))
1861  ereport(ERROR,
1863  errmsg_internal("failed to find parent tuple for heap-only tuple at (%u,%u) in table \"%s\"",
1864  ItemPointerGetBlockNumber(heapcursor),
1865  ItemPointerGetOffsetNumber(heapcursor),
1866  RelationGetRelationName(heapRelation))));
1867  ItemPointerSetOffsetNumber(&rootTuple, root_offnum);
1868  }
1869 
1870  /*
1871  * "merge" by skipping through the index tuples until we find or pass
1872  * the current root tuple.
1873  */
1874  while (!tuplesort_empty &&
1875  (!indexcursor ||
1876  ItemPointerCompare(indexcursor, &rootTuple) < 0))
1877  {
1878  Datum ts_val;
1879  bool ts_isnull;
1880 
1881  if (indexcursor)
1882  {
1883  /*
1884  * Remember index items seen earlier on the current heap page
1885  */
1886  if (ItemPointerGetBlockNumber(indexcursor) == root_blkno)
1887  in_index[ItemPointerGetOffsetNumber(indexcursor) - 1] = true;
1888  }
1889 
1890  tuplesort_empty = !tuplesort_getdatum(state->tuplesort, true,
1891  false, &ts_val, &ts_isnull,
1892  NULL);
1893  Assert(tuplesort_empty || !ts_isnull);
1894  if (!tuplesort_empty)
1895  {
1896  itemptr_decode(&decoded, DatumGetInt64(ts_val));
1897  indexcursor = &decoded;
1898  }
1899  else
1900  {
1901  /* Be tidy */
1902  indexcursor = NULL;
1903  }
1904  }
1905 
1906  /*
1907  * If the tuplesort has overshot *and* we didn't see a match earlier,
1908  * then this tuple is missing from the index, so insert it.
1909  */
1910  if ((tuplesort_empty ||
1911  ItemPointerCompare(indexcursor, &rootTuple) > 0) &&
1912  !in_index[root_offnum - 1])
1913  {
1915 
1916  /* Set up for predicate or expression evaluation */
1917  ExecStoreHeapTuple(heapTuple, slot, false);
1918 
1919  /*
1920  * In a partial index, discard tuples that don't satisfy the
1921  * predicate.
1922  */
1923  if (predicate != NULL)
1924  {
1925  if (!ExecQual(predicate, econtext))
1926  continue;
1927  }
1928 
1929  /*
1930  * For the current heap tuple, extract all the attributes we use
1931  * in this index, and note which are null. This also performs
1932  * evaluation of any expressions needed.
1933  */
1934  FormIndexDatum(indexInfo,
1935  slot,
1936  estate,
1937  values,
1938  isnull);
1939 
1940  /*
1941  * You'd think we should go ahead and build the index tuple here,
1942  * but some index AMs want to do further processing on the data
1943  * first. So pass the values[] and isnull[] arrays, instead.
1944  */
1945 
1946  /*
1947  * If the tuple is already committed dead, you might think we
1948  * could suppress uniqueness checking, but this is no longer true
1949  * in the presence of HOT, because the insert is actually a proxy
1950  * for a uniqueness check on the whole HOT-chain. That is, the
1951  * tuple we have here could be dead because it was already
1952  * HOT-updated, and if so the updating transaction will not have
1953  * thought it should insert index entries. The index AM will
1954  * check the whole HOT-chain and correctly detect a conflict if
1955  * there is one.
1956  */
1957 
1958  index_insert(indexRelation,
1959  values,
1960  isnull,
1961  &rootTuple,
1962  heapRelation,
1963  indexInfo->ii_Unique ?
1965  false,
1966  indexInfo);
1967 
1968  state->tups_inserted += 1;
1969  }
1970  }
1971 
1972  table_endscan(scan);
1973 
1975 
1976  FreeExecutorState(estate);
1977 
1978  /* These may have been pointing to the now-gone estate */
1979  indexInfo->ii_ExpressionsState = NIL;
1980  indexInfo->ii_PredicateState = NULL;
1981 }
1982 
1983 /*
1984  * Return the number of blocks that have been read by this scan since
1985  * starting. This is meant for progress reporting rather than be fully
1986  * accurate: in a parallel scan, workers can be concurrently reading blocks
1987  * further ahead than what we report.
1988  */
1989 static BlockNumber
1991 {
1992  ParallelBlockTableScanDesc bpscan = NULL;
1993  BlockNumber startblock;
1994  BlockNumber blocks_done;
1995 
1996  if (hscan->rs_base.rs_parallel != NULL)
1997  {
1999  startblock = bpscan->phs_startblock;
2000  }
2001  else
2002  startblock = hscan->rs_startblock;
2003 
2004  /*
2005  * Might have wrapped around the end of the relation, if startblock was
2006  * not zero.
2007  */
2008  if (hscan->rs_cblock > startblock)
2009  blocks_done = hscan->rs_cblock - startblock;
2010  else
2011  {
2012  BlockNumber nblocks;
2013 
2014  nblocks = bpscan != NULL ? bpscan->phs_nblocks : hscan->rs_nblocks;
2015  blocks_done = nblocks - startblock +
2016  hscan->rs_cblock;
2017  }
2018 
2019  return blocks_done;
2020 }
2021 
2022 
2023 /* ------------------------------------------------------------------------
2024  * Miscellaneous callbacks for the heap AM
2025  * ------------------------------------------------------------------------
2026  */
2027 
2028 /*
2029  * Check to see whether the table needs a TOAST table. It does only if
2030  * (1) there are any toastable attributes, and (2) the maximum length
2031  * of a tuple could exceed TOAST_TUPLE_THRESHOLD. (We don't want to
2032  * create a toast table for something like "f1 varchar(20)".)
2033  */
2034 static bool
2036 {
2037  int32 data_length = 0;
2038  bool maxlength_unknown = false;
2039  bool has_toastable_attrs = false;
2040  TupleDesc tupdesc = rel->rd_att;
2041  int32 tuple_length;
2042  int i;
2043 
2044  for (i = 0; i < tupdesc->natts; i++)
2045  {
2046  Form_pg_attribute att = TupleDescAttr(tupdesc, i);
2047 
2048  if (att->attisdropped)
2049  continue;
2050  data_length = att_align_nominal(data_length, att->attalign);
2051  if (att->attlen > 0)
2052  {
2053  /* Fixed-length types are never toastable */
2054  data_length += att->attlen;
2055  }
2056  else
2057  {
2058  int32 maxlen = type_maximum_size(att->atttypid,
2059  att->atttypmod);
2060 
2061  if (maxlen < 0)
2062  maxlength_unknown = true;
2063  else
2064  data_length += maxlen;
2065  if (att->attstorage != TYPSTORAGE_PLAIN)
2066  has_toastable_attrs = true;
2067  }
2068  }
2069  if (!has_toastable_attrs)
2070  return false; /* nothing to toast? */
2071  if (maxlength_unknown)
2072  return true; /* any unlimited-length attrs? */
2073  tuple_length = MAXALIGN(SizeofHeapTupleHeader +
2074  BITMAPLEN(tupdesc->natts)) +
2075  MAXALIGN(data_length);
2076  return (tuple_length > TOAST_TUPLE_THRESHOLD);
2077 }
2078 
2079 /*
2080  * TOAST tables for heap relations are just heap relations.
2081  */
2082 static Oid
2084 {
2085  return rel->rd_rel->relam;
2086 }
2087 
2088 
2089 /* ------------------------------------------------------------------------
2090  * Planner related callbacks for the heap AM
2091  * ------------------------------------------------------------------------
2092  */
2093 
2094 #define HEAP_OVERHEAD_BYTES_PER_TUPLE \
2095  (MAXALIGN(SizeofHeapTupleHeader) + sizeof(ItemIdData))
2096 #define HEAP_USABLE_BYTES_PER_PAGE \
2097  (BLCKSZ - SizeOfPageHeaderData)
2098 
2099 static void
2101  BlockNumber *pages, double *tuples,
2102  double *allvisfrac)
2103 {
2104  table_block_relation_estimate_size(rel, attr_widths, pages,
2105  tuples, allvisfrac,
2108 }
2109 
2110 
2111 /* ------------------------------------------------------------------------
2112  * Executor related callbacks for the heap AM
2113  * ------------------------------------------------------------------------
2114  */
2115 
2116 static bool
2118  BlockNumber *blockno, bool *recheck,
2119  uint64 *lossy_pages, uint64 *exact_pages)
2120 {
2121  HeapScanDesc hscan = (HeapScanDesc) scan;
2122  BlockNumber block;
2123  Buffer buffer;
2124  Snapshot snapshot;
2125  int ntup;
2126  TBMIterateResult *tbmres;
2127 
2128  hscan->rs_cindex = 0;
2129  hscan->rs_ntuples = 0;
2130 
2131  *blockno = InvalidBlockNumber;
2132  *recheck = true;
2133 
2134  do
2135  {
2137 
2138  if (scan->st.bitmap.rs_shared_iterator)
2139  tbmres = tbm_shared_iterate(scan->st.bitmap.rs_shared_iterator);
2140  else
2141  tbmres = tbm_iterate(scan->st.bitmap.rs_iterator);
2142 
2143  if (tbmres == NULL)
2144  return false;
2145 
2146  /*
2147  * Ignore any claimed entries past what we think is the end of the
2148  * relation. It may have been extended after the start of our scan (we
2149  * only hold an AccessShareLock, and it could be inserts from this
2150  * backend). We don't take this optimization in SERIALIZABLE
2151  * isolation though, as we need to examine all invisible tuples
2152  * reachable by the index.
2153  */
2154  } while (!IsolationIsSerializable() &&
2155  tbmres->blockno >= hscan->rs_nblocks);
2156 
2157  /* Got a valid block */
2158  *blockno = tbmres->blockno;
2159  *recheck = tbmres->recheck;
2160 
2161  /*
2162  * We can skip fetching the heap page if we don't need any fields from the
2163  * heap, the bitmap entries don't need rechecking, and all tuples on the
2164  * page are visible to our transaction.
2165  */
2166  if (!(scan->rs_flags & SO_NEED_TUPLES) &&
2167  !tbmres->recheck &&
2168  VM_ALL_VISIBLE(scan->rs_rd, tbmres->blockno, &hscan->rs_vmbuffer))
2169  {
2170  /* can't be lossy in the skip_fetch case */
2171  Assert(tbmres->ntuples >= 0);
2172  Assert(hscan->rs_empty_tuples_pending >= 0);
2173 
2174  hscan->rs_empty_tuples_pending += tbmres->ntuples;
2175 
2176  return true;
2177  }
2178 
2179  block = tbmres->blockno;
2180 
2181  /*
2182  * Acquire pin on the target heap page, trading in any pin we held before.
2183  */
2184  hscan->rs_cbuf = ReleaseAndReadBuffer(hscan->rs_cbuf,
2185  scan->rs_rd,
2186  block);
2187  hscan->rs_cblock = block;
2188  buffer = hscan->rs_cbuf;
2189  snapshot = scan->rs_snapshot;
2190 
2191  ntup = 0;
2192 
2193  /*
2194  * Prune and repair fragmentation for the whole page, if possible.
2195  */
2196  heap_page_prune_opt(scan->rs_rd, buffer);
2197 
2198  /*
2199  * We must hold share lock on the buffer content while examining tuple
2200  * visibility. Afterwards, however, the tuples we have found to be
2201  * visible are guaranteed good as long as we hold the buffer pin.
2202  */
2203  LockBuffer(buffer, BUFFER_LOCK_SHARE);
2204 
2205  /*
2206  * We need two separate strategies for lossy and non-lossy cases.
2207  */
2208  if (tbmres->ntuples >= 0)
2209  {
2210  /*
2211  * Bitmap is non-lossy, so we just look through the offsets listed in
2212  * tbmres; but we have to follow any HOT chain starting at each such
2213  * offset.
2214  */
2215  int curslot;
2216 
2217  for (curslot = 0; curslot < tbmres->ntuples; curslot++)
2218  {
2219  OffsetNumber offnum = tbmres->offsets[curslot];
2220  ItemPointerData tid;
2221  HeapTupleData heapTuple;
2222 
2223  ItemPointerSet(&tid, block, offnum);
2224  if (heap_hot_search_buffer(&tid, scan->rs_rd, buffer, snapshot,
2225  &heapTuple, NULL, true))
2226  hscan->rs_vistuples[ntup++] = ItemPointerGetOffsetNumber(&tid);
2227  }
2228  }
2229  else
2230  {
2231  /*
2232  * Bitmap is lossy, so we must examine each line pointer on the page.
2233  * But we can ignore HOT chains, since we'll check each tuple anyway.
2234  */
2235  Page page = BufferGetPage(buffer);
2236  OffsetNumber maxoff = PageGetMaxOffsetNumber(page);
2237  OffsetNumber offnum;
2238 
2239  for (offnum = FirstOffsetNumber; offnum <= maxoff; offnum = OffsetNumberNext(offnum))
2240  {
2241  ItemId lp;
2242  HeapTupleData loctup;
2243  bool valid;
2244 
2245  lp = PageGetItemId(page, offnum);
2246  if (!ItemIdIsNormal(lp))
2247  continue;
2248  loctup.t_data = (HeapTupleHeader) PageGetItem(page, lp);
2249  loctup.t_len = ItemIdGetLength(lp);
2250  loctup.t_tableOid = scan->rs_rd->rd_id;
2251  ItemPointerSet(&loctup.t_self, block, offnum);
2252  valid = HeapTupleSatisfiesVisibility(&loctup, snapshot, buffer);
2253  if (valid)
2254  {
2255  hscan->rs_vistuples[ntup++] = offnum;
2256  PredicateLockTID(scan->rs_rd, &loctup.t_self, snapshot,
2257  HeapTupleHeaderGetXmin(loctup.t_data));
2258  }
2259  HeapCheckForSerializableConflictOut(valid, scan->rs_rd, &loctup,
2260  buffer, snapshot);
2261  }
2262  }
2263 
2264  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
2265 
2266  Assert(ntup <= MaxHeapTuplesPerPage);
2267  hscan->rs_ntuples = ntup;
2268 
2269  if (tbmres->ntuples >= 0)
2270  (*exact_pages)++;
2271  else
2272  (*lossy_pages)++;
2273 
2274  /*
2275  * Return true to indicate that a valid block was found and the bitmap is
2276  * not exhausted. If there are no visible tuples on this page,
2277  * hscan->rs_ntuples will be 0 and heapam_scan_bitmap_next_tuple() will
2278  * return false returning control to this function to advance to the next
2279  * block in the bitmap.
2280  */
2281  return true;
2282 }
2283 
2284 static bool
2286  TupleTableSlot *slot)
2287 {
2288  HeapScanDesc hscan = (HeapScanDesc) scan;
2289  OffsetNumber targoffset;
2290  Page page;
2291  ItemId lp;
2292 
2293  if (hscan->rs_empty_tuples_pending > 0)
2294  {
2295  /*
2296  * If we don't have to fetch the tuple, just return nulls.
2297  */
2298  ExecStoreAllNullTuple(slot);
2299  hscan->rs_empty_tuples_pending--;
2300  return true;
2301  }
2302 
2303  /*
2304  * Out of range? If so, nothing more to look at on this page
2305  */
2306  if (hscan->rs_cindex < 0 || hscan->rs_cindex >= hscan->rs_ntuples)
2307  return false;
2308 
2309  targoffset = hscan->rs_vistuples[hscan->rs_cindex];
2310  page = BufferGetPage(hscan->rs_cbuf);
2311  lp = PageGetItemId(page, targoffset);
2312  Assert(ItemIdIsNormal(lp));
2313 
2314  hscan->rs_ctup.t_data = (HeapTupleHeader) PageGetItem(page, lp);
2315  hscan->rs_ctup.t_len = ItemIdGetLength(lp);
2316  hscan->rs_ctup.t_tableOid = scan->rs_rd->rd_id;
2317  ItemPointerSet(&hscan->rs_ctup.t_self, hscan->rs_cblock, targoffset);
2318 
2320 
2321  /*
2322  * Set up the result slot to point to this tuple. Note that the slot
2323  * acquires a pin on the buffer.
2324  */
2326  slot,
2327  hscan->rs_cbuf);
2328 
2329  hscan->rs_cindex++;
2330 
2331  return true;
2332 }
2333 
2334 static bool
2336 {
2337  HeapScanDesc hscan = (HeapScanDesc) scan;
2338  TsmRoutine *tsm = scanstate->tsmroutine;
2339  BlockNumber blockno;
2340 
2341  /* return false immediately if relation is empty */
2342  if (hscan->rs_nblocks == 0)
2343  return false;
2344 
2345  /* release previous scan buffer, if any */
2346  if (BufferIsValid(hscan->rs_cbuf))
2347  {
2348  ReleaseBuffer(hscan->rs_cbuf);
2349  hscan->rs_cbuf = InvalidBuffer;
2350  }
2351 
2352  if (tsm->NextSampleBlock)
2353  blockno = tsm->NextSampleBlock(scanstate, hscan->rs_nblocks);
2354  else
2355  {
2356  /* scanning table sequentially */
2357 
2358  if (hscan->rs_cblock == InvalidBlockNumber)
2359  {
2360  Assert(!hscan->rs_inited);
2361  blockno = hscan->rs_startblock;
2362  }
2363  else
2364  {
2365  Assert(hscan->rs_inited);
2366 
2367  blockno = hscan->rs_cblock + 1;
2368 
2369  if (blockno >= hscan->rs_nblocks)
2370  {
2371  /* wrap to beginning of rel, might not have started at 0 */
2372  blockno = 0;
2373  }
2374 
2375  /*
2376  * Report our new scan position for synchronization purposes.
2377  *
2378  * Note: we do this before checking for end of scan so that the
2379  * final state of the position hint is back at the start of the
2380  * rel. That's not strictly necessary, but otherwise when you run
2381  * the same query multiple times the starting position would shift
2382  * a little bit backwards on every invocation, which is confusing.
2383  * We don't guarantee any specific ordering in general, though.
2384  */
2385  if (scan->rs_flags & SO_ALLOW_SYNC)
2386  ss_report_location(scan->rs_rd, blockno);
2387 
2388  if (blockno == hscan->rs_startblock)
2389  {
2390  blockno = InvalidBlockNumber;
2391  }
2392  }
2393  }
2394 
2395  hscan->rs_cblock = blockno;
2396 
2397  if (!BlockNumberIsValid(blockno))
2398  {
2399  hscan->rs_inited = false;
2400  return false;
2401  }
2402 
2403  Assert(hscan->rs_cblock < hscan->rs_nblocks);
2404 
2405  /*
2406  * Be sure to check for interrupts at least once per page. Checks at
2407  * higher code levels won't be able to stop a sample scan that encounters
2408  * many pages' worth of consecutive dead tuples.
2409  */
2411 
2412  /* Read page using selected strategy */
2414  blockno, RBM_NORMAL, hscan->rs_strategy);
2415 
2416  /* in pagemode, prune the page and determine visible tuple offsets */
2417  if (hscan->rs_base.rs_flags & SO_ALLOW_PAGEMODE)
2418  heap_prepare_pagescan(scan);
2419 
2420  hscan->rs_inited = true;
2421  return true;
2422 }
2423 
2424 static bool
2426  TupleTableSlot *slot)
2427 {
2428  HeapScanDesc hscan = (HeapScanDesc) scan;
2429  TsmRoutine *tsm = scanstate->tsmroutine;
2430  BlockNumber blockno = hscan->rs_cblock;
2431  bool pagemode = (scan->rs_flags & SO_ALLOW_PAGEMODE) != 0;
2432 
2433  Page page;
2434  bool all_visible;
2435  OffsetNumber maxoffset;
2436 
2437  /*
2438  * When not using pagemode, we must lock the buffer during tuple
2439  * visibility checks.
2440  */
2441  if (!pagemode)
2443 
2444  page = (Page) BufferGetPage(hscan->rs_cbuf);
2445  all_visible = PageIsAllVisible(page) &&
2447  maxoffset = PageGetMaxOffsetNumber(page);
2448 
2449  for (;;)
2450  {
2451  OffsetNumber tupoffset;
2452 
2454 
2455  /* Ask the tablesample method which tuples to check on this page. */
2456  tupoffset = tsm->NextSampleTuple(scanstate,
2457  blockno,
2458  maxoffset);
2459 
2460  if (OffsetNumberIsValid(tupoffset))
2461  {
2462  ItemId itemid;
2463  bool visible;
2464  HeapTuple tuple = &(hscan->rs_ctup);
2465 
2466  /* Skip invalid tuple pointers. */
2467  itemid = PageGetItemId(page, tupoffset);
2468  if (!ItemIdIsNormal(itemid))
2469  continue;
2470 
2471  tuple->t_data = (HeapTupleHeader) PageGetItem(page, itemid);
2472  tuple->t_len = ItemIdGetLength(itemid);
2473  ItemPointerSet(&(tuple->t_self), blockno, tupoffset);
2474 
2475 
2476  if (all_visible)
2477  visible = true;
2478  else
2479  visible = SampleHeapTupleVisible(scan, hscan->rs_cbuf,
2480  tuple, tupoffset);
2481 
2482  /* in pagemode, heap_prepare_pagescan did this for us */
2483  if (!pagemode)
2484  HeapCheckForSerializableConflictOut(visible, scan->rs_rd, tuple,
2485  hscan->rs_cbuf, scan->rs_snapshot);
2486 
2487  /* Try next tuple from same page. */
2488  if (!visible)
2489  continue;
2490 
2491  /* Found visible tuple, return it. */
2492  if (!pagemode)
2494 
2495  ExecStoreBufferHeapTuple(tuple, slot, hscan->rs_cbuf);
2496 
2497  /* Count successfully-fetched tuples as heap fetches */
2499 
2500  return true;
2501  }
2502  else
2503  {
2504  /*
2505  * If we get here, it means we've exhausted the items on this page
2506  * and it's time to move to the next.
2507  */
2508  if (!pagemode)
2510 
2511  ExecClearTuple(slot);
2512  return false;
2513  }
2514  }
2515 
2516  Assert(0);
2517 }
2518 
2519 
2520 /* ----------------------------------------------------------------------------
2521  * Helper functions for the above.
2522  * ----------------------------------------------------------------------------
2523  */
2524 
2525 /*
2526  * Reconstruct and rewrite the given tuple
2527  *
2528  * We cannot simply copy the tuple as-is, for several reasons:
2529  *
2530  * 1. We'd like to squeeze out the values of any dropped columns, both
2531  * to save space and to ensure we have no corner-case failures. (It's
2532  * possible for example that the new table hasn't got a TOAST table
2533  * and so is unable to store any large values of dropped cols.)
2534  *
2535  * 2. The tuple might not even be legal for the new table; this is
2536  * currently only known to happen as an after-effect of ALTER TABLE
2537  * SET WITHOUT OIDS.
2538  *
2539  * So, we must reconstruct the tuple from component Datums.
2540  */
2541 static void
2543  Relation OldHeap, Relation NewHeap,
2544  Datum *values, bool *isnull, RewriteState rwstate)
2545 {
2546  TupleDesc oldTupDesc = RelationGetDescr(OldHeap);
2547  TupleDesc newTupDesc = RelationGetDescr(NewHeap);
2548  HeapTuple copiedTuple;
2549  int i;
2550 
2551  heap_deform_tuple(tuple, oldTupDesc, values, isnull);
2552 
2553  /* Be sure to null out any dropped columns */
2554  for (i = 0; i < newTupDesc->natts; i++)
2555  {
2556  if (TupleDescAttr(newTupDesc, i)->attisdropped)
2557  isnull[i] = true;
2558  }
2559 
2560  copiedTuple = heap_form_tuple(newTupDesc, values, isnull);
2561 
2562  /* The heap rewrite module does the rest */
2563  rewrite_heap_tuple(rwstate, tuple, copiedTuple);
2564 
2565  heap_freetuple(copiedTuple);
2566 }
2567 
2568 /*
2569  * Check visibility of the tuple.
2570  */
2571 static bool
2573  HeapTuple tuple,
2574  OffsetNumber tupoffset)
2575 {
2576  HeapScanDesc hscan = (HeapScanDesc) scan;
2577 
2578  if (scan->rs_flags & SO_ALLOW_PAGEMODE)
2579  {
2580  /*
2581  * In pageatatime mode, heap_prepare_pagescan() already did visibility
2582  * checks, so just look at the info it left in rs_vistuples[].
2583  *
2584  * We use a binary search over the known-sorted array. Note: we could
2585  * save some effort if we insisted that NextSampleTuple select tuples
2586  * in increasing order, but it's not clear that there would be enough
2587  * gain to justify the restriction.
2588  */
2589  int start = 0,
2590  end = hscan->rs_ntuples - 1;
2591 
2592  while (start <= end)
2593  {
2594  int mid = (start + end) / 2;
2595  OffsetNumber curoffset = hscan->rs_vistuples[mid];
2596 
2597  if (tupoffset == curoffset)
2598  return true;
2599  else if (tupoffset < curoffset)
2600  end = mid - 1;
2601  else
2602  start = mid + 1;
2603  }
2604 
2605  return false;
2606  }
2607  else
2608  {
2609  /* Otherwise, we have to check the tuple individually. */
2610  return HeapTupleSatisfiesVisibility(tuple, scan->rs_snapshot,
2611  buffer);
2612  }
2613 }
2614 
2615 
2616 /* ------------------------------------------------------------------------
2617  * Definition of the heap table access method.
2618  * ------------------------------------------------------------------------
2619  */
2620 
2622  .type = T_TableAmRoutine,
2623 
2624  .slot_callbacks = heapam_slot_callbacks,
2625 
2626  .scan_begin = heap_beginscan,
2627  .scan_end = heap_endscan,
2628  .scan_rescan = heap_rescan,
2629  .scan_getnextslot = heap_getnextslot,
2630 
2631  .scan_set_tidrange = heap_set_tidrange,
2632  .scan_getnextslot_tidrange = heap_getnextslot_tidrange,
2633 
2634  .parallelscan_estimate = table_block_parallelscan_estimate,
2635  .parallelscan_initialize = table_block_parallelscan_initialize,
2636  .parallelscan_reinitialize = table_block_parallelscan_reinitialize,
2637 
2638  .index_fetch_begin = heapam_index_fetch_begin,
2639  .index_fetch_reset = heapam_index_fetch_reset,
2640  .index_fetch_end = heapam_index_fetch_end,
2641  .index_fetch_tuple = heapam_index_fetch_tuple,
2642 
2643  .tuple_insert = heapam_tuple_insert,
2644  .tuple_insert_speculative = heapam_tuple_insert_speculative,
2645  .tuple_complete_speculative = heapam_tuple_complete_speculative,
2646  .multi_insert = heap_multi_insert,
2647  .tuple_delete = heapam_tuple_delete,
2648  .tuple_update = heapam_tuple_update,
2649  .tuple_lock = heapam_tuple_lock,
2650 
2651  .tuple_fetch_row_version = heapam_fetch_row_version,
2652  .tuple_get_latest_tid = heap_get_latest_tid,
2653  .tuple_tid_valid = heapam_tuple_tid_valid,
2654  .tuple_satisfies_snapshot = heapam_tuple_satisfies_snapshot,
2655  .index_delete_tuples = heap_index_delete_tuples,
2656 
2657  .relation_set_new_filelocator = heapam_relation_set_new_filelocator,
2658  .relation_nontransactional_truncate = heapam_relation_nontransactional_truncate,
2659  .relation_copy_data = heapam_relation_copy_data,
2660  .relation_copy_for_cluster = heapam_relation_copy_for_cluster,
2661  .relation_vacuum = heap_vacuum_rel,
2662  .scan_analyze_next_block = heapam_scan_analyze_next_block,
2663  .scan_analyze_next_tuple = heapam_scan_analyze_next_tuple,
2664  .index_build_range_scan = heapam_index_build_range_scan,
2665  .index_validate_scan = heapam_index_validate_scan,
2666 
2667  .relation_size = table_block_relation_size,
2668  .relation_needs_toast_table = heapam_relation_needs_toast_table,
2669  .relation_toast_am = heapam_relation_toast_am,
2670  .relation_fetch_toast_slice = heap_fetch_toast_slice,
2671 
2672  .relation_estimate_size = heapam_estimate_rel_size,
2673 
2674  .scan_bitmap_next_block = heapam_scan_bitmap_next_block,
2675  .scan_bitmap_next_tuple = heapam_scan_bitmap_next_tuple,
2676  .scan_sample_next_block = heapam_scan_sample_next_block,
2677  .scan_sample_next_tuple = heapam_scan_sample_next_tuple
2678 };
2679 
2680 
2681 const TableAmRoutine *
2683 {
2684  return &heapam_methods;
2685 }
2686 
2687 Datum
2689 {
2691 }
void pgstat_progress_update_param(int index, int64 val)
void pgstat_progress_update_multi_param(int nparam, const int *index, const int64 *val)
uint32 BlockNumber
Definition: block.h:31
#define InvalidBlockNumber
Definition: block.h:33
static bool BlockNumberIsValid(BlockNumber blockNumber)
Definition: block.h:71
static Datum values[MAXATTR]
Definition: bootstrap.c:151
int Buffer
Definition: buf.h:23
#define InvalidBuffer
Definition: buf.h:25
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:3724
Buffer ReleaseAndReadBuffer(Buffer buffer, Relation relation, BlockNumber blockNum)
Definition: bufmgr.c:2594
void ReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:4924
void UnlockReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:4941
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:5158
void FlushRelationBuffers(Relation rel)
Definition: bufmgr.c:4492
Buffer ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
Definition: bufmgr.c:793
#define BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:189
#define BUFFER_LOCK_SHARE
Definition: bufmgr.h:190
static Page BufferGetPage(Buffer buffer)
Definition: bufmgr.h:400
@ RBM_NORMAL
Definition: bufmgr.h:45
static bool BufferIsValid(Buffer bufnum)
Definition: bufmgr.h:351
Pointer Page
Definition: bufpage.h:81
static Item PageGetItem(Page page, ItemId itemId)
Definition: bufpage.h:354
static ItemId PageGetItemId(Page page, OffsetNumber offsetNumber)
Definition: bufpage.h:243
static bool PageIsAllVisible(Page page)
Definition: bufpage.h:429
static OffsetNumber PageGetMaxOffsetNumber(Page page)
Definition: bufpage.h:372
#define MAXALIGN(LEN)
Definition: c.h:765
uint8_t uint8
Definition: c.h:483
#define Assert(condition)
Definition: c.h:812
int64_t int64
Definition: c.h:482
TransactionId MultiXactId
Definition: c.h:616
int32_t int32
Definition: c.h:481
uint64_t uint64
Definition: c.h:486
uint32_t uint32
Definition: c.h:485
uint32 CommandId
Definition: c.h:620
uint32 TransactionId
Definition: c.h:606
#define OidIsValid(objectId)
Definition: c.h:729
bool IsSystemRelation(Relation relation)
Definition: catalog.c:73
CommandId HeapTupleHeaderGetCmin(HeapTupleHeader tup)
Definition: combocid.c:104
int errmsg_internal(const char *fmt,...)
Definition: elog.c:1157
int errcode(int sqlerrcode)
Definition: elog.c:853
int errmsg(const char *fmt,...)
Definition: elog.c:1070
#define WARNING
Definition: elog.h:36
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:225
#define ereport(elevel,...)
Definition: elog.h:149
ExprState * ExecPrepareQual(List *qual, EState *estate)
Definition: execExpr.c:771
void ExecDropSingleTupleTableSlot(TupleTableSlot *slot)
Definition: execTuples.c:1341
TupleTableSlot * ExecStoreBufferHeapTuple(HeapTuple tuple, TupleTableSlot *slot, Buffer buffer)
Definition: execTuples.c:1479
const TupleTableSlotOps TTSOpsBufferHeapTuple
Definition: execTuples.c:87
TupleTableSlot * ExecStoreAllNullTuple(TupleTableSlot *slot)
Definition: execTuples.c:1663
HeapTuple ExecFetchSlotHeapTuple(TupleTableSlot *slot, bool materialize, bool *shouldFree)
Definition: execTuples.c:1731
TupleTableSlot * ExecStoreHeapTuple(HeapTuple tuple, TupleTableSlot *slot, bool shouldFree)
Definition: execTuples.c:1439
const TupleTableSlotOps TTSOpsHeapTuple
Definition: execTuples.c:85
TupleTableSlot * ExecStorePinnedBufferHeapTuple(HeapTuple tuple, TupleTableSlot *slot, Buffer buffer)
Definition: execTuples.c:1505
TupleTableSlot * MakeSingleTupleTableSlot(TupleDesc tupdesc, const TupleTableSlotOps *tts_ops)
Definition: execTuples.c:1325
EState * CreateExecutorState(void)
Definition: execUtils.c:88
void FreeExecutorState(EState *estate)
Definition: execUtils.c:191
#define GetPerTupleExprContext(estate)
Definition: executor.h:561
static bool ExecQual(ExprState *state, ExprContext *econtext)
Definition: executor.h:424
#define PG_RETURN_POINTER(x)
Definition: fmgr.h:361
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
int32 type_maximum_size(Oid type_oid, int32 typemod)
Definition: format_type.c:412
@ UNIQUE_CHECK_NO
Definition: genam.h:117
@ UNIQUE_CHECK_YES
Definition: genam.h:118
int maintenance_work_mem
Definition: globals.c:132
return str start
void heap_finish_speculative(Relation relation, ItemPointer tid)
Definition: heapam.c:5936
void heap_insert(Relation relation, HeapTuple tup, CommandId cid, int options, BulkInsertState bistate)
Definition: heapam.c:1985
bool heap_fetch(Relation relation, Snapshot snapshot, HeapTuple tuple, Buffer *userbuf, bool keep_buf)
Definition: heapam.c:1502
TM_Result heap_delete(Relation relation, ItemPointer tid, CommandId cid, Snapshot crosscheck, bool wait, TM_FailureData *tmfd, bool changingPart)
Definition: heapam.c:2674
bool heap_getnextslot(TableScanDesc sscan, ScanDirection direction, TupleTableSlot *slot)
Definition: heapam.c:1292
void heap_endscan(TableScanDesc sscan)
Definition: heapam.c:1201
void heap_rescan(TableScanDesc sscan, ScanKey key, bool set_params, bool allow_strat, bool allow_sync, bool allow_pagemode)
Definition: heapam.c:1143
TM_Result heap_update(Relation relation, ItemPointer otid, HeapTuple newtup, CommandId cid, Snapshot crosscheck, bool wait, TM_FailureData *tmfd, LockTupleMode *lockmode, TU_UpdateIndexes *update_indexes)
Definition: heapam.c:3141
HeapTuple heap_getnext(TableScanDesc sscan, ScanDirection direction)
Definition: heapam.c:1243
bool heap_hot_search_buffer(ItemPointer tid, Relation relation, Buffer buffer, Snapshot snapshot, HeapTuple heapTuple, bool *all_dead, bool first_call)
Definition: heapam.c:1622
bool heap_getnextslot_tidrange(TableScanDesc sscan, ScanDirection direction, TupleTableSlot *slot)
Definition: heapam.c:1395
void heap_set_tidrange(TableScanDesc sscan, ItemPointer mintid, ItemPointer maxtid)
Definition: heapam.c:1322
void heap_abort_speculative(Relation relation, ItemPointer tid)
Definition: heapam.c:6023
TableScanDesc heap_beginscan(Relation relation, Snapshot snapshot, int nkeys, ScanKey key, ParallelTableScanDesc parallel_scan, uint32 flags)
Definition: heapam.c:1029
void heap_prepare_pagescan(TableScanDesc sscan)
Definition: heapam.c:485
TransactionId heap_index_delete_tuples(Relation rel, TM_IndexDeleteOp *delstate)
Definition: heapam.c:7956
void heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples, CommandId cid, int options, BulkInsertState bistate)
Definition: heapam.c:2254
TM_Result heap_lock_tuple(Relation relation, HeapTuple tuple, CommandId cid, LockTupleMode mode, LockWaitPolicy wait_policy, bool follow_updates, Buffer *buffer, TM_FailureData *tmfd)
Definition: heapam.c:4427
void heap_get_latest_tid(TableScanDesc sscan, ItemPointer tid)
Definition: heapam.c:1774
void heap_setscanlimits(TableScanDesc sscan, BlockNumber startBlk, BlockNumber numBlks)
Definition: heapam.c:413
void HeapCheckForSerializableConflictOut(bool visible, Relation relation, HeapTuple tuple, Buffer buffer, Snapshot snapshot)
Definition: heapam.c:9083
#define HEAP_INSERT_SPECULATIVE
Definition: heapam.h:39
struct HeapScanDescData * HeapScanDesc
Definition: heapam.h:110
@ HEAPTUPLE_RECENTLY_DEAD
Definition: heapam.h:128
@ HEAPTUPLE_INSERT_IN_PROGRESS
Definition: heapam.h:129
@ HEAPTUPLE_LIVE
Definition: heapam.h:127
@ HEAPTUPLE_DELETE_IN_PROGRESS
Definition: heapam.h:130
@ HEAPTUPLE_DEAD
Definition: heapam.h:126
static double heapam_index_build_range_scan(Relation heapRelation, Relation indexRelation, IndexInfo *indexInfo, bool allow_sync, bool anyvisible, bool progress, BlockNumber start_blockno, BlockNumber numblocks, IndexBuildCallback callback, void *callback_state, TableScanDesc scan)
#define HEAP_OVERHEAD_BYTES_PER_TUPLE
static void heapam_estimate_rel_size(Relation rel, int32 *attr_widths, BlockNumber *pages, double *tuples, double *allvisfrac)
static IndexFetchTableData * heapam_index_fetch_begin(Relation rel)
static const TableAmRoutine heapam_methods
static void heapam_index_validate_scan(Relation heapRelation, Relation indexRelation, IndexInfo *indexInfo, Snapshot snapshot, ValidateIndexState *state)
static bool heapam_index_fetch_tuple(struct IndexFetchTableData *scan, ItemPointer tid, Snapshot snapshot, TupleTableSlot *slot, bool *call_again, bool *all_dead)
static void heapam_relation_copy_for_cluster(Relation OldHeap, Relation NewHeap, Relation OldIndex, bool use_sort, TransactionId OldestXmin, TransactionId *xid_cutoff, MultiXactId *multi_cutoff, double *num_tuples, double *tups_vacuumed, double *tups_recently_dead)
static void heapam_tuple_insert_speculative(Relation relation, TupleTableSlot *slot, CommandId cid, int options, BulkInsertState bistate, uint32 specToken)
static bool heapam_scan_sample_next_block(TableScanDesc scan, SampleScanState *scanstate)
static void heapam_tuple_insert(Relation relation, TupleTableSlot *slot, CommandId cid, int options, BulkInsertState bistate)
static bool heapam_scan_analyze_next_block(TableScanDesc scan, ReadStream *stream)
static TM_Result heapam_tuple_delete(Relation relation, ItemPointer tid, CommandId cid, Snapshot snapshot, Snapshot crosscheck, bool wait, TM_FailureData *tmfd, bool changingPart)
static void heapam_index_fetch_reset(IndexFetchTableData *scan)
static Oid heapam_relation_toast_am(Relation rel)
static TM_Result heapam_tuple_update(Relation relation, ItemPointer otid, TupleTableSlot *slot, CommandId cid, Snapshot snapshot, Snapshot crosscheck, bool wait, TM_FailureData *tmfd, LockTupleMode *lockmode, TU_UpdateIndexes *update_indexes)
static bool heapam_relation_needs_toast_table(Relation rel)
static BlockNumber heapam_scan_get_blocks_done(HeapScanDesc hscan)
const TableAmRoutine * GetHeapamTableAmRoutine(void)
static bool heapam_scan_analyze_next_tuple(TableScanDesc scan, TransactionId OldestXmin, double *liverows, double *deadrows, TupleTableSlot *slot)
static bool heapam_scan_bitmap_next_tuple(TableScanDesc scan, TupleTableSlot *slot)
static bool SampleHeapTupleVisible(TableScanDesc scan, Buffer buffer, HeapTuple tuple, OffsetNumber tupoffset)
static bool heapam_scan_sample_next_tuple(TableScanDesc scan, SampleScanState *scanstate, TupleTableSlot *slot)
static const TupleTableSlotOps * heapam_slot_callbacks(Relation relation)
static bool heapam_tuple_tid_valid(TableScanDesc scan, ItemPointer tid)
#define HEAP_USABLE_BYTES_PER_PAGE
static TM_Result heapam_tuple_lock(Relation relation, ItemPointer tid, Snapshot snapshot, TupleTableSlot *slot, CommandId cid, LockTupleMode mode, LockWaitPolicy wait_policy, uint8 flags, TM_FailureData *tmfd)
static void heapam_relation_copy_data(Relation rel, const RelFileLocator *newrlocator)
static void heapam_index_fetch_end(IndexFetchTableData *scan)
static void heapam_tuple_complete_speculative(Relation relation, TupleTableSlot *slot, uint32 specToken, bool succeeded)
static void reform_and_rewrite_tuple(HeapTuple tuple, Relation OldHeap, Relation NewHeap, Datum *values, bool *isnull, RewriteState rwstate)
static void heapam_relation_set_new_filelocator(Relation rel, const RelFileLocator *newrlocator, char persistence, TransactionId *freezeXid, MultiXactId *minmulti)
static bool heapam_fetch_row_version(Relation relation, ItemPointer tid, Snapshot snapshot, TupleTableSlot *slot)
static void heapam_relation_nontransactional_truncate(Relation rel)
static bool heapam_tuple_satisfies_snapshot(Relation rel, TupleTableSlot *slot, Snapshot snapshot)
static bool heapam_scan_bitmap_next_block(TableScanDesc scan, BlockNumber *blockno, bool *recheck, uint64 *lossy_pages, uint64 *exact_pages)
Datum heap_tableam_handler(PG_FUNCTION_ARGS)
bool HeapTupleSatisfiesVisibility(HeapTuple htup, Snapshot snapshot, Buffer buffer)
HTSV_Result HeapTupleSatisfiesVacuum(HeapTuple htup, TransactionId OldestXmin, Buffer buffer)
void heap_fetch_toast_slice(Relation toastrel, Oid valueid, int32 attrsize, int32 sliceoffset, int32 slicelength, struct varlena *result)
Definition: heaptoast.c:626
#define TOAST_TUPLE_THRESHOLD
Definition: heaptoast.h:48
HeapTuple heap_form_tuple(TupleDesc tupleDescriptor, const Datum *values, const bool *isnull)
Definition: heaptuple.c:1116
void heap_deform_tuple(HeapTuple tuple, TupleDesc tupleDesc, Datum *values, bool *isnull)
Definition: heaptuple.c:1345
void heap_freetuple(HeapTuple htup)
Definition: heaptuple.c:1434
HeapTupleHeaderData * HeapTupleHeader
Definition: htup.h:23
#define HeapTupleHeaderSetSpeculativeToken(tup, token)
Definition: htup_details.h:439
#define SizeofHeapTupleHeader
Definition: htup_details.h:185
#define HeapTupleHeaderGetXmin(tup)
Definition: htup_details.h:309
#define BITMAPLEN(NATTS)
Definition: htup_details.h:545
#define HeapTupleIsHeapOnly(tuple)
Definition: htup_details.h:683
#define HeapTupleIsHotUpdated(tuple)
Definition: htup_details.h:674
#define MaxHeapTuplesPerPage
Definition: htup_details.h:572
#define HeapTupleHeaderGetUpdateXid(tup)
Definition: htup_details.h:361
#define HeapTupleHeaderIsSpeculative(tup)
Definition: htup_details.h:428
void FormIndexDatum(IndexInfo *indexInfo, TupleTableSlot *slot, EState *estate, Datum *values, bool *isnull)
Definition: index.c:2726
static void itemptr_decode(ItemPointer itemptr, int64 encoded)
Definition: index.h:211
bool index_getnext_slot(IndexScanDesc scan, ScanDirection direction, TupleTableSlot *slot)
Definition: indexam.c:675
bool index_insert(Relation indexRelation, Datum *values, bool *isnull, ItemPointer heap_t_ctid, Relation heapRelation, IndexUniqueCheck checkUnique, bool indexUnchanged, IndexInfo *indexInfo)
Definition: indexam.c:213
IndexScanDesc index_beginscan(Relation heapRelation, Relation indexRelation, Snapshot snapshot, int nkeys, int norderbys)
Definition: indexam.c:256
void index_endscan(IndexScanDesc scan)
Definition: indexam.c:378
void index_rescan(IndexScanDesc scan, ScanKey keys, int nkeys, ScanKey orderbys, int norderbys)
Definition: indexam.c:352
int i
Definition: isn.c:72
#define ItemIdGetLength(itemId)
Definition: itemid.h:59
#define ItemIdIsNormal(itemId)
Definition: itemid.h:99
#define ItemIdIsDead(itemId)
Definition: itemid.h:113
int32 ItemPointerCompare(ItemPointer arg1, ItemPointer arg2)
Definition: itemptr.c:51
bool ItemPointerEquals(ItemPointer pointer1, ItemPointer pointer2)
Definition: itemptr.c:35
static void ItemPointerSet(ItemPointerData *pointer, BlockNumber blockNumber, OffsetNumber offNum)
Definition: itemptr.h:135
static void ItemPointerSetOffsetNumber(ItemPointerData *pointer, OffsetNumber offsetNumber)
Definition: itemptr.h:158
static OffsetNumber ItemPointerGetOffsetNumber(const ItemPointerData *pointer)
Definition: itemptr.h:124
static bool ItemPointerIndicatesMovedPartitions(const ItemPointerData *pointer)
Definition: itemptr.h:197
static BlockNumber ItemPointerGetBlockNumber(const ItemPointerData *pointer)
Definition: itemptr.h:103
static void ItemPointerCopy(const ItemPointerData *fromPointer, ItemPointerData *toPointer)
Definition: itemptr.h:172
static bool ItemPointerIsValid(const ItemPointerData *pointer)
Definition: itemptr.h:83
void XactLockTableWait(TransactionId xid, Relation rel, ItemPointer ctid, XLTW_Oper oper)
Definition: lmgr.c:656
bool ConditionalXactLockTableWait(TransactionId xid)
Definition: lmgr.c:729
@ XLTW_FetchUpdated
Definition: lmgr.h:33
@ XLTW_InsertIndexUnique
Definition: lmgr.h:32
LockWaitPolicy
Definition: lockoptions.h:37
@ LockWaitSkip
Definition: lockoptions.h:41
@ LockWaitBlock
Definition: lockoptions.h:39
@ LockWaitError
Definition: lockoptions.h:43
LockTupleMode
Definition: lockoptions.h:50
void MemoryContextReset(MemoryContext context)
Definition: mcxt.c:383
void pfree(void *pointer)
Definition: mcxt.c:1521
void * palloc0(Size size)
Definition: mcxt.c:1347
void * palloc(Size size)
Definition: mcxt.c:1317
#define IsBootstrapProcessingMode()
Definition: miscadmin.h:454
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:122
MultiXactId GetOldestMultiXactId(void)
Definition: multixact.c:2660
#define InvalidOffsetNumber
Definition: off.h:26
#define OffsetNumberIsValid(offsetNumber)
Definition: off.h:39
#define OffsetNumberNext(offsetNumber)
Definition: off.h:52
uint16 OffsetNumber
Definition: off.h:24
#define FirstOffsetNumber
Definition: off.h:27
FormData_pg_attribute * Form_pg_attribute
Definition: pg_attribute.h:209
#define ERRCODE_DATA_CORRUPTED
Definition: pg_basebackup.c:41
static PgChecksumMode mode
Definition: pg_checksums.c:55
#define INDEX_MAX_KEYS
#define NIL
Definition: pg_list.h:68
static char * buf
Definition: pg_test_fsync.c:72
#define ERRCODE_T_R_SERIALIZATION_FAILURE
Definition: pgbench.c:76
static int progress
Definition: pgbench.c:261
#define pgstat_count_heap_fetch(rel)
Definition: pgstat.h:659
#define pgstat_count_heap_getnext(rel)
Definition: pgstat.h:654
static int64 DatumGetInt64(Datum X)
Definition: postgres.h:385
uintptr_t Datum
Definition: postgres.h:64
unsigned int Oid
Definition: postgres_ext.h:31
void PredicateLockTID(Relation relation, ItemPointer tid, Snapshot snapshot, TransactionId tuple_xid)
Definition: predicate.c:2611
TransactionId GetOldestNonRemovableTransactionId(Relation rel)
Definition: procarray.c:2005
#define PROGRESS_CLUSTER_INDEX_RELID
Definition: progress.h:60
#define PROGRESS_CLUSTER_HEAP_BLKS_SCANNED
Definition: progress.h:64
#define PROGRESS_CLUSTER_PHASE_SORT_TUPLES
Definition: progress.h:70
#define PROGRESS_CLUSTER_PHASE_SEQ_SCAN_HEAP
Definition: progress.h:68
#define PROGRESS_SCAN_BLOCKS_DONE
Definition: progress.h:123
#define PROGRESS_CLUSTER_PHASE
Definition: progress.h:59
#define PROGRESS_CLUSTER_HEAP_TUPLES_SCANNED
Definition: progress.h:61
#define PROGRESS_CLUSTER_TOTAL_HEAP_BLKS
Definition: progress.h:63
#define PROGRESS_CLUSTER_HEAP_TUPLES_WRITTEN
Definition: progress.h:62
#define PROGRESS_CLUSTER_PHASE_INDEX_SCAN_HEAP
Definition: progress.h:69
#define PROGRESS_SCAN_BLOCKS_TOTAL
Definition: progress.h:122
#define PROGRESS_CLUSTER_PHASE_WRITE_NEW_HEAP
Definition: progress.h:71
void heap_get_root_tuples(Page page, OffsetNumber *root_offsets)
Definition: pruneheap.c:1785
void heap_page_prune_opt(Relation relation, Buffer buffer)
Definition: pruneheap.c:193
Buffer read_stream_next_buffer(ReadStream *stream, void **per_buffer_data)
Definition: read_stream.c:605
#define RelationGetRelid(relation)
Definition: rel.h:505
static SMgrRelation RelationGetSmgr(Relation rel)
Definition: rel.h:567
#define RelationGetDescr(relation)
Definition: rel.h:531
#define RelationGetRelationName(relation)
Definition: rel.h:539
#define RelationGetTargetBlock(relation)
Definition: rel.h:601
#define RelationIsPermanent(relation)
Definition: rel.h:617
ForkNumber
Definition: relpath.h:56
@ MAIN_FORKNUM
Definition: relpath.h:58
@ INIT_FORKNUM
Definition: relpath.h:61
#define MAX_FORKNUM
Definition: relpath.h:70
struct ParallelBlockTableScanDescData * ParallelBlockTableScanDesc
Definition: relscan.h:108
void end_heap_rewrite(RewriteState state)
Definition: rewriteheap.c:297
bool rewrite_heap_dead_tuple(RewriteState state, HeapTuple old_tuple)
Definition: rewriteheap.c:543
RewriteState begin_heap_rewrite(Relation old_heap, Relation new_heap, TransactionId oldest_xmin, TransactionId freeze_xid, MultiXactId cutoff_multi)
Definition: rewriteheap.c:234
void rewrite_heap_tuple(RewriteState state, HeapTuple old_tuple, HeapTuple new_tuple)
Definition: rewriteheap.c:341
@ ForwardScanDirection
Definition: sdir.h:28
void smgrcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo)
Definition: smgr.c:414
void smgrclose(SMgrRelation reln)
Definition: smgr.c:323
bool smgrexists(SMgrRelation reln, ForkNumber forknum)
Definition: smgr.c:401
TransactionId RecentXmin
Definition: snapmgr.c:99
Snapshot GetTransactionSnapshot(void)
Definition: snapmgr.c:216
void UnregisterSnapshot(Snapshot snapshot)
Definition: snapmgr.c:836
Snapshot RegisterSnapshot(Snapshot snapshot)
Definition: snapmgr.c:794
#define SnapshotAny
Definition: snapmgr.h:33
#define InitDirtySnapshot(snapshotdata)
Definition: snapmgr.h:40
#define IsMVCCSnapshot(snapshot)
Definition: snapmgr.h:62
void RelationCopyStorage(SMgrRelation src, SMgrRelation dst, ForkNumber forkNum, char relpersistence)
Definition: storage.c:465
SMgrRelation RelationCreateStorage(RelFileLocator rlocator, char relpersistence, bool register_delete)
Definition: storage.c:121
void log_smgrcreate(const RelFileLocator *rlocator, ForkNumber forkNum)
Definition: storage.c:186
void RelationDropStorage(Relation rel)
Definition: storage.c:206
void RelationTruncate(Relation rel, BlockNumber nblocks)
Definition: storage.c:288
MemoryContext ecxt_per_tuple_memory
Definition: execnodes.h:266
TupleTableSlot * ecxt_scantuple
Definition: execnodes.h:258
Buffer rs_vmbuffer
Definition: heapam.h:102
BufferAccessStrategy rs_strategy
Definition: heapam.h:71
bool rs_inited
Definition: heapam.h:65
Buffer rs_cbuf
Definition: heapam.h:68
BlockNumber rs_startblock
Definition: heapam.h:60
HeapTupleData rs_ctup
Definition: heapam.h:73
OffsetNumber rs_vistuples[MaxHeapTuplesPerPage]
Definition: heapam.h:108
BlockNumber rs_nblocks
Definition: heapam.h:59
int rs_empty_tuples_pending
Definition: heapam.h:103
BlockNumber rs_cblock
Definition: heapam.h:67
TableScanDescData rs_base
Definition: heapam.h:56
ItemPointerData t_self
Definition: htup.h:65
uint32 t_len
Definition: htup.h:64
HeapTupleHeader t_data
Definition: htup.h:68
Oid t_tableOid
Definition: htup.h:66
ItemPointerData t_ctid
Definition: htup_details.h:161
Buffer xs_cbuf
Definition: heapam.h:119
IndexFetchTableData xs_base
Definition: heapam.h:117
bool ii_Unique
Definition: execnodes.h:199
bool ii_BrokenHotChain
Definition: execnodes.h:205
ExprState * ii_PredicateState
Definition: execnodes.h:192
Oid * ii_ExclusionOps
Definition: execnodes.h:193
bool ii_Concurrent
Definition: execnodes.h:204
List * ii_ExpressionsState
Definition: execnodes.h:190
List * ii_Predicate
Definition: execnodes.h:191
TupleDesc rd_att
Definition: rel.h:112
Oid rd_id
Definition: rel.h:113
Form_pg_class rd_rel
Definition: rel.h:111
struct TsmRoutine * tsmroutine
Definition: execnodes.h:1600
TransactionId xmin
Definition: snapshot.h:157
TransactionId xmax
Definition: snapshot.h:158
bool takenDuringRecovery
Definition: snapshot.h:184
OffsetNumber offsets[FLEXIBLE_ARRAY_MEMBER]
Definition: tidbitmap.h:46
BlockNumber blockno
Definition: tidbitmap.h:42
bool traversed
Definition: tableam.h:152
TransactionId xmax
Definition: tableam.h:150
CommandId cmax
Definition: tableam.h:151
ItemPointerData ctid
Definition: tableam.h:149
NodeTag type
Definition: tableam.h:292
Relation rs_rd
Definition: relscan.h:38
union TableScanDescData::@48 st
struct TableScanDescData::@48::@49 bitmap
uint32 rs_flags
Definition: relscan.h:70
struct SnapshotData * rs_snapshot
Definition: relscan.h:39
struct ParallelTableScanDescData * rs_parallel
Definition: relscan.h:72
NextSampleTuple_function NextSampleTuple
Definition: tsmapi.h:74
NextSampleBlock_function NextSampleBlock
Definition: tsmapi.h:73
Oid tts_tableOid
Definition: tuptable.h:130
ItemPointerData tts_tid
Definition: tuptable.h:129
Definition: regguts.h:323
void ss_report_location(Relation rel, BlockNumber location)
Definition: syncscan.c:289
Size table_block_parallelscan_initialize(Relation rel, ParallelTableScanDesc pscan)
Definition: tableam.c:388
void table_block_parallelscan_reinitialize(Relation rel, ParallelTableScanDesc pscan)
Definition: tableam.c:406
uint64 table_block_relation_size(Relation rel, ForkNumber forkNumber)
Definition: tableam.c:616
TupleTableSlot * table_slot_create(Relation relation, List **reglist)
Definition: tableam.c:91
Size table_block_parallelscan_estimate(Relation rel)
Definition: tableam.c:382
void table_block_relation_estimate_size(Relation rel, int32 *attr_widths, BlockNumber *pages, double *tuples, double *allvisfrac, Size overhead_bytes_per_tuple, Size usable_bytes_per_page)
Definition: tableam.c:653
@ SO_NEED_TUPLES
Definition: tableam.h:71
@ SO_ALLOW_PAGEMODE
Definition: tableam.h:61
@ SO_ALLOW_SYNC
Definition: tableam.h:59
static TableScanDesc table_beginscan(Relation rel, Snapshot snapshot, int nkeys, struct ScanKeyData *key)
Definition: tableam.h:912
TU_UpdateIndexes
Definition: tableam.h:117
@ TU_Summarizing
Definition: tableam.h:125
@ TU_All
Definition: tableam.h:122
@ TU_None
Definition: tableam.h:119
static void table_endscan(TableScanDesc scan)
Definition: tableam.h:1028
TM_Result
Definition: tableam.h:79
@ TM_Ok
Definition: tableam.h:84
@ TM_Deleted
Definition: tableam.h:99
@ TM_WouldBlock
Definition: tableam.h:109
@ TM_Updated
Definition: tableam.h:96
@ TM_SelfModified
Definition: tableam.h:90
static TableScanDesc table_beginscan_strat(Relation rel, Snapshot snapshot, int nkeys, struct ScanKeyData *key, bool allow_strat, bool allow_sync)
Definition: tableam.h:936
#define TUPLE_LOCK_FLAG_FIND_LAST_VERSION
Definition: tableam.h:267
void(* IndexBuildCallback)(Relation index, ItemPointer tid, Datum *values, bool *isnull, bool tupleIsAlive, void *state)
Definition: tableam.h:271
static bool table_scan_getnextslot(TableScanDesc sscan, ScanDirection direction, TupleTableSlot *slot)
Definition: tableam.h:1064
#define TUPLE_LOCK_FLAG_LOCK_UPDATE_IN_PROGRESS
Definition: tableam.h:265
static void callback(struct sockaddr *addr, struct sockaddr *mask, void *unused)
Definition: test_ifaddrs.c:46
TBMIterateResult * tbm_shared_iterate(TBMSharedIterator *iterator)
Definition: tidbitmap.c:1052
TBMIterateResult * tbm_iterate(TBMIterator *iterator)
Definition: tidbitmap.c:971
#define InvalidTransactionId
Definition: transam.h:31
#define TransactionIdEquals(id1, id2)
Definition: transam.h:43
#define TransactionIdIsValid(xid)
Definition: transam.h:41
#define TupleDescAttr(tupdesc, i)
Definition: tupdesc.h:92
void tuplesort_performsort(Tuplesortstate *state)
Definition: tuplesort.c:1363
void tuplesort_end(Tuplesortstate *state)
Definition: tuplesort.c:951
#define TUPLESORT_NONE
Definition: tuplesort.h:93
HeapTuple tuplesort_getheaptuple(Tuplesortstate *state, bool forward)
Tuplesortstate * tuplesort_begin_cluster(TupleDesc tupDesc, Relation indexRel, int workMem, SortCoordinate coordinate, int sortopt)
void tuplesort_putheaptuple(Tuplesortstate *state, HeapTuple tup)
bool tuplesort_getdatum(Tuplesortstate *state, bool forward, bool copy, Datum *val, bool *isNull, Datum *abbrev)
#define att_align_nominal(cur_offset, attalign)
Definition: tupmacs.h:129
static TupleTableSlot * ExecClearTuple(TupleTableSlot *slot)
Definition: tuptable.h:454
#define TTS_IS_BUFFERTUPLE(slot)
Definition: tuptable.h:237
void heap_vacuum_rel(Relation rel, VacuumParams *params, BufferAccessStrategy bstrategy)
Definition: vacuumlazy.c:293
#define VM_ALL_VISIBLE(r, b, v)
Definition: visibilitymap.h:24
bool TransactionIdIsCurrentTransactionId(TransactionId xid)
Definition: xact.c:940
#define IsolationIsSerializable()
Definition: xact.h:52