PostgreSQL Source Code  git master
heapam_handler.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * heapam_handler.c
4  * heap table access method code
5  *
6  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  * src/backend/access/heap/heapam_handler.c
12  *
13  *
14  * NOTES
15  * This files wires up the lower level heapam.c et al routines with the
16  * tableam abstraction.
17  *
18  *-------------------------------------------------------------------------
19  */
20 #include "postgres.h"
21 
22 #include "access/genam.h"
23 #include "access/heapam.h"
24 #include "access/heaptoast.h"
25 #include "access/multixact.h"
26 #include "access/rewriteheap.h"
27 #include "access/syncscan.h"
28 #include "access/tableam.h"
29 #include "access/tsmapi.h"
30 #include "access/visibilitymap.h"
31 #include "access/xact.h"
32 #include "catalog/catalog.h"
33 #include "catalog/index.h"
34 #include "catalog/storage.h"
35 #include "catalog/storage_xlog.h"
36 #include "commands/progress.h"
37 #include "executor/executor.h"
38 #include "miscadmin.h"
39 #include "pgstat.h"
40 #include "storage/bufmgr.h"
41 #include "storage/bufpage.h"
42 #include "storage/lmgr.h"
43 #include "storage/predicate.h"
44 #include "storage/procarray.h"
45 #include "storage/smgr.h"
46 #include "utils/builtins.h"
47 #include "utils/rel.h"
48 
49 static void reform_and_rewrite_tuple(HeapTuple tuple,
50  Relation OldHeap, Relation NewHeap,
51  Datum *values, bool *isnull, RewriteState rwstate);
52 
53 static bool SampleHeapTupleVisible(TableScanDesc scan, Buffer buffer,
54  HeapTuple tuple,
55  OffsetNumber tupoffset);
56 
58 
60 
61 
62 /* ------------------------------------------------------------------------
63  * Slot related callbacks for heap AM
64  * ------------------------------------------------------------------------
65  */
66 
67 static const TupleTableSlotOps *
69 {
70  return &TTSOpsBufferHeapTuple;
71 }
72 
73 
74 /* ------------------------------------------------------------------------
75  * Index Scan Callbacks for heap AM
76  * ------------------------------------------------------------------------
77  */
78 
79 static IndexFetchTableData *
81 {
83 
84  hscan->xs_base.rel = rel;
85  hscan->xs_cbuf = InvalidBuffer;
86 
87  return &hscan->xs_base;
88 }
89 
90 static void
92 {
93  IndexFetchHeapData *hscan = (IndexFetchHeapData *) scan;
94 
95  if (BufferIsValid(hscan->xs_cbuf))
96  {
97  ReleaseBuffer(hscan->xs_cbuf);
98  hscan->xs_cbuf = InvalidBuffer;
99  }
100 }
101 
102 static void
104 {
105  IndexFetchHeapData *hscan = (IndexFetchHeapData *) scan;
106 
108 
109  pfree(hscan);
110 }
111 
112 static bool
114  ItemPointer tid,
115  Snapshot snapshot,
116  TupleTableSlot *slot,
117  bool *call_again, bool *all_dead)
118 {
119  IndexFetchHeapData *hscan = (IndexFetchHeapData *) scan;
121  bool got_heap_tuple;
122 
123  Assert(TTS_IS_BUFFERTUPLE(slot));
124 
125  /* We can skip the buffer-switching logic if we're in mid-HOT chain. */
126  if (!*call_again)
127  {
128  /* Switch to correct buffer if we don't have it already */
129  Buffer prev_buf = hscan->xs_cbuf;
130 
131  hscan->xs_cbuf = ReleaseAndReadBuffer(hscan->xs_cbuf,
132  hscan->xs_base.rel,
134 
135  /*
136  * Prune page, but only if we weren't already on this page
137  */
138  if (prev_buf != hscan->xs_cbuf)
139  heap_page_prune_opt(hscan->xs_base.rel, hscan->xs_cbuf);
140  }
141 
142  /* Obtain share-lock on the buffer so we can examine visibility */
144  got_heap_tuple = heap_hot_search_buffer(tid,
145  hscan->xs_base.rel,
146  hscan->xs_cbuf,
147  snapshot,
148  &bslot->base.tupdata,
149  all_dead,
150  !*call_again);
151  bslot->base.tupdata.t_self = *tid;
153 
154  if (got_heap_tuple)
155  {
156  /*
157  * Only in a non-MVCC snapshot can more than one member of the HOT
158  * chain be visible.
159  */
160  *call_again = !IsMVCCSnapshot(snapshot);
161 
162  slot->tts_tableOid = RelationGetRelid(scan->rel);
163  ExecStoreBufferHeapTuple(&bslot->base.tupdata, slot, hscan->xs_cbuf);
164  }
165  else
166  {
167  /* We've reached the end of the HOT chain. */
168  *call_again = false;
169  }
170 
171  return got_heap_tuple;
172 }
173 
174 
175 /* ------------------------------------------------------------------------
176  * Callbacks for non-modifying operations on individual tuples for heap AM
177  * ------------------------------------------------------------------------
178  */
179 
180 static bool
182  ItemPointer tid,
183  Snapshot snapshot,
184  TupleTableSlot *slot)
185 {
187  Buffer buffer;
188 
189  Assert(TTS_IS_BUFFERTUPLE(slot));
190 
191  bslot->base.tupdata.t_self = *tid;
192  if (heap_fetch(relation, snapshot, &bslot->base.tupdata, &buffer, false))
193  {
194  /* store in slot, transferring existing pin */
195  ExecStorePinnedBufferHeapTuple(&bslot->base.tupdata, slot, buffer);
196  slot->tts_tableOid = RelationGetRelid(relation);
197 
198  return true;
199  }
200 
201  return false;
202 }
203 
204 static bool
206 {
207  HeapScanDesc hscan = (HeapScanDesc) scan;
208 
209  return ItemPointerIsValid(tid) &&
211 }
212 
213 static bool
215  Snapshot snapshot)
216 {
218  bool res;
219 
220  Assert(TTS_IS_BUFFERTUPLE(slot));
221  Assert(BufferIsValid(bslot->buffer));
222 
223  /*
224  * We need buffer pin and lock to call HeapTupleSatisfiesVisibility.
225  * Caller should be holding pin, but not lock.
226  */
228  res = HeapTupleSatisfiesVisibility(bslot->base.tuple, snapshot,
229  bslot->buffer);
231 
232  return res;
233 }
234 
235 
236 /* ----------------------------------------------------------------------------
237  * Functions for manipulations of physical tuples for heap AM.
238  * ----------------------------------------------------------------------------
239  */
240 
241 static void
243  int options, BulkInsertState bistate)
244 {
245  bool shouldFree = true;
246  HeapTuple tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree);
247 
248  /* Update the tuple with table oid */
249  slot->tts_tableOid = RelationGetRelid(relation);
250  tuple->t_tableOid = slot->tts_tableOid;
251 
252  /* Perform the insertion, and copy the resulting ItemPointer */
253  heap_insert(relation, tuple, cid, options, bistate);
254  ItemPointerCopy(&tuple->t_self, &slot->tts_tid);
255 
256  if (shouldFree)
257  pfree(tuple);
258 }
259 
260 static void
262  CommandId cid, int options,
263  BulkInsertState bistate, uint32 specToken)
264 {
265  bool shouldFree = true;
266  HeapTuple tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree);
267 
268  /* Update the tuple with table oid */
269  slot->tts_tableOid = RelationGetRelid(relation);
270  tuple->t_tableOid = slot->tts_tableOid;
271 
272  HeapTupleHeaderSetSpeculativeToken(tuple->t_data, specToken);
274 
275  /* Perform the insertion, and copy the resulting ItemPointer */
276  heap_insert(relation, tuple, cid, options, bistate);
277  ItemPointerCopy(&tuple->t_self, &slot->tts_tid);
278 
279  if (shouldFree)
280  pfree(tuple);
281 }
282 
283 static void
285  uint32 specToken, bool succeeded)
286 {
287  bool shouldFree = true;
288  HeapTuple tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree);
289 
290  /* adjust the tuple's state accordingly */
291  if (succeeded)
292  heap_finish_speculative(relation, &slot->tts_tid);
293  else
294  heap_abort_speculative(relation, &slot->tts_tid);
295 
296  if (shouldFree)
297  pfree(tuple);
298 }
299 
300 static TM_Result
302  Snapshot snapshot, Snapshot crosscheck, bool wait,
303  TM_FailureData *tmfd, bool changingPart)
304 {
305  /*
306  * Currently Deleting of index tuples are handled at vacuum, in case if
307  * the storage itself is cleaning the dead tuples by itself, it is the
308  * time to call the index tuple deletion also.
309  */
310  return heap_delete(relation, tid, cid, crosscheck, wait, tmfd, changingPart);
311 }
312 
313 
314 static TM_Result
316  CommandId cid, Snapshot snapshot, Snapshot crosscheck,
317  bool wait, TM_FailureData *tmfd,
318  LockTupleMode *lockmode, TU_UpdateIndexes *update_indexes)
319 {
320  bool shouldFree = true;
321  HeapTuple tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree);
322  TM_Result result;
323 
324  /* Update the tuple with table oid */
325  slot->tts_tableOid = RelationGetRelid(relation);
326  tuple->t_tableOid = slot->tts_tableOid;
327 
328  result = heap_update(relation, otid, tuple, cid, crosscheck, wait,
329  tmfd, lockmode, update_indexes);
330  ItemPointerCopy(&tuple->t_self, &slot->tts_tid);
331 
332  /*
333  * Decide whether new index entries are needed for the tuple
334  *
335  * Note: heap_update returns the tid (location) of the new tuple in the
336  * t_self field.
337  *
338  * If the update is not HOT, we must update all indexes. If the update is
339  * HOT, it could be that we updated summarized columns, so we either
340  * update only summarized indexes, or none at all.
341  */
342  if (result != TM_Ok)
343  {
344  Assert(*update_indexes == TU_None);
345  *update_indexes = TU_None;
346  }
347  else if (!HeapTupleIsHeapOnly(tuple))
348  Assert(*update_indexes == TU_All);
349  else
350  Assert((*update_indexes == TU_Summarizing) ||
351  (*update_indexes == TU_None));
352 
353  if (shouldFree)
354  pfree(tuple);
355 
356  return result;
357 }
358 
359 static TM_Result
362  LockWaitPolicy wait_policy, uint8 flags,
363  TM_FailureData *tmfd)
364 {
366  TM_Result result;
367  Buffer buffer;
368  HeapTuple tuple = &bslot->base.tupdata;
369  bool follow_updates;
370 
371  follow_updates = (flags & TUPLE_LOCK_FLAG_LOCK_UPDATE_IN_PROGRESS) != 0;
372  tmfd->traversed = false;
373 
374  Assert(TTS_IS_BUFFERTUPLE(slot));
375 
376 tuple_lock_retry:
377  tuple->t_self = *tid;
378  result = heap_lock_tuple(relation, tuple, cid, mode, wait_policy,
379  follow_updates, &buffer, tmfd);
380 
381  if (result == TM_Updated &&
383  {
384  /* Should not encounter speculative tuple on recheck */
386 
387  ReleaseBuffer(buffer);
388 
389  if (!ItemPointerEquals(&tmfd->ctid, &tuple->t_self))
390  {
391  SnapshotData SnapshotDirty;
392  TransactionId priorXmax;
393 
394  /* it was updated, so look at the updated version */
395  *tid = tmfd->ctid;
396  /* updated row should have xmin matching this xmax */
397  priorXmax = tmfd->xmax;
398 
399  /* signal that a tuple later in the chain is getting locked */
400  tmfd->traversed = true;
401 
402  /*
403  * fetch target tuple
404  *
405  * Loop here to deal with updated or busy tuples
406  */
407  InitDirtySnapshot(SnapshotDirty);
408  for (;;)
409  {
411  ereport(ERROR,
413  errmsg("tuple to be locked was already moved to another partition due to concurrent update")));
414 
415  tuple->t_self = *tid;
416  if (heap_fetch(relation, &SnapshotDirty, tuple, &buffer, true))
417  {
418  /*
419  * If xmin isn't what we're expecting, the slot must have
420  * been recycled and reused for an unrelated tuple. This
421  * implies that the latest version of the row was deleted,
422  * so we need do nothing. (Should be safe to examine xmin
423  * without getting buffer's content lock. We assume
424  * reading a TransactionId to be atomic, and Xmin never
425  * changes in an existing tuple, except to invalid or
426  * frozen, and neither of those can match priorXmax.)
427  */
429  priorXmax))
430  {
431  ReleaseBuffer(buffer);
432  return TM_Deleted;
433  }
434 
435  /* otherwise xmin should not be dirty... */
436  if (TransactionIdIsValid(SnapshotDirty.xmin))
437  ereport(ERROR,
439  errmsg_internal("t_xmin %u is uncommitted in tuple (%u,%u) to be updated in table \"%s\"",
440  SnapshotDirty.xmin,
443  RelationGetRelationName(relation))));
444 
445  /*
446  * If tuple is being updated by other transaction then we
447  * have to wait for its commit/abort, or die trying.
448  */
449  if (TransactionIdIsValid(SnapshotDirty.xmax))
450  {
451  ReleaseBuffer(buffer);
452  switch (wait_policy)
453  {
454  case LockWaitBlock:
455  XactLockTableWait(SnapshotDirty.xmax,
456  relation, &tuple->t_self,
458  break;
459  case LockWaitSkip:
460  if (!ConditionalXactLockTableWait(SnapshotDirty.xmax))
461  /* skip instead of waiting */
462  return TM_WouldBlock;
463  break;
464  case LockWaitError:
465  if (!ConditionalXactLockTableWait(SnapshotDirty.xmax))
466  ereport(ERROR,
467  (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
468  errmsg("could not obtain lock on row in relation \"%s\"",
469  RelationGetRelationName(relation))));
470  break;
471  }
472  continue; /* loop back to repeat heap_fetch */
473  }
474 
475  /*
476  * If tuple was inserted by our own transaction, we have
477  * to check cmin against cid: cmin >= current CID means
478  * our command cannot see the tuple, so we should ignore
479  * it. Otherwise heap_lock_tuple() will throw an error,
480  * and so would any later attempt to update or delete the
481  * tuple. (We need not check cmax because
482  * HeapTupleSatisfiesDirty will consider a tuple deleted
483  * by our transaction dead, regardless of cmax.) We just
484  * checked that priorXmax == xmin, so we can test that
485  * variable instead of doing HeapTupleHeaderGetXmin again.
486  */
487  if (TransactionIdIsCurrentTransactionId(priorXmax) &&
488  HeapTupleHeaderGetCmin(tuple->t_data) >= cid)
489  {
490  tmfd->xmax = priorXmax;
491 
492  /*
493  * Cmin is the problematic value, so store that. See
494  * above.
495  */
496  tmfd->cmax = HeapTupleHeaderGetCmin(tuple->t_data);
497  ReleaseBuffer(buffer);
498  return TM_SelfModified;
499  }
500 
501  /*
502  * This is a live tuple, so try to lock it again.
503  */
504  ReleaseBuffer(buffer);
505  goto tuple_lock_retry;
506  }
507 
508  /*
509  * If the referenced slot was actually empty, the latest
510  * version of the row must have been deleted, so we need do
511  * nothing.
512  */
513  if (tuple->t_data == NULL)
514  {
515  Assert(!BufferIsValid(buffer));
516  return TM_Deleted;
517  }
518 
519  /*
520  * As above, if xmin isn't what we're expecting, do nothing.
521  */
523  priorXmax))
524  {
525  ReleaseBuffer(buffer);
526  return TM_Deleted;
527  }
528 
529  /*
530  * If we get here, the tuple was found but failed
531  * SnapshotDirty. Assuming the xmin is either a committed xact
532  * or our own xact (as it certainly should be if we're trying
533  * to modify the tuple), this must mean that the row was
534  * updated or deleted by either a committed xact or our own
535  * xact. If it was deleted, we can ignore it; if it was
536  * updated then chain up to the next version and repeat the
537  * whole process.
538  *
539  * As above, it should be safe to examine xmax and t_ctid
540  * without the buffer content lock, because they can't be
541  * changing. We'd better hold a buffer pin though.
542  */
543  if (ItemPointerEquals(&tuple->t_self, &tuple->t_data->t_ctid))
544  {
545  /* deleted, so forget about it */
546  ReleaseBuffer(buffer);
547  return TM_Deleted;
548  }
549 
550  /* updated, so look at the updated row */
551  *tid = tuple->t_data->t_ctid;
552  /* updated row should have xmin matching this xmax */
553  priorXmax = HeapTupleHeaderGetUpdateXid(tuple->t_data);
554  ReleaseBuffer(buffer);
555  /* loop back to fetch next in chain */
556  }
557  }
558  else
559  {
560  /* tuple was deleted, so give up */
561  return TM_Deleted;
562  }
563  }
564 
565  slot->tts_tableOid = RelationGetRelid(relation);
566  tuple->t_tableOid = slot->tts_tableOid;
567 
568  /* store in slot, transferring existing pin */
569  ExecStorePinnedBufferHeapTuple(tuple, slot, buffer);
570 
571  return result;
572 }
573 
574 
575 /* ------------------------------------------------------------------------
576  * DDL related callbacks for heap AM.
577  * ------------------------------------------------------------------------
578  */
579 
580 static void
582  const RelFileLocator *newrlocator,
583  char persistence,
584  TransactionId *freezeXid,
585  MultiXactId *minmulti)
586 {
587  SMgrRelation srel;
588 
589  /*
590  * Initialize to the minimum XID that could put tuples in the table. We
591  * know that no xacts older than RecentXmin are still running, so that
592  * will do.
593  */
594  *freezeXid = RecentXmin;
595 
596  /*
597  * Similarly, initialize the minimum Multixact to the first value that
598  * could possibly be stored in tuples in the table. Running transactions
599  * could reuse values from their local cache, so we are careful to
600  * consider all currently running multis.
601  *
602  * XXX this could be refined further, but is it worth the hassle?
603  */
604  *minmulti = GetOldestMultiXactId();
605 
606  srel = RelationCreateStorage(*newrlocator, persistence, true);
607 
608  /*
609  * If required, set up an init fork for an unlogged table so that it can
610  * be correctly reinitialized on restart. Recovery may remove it while
611  * replaying, for example, an XLOG_DBASE_CREATE* or XLOG_TBLSPC_CREATE
612  * record. Therefore, logging is necessary even if wal_level=minimal.
613  */
614  if (persistence == RELPERSISTENCE_UNLOGGED)
615  {
616  Assert(rel->rd_rel->relkind == RELKIND_RELATION ||
617  rel->rd_rel->relkind == RELKIND_MATVIEW ||
618  rel->rd_rel->relkind == RELKIND_TOASTVALUE);
619  smgrcreate(srel, INIT_FORKNUM, false);
620  log_smgrcreate(newrlocator, INIT_FORKNUM);
621  }
622 
623  smgrclose(srel);
624 }
625 
626 static void
628 {
629  RelationTruncate(rel, 0);
630 }
631 
632 static void
634 {
635  SMgrRelation dstrel;
636 
637  /*
638  * Since we copy the file directly without looking at the shared buffers,
639  * we'd better first flush out any pages of the source relation that are
640  * in shared buffers. We assume no new changes will be made while we are
641  * holding exclusive lock on the rel.
642  */
644 
645  /*
646  * Create and copy all forks of the relation, and schedule unlinking of
647  * old physical files.
648  *
649  * NOTE: any conflict in relfilenumber value will be caught in
650  * RelationCreateStorage().
651  */
652  dstrel = RelationCreateStorage(*newrlocator, rel->rd_rel->relpersistence, true);
653 
654  /* copy main fork */
656  rel->rd_rel->relpersistence);
657 
658  /* copy those extra forks that exist */
659  for (ForkNumber forkNum = MAIN_FORKNUM + 1;
660  forkNum <= MAX_FORKNUM; forkNum++)
661  {
662  if (smgrexists(RelationGetSmgr(rel), forkNum))
663  {
664  smgrcreate(dstrel, forkNum, false);
665 
666  /*
667  * WAL log creation if the relation is persistent, or this is the
668  * init fork of an unlogged relation.
669  */
670  if (RelationIsPermanent(rel) ||
671  (rel->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED &&
672  forkNum == INIT_FORKNUM))
673  log_smgrcreate(newrlocator, forkNum);
674  RelationCopyStorage(RelationGetSmgr(rel), dstrel, forkNum,
675  rel->rd_rel->relpersistence);
676  }
677  }
678 
679 
680  /* drop old relation, and close new one */
681  RelationDropStorage(rel);
682  smgrclose(dstrel);
683 }
684 
685 static void
687  Relation OldIndex, bool use_sort,
688  TransactionId OldestXmin,
689  TransactionId *xid_cutoff,
690  MultiXactId *multi_cutoff,
691  double *num_tuples,
692  double *tups_vacuumed,
693  double *tups_recently_dead)
694 {
695  RewriteState rwstate;
696  IndexScanDesc indexScan;
697  TableScanDesc tableScan;
698  HeapScanDesc heapScan;
699  bool is_system_catalog;
700  Tuplesortstate *tuplesort;
701  TupleDesc oldTupDesc = RelationGetDescr(OldHeap);
702  TupleDesc newTupDesc = RelationGetDescr(NewHeap);
703  TupleTableSlot *slot;
704  int natts;
705  Datum *values;
706  bool *isnull;
708  BlockNumber prev_cblock = InvalidBlockNumber;
709 
710  /* Remember if it's a system catalog */
711  is_system_catalog = IsSystemRelation(OldHeap);
712 
713  /*
714  * Valid smgr_targblock implies something already wrote to the relation.
715  * This may be harmless, but this function hasn't planned for it.
716  */
718 
719  /* Preallocate values/isnull arrays */
720  natts = newTupDesc->natts;
721  values = (Datum *) palloc(natts * sizeof(Datum));
722  isnull = (bool *) palloc(natts * sizeof(bool));
723 
724  /* Initialize the rewrite operation */
725  rwstate = begin_heap_rewrite(OldHeap, NewHeap, OldestXmin, *xid_cutoff,
726  *multi_cutoff);
727 
728 
729  /* Set up sorting if wanted */
730  if (use_sort)
731  tuplesort = tuplesort_begin_cluster(oldTupDesc, OldIndex,
733  NULL, TUPLESORT_NONE);
734  else
735  tuplesort = NULL;
736 
737  /*
738  * Prepare to scan the OldHeap. To ensure we see recently-dead tuples
739  * that still need to be copied, we scan with SnapshotAny and use
740  * HeapTupleSatisfiesVacuum for the visibility test.
741  */
742  if (OldIndex != NULL && !use_sort)
743  {
744  const int ci_index[] = {
747  };
748  int64 ci_val[2];
749 
750  /* Set phase and OIDOldIndex to columns */
752  ci_val[1] = RelationGetRelid(OldIndex);
753  pgstat_progress_update_multi_param(2, ci_index, ci_val);
754 
755  tableScan = NULL;
756  heapScan = NULL;
757  indexScan = index_beginscan(OldHeap, OldIndex, SnapshotAny, 0, 0);
758  index_rescan(indexScan, NULL, 0, NULL, 0);
759  }
760  else
761  {
762  /* In scan-and-sort mode and also VACUUM FULL, set phase */
765 
766  tableScan = table_beginscan(OldHeap, SnapshotAny, 0, (ScanKey) NULL);
767  heapScan = (HeapScanDesc) tableScan;
768  indexScan = NULL;
769 
770  /* Set total heap blocks */
772  heapScan->rs_nblocks);
773  }
774 
775  slot = table_slot_create(OldHeap, NULL);
776  hslot = (BufferHeapTupleTableSlot *) slot;
777 
778  /*
779  * Scan through the OldHeap, either in OldIndex order or sequentially;
780  * copy each tuple into the NewHeap, or transiently to the tuplesort
781  * module. Note that we don't bother sorting dead tuples (they won't get
782  * to the new table anyway).
783  */
784  for (;;)
785  {
786  HeapTuple tuple;
787  Buffer buf;
788  bool isdead;
789 
791 
792  if (indexScan != NULL)
793  {
794  if (!index_getnext_slot(indexScan, ForwardScanDirection, slot))
795  break;
796 
797  /* Since we used no scan keys, should never need to recheck */
798  if (indexScan->xs_recheck)
799  elog(ERROR, "CLUSTER does not support lossy index conditions");
800  }
801  else
802  {
803  if (!table_scan_getnextslot(tableScan, ForwardScanDirection, slot))
804  {
805  /*
806  * If the last pages of the scan were empty, we would go to
807  * the next phase while heap_blks_scanned != heap_blks_total.
808  * Instead, to ensure that heap_blks_scanned is equivalent to
809  * heap_blks_total after the table scan phase, this parameter
810  * is manually updated to the correct value when the table
811  * scan finishes.
812  */
814  heapScan->rs_nblocks);
815  break;
816  }
817 
818  /*
819  * In scan-and-sort mode and also VACUUM FULL, set heap blocks
820  * scanned
821  *
822  * Note that heapScan may start at an offset and wrap around, i.e.
823  * rs_startblock may be >0, and rs_cblock may end with a number
824  * below rs_startblock. To prevent showing this wraparound to the
825  * user, we offset rs_cblock by rs_startblock (modulo rs_nblocks).
826  */
827  if (prev_cblock != heapScan->rs_cblock)
828  {
830  (heapScan->rs_cblock +
831  heapScan->rs_nblocks -
832  heapScan->rs_startblock
833  ) % heapScan->rs_nblocks + 1);
834  prev_cblock = heapScan->rs_cblock;
835  }
836  }
837 
838  tuple = ExecFetchSlotHeapTuple(slot, false, NULL);
839  buf = hslot->buffer;
840 
842 
843  switch (HeapTupleSatisfiesVacuum(tuple, OldestXmin, buf))
844  {
845  case HEAPTUPLE_DEAD:
846  /* Definitely dead */
847  isdead = true;
848  break;
850  *tups_recently_dead += 1;
851  /* fall through */
852  case HEAPTUPLE_LIVE:
853  /* Live or recently dead, must copy it */
854  isdead = false;
855  break;
857 
858  /*
859  * Since we hold exclusive lock on the relation, normally the
860  * only way to see this is if it was inserted earlier in our
861  * own transaction. However, it can happen in system
862  * catalogs, since we tend to release write lock before commit
863  * there. Give a warning if neither case applies; but in any
864  * case we had better copy it.
865  */
866  if (!is_system_catalog &&
868  elog(WARNING, "concurrent insert in progress within table \"%s\"",
869  RelationGetRelationName(OldHeap));
870  /* treat as live */
871  isdead = false;
872  break;
874 
875  /*
876  * Similar situation to INSERT_IN_PROGRESS case.
877  */
878  if (!is_system_catalog &&
880  elog(WARNING, "concurrent delete in progress within table \"%s\"",
881  RelationGetRelationName(OldHeap));
882  /* treat as recently dead */
883  *tups_recently_dead += 1;
884  isdead = false;
885  break;
886  default:
887  elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
888  isdead = false; /* keep compiler quiet */
889  break;
890  }
891 
893 
894  if (isdead)
895  {
896  *tups_vacuumed += 1;
897  /* heap rewrite module still needs to see it... */
898  if (rewrite_heap_dead_tuple(rwstate, tuple))
899  {
900  /* A previous recently-dead tuple is now known dead */
901  *tups_vacuumed += 1;
902  *tups_recently_dead -= 1;
903  }
904  continue;
905  }
906 
907  *num_tuples += 1;
908  if (tuplesort != NULL)
909  {
910  tuplesort_putheaptuple(tuplesort, tuple);
911 
912  /*
913  * In scan-and-sort mode, report increase in number of tuples
914  * scanned
915  */
917  *num_tuples);
918  }
919  else
920  {
921  const int ct_index[] = {
924  };
925  int64 ct_val[2];
926 
927  reform_and_rewrite_tuple(tuple, OldHeap, NewHeap,
928  values, isnull, rwstate);
929 
930  /*
931  * In indexscan mode and also VACUUM FULL, report increase in
932  * number of tuples scanned and written
933  */
934  ct_val[0] = *num_tuples;
935  ct_val[1] = *num_tuples;
936  pgstat_progress_update_multi_param(2, ct_index, ct_val);
937  }
938  }
939 
940  if (indexScan != NULL)
941  index_endscan(indexScan);
942  if (tableScan != NULL)
943  table_endscan(tableScan);
944  if (slot)
946 
947  /*
948  * In scan-and-sort mode, complete the sort, then read out all live tuples
949  * from the tuplestore and write them to the new relation.
950  */
951  if (tuplesort != NULL)
952  {
953  double n_tuples = 0;
954 
955  /* Report that we are now sorting tuples */
958 
959  tuplesort_performsort(tuplesort);
960 
961  /* Report that we are now writing new heap */
964 
965  for (;;)
966  {
967  HeapTuple tuple;
968 
970 
971  tuple = tuplesort_getheaptuple(tuplesort, true);
972  if (tuple == NULL)
973  break;
974 
975  n_tuples += 1;
977  OldHeap, NewHeap,
978  values, isnull,
979  rwstate);
980  /* Report n_tuples */
982  n_tuples);
983  }
984 
985  tuplesort_end(tuplesort);
986  }
987 
988  /* Write out any remaining tuples, and fsync if needed */
989  end_heap_rewrite(rwstate);
990 
991  /* Clean up */
992  pfree(values);
993  pfree(isnull);
994 }
995 
996 /*
997  * Prepare to analyze the next block in the read stream. Returns false if
998  * the stream is exhausted and true otherwise. The scan must have been started
999  * with SO_TYPE_ANALYZE option.
1000  *
1001  * This routine holds a buffer pin and lock on the heap page. They are held
1002  * until heapam_scan_analyze_next_tuple() returns false. That is until all the
1003  * items of the heap page are analyzed.
1004  */
1005 static bool
1007 {
1008  HeapScanDesc hscan = (HeapScanDesc) scan;
1009 
1010  /*
1011  * We must maintain a pin on the target page's buffer to ensure that
1012  * concurrent activity - e.g. HOT pruning - doesn't delete tuples out from
1013  * under us. It comes from the stream already pinned. We also choose to
1014  * hold sharelock on the buffer throughout --- we could release and
1015  * re-acquire sharelock for each tuple, but since we aren't doing much
1016  * work per tuple, the extra lock traffic is probably better avoided.
1017  */
1018  hscan->rs_cbuf = read_stream_next_buffer(stream, NULL);
1019  if (!BufferIsValid(hscan->rs_cbuf))
1020  return false;
1021 
1023 
1024  hscan->rs_cblock = BufferGetBlockNumber(hscan->rs_cbuf);
1025  hscan->rs_cindex = FirstOffsetNumber;
1026  return true;
1027 }
1028 
1029 static bool
1031  double *liverows, double *deadrows,
1032  TupleTableSlot *slot)
1033 {
1034  HeapScanDesc hscan = (HeapScanDesc) scan;
1035  Page targpage;
1036  OffsetNumber maxoffset;
1037  BufferHeapTupleTableSlot *hslot;
1038 
1039  Assert(TTS_IS_BUFFERTUPLE(slot));
1040 
1041  hslot = (BufferHeapTupleTableSlot *) slot;
1042  targpage = BufferGetPage(hscan->rs_cbuf);
1043  maxoffset = PageGetMaxOffsetNumber(targpage);
1044 
1045  /* Inner loop over all tuples on the selected page */
1046  for (; hscan->rs_cindex <= maxoffset; hscan->rs_cindex++)
1047  {
1048  ItemId itemid;
1049  HeapTuple targtuple = &hslot->base.tupdata;
1050  bool sample_it = false;
1051 
1052  itemid = PageGetItemId(targpage, hscan->rs_cindex);
1053 
1054  /*
1055  * We ignore unused and redirect line pointers. DEAD line pointers
1056  * should be counted as dead, because we need vacuum to run to get rid
1057  * of them. Note that this rule agrees with the way that
1058  * heap_page_prune_and_freeze() counts things.
1059  */
1060  if (!ItemIdIsNormal(itemid))
1061  {
1062  if (ItemIdIsDead(itemid))
1063  *deadrows += 1;
1064  continue;
1065  }
1066 
1067  ItemPointerSet(&targtuple->t_self, hscan->rs_cblock, hscan->rs_cindex);
1068 
1069  targtuple->t_tableOid = RelationGetRelid(scan->rs_rd);
1070  targtuple->t_data = (HeapTupleHeader) PageGetItem(targpage, itemid);
1071  targtuple->t_len = ItemIdGetLength(itemid);
1072 
1073  switch (HeapTupleSatisfiesVacuum(targtuple, OldestXmin,
1074  hscan->rs_cbuf))
1075  {
1076  case HEAPTUPLE_LIVE:
1077  sample_it = true;
1078  *liverows += 1;
1079  break;
1080 
1081  case HEAPTUPLE_DEAD:
1083  /* Count dead and recently-dead rows */
1084  *deadrows += 1;
1085  break;
1086 
1088 
1089  /*
1090  * Insert-in-progress rows are not counted. We assume that
1091  * when the inserting transaction commits or aborts, it will
1092  * send a stats message to increment the proper count. This
1093  * works right only if that transaction ends after we finish
1094  * analyzing the table; if things happen in the other order,
1095  * its stats update will be overwritten by ours. However, the
1096  * error will be large only if the other transaction runs long
1097  * enough to insert many tuples, so assuming it will finish
1098  * after us is the safer option.
1099  *
1100  * A special case is that the inserting transaction might be
1101  * our own. In this case we should count and sample the row,
1102  * to accommodate users who load a table and analyze it in one
1103  * transaction. (pgstat_report_analyze has to adjust the
1104  * numbers we report to the cumulative stats system to make
1105  * this come out right.)
1106  */
1108  {
1109  sample_it = true;
1110  *liverows += 1;
1111  }
1112  break;
1113 
1115 
1116  /*
1117  * We count and sample delete-in-progress rows the same as
1118  * live ones, so that the stats counters come out right if the
1119  * deleting transaction commits after us, per the same
1120  * reasoning given above.
1121  *
1122  * If the delete was done by our own transaction, however, we
1123  * must count the row as dead to make pgstat_report_analyze's
1124  * stats adjustments come out right. (Note: this works out
1125  * properly when the row was both inserted and deleted in our
1126  * xact.)
1127  *
1128  * The net effect of these choices is that we act as though an
1129  * IN_PROGRESS transaction hasn't happened yet, except if it
1130  * is our own transaction, which we assume has happened.
1131  *
1132  * This approach ensures that we behave sanely if we see both
1133  * the pre-image and post-image rows for a row being updated
1134  * by a concurrent transaction: we will sample the pre-image
1135  * but not the post-image. We also get sane results if the
1136  * concurrent transaction never commits.
1137  */
1139  *deadrows += 1;
1140  else
1141  {
1142  sample_it = true;
1143  *liverows += 1;
1144  }
1145  break;
1146 
1147  default:
1148  elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
1149  break;
1150  }
1151 
1152  if (sample_it)
1153  {
1154  ExecStoreBufferHeapTuple(targtuple, slot, hscan->rs_cbuf);
1155  hscan->rs_cindex++;
1156 
1157  /* note that we leave the buffer locked here! */
1158  return true;
1159  }
1160  }
1161 
1162  /* Now release the lock and pin on the page */
1163  UnlockReleaseBuffer(hscan->rs_cbuf);
1164  hscan->rs_cbuf = InvalidBuffer;
1165 
1166  /* also prevent old slot contents from having pin on page */
1167  ExecClearTuple(slot);
1168 
1169  return false;
1170 }
1171 
1172 static double
1174  Relation indexRelation,
1175  IndexInfo *indexInfo,
1176  bool allow_sync,
1177  bool anyvisible,
1178  bool progress,
1179  BlockNumber start_blockno,
1180  BlockNumber numblocks,
1182  void *callback_state,
1183  TableScanDesc scan)
1184 {
1185  HeapScanDesc hscan;
1186  bool is_system_catalog;
1187  bool checking_uniqueness;
1188  HeapTuple heapTuple;
1190  bool isnull[INDEX_MAX_KEYS];
1191  double reltuples;
1192  ExprState *predicate;
1193  TupleTableSlot *slot;
1194  EState *estate;
1195  ExprContext *econtext;
1196  Snapshot snapshot;
1197  bool need_unregister_snapshot = false;
1198  TransactionId OldestXmin;
1199  BlockNumber previous_blkno = InvalidBlockNumber;
1200  BlockNumber root_blkno = InvalidBlockNumber;
1201  OffsetNumber root_offsets[MaxHeapTuplesPerPage];
1202 
1203  /*
1204  * sanity checks
1205  */
1206  Assert(OidIsValid(indexRelation->rd_rel->relam));
1207 
1208  /* Remember if it's a system catalog */
1209  is_system_catalog = IsSystemRelation(heapRelation);
1210 
1211  /* See whether we're verifying uniqueness/exclusion properties */
1212  checking_uniqueness = (indexInfo->ii_Unique ||
1213  indexInfo->ii_ExclusionOps != NULL);
1214 
1215  /*
1216  * "Any visible" mode is not compatible with uniqueness checks; make sure
1217  * only one of those is requested.
1218  */
1219  Assert(!(anyvisible && checking_uniqueness));
1220 
1221  /*
1222  * Need an EState for evaluation of index expressions and partial-index
1223  * predicates. Also a slot to hold the current tuple.
1224  */
1225  estate = CreateExecutorState();
1226  econtext = GetPerTupleExprContext(estate);
1227  slot = table_slot_create(heapRelation, NULL);
1228 
1229  /* Arrange for econtext's scan tuple to be the tuple under test */
1230  econtext->ecxt_scantuple = slot;
1231 
1232  /* Set up execution state for predicate, if any. */
1233  predicate = ExecPrepareQual(indexInfo->ii_Predicate, estate);
1234 
1235  /*
1236  * Prepare for scan of the base relation. In a normal index build, we use
1237  * SnapshotAny because we must retrieve all tuples and do our own time
1238  * qual checks (because we have to index RECENTLY_DEAD tuples). In a
1239  * concurrent build, or during bootstrap, we take a regular MVCC snapshot
1240  * and index whatever's live according to that.
1241  */
1242  OldestXmin = InvalidTransactionId;
1243 
1244  /* okay to ignore lazy VACUUMs here */
1245  if (!IsBootstrapProcessingMode() && !indexInfo->ii_Concurrent)
1246  OldestXmin = GetOldestNonRemovableTransactionId(heapRelation);
1247 
1248  if (!scan)
1249  {
1250  /*
1251  * Serial index build.
1252  *
1253  * Must begin our own heap scan in this case. We may also need to
1254  * register a snapshot whose lifetime is under our direct control.
1255  */
1256  if (!TransactionIdIsValid(OldestXmin))
1257  {
1259  need_unregister_snapshot = true;
1260  }
1261  else
1262  snapshot = SnapshotAny;
1263 
1264  scan = table_beginscan_strat(heapRelation, /* relation */
1265  snapshot, /* snapshot */
1266  0, /* number of keys */
1267  NULL, /* scan key */
1268  true, /* buffer access strategy OK */
1269  allow_sync); /* syncscan OK? */
1270  }
1271  else
1272  {
1273  /*
1274  * Parallel index build.
1275  *
1276  * Parallel case never registers/unregisters own snapshot. Snapshot
1277  * is taken from parallel heap scan, and is SnapshotAny or an MVCC
1278  * snapshot, based on same criteria as serial case.
1279  */
1281  Assert(allow_sync);
1282  snapshot = scan->rs_snapshot;
1283  }
1284 
1285  hscan = (HeapScanDesc) scan;
1286 
1287  /*
1288  * Must have called GetOldestNonRemovableTransactionId() if using
1289  * SnapshotAny. Shouldn't have for an MVCC snapshot. (It's especially
1290  * worth checking this for parallel builds, since ambuild routines that
1291  * support parallel builds must work these details out for themselves.)
1292  */
1293  Assert(snapshot == SnapshotAny || IsMVCCSnapshot(snapshot));
1294  Assert(snapshot == SnapshotAny ? TransactionIdIsValid(OldestXmin) :
1295  !TransactionIdIsValid(OldestXmin));
1296  Assert(snapshot == SnapshotAny || !anyvisible);
1297 
1298  /* Publish number of blocks to scan */
1299  if (progress)
1300  {
1301  BlockNumber nblocks;
1302 
1303  if (hscan->rs_base.rs_parallel != NULL)
1304  {
1306 
1308  nblocks = pbscan->phs_nblocks;
1309  }
1310  else
1311  nblocks = hscan->rs_nblocks;
1312 
1314  nblocks);
1315  }
1316 
1317  /* set our scan endpoints */
1318  if (!allow_sync)
1319  heap_setscanlimits(scan, start_blockno, numblocks);
1320  else
1321  {
1322  /* syncscan can only be requested on whole relation */
1323  Assert(start_blockno == 0);
1324  Assert(numblocks == InvalidBlockNumber);
1325  }
1326 
1327  reltuples = 0;
1328 
1329  /*
1330  * Scan all tuples in the base relation.
1331  */
1332  while ((heapTuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
1333  {
1334  bool tupleIsAlive;
1335 
1337 
1338  /* Report scan progress, if asked to. */
1339  if (progress)
1340  {
1341  BlockNumber blocks_done = heapam_scan_get_blocks_done(hscan);
1342 
1343  if (blocks_done != previous_blkno)
1344  {
1346  blocks_done);
1347  previous_blkno = blocks_done;
1348  }
1349  }
1350 
1351  /*
1352  * When dealing with a HOT-chain of updated tuples, we want to index
1353  * the values of the live tuple (if any), but index it under the TID
1354  * of the chain's root tuple. This approach is necessary to preserve
1355  * the HOT-chain structure in the heap. So we need to be able to find
1356  * the root item offset for every tuple that's in a HOT-chain. When
1357  * first reaching a new page of the relation, call
1358  * heap_get_root_tuples() to build a map of root item offsets on the
1359  * page.
1360  *
1361  * It might look unsafe to use this information across buffer
1362  * lock/unlock. However, we hold ShareLock on the table so no
1363  * ordinary insert/update/delete should occur; and we hold pin on the
1364  * buffer continuously while visiting the page, so no pruning
1365  * operation can occur either.
1366  *
1367  * In cases with only ShareUpdateExclusiveLock on the table, it's
1368  * possible for some HOT tuples to appear that we didn't know about
1369  * when we first read the page. To handle that case, we re-obtain the
1370  * list of root offsets when a HOT tuple points to a root item that we
1371  * don't know about.
1372  *
1373  * Also, although our opinions about tuple liveness could change while
1374  * we scan the page (due to concurrent transaction commits/aborts),
1375  * the chain root locations won't, so this info doesn't need to be
1376  * rebuilt after waiting for another transaction.
1377  *
1378  * Note the implied assumption that there is no more than one live
1379  * tuple per HOT-chain --- else we could create more than one index
1380  * entry pointing to the same root tuple.
1381  */
1382  if (hscan->rs_cblock != root_blkno)
1383  {
1384  Page page = BufferGetPage(hscan->rs_cbuf);
1385 
1387  heap_get_root_tuples(page, root_offsets);
1389 
1390  root_blkno = hscan->rs_cblock;
1391  }
1392 
1393  if (snapshot == SnapshotAny)
1394  {
1395  /* do our own time qual check */
1396  bool indexIt;
1397  TransactionId xwait;
1398 
1399  recheck:
1400 
1401  /*
1402  * We could possibly get away with not locking the buffer here,
1403  * since caller should hold ShareLock on the relation, but let's
1404  * be conservative about it. (This remark is still correct even
1405  * with HOT-pruning: our pin on the buffer prevents pruning.)
1406  */
1408 
1409  /*
1410  * The criteria for counting a tuple as live in this block need to
1411  * match what analyze.c's heapam_scan_analyze_next_tuple() does,
1412  * otherwise CREATE INDEX and ANALYZE may produce wildly different
1413  * reltuples values, e.g. when there are many recently-dead
1414  * tuples.
1415  */
1416  switch (HeapTupleSatisfiesVacuum(heapTuple, OldestXmin,
1417  hscan->rs_cbuf))
1418  {
1419  case HEAPTUPLE_DEAD:
1420  /* Definitely dead, we can ignore it */
1421  indexIt = false;
1422  tupleIsAlive = false;
1423  break;
1424  case HEAPTUPLE_LIVE:
1425  /* Normal case, index and unique-check it */
1426  indexIt = true;
1427  tupleIsAlive = true;
1428  /* Count it as live, too */
1429  reltuples += 1;
1430  break;
1432 
1433  /*
1434  * If tuple is recently deleted then we must index it
1435  * anyway to preserve MVCC semantics. (Pre-existing
1436  * transactions could try to use the index after we finish
1437  * building it, and may need to see such tuples.)
1438  *
1439  * However, if it was HOT-updated then we must only index
1440  * the live tuple at the end of the HOT-chain. Since this
1441  * breaks semantics for pre-existing snapshots, mark the
1442  * index as unusable for them.
1443  *
1444  * We don't count recently-dead tuples in reltuples, even
1445  * if we index them; see heapam_scan_analyze_next_tuple().
1446  */
1447  if (HeapTupleIsHotUpdated(heapTuple))
1448  {
1449  indexIt = false;
1450  /* mark the index as unsafe for old snapshots */
1451  indexInfo->ii_BrokenHotChain = true;
1452  }
1453  else
1454  indexIt = true;
1455  /* In any case, exclude the tuple from unique-checking */
1456  tupleIsAlive = false;
1457  break;
1459 
1460  /*
1461  * In "anyvisible" mode, this tuple is visible and we
1462  * don't need any further checks.
1463  */
1464  if (anyvisible)
1465  {
1466  indexIt = true;
1467  tupleIsAlive = true;
1468  reltuples += 1;
1469  break;
1470  }
1471 
1472  /*
1473  * Since caller should hold ShareLock or better, normally
1474  * the only way to see this is if it was inserted earlier
1475  * in our own transaction. However, it can happen in
1476  * system catalogs, since we tend to release write lock
1477  * before commit there. Give a warning if neither case
1478  * applies.
1479  */
1480  xwait = HeapTupleHeaderGetXmin(heapTuple->t_data);
1482  {
1483  if (!is_system_catalog)
1484  elog(WARNING, "concurrent insert in progress within table \"%s\"",
1485  RelationGetRelationName(heapRelation));
1486 
1487  /*
1488  * If we are performing uniqueness checks, indexing
1489  * such a tuple could lead to a bogus uniqueness
1490  * failure. In that case we wait for the inserting
1491  * transaction to finish and check again.
1492  */
1493  if (checking_uniqueness)
1494  {
1495  /*
1496  * Must drop the lock on the buffer before we wait
1497  */
1499  XactLockTableWait(xwait, heapRelation,
1500  &heapTuple->t_self,
1503  goto recheck;
1504  }
1505  }
1506  else
1507  {
1508  /*
1509  * For consistency with
1510  * heapam_scan_analyze_next_tuple(), count
1511  * HEAPTUPLE_INSERT_IN_PROGRESS tuples as live only
1512  * when inserted by our own transaction.
1513  */
1514  reltuples += 1;
1515  }
1516 
1517  /*
1518  * We must index such tuples, since if the index build
1519  * commits then they're good.
1520  */
1521  indexIt = true;
1522  tupleIsAlive = true;
1523  break;
1525 
1526  /*
1527  * As with INSERT_IN_PROGRESS case, this is unexpected
1528  * unless it's our own deletion or a system catalog; but
1529  * in anyvisible mode, this tuple is visible.
1530  */
1531  if (anyvisible)
1532  {
1533  indexIt = true;
1534  tupleIsAlive = false;
1535  reltuples += 1;
1536  break;
1537  }
1538 
1539  xwait = HeapTupleHeaderGetUpdateXid(heapTuple->t_data);
1541  {
1542  if (!is_system_catalog)
1543  elog(WARNING, "concurrent delete in progress within table \"%s\"",
1544  RelationGetRelationName(heapRelation));
1545 
1546  /*
1547  * If we are performing uniqueness checks, assuming
1548  * the tuple is dead could lead to missing a
1549  * uniqueness violation. In that case we wait for the
1550  * deleting transaction to finish and check again.
1551  *
1552  * Also, if it's a HOT-updated tuple, we should not
1553  * index it but rather the live tuple at the end of
1554  * the HOT-chain. However, the deleting transaction
1555  * could abort, possibly leaving this tuple as live
1556  * after all, in which case it has to be indexed. The
1557  * only way to know what to do is to wait for the
1558  * deleting transaction to finish and check again.
1559  */
1560  if (checking_uniqueness ||
1561  HeapTupleIsHotUpdated(heapTuple))
1562  {
1563  /*
1564  * Must drop the lock on the buffer before we wait
1565  */
1567  XactLockTableWait(xwait, heapRelation,
1568  &heapTuple->t_self,
1571  goto recheck;
1572  }
1573 
1574  /*
1575  * Otherwise index it but don't check for uniqueness,
1576  * the same as a RECENTLY_DEAD tuple.
1577  */
1578  indexIt = true;
1579 
1580  /*
1581  * Count HEAPTUPLE_DELETE_IN_PROGRESS tuples as live,
1582  * if they were not deleted by the current
1583  * transaction. That's what
1584  * heapam_scan_analyze_next_tuple() does, and we want
1585  * the behavior to be consistent.
1586  */
1587  reltuples += 1;
1588  }
1589  else if (HeapTupleIsHotUpdated(heapTuple))
1590  {
1591  /*
1592  * It's a HOT-updated tuple deleted by our own xact.
1593  * We can assume the deletion will commit (else the
1594  * index contents don't matter), so treat the same as
1595  * RECENTLY_DEAD HOT-updated tuples.
1596  */
1597  indexIt = false;
1598  /* mark the index as unsafe for old snapshots */
1599  indexInfo->ii_BrokenHotChain = true;
1600  }
1601  else
1602  {
1603  /*
1604  * It's a regular tuple deleted by our own xact. Index
1605  * it, but don't check for uniqueness nor count in
1606  * reltuples, the same as a RECENTLY_DEAD tuple.
1607  */
1608  indexIt = true;
1609  }
1610  /* In any case, exclude the tuple from unique-checking */
1611  tupleIsAlive = false;
1612  break;
1613  default:
1614  elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
1615  indexIt = tupleIsAlive = false; /* keep compiler quiet */
1616  break;
1617  }
1618 
1620 
1621  if (!indexIt)
1622  continue;
1623  }
1624  else
1625  {
1626  /* heap_getnext did the time qual check */
1627  tupleIsAlive = true;
1628  reltuples += 1;
1629  }
1630 
1632 
1633  /* Set up for predicate or expression evaluation */
1634  ExecStoreBufferHeapTuple(heapTuple, slot, hscan->rs_cbuf);
1635 
1636  /*
1637  * In a partial index, discard tuples that don't satisfy the
1638  * predicate.
1639  */
1640  if (predicate != NULL)
1641  {
1642  if (!ExecQual(predicate, econtext))
1643  continue;
1644  }
1645 
1646  /*
1647  * For the current heap tuple, extract all the attributes we use in
1648  * this index, and note which are null. This also performs evaluation
1649  * of any expressions needed.
1650  */
1651  FormIndexDatum(indexInfo,
1652  slot,
1653  estate,
1654  values,
1655  isnull);
1656 
1657  /*
1658  * You'd think we should go ahead and build the index tuple here, but
1659  * some index AMs want to do further processing on the data first. So
1660  * pass the values[] and isnull[] arrays, instead.
1661  */
1662 
1663  if (HeapTupleIsHeapOnly(heapTuple))
1664  {
1665  /*
1666  * For a heap-only tuple, pretend its TID is that of the root. See
1667  * src/backend/access/heap/README.HOT for discussion.
1668  */
1669  ItemPointerData tid;
1670  OffsetNumber offnum;
1671 
1672  offnum = ItemPointerGetOffsetNumber(&heapTuple->t_self);
1673 
1674  /*
1675  * If a HOT tuple points to a root that we don't know about,
1676  * obtain root items afresh. If that still fails, report it as
1677  * corruption.
1678  */
1679  if (root_offsets[offnum - 1] == InvalidOffsetNumber)
1680  {
1681  Page page = BufferGetPage(hscan->rs_cbuf);
1682 
1684  heap_get_root_tuples(page, root_offsets);
1686  }
1687 
1688  if (!OffsetNumberIsValid(root_offsets[offnum - 1]))
1689  ereport(ERROR,
1691  errmsg_internal("failed to find parent tuple for heap-only tuple at (%u,%u) in table \"%s\"",
1692  ItemPointerGetBlockNumber(&heapTuple->t_self),
1693  offnum,
1694  RelationGetRelationName(heapRelation))));
1695 
1696  ItemPointerSet(&tid, ItemPointerGetBlockNumber(&heapTuple->t_self),
1697  root_offsets[offnum - 1]);
1698 
1699  /* Call the AM's callback routine to process the tuple */
1700  callback(indexRelation, &tid, values, isnull, tupleIsAlive,
1701  callback_state);
1702  }
1703  else
1704  {
1705  /* Call the AM's callback routine to process the tuple */
1706  callback(indexRelation, &heapTuple->t_self, values, isnull,
1707  tupleIsAlive, callback_state);
1708  }
1709  }
1710 
1711  /* Report scan progress one last time. */
1712  if (progress)
1713  {
1714  BlockNumber blks_done;
1715 
1716  if (hscan->rs_base.rs_parallel != NULL)
1717  {
1719 
1721  blks_done = pbscan->phs_nblocks;
1722  }
1723  else
1724  blks_done = hscan->rs_nblocks;
1725 
1727  blks_done);
1728  }
1729 
1730  table_endscan(scan);
1731 
1732  /* we can now forget our snapshot, if set and registered by us */
1733  if (need_unregister_snapshot)
1734  UnregisterSnapshot(snapshot);
1735 
1737 
1738  FreeExecutorState(estate);
1739 
1740  /* These may have been pointing to the now-gone estate */
1741  indexInfo->ii_ExpressionsState = NIL;
1742  indexInfo->ii_PredicateState = NULL;
1743 
1744  return reltuples;
1745 }
1746 
1747 static void
1749  Relation indexRelation,
1750  IndexInfo *indexInfo,
1751  Snapshot snapshot,
1753 {
1754  TableScanDesc scan;
1755  HeapScanDesc hscan;
1756  HeapTuple heapTuple;
1758  bool isnull[INDEX_MAX_KEYS];
1759  ExprState *predicate;
1760  TupleTableSlot *slot;
1761  EState *estate;
1762  ExprContext *econtext;
1763  BlockNumber root_blkno = InvalidBlockNumber;
1764  OffsetNumber root_offsets[MaxHeapTuplesPerPage];
1765  bool in_index[MaxHeapTuplesPerPage];
1766  BlockNumber previous_blkno = InvalidBlockNumber;
1767 
1768  /* state variables for the merge */
1769  ItemPointer indexcursor = NULL;
1770  ItemPointerData decoded;
1771  bool tuplesort_empty = false;
1772 
1773  /*
1774  * sanity checks
1775  */
1776  Assert(OidIsValid(indexRelation->rd_rel->relam));
1777 
1778  /*
1779  * Need an EState for evaluation of index expressions and partial-index
1780  * predicates. Also a slot to hold the current tuple.
1781  */
1782  estate = CreateExecutorState();
1783  econtext = GetPerTupleExprContext(estate);
1784  slot = MakeSingleTupleTableSlot(RelationGetDescr(heapRelation),
1785  &TTSOpsHeapTuple);
1786 
1787  /* Arrange for econtext's scan tuple to be the tuple under test */
1788  econtext->ecxt_scantuple = slot;
1789 
1790  /* Set up execution state for predicate, if any. */
1791  predicate = ExecPrepareQual(indexInfo->ii_Predicate, estate);
1792 
1793  /*
1794  * Prepare for scan of the base relation. We need just those tuples
1795  * satisfying the passed-in reference snapshot. We must disable syncscan
1796  * here, because it's critical that we read from block zero forward to
1797  * match the sorted TIDs.
1798  */
1799  scan = table_beginscan_strat(heapRelation, /* relation */
1800  snapshot, /* snapshot */
1801  0, /* number of keys */
1802  NULL, /* scan key */
1803  true, /* buffer access strategy OK */
1804  false); /* syncscan not OK */
1805  hscan = (HeapScanDesc) scan;
1806 
1808  hscan->rs_nblocks);
1809 
1810  /*
1811  * Scan all tuples matching the snapshot.
1812  */
1813  while ((heapTuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
1814  {
1815  ItemPointer heapcursor = &heapTuple->t_self;
1816  ItemPointerData rootTuple;
1817  OffsetNumber root_offnum;
1818 
1820 
1821  state->htups += 1;
1822 
1823  if ((previous_blkno == InvalidBlockNumber) ||
1824  (hscan->rs_cblock != previous_blkno))
1825  {
1827  hscan->rs_cblock);
1828  previous_blkno = hscan->rs_cblock;
1829  }
1830 
1831  /*
1832  * As commented in table_index_build_scan, we should index heap-only
1833  * tuples under the TIDs of their root tuples; so when we advance onto
1834  * a new heap page, build a map of root item offsets on the page.
1835  *
1836  * This complicates merging against the tuplesort output: we will
1837  * visit the live tuples in order by their offsets, but the root
1838  * offsets that we need to compare against the index contents might be
1839  * ordered differently. So we might have to "look back" within the
1840  * tuplesort output, but only within the current page. We handle that
1841  * by keeping a bool array in_index[] showing all the
1842  * already-passed-over tuplesort output TIDs of the current page. We
1843  * clear that array here, when advancing onto a new heap page.
1844  */
1845  if (hscan->rs_cblock != root_blkno)
1846  {
1847  Page page = BufferGetPage(hscan->rs_cbuf);
1848 
1850  heap_get_root_tuples(page, root_offsets);
1852 
1853  memset(in_index, 0, sizeof(in_index));
1854 
1855  root_blkno = hscan->rs_cblock;
1856  }
1857 
1858  /* Convert actual tuple TID to root TID */
1859  rootTuple = *heapcursor;
1860  root_offnum = ItemPointerGetOffsetNumber(heapcursor);
1861 
1862  if (HeapTupleIsHeapOnly(heapTuple))
1863  {
1864  root_offnum = root_offsets[root_offnum - 1];
1865  if (!OffsetNumberIsValid(root_offnum))
1866  ereport(ERROR,
1868  errmsg_internal("failed to find parent tuple for heap-only tuple at (%u,%u) in table \"%s\"",
1869  ItemPointerGetBlockNumber(heapcursor),
1870  ItemPointerGetOffsetNumber(heapcursor),
1871  RelationGetRelationName(heapRelation))));
1872  ItemPointerSetOffsetNumber(&rootTuple, root_offnum);
1873  }
1874 
1875  /*
1876  * "merge" by skipping through the index tuples until we find or pass
1877  * the current root tuple.
1878  */
1879  while (!tuplesort_empty &&
1880  (!indexcursor ||
1881  ItemPointerCompare(indexcursor, &rootTuple) < 0))
1882  {
1883  Datum ts_val;
1884  bool ts_isnull;
1885 
1886  if (indexcursor)
1887  {
1888  /*
1889  * Remember index items seen earlier on the current heap page
1890  */
1891  if (ItemPointerGetBlockNumber(indexcursor) == root_blkno)
1892  in_index[ItemPointerGetOffsetNumber(indexcursor) - 1] = true;
1893  }
1894 
1895  tuplesort_empty = !tuplesort_getdatum(state->tuplesort, true,
1896  false, &ts_val, &ts_isnull,
1897  NULL);
1898  Assert(tuplesort_empty || !ts_isnull);
1899  if (!tuplesort_empty)
1900  {
1901  itemptr_decode(&decoded, DatumGetInt64(ts_val));
1902  indexcursor = &decoded;
1903  }
1904  else
1905  {
1906  /* Be tidy */
1907  indexcursor = NULL;
1908  }
1909  }
1910 
1911  /*
1912  * If the tuplesort has overshot *and* we didn't see a match earlier,
1913  * then this tuple is missing from the index, so insert it.
1914  */
1915  if ((tuplesort_empty ||
1916  ItemPointerCompare(indexcursor, &rootTuple) > 0) &&
1917  !in_index[root_offnum - 1])
1918  {
1920 
1921  /* Set up for predicate or expression evaluation */
1922  ExecStoreHeapTuple(heapTuple, slot, false);
1923 
1924  /*
1925  * In a partial index, discard tuples that don't satisfy the
1926  * predicate.
1927  */
1928  if (predicate != NULL)
1929  {
1930  if (!ExecQual(predicate, econtext))
1931  continue;
1932  }
1933 
1934  /*
1935  * For the current heap tuple, extract all the attributes we use
1936  * in this index, and note which are null. This also performs
1937  * evaluation of any expressions needed.
1938  */
1939  FormIndexDatum(indexInfo,
1940  slot,
1941  estate,
1942  values,
1943  isnull);
1944 
1945  /*
1946  * You'd think we should go ahead and build the index tuple here,
1947  * but some index AMs want to do further processing on the data
1948  * first. So pass the values[] and isnull[] arrays, instead.
1949  */
1950 
1951  /*
1952  * If the tuple is already committed dead, you might think we
1953  * could suppress uniqueness checking, but this is no longer true
1954  * in the presence of HOT, because the insert is actually a proxy
1955  * for a uniqueness check on the whole HOT-chain. That is, the
1956  * tuple we have here could be dead because it was already
1957  * HOT-updated, and if so the updating transaction will not have
1958  * thought it should insert index entries. The index AM will
1959  * check the whole HOT-chain and correctly detect a conflict if
1960  * there is one.
1961  */
1962 
1963  index_insert(indexRelation,
1964  values,
1965  isnull,
1966  &rootTuple,
1967  heapRelation,
1968  indexInfo->ii_Unique ?
1970  false,
1971  indexInfo);
1972 
1973  state->tups_inserted += 1;
1974  }
1975  }
1976 
1977  table_endscan(scan);
1978 
1980 
1981  FreeExecutorState(estate);
1982 
1983  /* These may have been pointing to the now-gone estate */
1984  indexInfo->ii_ExpressionsState = NIL;
1985  indexInfo->ii_PredicateState = NULL;
1986 }
1987 
1988 /*
1989  * Return the number of blocks that have been read by this scan since
1990  * starting. This is meant for progress reporting rather than be fully
1991  * accurate: in a parallel scan, workers can be concurrently reading blocks
1992  * further ahead than what we report.
1993  */
1994 static BlockNumber
1996 {
1997  ParallelBlockTableScanDesc bpscan = NULL;
1998  BlockNumber startblock;
1999  BlockNumber blocks_done;
2000 
2001  if (hscan->rs_base.rs_parallel != NULL)
2002  {
2004  startblock = bpscan->phs_startblock;
2005  }
2006  else
2007  startblock = hscan->rs_startblock;
2008 
2009  /*
2010  * Might have wrapped around the end of the relation, if startblock was
2011  * not zero.
2012  */
2013  if (hscan->rs_cblock > startblock)
2014  blocks_done = hscan->rs_cblock - startblock;
2015  else
2016  {
2017  BlockNumber nblocks;
2018 
2019  nblocks = bpscan != NULL ? bpscan->phs_nblocks : hscan->rs_nblocks;
2020  blocks_done = nblocks - startblock +
2021  hscan->rs_cblock;
2022  }
2023 
2024  return blocks_done;
2025 }
2026 
2027 
2028 /* ------------------------------------------------------------------------
2029  * Miscellaneous callbacks for the heap AM
2030  * ------------------------------------------------------------------------
2031  */
2032 
2033 /*
2034  * Check to see whether the table needs a TOAST table. It does only if
2035  * (1) there are any toastable attributes, and (2) the maximum length
2036  * of a tuple could exceed TOAST_TUPLE_THRESHOLD. (We don't want to
2037  * create a toast table for something like "f1 varchar(20)".)
2038  */
2039 static bool
2041 {
2042  int32 data_length = 0;
2043  bool maxlength_unknown = false;
2044  bool has_toastable_attrs = false;
2045  TupleDesc tupdesc = rel->rd_att;
2046  int32 tuple_length;
2047  int i;
2048 
2049  for (i = 0; i < tupdesc->natts; i++)
2050  {
2051  Form_pg_attribute att = TupleDescAttr(tupdesc, i);
2052 
2053  if (att->attisdropped)
2054  continue;
2055  data_length = att_align_nominal(data_length, att->attalign);
2056  if (att->attlen > 0)
2057  {
2058  /* Fixed-length types are never toastable */
2059  data_length += att->attlen;
2060  }
2061  else
2062  {
2063  int32 maxlen = type_maximum_size(att->atttypid,
2064  att->atttypmod);
2065 
2066  if (maxlen < 0)
2067  maxlength_unknown = true;
2068  else
2069  data_length += maxlen;
2070  if (att->attstorage != TYPSTORAGE_PLAIN)
2071  has_toastable_attrs = true;
2072  }
2073  }
2074  if (!has_toastable_attrs)
2075  return false; /* nothing to toast? */
2076  if (maxlength_unknown)
2077  return true; /* any unlimited-length attrs? */
2078  tuple_length = MAXALIGN(SizeofHeapTupleHeader +
2079  BITMAPLEN(tupdesc->natts)) +
2080  MAXALIGN(data_length);
2081  return (tuple_length > TOAST_TUPLE_THRESHOLD);
2082 }
2083 
2084 /*
2085  * TOAST tables for heap relations are just heap relations.
2086  */
2087 static Oid
2089 {
2090  return rel->rd_rel->relam;
2091 }
2092 
2093 
2094 /* ------------------------------------------------------------------------
2095  * Planner related callbacks for the heap AM
2096  * ------------------------------------------------------------------------
2097  */
2098 
2099 #define HEAP_OVERHEAD_BYTES_PER_TUPLE \
2100  (MAXALIGN(SizeofHeapTupleHeader) + sizeof(ItemIdData))
2101 #define HEAP_USABLE_BYTES_PER_PAGE \
2102  (BLCKSZ - SizeOfPageHeaderData)
2103 
2104 static void
2106  BlockNumber *pages, double *tuples,
2107  double *allvisfrac)
2108 {
2109  table_block_relation_estimate_size(rel, attr_widths, pages,
2110  tuples, allvisfrac,
2113 }
2114 
2115 
2116 /* ------------------------------------------------------------------------
2117  * Executor related callbacks for the heap AM
2118  * ------------------------------------------------------------------------
2119  */
2120 
2121 static bool
2123  TBMIterateResult *tbmres)
2124 {
2125  HeapScanDesc hscan = (HeapScanDesc) scan;
2126  BlockNumber block = tbmres->blockno;
2127  Buffer buffer;
2128  Snapshot snapshot;
2129  int ntup;
2130 
2131  hscan->rs_cindex = 0;
2132  hscan->rs_ntuples = 0;
2133 
2134  /*
2135  * We can skip fetching the heap page if we don't need any fields from the
2136  * heap, the bitmap entries don't need rechecking, and all tuples on the
2137  * page are visible to our transaction.
2138  */
2139  if (!(scan->rs_flags & SO_NEED_TUPLES) &&
2140  !tbmres->recheck &&
2141  VM_ALL_VISIBLE(scan->rs_rd, tbmres->blockno, &hscan->rs_vmbuffer))
2142  {
2143  /* can't be lossy in the skip_fetch case */
2144  Assert(tbmres->ntuples >= 0);
2145  Assert(hscan->rs_empty_tuples_pending >= 0);
2146 
2147  hscan->rs_empty_tuples_pending += tbmres->ntuples;
2148 
2149  return true;
2150  }
2151 
2152  /*
2153  * Ignore any claimed entries past what we think is the end of the
2154  * relation. It may have been extended after the start of our scan (we
2155  * only hold an AccessShareLock, and it could be inserts from this
2156  * backend). We don't take this optimization in SERIALIZABLE isolation
2157  * though, as we need to examine all invisible tuples reachable by the
2158  * index.
2159  */
2160  if (!IsolationIsSerializable() && block >= hscan->rs_nblocks)
2161  return false;
2162 
2163  /*
2164  * Acquire pin on the target heap page, trading in any pin we held before.
2165  */
2166  hscan->rs_cbuf = ReleaseAndReadBuffer(hscan->rs_cbuf,
2167  scan->rs_rd,
2168  block);
2169  hscan->rs_cblock = block;
2170  buffer = hscan->rs_cbuf;
2171  snapshot = scan->rs_snapshot;
2172 
2173  ntup = 0;
2174 
2175  /*
2176  * Prune and repair fragmentation for the whole page, if possible.
2177  */
2178  heap_page_prune_opt(scan->rs_rd, buffer);
2179 
2180  /*
2181  * We must hold share lock on the buffer content while examining tuple
2182  * visibility. Afterwards, however, the tuples we have found to be
2183  * visible are guaranteed good as long as we hold the buffer pin.
2184  */
2185  LockBuffer(buffer, BUFFER_LOCK_SHARE);
2186 
2187  /*
2188  * We need two separate strategies for lossy and non-lossy cases.
2189  */
2190  if (tbmres->ntuples >= 0)
2191  {
2192  /*
2193  * Bitmap is non-lossy, so we just look through the offsets listed in
2194  * tbmres; but we have to follow any HOT chain starting at each such
2195  * offset.
2196  */
2197  int curslot;
2198 
2199  for (curslot = 0; curslot < tbmres->ntuples; curslot++)
2200  {
2201  OffsetNumber offnum = tbmres->offsets[curslot];
2202  ItemPointerData tid;
2203  HeapTupleData heapTuple;
2204 
2205  ItemPointerSet(&tid, block, offnum);
2206  if (heap_hot_search_buffer(&tid, scan->rs_rd, buffer, snapshot,
2207  &heapTuple, NULL, true))
2208  hscan->rs_vistuples[ntup++] = ItemPointerGetOffsetNumber(&tid);
2209  }
2210  }
2211  else
2212  {
2213  /*
2214  * Bitmap is lossy, so we must examine each line pointer on the page.
2215  * But we can ignore HOT chains, since we'll check each tuple anyway.
2216  */
2217  Page page = BufferGetPage(buffer);
2218  OffsetNumber maxoff = PageGetMaxOffsetNumber(page);
2219  OffsetNumber offnum;
2220 
2221  for (offnum = FirstOffsetNumber; offnum <= maxoff; offnum = OffsetNumberNext(offnum))
2222  {
2223  ItemId lp;
2224  HeapTupleData loctup;
2225  bool valid;
2226 
2227  lp = PageGetItemId(page, offnum);
2228  if (!ItemIdIsNormal(lp))
2229  continue;
2230  loctup.t_data = (HeapTupleHeader) PageGetItem(page, lp);
2231  loctup.t_len = ItemIdGetLength(lp);
2232  loctup.t_tableOid = scan->rs_rd->rd_id;
2233  ItemPointerSet(&loctup.t_self, block, offnum);
2234  valid = HeapTupleSatisfiesVisibility(&loctup, snapshot, buffer);
2235  if (valid)
2236  {
2237  hscan->rs_vistuples[ntup++] = offnum;
2238  PredicateLockTID(scan->rs_rd, &loctup.t_self, snapshot,
2239  HeapTupleHeaderGetXmin(loctup.t_data));
2240  }
2241  HeapCheckForSerializableConflictOut(valid, scan->rs_rd, &loctup,
2242  buffer, snapshot);
2243  }
2244  }
2245 
2246  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
2247 
2248  Assert(ntup <= MaxHeapTuplesPerPage);
2249  hscan->rs_ntuples = ntup;
2250 
2251  return ntup > 0;
2252 }
2253 
2254 static bool
2256  TBMIterateResult *tbmres,
2257  TupleTableSlot *slot)
2258 {
2259  HeapScanDesc hscan = (HeapScanDesc) scan;
2260  OffsetNumber targoffset;
2261  Page page;
2262  ItemId lp;
2263 
2264  if (hscan->rs_empty_tuples_pending > 0)
2265  {
2266  /*
2267  * If we don't have to fetch the tuple, just return nulls.
2268  */
2269  ExecStoreAllNullTuple(slot);
2270  hscan->rs_empty_tuples_pending--;
2271  return true;
2272  }
2273 
2274  /*
2275  * Out of range? If so, nothing more to look at on this page
2276  */
2277  if (hscan->rs_cindex < 0 || hscan->rs_cindex >= hscan->rs_ntuples)
2278  return false;
2279 
2280  targoffset = hscan->rs_vistuples[hscan->rs_cindex];
2281  page = BufferGetPage(hscan->rs_cbuf);
2282  lp = PageGetItemId(page, targoffset);
2283  Assert(ItemIdIsNormal(lp));
2284 
2285  hscan->rs_ctup.t_data = (HeapTupleHeader) PageGetItem(page, lp);
2286  hscan->rs_ctup.t_len = ItemIdGetLength(lp);
2287  hscan->rs_ctup.t_tableOid = scan->rs_rd->rd_id;
2288  ItemPointerSet(&hscan->rs_ctup.t_self, hscan->rs_cblock, targoffset);
2289 
2291 
2292  /*
2293  * Set up the result slot to point to this tuple. Note that the slot
2294  * acquires a pin on the buffer.
2295  */
2297  slot,
2298  hscan->rs_cbuf);
2299 
2300  hscan->rs_cindex++;
2301 
2302  return true;
2303 }
2304 
2305 static bool
2307 {
2308  HeapScanDesc hscan = (HeapScanDesc) scan;
2309  TsmRoutine *tsm = scanstate->tsmroutine;
2310  BlockNumber blockno;
2311 
2312  /* return false immediately if relation is empty */
2313  if (hscan->rs_nblocks == 0)
2314  return false;
2315 
2316  /* release previous scan buffer, if any */
2317  if (BufferIsValid(hscan->rs_cbuf))
2318  {
2319  ReleaseBuffer(hscan->rs_cbuf);
2320  hscan->rs_cbuf = InvalidBuffer;
2321  }
2322 
2323  if (tsm->NextSampleBlock)
2324  blockno = tsm->NextSampleBlock(scanstate, hscan->rs_nblocks);
2325  else
2326  {
2327  /* scanning table sequentially */
2328 
2329  if (hscan->rs_cblock == InvalidBlockNumber)
2330  {
2331  Assert(!hscan->rs_inited);
2332  blockno = hscan->rs_startblock;
2333  }
2334  else
2335  {
2336  Assert(hscan->rs_inited);
2337 
2338  blockno = hscan->rs_cblock + 1;
2339 
2340  if (blockno >= hscan->rs_nblocks)
2341  {
2342  /* wrap to beginning of rel, might not have started at 0 */
2343  blockno = 0;
2344  }
2345 
2346  /*
2347  * Report our new scan position for synchronization purposes.
2348  *
2349  * Note: we do this before checking for end of scan so that the
2350  * final state of the position hint is back at the start of the
2351  * rel. That's not strictly necessary, but otherwise when you run
2352  * the same query multiple times the starting position would shift
2353  * a little bit backwards on every invocation, which is confusing.
2354  * We don't guarantee any specific ordering in general, though.
2355  */
2356  if (scan->rs_flags & SO_ALLOW_SYNC)
2357  ss_report_location(scan->rs_rd, blockno);
2358 
2359  if (blockno == hscan->rs_startblock)
2360  {
2361  blockno = InvalidBlockNumber;
2362  }
2363  }
2364  }
2365 
2366  hscan->rs_cblock = blockno;
2367 
2368  if (!BlockNumberIsValid(blockno))
2369  {
2370  hscan->rs_inited = false;
2371  return false;
2372  }
2373 
2374  Assert(hscan->rs_cblock < hscan->rs_nblocks);
2375 
2376  /*
2377  * Be sure to check for interrupts at least once per page. Checks at
2378  * higher code levels won't be able to stop a sample scan that encounters
2379  * many pages' worth of consecutive dead tuples.
2380  */
2382 
2383  /* Read page using selected strategy */
2385  blockno, RBM_NORMAL, hscan->rs_strategy);
2386 
2387  /* in pagemode, prune the page and determine visible tuple offsets */
2388  if (hscan->rs_base.rs_flags & SO_ALLOW_PAGEMODE)
2389  heap_prepare_pagescan(scan);
2390 
2391  hscan->rs_inited = true;
2392  return true;
2393 }
2394 
2395 static bool
2397  TupleTableSlot *slot)
2398 {
2399  HeapScanDesc hscan = (HeapScanDesc) scan;
2400  TsmRoutine *tsm = scanstate->tsmroutine;
2401  BlockNumber blockno = hscan->rs_cblock;
2402  bool pagemode = (scan->rs_flags & SO_ALLOW_PAGEMODE) != 0;
2403 
2404  Page page;
2405  bool all_visible;
2406  OffsetNumber maxoffset;
2407 
2408  /*
2409  * When not using pagemode, we must lock the buffer during tuple
2410  * visibility checks.
2411  */
2412  if (!pagemode)
2414 
2415  page = (Page) BufferGetPage(hscan->rs_cbuf);
2416  all_visible = PageIsAllVisible(page) &&
2418  maxoffset = PageGetMaxOffsetNumber(page);
2419 
2420  for (;;)
2421  {
2422  OffsetNumber tupoffset;
2423 
2425 
2426  /* Ask the tablesample method which tuples to check on this page. */
2427  tupoffset = tsm->NextSampleTuple(scanstate,
2428  blockno,
2429  maxoffset);
2430 
2431  if (OffsetNumberIsValid(tupoffset))
2432  {
2433  ItemId itemid;
2434  bool visible;
2435  HeapTuple tuple = &(hscan->rs_ctup);
2436 
2437  /* Skip invalid tuple pointers. */
2438  itemid = PageGetItemId(page, tupoffset);
2439  if (!ItemIdIsNormal(itemid))
2440  continue;
2441 
2442  tuple->t_data = (HeapTupleHeader) PageGetItem(page, itemid);
2443  tuple->t_len = ItemIdGetLength(itemid);
2444  ItemPointerSet(&(tuple->t_self), blockno, tupoffset);
2445 
2446 
2447  if (all_visible)
2448  visible = true;
2449  else
2450  visible = SampleHeapTupleVisible(scan, hscan->rs_cbuf,
2451  tuple, tupoffset);
2452 
2453  /* in pagemode, heap_prepare_pagescan did this for us */
2454  if (!pagemode)
2455  HeapCheckForSerializableConflictOut(visible, scan->rs_rd, tuple,
2456  hscan->rs_cbuf, scan->rs_snapshot);
2457 
2458  /* Try next tuple from same page. */
2459  if (!visible)
2460  continue;
2461 
2462  /* Found visible tuple, return it. */
2463  if (!pagemode)
2465 
2466  ExecStoreBufferHeapTuple(tuple, slot, hscan->rs_cbuf);
2467 
2468  /* Count successfully-fetched tuples as heap fetches */
2470 
2471  return true;
2472  }
2473  else
2474  {
2475  /*
2476  * If we get here, it means we've exhausted the items on this page
2477  * and it's time to move to the next.
2478  */
2479  if (!pagemode)
2481 
2482  ExecClearTuple(slot);
2483  return false;
2484  }
2485  }
2486 
2487  Assert(0);
2488 }
2489 
2490 
2491 /* ----------------------------------------------------------------------------
2492  * Helper functions for the above.
2493  * ----------------------------------------------------------------------------
2494  */
2495 
2496 /*
2497  * Reconstruct and rewrite the given tuple
2498  *
2499  * We cannot simply copy the tuple as-is, for several reasons:
2500  *
2501  * 1. We'd like to squeeze out the values of any dropped columns, both
2502  * to save space and to ensure we have no corner-case failures. (It's
2503  * possible for example that the new table hasn't got a TOAST table
2504  * and so is unable to store any large values of dropped cols.)
2505  *
2506  * 2. The tuple might not even be legal for the new table; this is
2507  * currently only known to happen as an after-effect of ALTER TABLE
2508  * SET WITHOUT OIDS.
2509  *
2510  * So, we must reconstruct the tuple from component Datums.
2511  */
2512 static void
2514  Relation OldHeap, Relation NewHeap,
2515  Datum *values, bool *isnull, RewriteState rwstate)
2516 {
2517  TupleDesc oldTupDesc = RelationGetDescr(OldHeap);
2518  TupleDesc newTupDesc = RelationGetDescr(NewHeap);
2519  HeapTuple copiedTuple;
2520  int i;
2521 
2522  heap_deform_tuple(tuple, oldTupDesc, values, isnull);
2523 
2524  /* Be sure to null out any dropped columns */
2525  for (i = 0; i < newTupDesc->natts; i++)
2526  {
2527  if (TupleDescAttr(newTupDesc, i)->attisdropped)
2528  isnull[i] = true;
2529  }
2530 
2531  copiedTuple = heap_form_tuple(newTupDesc, values, isnull);
2532 
2533  /* The heap rewrite module does the rest */
2534  rewrite_heap_tuple(rwstate, tuple, copiedTuple);
2535 
2536  heap_freetuple(copiedTuple);
2537 }
2538 
2539 /*
2540  * Check visibility of the tuple.
2541  */
2542 static bool
2544  HeapTuple tuple,
2545  OffsetNumber tupoffset)
2546 {
2547  HeapScanDesc hscan = (HeapScanDesc) scan;
2548 
2549  if (scan->rs_flags & SO_ALLOW_PAGEMODE)
2550  {
2551  /*
2552  * In pageatatime mode, heap_prepare_pagescan() already did visibility
2553  * checks, so just look at the info it left in rs_vistuples[].
2554  *
2555  * We use a binary search over the known-sorted array. Note: we could
2556  * save some effort if we insisted that NextSampleTuple select tuples
2557  * in increasing order, but it's not clear that there would be enough
2558  * gain to justify the restriction.
2559  */
2560  int start = 0,
2561  end = hscan->rs_ntuples - 1;
2562 
2563  while (start <= end)
2564  {
2565  int mid = (start + end) / 2;
2566  OffsetNumber curoffset = hscan->rs_vistuples[mid];
2567 
2568  if (tupoffset == curoffset)
2569  return true;
2570  else if (tupoffset < curoffset)
2571  end = mid - 1;
2572  else
2573  start = mid + 1;
2574  }
2575 
2576  return false;
2577  }
2578  else
2579  {
2580  /* Otherwise, we have to check the tuple individually. */
2581  return HeapTupleSatisfiesVisibility(tuple, scan->rs_snapshot,
2582  buffer);
2583  }
2584 }
2585 
2586 
2587 /* ------------------------------------------------------------------------
2588  * Definition of the heap table access method.
2589  * ------------------------------------------------------------------------
2590  */
2591 
2592 static const TableAmRoutine heapam_methods = {
2593  .type = T_TableAmRoutine,
2594 
2595  .slot_callbacks = heapam_slot_callbacks,
2596 
2597  .scan_begin = heap_beginscan,
2598  .scan_end = heap_endscan,
2599  .scan_rescan = heap_rescan,
2600  .scan_getnextslot = heap_getnextslot,
2601 
2602  .scan_set_tidrange = heap_set_tidrange,
2603  .scan_getnextslot_tidrange = heap_getnextslot_tidrange,
2604 
2605  .parallelscan_estimate = table_block_parallelscan_estimate,
2606  .parallelscan_initialize = table_block_parallelscan_initialize,
2607  .parallelscan_reinitialize = table_block_parallelscan_reinitialize,
2608 
2609  .index_fetch_begin = heapam_index_fetch_begin,
2610  .index_fetch_reset = heapam_index_fetch_reset,
2611  .index_fetch_end = heapam_index_fetch_end,
2612  .index_fetch_tuple = heapam_index_fetch_tuple,
2613 
2614  .tuple_insert = heapam_tuple_insert,
2615  .tuple_insert_speculative = heapam_tuple_insert_speculative,
2616  .tuple_complete_speculative = heapam_tuple_complete_speculative,
2617  .multi_insert = heap_multi_insert,
2618  .tuple_delete = heapam_tuple_delete,
2619  .tuple_update = heapam_tuple_update,
2620  .tuple_lock = heapam_tuple_lock,
2621 
2622  .tuple_fetch_row_version = heapam_fetch_row_version,
2623  .tuple_get_latest_tid = heap_get_latest_tid,
2624  .tuple_tid_valid = heapam_tuple_tid_valid,
2625  .tuple_satisfies_snapshot = heapam_tuple_satisfies_snapshot,
2626  .index_delete_tuples = heap_index_delete_tuples,
2627 
2628  .relation_set_new_filelocator = heapam_relation_set_new_filelocator,
2629  .relation_nontransactional_truncate = heapam_relation_nontransactional_truncate,
2630  .relation_copy_data = heapam_relation_copy_data,
2631  .relation_copy_for_cluster = heapam_relation_copy_for_cluster,
2632  .relation_vacuum = heap_vacuum_rel,
2633  .scan_analyze_next_block = heapam_scan_analyze_next_block,
2634  .scan_analyze_next_tuple = heapam_scan_analyze_next_tuple,
2635  .index_build_range_scan = heapam_index_build_range_scan,
2636  .index_validate_scan = heapam_index_validate_scan,
2637 
2638  .relation_size = table_block_relation_size,
2639  .relation_needs_toast_table = heapam_relation_needs_toast_table,
2640  .relation_toast_am = heapam_relation_toast_am,
2641  .relation_fetch_toast_slice = heap_fetch_toast_slice,
2642 
2643  .relation_estimate_size = heapam_estimate_rel_size,
2644 
2645  .scan_bitmap_next_block = heapam_scan_bitmap_next_block,
2646  .scan_bitmap_next_tuple = heapam_scan_bitmap_next_tuple,
2647  .scan_sample_next_block = heapam_scan_sample_next_block,
2648  .scan_sample_next_tuple = heapam_scan_sample_next_tuple
2649 };
2650 
2651 
2652 const TableAmRoutine *
2654 {
2655  return &heapam_methods;
2656 }
2657 
2658 Datum
2660 {
2662 }
void pgstat_progress_update_param(int index, int64 val)
void pgstat_progress_update_multi_param(int nparam, const int *index, const int64 *val)
uint32 BlockNumber
Definition: block.h:31
#define InvalidBlockNumber
Definition: block.h:33
static bool BlockNumberIsValid(BlockNumber blockNumber)
Definition: block.h:71
static Datum values[MAXATTR]
Definition: bootstrap.c:152
int Buffer
Definition: buf.h:23
#define InvalidBuffer
Definition: buf.h:25
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:3667
Buffer ReleaseAndReadBuffer(Buffer buffer, Relation relation, BlockNumber blockNum)
Definition: bufmgr.c:2537
void ReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:4850
void UnlockReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:4867
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:5085
void FlushRelationBuffers(Relation rel)
Definition: bufmgr.c:4435
Buffer ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
Definition: bufmgr.c:792
#define BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:193
#define BUFFER_LOCK_SHARE
Definition: bufmgr.h:194
static Page BufferGetPage(Buffer buffer)
Definition: bufmgr.h:404
@ RBM_NORMAL
Definition: bufmgr.h:45
static bool BufferIsValid(Buffer bufnum)
Definition: bufmgr.h:355
Pointer Page
Definition: bufpage.h:78
static Item PageGetItem(Page page, ItemId itemId)
Definition: bufpage.h:351
static ItemId PageGetItemId(Page page, OffsetNumber offsetNumber)
Definition: bufpage.h:240
static bool PageIsAllVisible(Page page)
Definition: bufpage.h:426
static OffsetNumber PageGetMaxOffsetNumber(Page page)
Definition: bufpage.h:369
unsigned int uint32
Definition: c.h:506
#define MAXALIGN(LEN)
Definition: c.h:811
signed int int32
Definition: c.h:494
#define Assert(condition)
Definition: c.h:858
TransactionId MultiXactId
Definition: c.h:662
unsigned char uint8
Definition: c.h:504
uint32 CommandId
Definition: c.h:666
uint32 TransactionId
Definition: c.h:652
#define OidIsValid(objectId)
Definition: c.h:775
bool IsSystemRelation(Relation relation)
Definition: catalog.c:73
CommandId HeapTupleHeaderGetCmin(HeapTupleHeader tup)
Definition: combocid.c:104
int errmsg_internal(const char *fmt,...)
Definition: elog.c:1157
int errcode(int sqlerrcode)
Definition: elog.c:857
int errmsg(const char *fmt,...)
Definition: elog.c:1070
#define WARNING
Definition: elog.h:36
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:224
#define ereport(elevel,...)
Definition: elog.h:149
ExprState * ExecPrepareQual(List *qual, EState *estate)
Definition: execExpr.c:767
void ExecDropSingleTupleTableSlot(TupleTableSlot *slot)
Definition: execTuples.c:1341
TupleTableSlot * ExecStoreBufferHeapTuple(HeapTuple tuple, TupleTableSlot *slot, Buffer buffer)
Definition: execTuples.c:1479
const TupleTableSlotOps TTSOpsBufferHeapTuple
Definition: execTuples.c:87
TupleTableSlot * ExecStoreAllNullTuple(TupleTableSlot *slot)
Definition: execTuples.c:1663
HeapTuple ExecFetchSlotHeapTuple(TupleTableSlot *slot, bool materialize, bool *shouldFree)
Definition: execTuples.c:1731
TupleTableSlot * ExecStoreHeapTuple(HeapTuple tuple, TupleTableSlot *slot, bool shouldFree)
Definition: execTuples.c:1439
const TupleTableSlotOps TTSOpsHeapTuple
Definition: execTuples.c:85
TupleTableSlot * ExecStorePinnedBufferHeapTuple(HeapTuple tuple, TupleTableSlot *slot, Buffer buffer)
Definition: execTuples.c:1505
TupleTableSlot * MakeSingleTupleTableSlot(TupleDesc tupdesc, const TupleTableSlotOps *tts_ops)
Definition: execTuples.c:1325
EState * CreateExecutorState(void)
Definition: execUtils.c:88
void FreeExecutorState(EState *estate)
Definition: execUtils.c:189
#define GetPerTupleExprContext(estate)
Definition: executor.h:550
static bool ExecQual(ExprState *state, ExprContext *econtext)
Definition: executor.h:413
#define PG_RETURN_POINTER(x)
Definition: fmgr.h:361
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
int32 type_maximum_size(Oid type_oid, int32 typemod)
Definition: format_type.c:412
@ UNIQUE_CHECK_NO
Definition: genam.h:117
@ UNIQUE_CHECK_YES
Definition: genam.h:118
int maintenance_work_mem
Definition: globals.c:130
return str start
void heap_finish_speculative(Relation relation, ItemPointer tid)
Definition: heapam.c:5818
void heap_insert(Relation relation, HeapTuple tup, CommandId cid, int options, BulkInsertState bistate)
Definition: heapam.c:1993
bool heap_fetch(Relation relation, Snapshot snapshot, HeapTuple tuple, Buffer *userbuf, bool keep_buf)
Definition: heapam.c:1510
TM_Result heap_delete(Relation relation, ItemPointer tid, CommandId cid, Snapshot crosscheck, bool wait, TM_FailureData *tmfd, bool changingPart)
Definition: heapam.c:2682
bool heap_getnextslot(TableScanDesc sscan, ScanDirection direction, TupleTableSlot *slot)
Definition: heapam.c:1300
void heap_endscan(TableScanDesc sscan)
Definition: heapam.c:1209
void heap_rescan(TableScanDesc sscan, ScanKey key, bool set_params, bool allow_strat, bool allow_sync, bool allow_pagemode)
Definition: heapam.c:1151
TM_Result heap_update(Relation relation, ItemPointer otid, HeapTuple newtup, CommandId cid, Snapshot crosscheck, bool wait, TM_FailureData *tmfd, LockTupleMode *lockmode, TU_UpdateIndexes *update_indexes)
Definition: heapam.c:3149
HeapTuple heap_getnext(TableScanDesc sscan, ScanDirection direction)
Definition: heapam.c:1251
bool heap_hot_search_buffer(ItemPointer tid, Relation relation, Buffer buffer, Snapshot snapshot, HeapTuple heapTuple, bool *all_dead, bool first_call)
Definition: heapam.c:1630
bool heap_getnextslot_tidrange(TableScanDesc sscan, ScanDirection direction, TupleTableSlot *slot)
Definition: heapam.c:1403
void heap_set_tidrange(TableScanDesc sscan, ItemPointer mintid, ItemPointer maxtid)
Definition: heapam.c:1330
void heap_abort_speculative(Relation relation, ItemPointer tid)
Definition: heapam.c:5905
TableScanDesc heap_beginscan(Relation relation, Snapshot snapshot, int nkeys, ScanKey key, ParallelTableScanDesc parallel_scan, uint32 flags)
Definition: heapam.c:1037
void heap_prepare_pagescan(TableScanDesc sscan)
Definition: heapam.c:493
TransactionId heap_index_delete_tuples(Relation rel, TM_IndexDeleteOp *delstate)
Definition: heapam.c:7634
void heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples, CommandId cid, int options, BulkInsertState bistate)
Definition: heapam.c:2262
TM_Result heap_lock_tuple(Relation relation, HeapTuple tuple, CommandId cid, LockTupleMode mode, LockWaitPolicy wait_policy, bool follow_updates, Buffer *buffer, TM_FailureData *tmfd)
Definition: heapam.c:4309
void heap_get_latest_tid(TableScanDesc sscan, ItemPointer tid)
Definition: heapam.c:1782
void heap_setscanlimits(TableScanDesc sscan, BlockNumber startBlk, BlockNumber numBlks)
Definition: heapam.c:421
void HeapCheckForSerializableConflictOut(bool visible, Relation relation, HeapTuple tuple, Buffer buffer, Snapshot snapshot)
Definition: heapam.c:10059
#define HEAP_INSERT_SPECULATIVE
Definition: heapam.h:38
struct HeapScanDescData * HeapScanDesc
Definition: heapam.h:109
@ HEAPTUPLE_RECENTLY_DEAD
Definition: heapam.h:127
@ HEAPTUPLE_INSERT_IN_PROGRESS
Definition: heapam.h:128
@ HEAPTUPLE_LIVE
Definition: heapam.h:126
@ HEAPTUPLE_DELETE_IN_PROGRESS
Definition: heapam.h:129
@ HEAPTUPLE_DEAD
Definition: heapam.h:125
static double heapam_index_build_range_scan(Relation heapRelation, Relation indexRelation, IndexInfo *indexInfo, bool allow_sync, bool anyvisible, bool progress, BlockNumber start_blockno, BlockNumber numblocks, IndexBuildCallback callback, void *callback_state, TableScanDesc scan)
#define HEAP_OVERHEAD_BYTES_PER_TUPLE
static void heapam_estimate_rel_size(Relation rel, int32 *attr_widths, BlockNumber *pages, double *tuples, double *allvisfrac)
static IndexFetchTableData * heapam_index_fetch_begin(Relation rel)
static const TableAmRoutine heapam_methods
static void heapam_index_validate_scan(Relation heapRelation, Relation indexRelation, IndexInfo *indexInfo, Snapshot snapshot, ValidateIndexState *state)
static bool heapam_index_fetch_tuple(struct IndexFetchTableData *scan, ItemPointer tid, Snapshot snapshot, TupleTableSlot *slot, bool *call_again, bool *all_dead)
static void heapam_relation_copy_for_cluster(Relation OldHeap, Relation NewHeap, Relation OldIndex, bool use_sort, TransactionId OldestXmin, TransactionId *xid_cutoff, MultiXactId *multi_cutoff, double *num_tuples, double *tups_vacuumed, double *tups_recently_dead)
static void heapam_tuple_insert_speculative(Relation relation, TupleTableSlot *slot, CommandId cid, int options, BulkInsertState bistate, uint32 specToken)
static bool heapam_scan_sample_next_block(TableScanDesc scan, SampleScanState *scanstate)
static void heapam_tuple_insert(Relation relation, TupleTableSlot *slot, CommandId cid, int options, BulkInsertState bistate)
static bool heapam_scan_analyze_next_block(TableScanDesc scan, ReadStream *stream)
static TM_Result heapam_tuple_delete(Relation relation, ItemPointer tid, CommandId cid, Snapshot snapshot, Snapshot crosscheck, bool wait, TM_FailureData *tmfd, bool changingPart)
static void heapam_index_fetch_reset(IndexFetchTableData *scan)
static Oid heapam_relation_toast_am(Relation rel)
static TM_Result heapam_tuple_update(Relation relation, ItemPointer otid, TupleTableSlot *slot, CommandId cid, Snapshot snapshot, Snapshot crosscheck, bool wait, TM_FailureData *tmfd, LockTupleMode *lockmode, TU_UpdateIndexes *update_indexes)
static bool heapam_relation_needs_toast_table(Relation rel)
static BlockNumber heapam_scan_get_blocks_done(HeapScanDesc hscan)
static bool heapam_scan_bitmap_next_block(TableScanDesc scan, TBMIterateResult *tbmres)
const TableAmRoutine * GetHeapamTableAmRoutine(void)
static bool heapam_scan_analyze_next_tuple(TableScanDesc scan, TransactionId OldestXmin, double *liverows, double *deadrows, TupleTableSlot *slot)
static bool SampleHeapTupleVisible(TableScanDesc scan, Buffer buffer, HeapTuple tuple, OffsetNumber tupoffset)
static bool heapam_scan_sample_next_tuple(TableScanDesc scan, SampleScanState *scanstate, TupleTableSlot *slot)
static const TupleTableSlotOps * heapam_slot_callbacks(Relation relation)
static bool heapam_tuple_tid_valid(TableScanDesc scan, ItemPointer tid)
#define HEAP_USABLE_BYTES_PER_PAGE
static TM_Result heapam_tuple_lock(Relation relation, ItemPointer tid, Snapshot snapshot, TupleTableSlot *slot, CommandId cid, LockTupleMode mode, LockWaitPolicy wait_policy, uint8 flags, TM_FailureData *tmfd)
static void heapam_relation_copy_data(Relation rel, const RelFileLocator *newrlocator)
static void heapam_index_fetch_end(IndexFetchTableData *scan)
static void heapam_tuple_complete_speculative(Relation relation, TupleTableSlot *slot, uint32 specToken, bool succeeded)
static void reform_and_rewrite_tuple(HeapTuple tuple, Relation OldHeap, Relation NewHeap, Datum *values, bool *isnull, RewriteState rwstate)
static void heapam_relation_set_new_filelocator(Relation rel, const RelFileLocator *newrlocator, char persistence, TransactionId *freezeXid, MultiXactId *minmulti)
static bool heapam_fetch_row_version(Relation relation, ItemPointer tid, Snapshot snapshot, TupleTableSlot *slot)
static bool heapam_scan_bitmap_next_tuple(TableScanDesc scan, TBMIterateResult *tbmres, TupleTableSlot *slot)
static void heapam_relation_nontransactional_truncate(Relation rel)
static bool heapam_tuple_satisfies_snapshot(Relation rel, TupleTableSlot *slot, Snapshot snapshot)
Datum heap_tableam_handler(PG_FUNCTION_ARGS)
bool HeapTupleSatisfiesVisibility(HeapTuple htup, Snapshot snapshot, Buffer buffer)
HTSV_Result HeapTupleSatisfiesVacuum(HeapTuple htup, TransactionId OldestXmin, Buffer buffer)
void heap_fetch_toast_slice(Relation toastrel, Oid valueid, int32 attrsize, int32 sliceoffset, int32 slicelength, struct varlena *result)
Definition: heaptoast.c:626
#define TOAST_TUPLE_THRESHOLD
Definition: heaptoast.h:48
HeapTuple heap_form_tuple(TupleDesc tupleDescriptor, const Datum *values, const bool *isnull)
Definition: heaptuple.c:1116
void heap_deform_tuple(HeapTuple tuple, TupleDesc tupleDesc, Datum *values, bool *isnull)
Definition: heaptuple.c:1345
void heap_freetuple(HeapTuple htup)
Definition: heaptuple.c:1434
HeapTupleHeaderData * HeapTupleHeader
Definition: htup.h:23
#define HeapTupleHeaderSetSpeculativeToken(tup, token)
Definition: htup_details.h:439
#define SizeofHeapTupleHeader
Definition: htup_details.h:185
#define HeapTupleHeaderGetXmin(tup)
Definition: htup_details.h:309
#define BITMAPLEN(NATTS)
Definition: htup_details.h:545
#define HeapTupleIsHeapOnly(tuple)
Definition: htup_details.h:683
#define HeapTupleIsHotUpdated(tuple)
Definition: htup_details.h:674
#define MaxHeapTuplesPerPage
Definition: htup_details.h:572
#define HeapTupleHeaderGetUpdateXid(tup)
Definition: htup_details.h:361
#define HeapTupleHeaderIsSpeculative(tup)
Definition: htup_details.h:428
void FormIndexDatum(IndexInfo *indexInfo, TupleTableSlot *slot, EState *estate, Datum *values, bool *isnull)
Definition: index.c:2701
static void itemptr_decode(ItemPointer itemptr, int64 encoded)
Definition: index.h:210
bool index_getnext_slot(IndexScanDesc scan, ScanDirection direction, TupleTableSlot *slot)
Definition: indexam.c:673
bool index_insert(Relation indexRelation, Datum *values, bool *isnull, ItemPointer heap_t_ctid, Relation heapRelation, IndexUniqueCheck checkUnique, bool indexUnchanged, IndexInfo *indexInfo)
Definition: indexam.c:213
IndexScanDesc index_beginscan(Relation heapRelation, Relation indexRelation, Snapshot snapshot, int nkeys, int norderbys)
Definition: indexam.c:256
void index_endscan(IndexScanDesc scan)
Definition: indexam.c:378
void index_rescan(IndexScanDesc scan, ScanKey keys, int nkeys, ScanKey orderbys, int norderbys)
Definition: indexam.c:352
int i
Definition: isn.c:73
#define ItemIdGetLength(itemId)
Definition: itemid.h:59
#define ItemIdIsNormal(itemId)
Definition: itemid.h:99
#define ItemIdIsDead(itemId)
Definition: itemid.h:113
int32 ItemPointerCompare(ItemPointer arg1, ItemPointer arg2)
Definition: itemptr.c:51
bool ItemPointerEquals(ItemPointer pointer1, ItemPointer pointer2)
Definition: itemptr.c:35
static void ItemPointerSet(ItemPointerData *pointer, BlockNumber blockNumber, OffsetNumber offNum)
Definition: itemptr.h:135
static void ItemPointerSetOffsetNumber(ItemPointerData *pointer, OffsetNumber offsetNumber)
Definition: itemptr.h:158
static OffsetNumber ItemPointerGetOffsetNumber(const ItemPointerData *pointer)
Definition: itemptr.h:124
static bool ItemPointerIndicatesMovedPartitions(const ItemPointerData *pointer)
Definition: itemptr.h:197
static BlockNumber ItemPointerGetBlockNumber(const ItemPointerData *pointer)
Definition: itemptr.h:103
static void ItemPointerCopy(const ItemPointerData *fromPointer, ItemPointerData *toPointer)
Definition: itemptr.h:172
static bool ItemPointerIsValid(const ItemPointerData *pointer)
Definition: itemptr.h:83
void XactLockTableWait(TransactionId xid, Relation rel, ItemPointer ctid, XLTW_Oper oper)
Definition: lmgr.c:667
bool ConditionalXactLockTableWait(TransactionId xid)
Definition: lmgr.c:740
@ XLTW_FetchUpdated
Definition: lmgr.h:33
@ XLTW_InsertIndexUnique
Definition: lmgr.h:32
LockWaitPolicy
Definition: lockoptions.h:37
@ LockWaitSkip
Definition: lockoptions.h:41
@ LockWaitBlock
Definition: lockoptions.h:39
@ LockWaitError
Definition: lockoptions.h:43
LockTupleMode
Definition: lockoptions.h:50
void MemoryContextReset(MemoryContext context)
Definition: mcxt.c:383
void pfree(void *pointer)
Definition: mcxt.c:1520
void * palloc0(Size size)
Definition: mcxt.c:1346
void * palloc(Size size)
Definition: mcxt.c:1316
#define IsBootstrapProcessingMode()
Definition: miscadmin.h:454
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:122
MultiXactId GetOldestMultiXactId(void)
Definition: multixact.c:2611
#define InvalidOffsetNumber
Definition: off.h:26
#define OffsetNumberIsValid(offsetNumber)
Definition: off.h:39
#define OffsetNumberNext(offsetNumber)
Definition: off.h:52
uint16 OffsetNumber
Definition: off.h:24
#define FirstOffsetNumber
Definition: off.h:27
FormData_pg_attribute * Form_pg_attribute
Definition: pg_attribute.h:209
#define ERRCODE_DATA_CORRUPTED
Definition: pg_basebackup.c:41
static PgChecksumMode mode
Definition: pg_checksums.c:56
#define INDEX_MAX_KEYS
#define NIL
Definition: pg_list.h:68
static char * buf
Definition: pg_test_fsync.c:73
#define ERRCODE_T_R_SERIALIZATION_FAILURE
Definition: pgbench.c:76
int progress
Definition: pgbench.c:261
#define pgstat_count_heap_fetch(rel)
Definition: pgstat.h:620
#define pgstat_count_heap_getnext(rel)
Definition: pgstat.h:615
static int64 DatumGetInt64(Datum X)
Definition: postgres.h:385
uintptr_t Datum
Definition: postgres.h:64
unsigned int Oid
Definition: postgres_ext.h:31
void PredicateLockTID(Relation relation, ItemPointer tid, Snapshot snapshot, TransactionId tuple_xid)
Definition: predicate.c:2606
TransactionId GetOldestNonRemovableTransactionId(Relation rel)
Definition: procarray.c:1993
#define PROGRESS_CLUSTER_INDEX_RELID
Definition: progress.h:59
#define PROGRESS_CLUSTER_HEAP_BLKS_SCANNED
Definition: progress.h:63
#define PROGRESS_CLUSTER_PHASE_SORT_TUPLES
Definition: progress.h:69
#define PROGRESS_CLUSTER_PHASE_SEQ_SCAN_HEAP
Definition: progress.h:67
#define PROGRESS_SCAN_BLOCKS_DONE
Definition: progress.h:122
#define PROGRESS_CLUSTER_PHASE
Definition: progress.h:58
#define PROGRESS_CLUSTER_HEAP_TUPLES_SCANNED
Definition: progress.h:60
#define PROGRESS_CLUSTER_TOTAL_HEAP_BLKS
Definition: progress.h:62
#define PROGRESS_CLUSTER_HEAP_TUPLES_WRITTEN
Definition: progress.h:61
#define PROGRESS_CLUSTER_PHASE_INDEX_SCAN_HEAP
Definition: progress.h:68
#define PROGRESS_SCAN_BLOCKS_TOTAL
Definition: progress.h:121
#define PROGRESS_CLUSTER_PHASE_WRITE_NEW_HEAP
Definition: progress.h:70
void heap_get_root_tuples(Page page, OffsetNumber *root_offsets)
Definition: pruneheap.c:1764
void heap_page_prune_opt(Relation relation, Buffer buffer)
Definition: pruneheap.c:193
Buffer read_stream_next_buffer(ReadStream *stream, void **per_buffer_data)
Definition: read_stream.c:572
#define RelationGetRelid(relation)
Definition: rel.h:505
static SMgrRelation RelationGetSmgr(Relation rel)
Definition: rel.h:567
#define RelationGetDescr(relation)
Definition: rel.h:531
#define RelationGetRelationName(relation)
Definition: rel.h:539
#define RelationGetTargetBlock(relation)
Definition: rel.h:601
#define RelationIsPermanent(relation)
Definition: rel.h:617
ForkNumber
Definition: relpath.h:48
@ MAIN_FORKNUM
Definition: relpath.h:50
@ INIT_FORKNUM
Definition: relpath.h:53
#define MAX_FORKNUM
Definition: relpath.h:62
struct ParallelBlockTableScanDescData * ParallelBlockTableScanDesc
Definition: relscan.h:85
void end_heap_rewrite(RewriteState state)
Definition: rewriteheap.c:297
bool rewrite_heap_dead_tuple(RewriteState state, HeapTuple old_tuple)
Definition: rewriteheap.c:543
RewriteState begin_heap_rewrite(Relation old_heap, Relation new_heap, TransactionId oldest_xmin, TransactionId freeze_xid, MultiXactId cutoff_multi)
Definition: rewriteheap.c:234
void rewrite_heap_tuple(RewriteState state, HeapTuple old_tuple, HeapTuple new_tuple)
Definition: rewriteheap.c:341
@ ForwardScanDirection
Definition: sdir.h:28
void smgrcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo)
Definition: smgr.c:411
void smgrclose(SMgrRelation reln)
Definition: smgr.c:320
bool smgrexists(SMgrRelation reln, ForkNumber forknum)
Definition: smgr.c:398
TransactionId RecentXmin
Definition: snapmgr.c:99
Snapshot GetTransactionSnapshot(void)
Definition: snapmgr.c:216
void UnregisterSnapshot(Snapshot snapshot)
Definition: snapmgr.c:836
Snapshot RegisterSnapshot(Snapshot snapshot)
Definition: snapmgr.c:794
#define SnapshotAny
Definition: snapmgr.h:33
#define InitDirtySnapshot(snapshotdata)
Definition: snapmgr.h:40
#define IsMVCCSnapshot(snapshot)
Definition: snapmgr.h:62
void RelationCopyStorage(SMgrRelation src, SMgrRelation dst, ForkNumber forkNum, char relpersistence)
Definition: storage.c:452
SMgrRelation RelationCreateStorage(RelFileLocator rlocator, char relpersistence, bool register_delete)
Definition: storage.c:121
void log_smgrcreate(const RelFileLocator *rlocator, ForkNumber forkNum)
Definition: storage.c:186
void RelationDropStorage(Relation rel)
Definition: storage.c:206
void RelationTruncate(Relation rel, BlockNumber nblocks)
Definition: storage.c:288
MemoryContext ecxt_per_tuple_memory
Definition: execnodes.h:263
TupleTableSlot * ecxt_scantuple
Definition: execnodes.h:255
Buffer rs_vmbuffer
Definition: heapam.h:101
BufferAccessStrategy rs_strategy
Definition: heapam.h:70
bool rs_inited
Definition: heapam.h:64
Buffer rs_cbuf
Definition: heapam.h:67
BlockNumber rs_startblock
Definition: heapam.h:59
HeapTupleData rs_ctup
Definition: heapam.h:72
OffsetNumber rs_vistuples[MaxHeapTuplesPerPage]
Definition: heapam.h:107
BlockNumber rs_nblocks
Definition: heapam.h:58
int rs_empty_tuples_pending
Definition: heapam.h:102
BlockNumber rs_cblock
Definition: heapam.h:66
TableScanDescData rs_base
Definition: heapam.h:55
ItemPointerData t_self
Definition: htup.h:65
uint32 t_len
Definition: htup.h:64
HeapTupleHeader t_data
Definition: htup.h:68
Oid t_tableOid
Definition: htup.h:66
ItemPointerData t_ctid
Definition: htup_details.h:161
Buffer xs_cbuf
Definition: heapam.h:118
IndexFetchTableData xs_base
Definition: heapam.h:116
bool ii_Unique
Definition: execnodes.h:197
bool ii_BrokenHotChain
Definition: execnodes.h:203
ExprState * ii_PredicateState
Definition: execnodes.h:190
Oid * ii_ExclusionOps
Definition: execnodes.h:191
bool ii_Concurrent
Definition: execnodes.h:202
List * ii_ExpressionsState
Definition: execnodes.h:188
List * ii_Predicate
Definition: execnodes.h:189
TupleDesc rd_att
Definition: rel.h:112
Oid rd_id
Definition: rel.h:113
Form_pg_class rd_rel
Definition: rel.h:111
struct TsmRoutine * tsmroutine
Definition: execnodes.h:1590
TransactionId xmin
Definition: snapshot.h:157
TransactionId xmax
Definition: snapshot.h:158
bool takenDuringRecovery
Definition: snapshot.h:184
OffsetNumber offsets[FLEXIBLE_ARRAY_MEMBER]
Definition: tidbitmap.h:46
BlockNumber blockno
Definition: tidbitmap.h:42
bool traversed
Definition: tableam.h:153
TransactionId xmax
Definition: tableam.h:151
CommandId cmax
Definition: tableam.h:152
ItemPointerData ctid
Definition: tableam.h:150
NodeTag type
Definition: tableam.h:293
Relation rs_rd
Definition: relscan.h:34
uint32 rs_flags
Definition: relscan.h:47
struct SnapshotData * rs_snapshot
Definition: relscan.h:35
struct ParallelTableScanDescData * rs_parallel
Definition: relscan.h:49
NextSampleTuple_function NextSampleTuple
Definition: tsmapi.h:74
NextSampleBlock_function NextSampleBlock
Definition: tsmapi.h:73
Oid tts_tableOid
Definition: tuptable.h:130
ItemPointerData tts_tid
Definition: tuptable.h:129
Definition: regguts.h:323
void ss_report_location(Relation rel, BlockNumber location)
Definition: syncscan.c:289
Size table_block_parallelscan_initialize(Relation rel, ParallelTableScanDesc pscan)
Definition: tableam.c:388
void table_block_parallelscan_reinitialize(Relation rel, ParallelTableScanDesc pscan)
Definition: tableam.c:406
uint64 table_block_relation_size(Relation rel, ForkNumber forkNumber)
Definition: tableam.c:616
TupleTableSlot * table_slot_create(Relation relation, List **reglist)
Definition: tableam.c:91
Size table_block_parallelscan_estimate(Relation rel)
Definition: tableam.c:382
void table_block_relation_estimate_size(Relation rel, int32 *attr_widths, BlockNumber *pages, double *tuples, double *allvisfrac, Size overhead_bytes_per_tuple, Size usable_bytes_per_page)
Definition: tableam.c:653
@ SO_NEED_TUPLES
Definition: tableam.h:72
@ SO_ALLOW_PAGEMODE
Definition: tableam.h:62
@ SO_ALLOW_SYNC
Definition: tableam.h:60
static TableScanDesc table_beginscan(Relation rel, Snapshot snapshot, int nkeys, struct ScanKeyData *key)
Definition: tableam.h:918
TU_UpdateIndexes
Definition: tableam.h:118
@ TU_Summarizing
Definition: tableam.h:126
@ TU_All
Definition: tableam.h:123
@ TU_None
Definition: tableam.h:120
static void table_endscan(TableScanDesc scan)
Definition: tableam.h:1029
TM_Result
Definition: tableam.h:80
@ TM_Ok
Definition: tableam.h:85
@ TM_Deleted
Definition: tableam.h:100
@ TM_WouldBlock
Definition: tableam.h:110
@ TM_Updated
Definition: tableam.h:97
@ TM_SelfModified
Definition: tableam.h:91
static TableScanDesc table_beginscan_strat(Relation rel, Snapshot snapshot, int nkeys, struct ScanKeyData *key, bool allow_strat, bool allow_sync)
Definition: tableam.h:942
#define TUPLE_LOCK_FLAG_FIND_LAST_VERSION
Definition: tableam.h:268
void(* IndexBuildCallback)(Relation index, ItemPointer tid, Datum *values, bool *isnull, bool tupleIsAlive, void *state)
Definition: tableam.h:272
static bool table_scan_getnextslot(TableScanDesc sscan, ScanDirection direction, TupleTableSlot *slot)
Definition: tableam.h:1065
#define TUPLE_LOCK_FLAG_LOCK_UPDATE_IN_PROGRESS
Definition: tableam.h:266
static void callback(struct sockaddr *addr, struct sockaddr *mask, void *unused)
Definition: test_ifaddrs.c:46
#define InvalidTransactionId
Definition: transam.h:31
#define TransactionIdEquals(id1, id2)
Definition: transam.h:43
#define TransactionIdIsValid(xid)
Definition: transam.h:41
#define TupleDescAttr(tupdesc, i)
Definition: tupdesc.h:92
void tuplesort_performsort(Tuplesortstate *state)
Definition: tuplesort.c:1385
void tuplesort_end(Tuplesortstate *state)
Definition: tuplesort.c:971
#define TUPLESORT_NONE
Definition: tuplesort.h:93
HeapTuple tuplesort_getheaptuple(Tuplesortstate *state, bool forward)
Tuplesortstate * tuplesort_begin_cluster(TupleDesc tupDesc, Relation indexRel, int workMem, SortCoordinate coordinate, int sortopt)
void tuplesort_putheaptuple(Tuplesortstate *state, HeapTuple tup)
bool tuplesort_getdatum(Tuplesortstate *state, bool forward, bool copy, Datum *val, bool *isNull, Datum *abbrev)
#define att_align_nominal(cur_offset, attalign)
Definition: tupmacs.h:129
static TupleTableSlot * ExecClearTuple(TupleTableSlot *slot)
Definition: tuptable.h:454
#define TTS_IS_BUFFERTUPLE(slot)
Definition: tuptable.h:237
void heap_vacuum_rel(Relation rel, VacuumParams *params, BufferAccessStrategy bstrategy)
Definition: vacuumlazy.c:295
#define VM_ALL_VISIBLE(r, b, v)
Definition: visibilitymap.h:24
bool TransactionIdIsCurrentTransactionId(TransactionId xid)
Definition: xact.c:938
#define IsolationIsSerializable()
Definition: xact.h:52