PostgreSQL Source Code  git master
heapam_handler.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * heapam_handler.c
4  * heap table access method code
5  *
6  * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  * src/backend/access/heap/heapam_handler.c
12  *
13  *
14  * NOTES
15  * This files wires up the lower level heapam.c et al routines with the
16  * tableam abstraction.
17  *
18  *-------------------------------------------------------------------------
19  */
20 #include "postgres.h"
21 
22 #include "access/genam.h"
23 #include "access/heapam.h"
24 #include "access/heaptoast.h"
25 #include "access/multixact.h"
26 #include "access/rewriteheap.h"
27 #include "access/syncscan.h"
28 #include "access/tableam.h"
29 #include "access/tsmapi.h"
30 #include "access/xact.h"
31 #include "catalog/catalog.h"
32 #include "catalog/index.h"
33 #include "catalog/storage.h"
34 #include "catalog/storage_xlog.h"
35 #include "commands/progress.h"
36 #include "executor/executor.h"
37 #include "miscadmin.h"
38 #include "pgstat.h"
39 #include "storage/bufmgr.h"
40 #include "storage/bufpage.h"
41 #include "storage/lmgr.h"
42 #include "storage/predicate.h"
43 #include "storage/procarray.h"
44 #include "storage/smgr.h"
45 #include "utils/builtins.h"
46 #include "utils/rel.h"
47 
48 static void reform_and_rewrite_tuple(HeapTuple tuple,
49  Relation OldHeap, Relation NewHeap,
50  Datum *values, bool *isnull, RewriteState rwstate);
51 
52 static bool SampleHeapTupleVisible(TableScanDesc scan, Buffer buffer,
53  HeapTuple tuple,
54  OffsetNumber tupoffset);
55 
57 
59 
60 
61 /* ------------------------------------------------------------------------
62  * Slot related callbacks for heap AM
63  * ------------------------------------------------------------------------
64  */
65 
66 static const TupleTableSlotOps *
68 {
69  return &TTSOpsBufferHeapTuple;
70 }
71 
72 
73 /* ------------------------------------------------------------------------
74  * Index Scan Callbacks for heap AM
75  * ------------------------------------------------------------------------
76  */
77 
78 static IndexFetchTableData *
80 {
82 
83  hscan->xs_base.rel = rel;
84  hscan->xs_cbuf = InvalidBuffer;
85 
86  return &hscan->xs_base;
87 }
88 
89 static void
91 {
92  IndexFetchHeapData *hscan = (IndexFetchHeapData *) scan;
93 
94  if (BufferIsValid(hscan->xs_cbuf))
95  {
96  ReleaseBuffer(hscan->xs_cbuf);
97  hscan->xs_cbuf = InvalidBuffer;
98  }
99 }
100 
101 static void
103 {
104  IndexFetchHeapData *hscan = (IndexFetchHeapData *) scan;
105 
107 
108  pfree(hscan);
109 }
110 
111 static bool
113  ItemPointer tid,
114  Snapshot snapshot,
115  TupleTableSlot *slot,
116  bool *call_again, bool *all_dead)
117 {
118  IndexFetchHeapData *hscan = (IndexFetchHeapData *) scan;
120  bool got_heap_tuple;
121 
122  Assert(TTS_IS_BUFFERTUPLE(slot));
123 
124  /* We can skip the buffer-switching logic if we're in mid-HOT chain. */
125  if (!*call_again)
126  {
127  /* Switch to correct buffer if we don't have it already */
128  Buffer prev_buf = hscan->xs_cbuf;
129 
130  hscan->xs_cbuf = ReleaseAndReadBuffer(hscan->xs_cbuf,
131  hscan->xs_base.rel,
133 
134  /*
135  * Prune page, but only if we weren't already on this page
136  */
137  if (prev_buf != hscan->xs_cbuf)
138  heap_page_prune_opt(hscan->xs_base.rel, hscan->xs_cbuf);
139  }
140 
141  /* Obtain share-lock on the buffer so we can examine visibility */
143  got_heap_tuple = heap_hot_search_buffer(tid,
144  hscan->xs_base.rel,
145  hscan->xs_cbuf,
146  snapshot,
147  &bslot->base.tupdata,
148  all_dead,
149  !*call_again);
150  bslot->base.tupdata.t_self = *tid;
152 
153  if (got_heap_tuple)
154  {
155  /*
156  * Only in a non-MVCC snapshot can more than one member of the HOT
157  * chain be visible.
158  */
159  *call_again = !IsMVCCSnapshot(snapshot);
160 
161  slot->tts_tableOid = RelationGetRelid(scan->rel);
162  ExecStoreBufferHeapTuple(&bslot->base.tupdata, slot, hscan->xs_cbuf);
163  }
164  else
165  {
166  /* We've reached the end of the HOT chain. */
167  *call_again = false;
168  }
169 
170  return got_heap_tuple;
171 }
172 
173 
174 /* ------------------------------------------------------------------------
175  * Callbacks for non-modifying operations on individual tuples for heap AM
176  * ------------------------------------------------------------------------
177  */
178 
179 static bool
181  ItemPointer tid,
182  Snapshot snapshot,
183  TupleTableSlot *slot)
184 {
186  Buffer buffer;
187 
188  Assert(TTS_IS_BUFFERTUPLE(slot));
189 
190  bslot->base.tupdata.t_self = *tid;
191  if (heap_fetch(relation, snapshot, &bslot->base.tupdata, &buffer, false))
192  {
193  /* store in slot, transferring existing pin */
194  ExecStorePinnedBufferHeapTuple(&bslot->base.tupdata, slot, buffer);
195  slot->tts_tableOid = RelationGetRelid(relation);
196 
197  return true;
198  }
199 
200  return false;
201 }
202 
203 static bool
205 {
206  HeapScanDesc hscan = (HeapScanDesc) scan;
207 
208  return ItemPointerIsValid(tid) &&
210 }
211 
212 static bool
214  Snapshot snapshot)
215 {
217  bool res;
218 
219  Assert(TTS_IS_BUFFERTUPLE(slot));
220  Assert(BufferIsValid(bslot->buffer));
221 
222  /*
223  * We need buffer pin and lock to call HeapTupleSatisfiesVisibility.
224  * Caller should be holding pin, but not lock.
225  */
227  res = HeapTupleSatisfiesVisibility(bslot->base.tuple, snapshot,
228  bslot->buffer);
230 
231  return res;
232 }
233 
234 
235 /* ----------------------------------------------------------------------------
236  * Functions for manipulations of physical tuples for heap AM.
237  * ----------------------------------------------------------------------------
238  */
239 
240 static void
242  int options, BulkInsertState bistate)
243 {
244  bool shouldFree = true;
245  HeapTuple tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree);
246 
247  /* Update the tuple with table oid */
248  slot->tts_tableOid = RelationGetRelid(relation);
249  tuple->t_tableOid = slot->tts_tableOid;
250 
251  /* Perform the insertion, and copy the resulting ItemPointer */
252  heap_insert(relation, tuple, cid, options, bistate);
253  ItemPointerCopy(&tuple->t_self, &slot->tts_tid);
254 
255  if (shouldFree)
256  pfree(tuple);
257 }
258 
259 static void
261  CommandId cid, int options,
262  BulkInsertState bistate, uint32 specToken)
263 {
264  bool shouldFree = true;
265  HeapTuple tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree);
266 
267  /* Update the tuple with table oid */
268  slot->tts_tableOid = RelationGetRelid(relation);
269  tuple->t_tableOid = slot->tts_tableOid;
270 
271  HeapTupleHeaderSetSpeculativeToken(tuple->t_data, specToken);
273 
274  /* Perform the insertion, and copy the resulting ItemPointer */
275  heap_insert(relation, tuple, cid, options, bistate);
276  ItemPointerCopy(&tuple->t_self, &slot->tts_tid);
277 
278  if (shouldFree)
279  pfree(tuple);
280 }
281 
282 static void
284  uint32 specToken, bool succeeded)
285 {
286  bool shouldFree = true;
287  HeapTuple tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree);
288 
289  /* adjust the tuple's state accordingly */
290  if (succeeded)
291  heap_finish_speculative(relation, &slot->tts_tid);
292  else
293  heap_abort_speculative(relation, &slot->tts_tid);
294 
295  if (shouldFree)
296  pfree(tuple);
297 }
298 
299 static TM_Result
301  Snapshot snapshot, Snapshot crosscheck, bool wait,
302  TM_FailureData *tmfd, bool changingPart)
303 {
304  /*
305  * Currently Deleting of index tuples are handled at vacuum, in case if
306  * the storage itself is cleaning the dead tuples by itself, it is the
307  * time to call the index tuple deletion also.
308  */
309  return heap_delete(relation, tid, cid, crosscheck, wait, tmfd, changingPart);
310 }
311 
312 
313 static TM_Result
315  CommandId cid, Snapshot snapshot, Snapshot crosscheck,
316  bool wait, TM_FailureData *tmfd,
317  LockTupleMode *lockmode, bool *update_indexes)
318 {
319  bool shouldFree = true;
320  HeapTuple tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree);
321  TM_Result result;
322 
323  /* Update the tuple with table oid */
324  slot->tts_tableOid = RelationGetRelid(relation);
325  tuple->t_tableOid = slot->tts_tableOid;
326 
327  result = heap_update(relation, otid, tuple, cid, crosscheck, wait,
328  tmfd, lockmode);
329  ItemPointerCopy(&tuple->t_self, &slot->tts_tid);
330 
331  /*
332  * Decide whether new index entries are needed for the tuple
333  *
334  * Note: heap_update returns the tid (location) of the new tuple in the
335  * t_self field.
336  *
337  * If it's a HOT update, we mustn't insert new index entries.
338  */
339  *update_indexes = result == TM_Ok && !HeapTupleIsHeapOnly(tuple);
340 
341  if (shouldFree)
342  pfree(tuple);
343 
344  return result;
345 }
346 
347 static TM_Result
350  LockWaitPolicy wait_policy, uint8 flags,
351  TM_FailureData *tmfd)
352 {
354  TM_Result result;
355  Buffer buffer;
356  HeapTuple tuple = &bslot->base.tupdata;
357  bool follow_updates;
358 
359  follow_updates = (flags & TUPLE_LOCK_FLAG_LOCK_UPDATE_IN_PROGRESS) != 0;
360  tmfd->traversed = false;
361 
362  Assert(TTS_IS_BUFFERTUPLE(slot));
363 
364 tuple_lock_retry:
365  tuple->t_self = *tid;
366  result = heap_lock_tuple(relation, tuple, cid, mode, wait_policy,
367  follow_updates, &buffer, tmfd);
368 
369  if (result == TM_Updated &&
371  {
372  /* Should not encounter speculative tuple on recheck */
374 
375  ReleaseBuffer(buffer);
376 
377  if (!ItemPointerEquals(&tmfd->ctid, &tuple->t_self))
378  {
379  SnapshotData SnapshotDirty;
380  TransactionId priorXmax;
381 
382  /* it was updated, so look at the updated version */
383  *tid = tmfd->ctid;
384  /* updated row should have xmin matching this xmax */
385  priorXmax = tmfd->xmax;
386 
387  /* signal that a tuple later in the chain is getting locked */
388  tmfd->traversed = true;
389 
390  /*
391  * fetch target tuple
392  *
393  * Loop here to deal with updated or busy tuples
394  */
395  InitDirtySnapshot(SnapshotDirty);
396  for (;;)
397  {
399  ereport(ERROR,
401  errmsg("tuple to be locked was already moved to another partition due to concurrent update")));
402 
403  tuple->t_self = *tid;
404  if (heap_fetch(relation, &SnapshotDirty, tuple, &buffer, true))
405  {
406  /*
407  * If xmin isn't what we're expecting, the slot must have
408  * been recycled and reused for an unrelated tuple. This
409  * implies that the latest version of the row was deleted,
410  * so we need do nothing. (Should be safe to examine xmin
411  * without getting buffer's content lock. We assume
412  * reading a TransactionId to be atomic, and Xmin never
413  * changes in an existing tuple, except to invalid or
414  * frozen, and neither of those can match priorXmax.)
415  */
417  priorXmax))
418  {
419  ReleaseBuffer(buffer);
420  return TM_Deleted;
421  }
422 
423  /* otherwise xmin should not be dirty... */
424  if (TransactionIdIsValid(SnapshotDirty.xmin))
425  ereport(ERROR,
427  errmsg_internal("t_xmin %u is uncommitted in tuple (%u,%u) to be updated in table \"%s\"",
428  SnapshotDirty.xmin,
431  RelationGetRelationName(relation))));
432 
433  /*
434  * If tuple is being updated by other transaction then we
435  * have to wait for its commit/abort, or die trying.
436  */
437  if (TransactionIdIsValid(SnapshotDirty.xmax))
438  {
439  ReleaseBuffer(buffer);
440  switch (wait_policy)
441  {
442  case LockWaitBlock:
443  XactLockTableWait(SnapshotDirty.xmax,
444  relation, &tuple->t_self,
446  break;
447  case LockWaitSkip:
448  if (!ConditionalXactLockTableWait(SnapshotDirty.xmax))
449  /* skip instead of waiting */
450  return TM_WouldBlock;
451  break;
452  case LockWaitError:
453  if (!ConditionalXactLockTableWait(SnapshotDirty.xmax))
454  ereport(ERROR,
455  (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
456  errmsg("could not obtain lock on row in relation \"%s\"",
457  RelationGetRelationName(relation))));
458  break;
459  }
460  continue; /* loop back to repeat heap_fetch */
461  }
462 
463  /*
464  * If tuple was inserted by our own transaction, we have
465  * to check cmin against cid: cmin >= current CID means
466  * our command cannot see the tuple, so we should ignore
467  * it. Otherwise heap_lock_tuple() will throw an error,
468  * and so would any later attempt to update or delete the
469  * tuple. (We need not check cmax because
470  * HeapTupleSatisfiesDirty will consider a tuple deleted
471  * by our transaction dead, regardless of cmax.) We just
472  * checked that priorXmax == xmin, so we can test that
473  * variable instead of doing HeapTupleHeaderGetXmin again.
474  */
475  if (TransactionIdIsCurrentTransactionId(priorXmax) &&
476  HeapTupleHeaderGetCmin(tuple->t_data) >= cid)
477  {
478  tmfd->xmax = priorXmax;
479 
480  /*
481  * Cmin is the problematic value, so store that. See
482  * above.
483  */
484  tmfd->cmax = HeapTupleHeaderGetCmin(tuple->t_data);
485  ReleaseBuffer(buffer);
486  return TM_SelfModified;
487  }
488 
489  /*
490  * This is a live tuple, so try to lock it again.
491  */
492  ReleaseBuffer(buffer);
493  goto tuple_lock_retry;
494  }
495 
496  /*
497  * If the referenced slot was actually empty, the latest
498  * version of the row must have been deleted, so we need do
499  * nothing.
500  */
501  if (tuple->t_data == NULL)
502  {
503  Assert(!BufferIsValid(buffer));
504  return TM_Deleted;
505  }
506 
507  /*
508  * As above, if xmin isn't what we're expecting, do nothing.
509  */
511  priorXmax))
512  {
513  ReleaseBuffer(buffer);
514  return TM_Deleted;
515  }
516 
517  /*
518  * If we get here, the tuple was found but failed
519  * SnapshotDirty. Assuming the xmin is either a committed xact
520  * or our own xact (as it certainly should be if we're trying
521  * to modify the tuple), this must mean that the row was
522  * updated or deleted by either a committed xact or our own
523  * xact. If it was deleted, we can ignore it; if it was
524  * updated then chain up to the next version and repeat the
525  * whole process.
526  *
527  * As above, it should be safe to examine xmax and t_ctid
528  * without the buffer content lock, because they can't be
529  * changing. We'd better hold a buffer pin though.
530  */
531  if (ItemPointerEquals(&tuple->t_self, &tuple->t_data->t_ctid))
532  {
533  /* deleted, so forget about it */
534  ReleaseBuffer(buffer);
535  return TM_Deleted;
536  }
537 
538  /* updated, so look at the updated row */
539  *tid = tuple->t_data->t_ctid;
540  /* updated row should have xmin matching this xmax */
541  priorXmax = HeapTupleHeaderGetUpdateXid(tuple->t_data);
542  ReleaseBuffer(buffer);
543  /* loop back to fetch next in chain */
544  }
545  }
546  else
547  {
548  /* tuple was deleted, so give up */
549  return TM_Deleted;
550  }
551  }
552 
553  slot->tts_tableOid = RelationGetRelid(relation);
554  tuple->t_tableOid = slot->tts_tableOid;
555 
556  /* store in slot, transferring existing pin */
557  ExecStorePinnedBufferHeapTuple(tuple, slot, buffer);
558 
559  return result;
560 }
561 
562 
563 /* ------------------------------------------------------------------------
564  * DDL related callbacks for heap AM.
565  * ------------------------------------------------------------------------
566  */
567 
568 static void
570  const RelFileLocator *newrlocator,
571  char persistence,
572  TransactionId *freezeXid,
573  MultiXactId *minmulti)
574 {
575  SMgrRelation srel;
576 
577  /*
578  * Initialize to the minimum XID that could put tuples in the table. We
579  * know that no xacts older than RecentXmin are still running, so that
580  * will do.
581  */
582  *freezeXid = RecentXmin;
583 
584  /*
585  * Similarly, initialize the minimum Multixact to the first value that
586  * could possibly be stored in tuples in the table. Running transactions
587  * could reuse values from their local cache, so we are careful to
588  * consider all currently running multis.
589  *
590  * XXX this could be refined further, but is it worth the hassle?
591  */
592  *minmulti = GetOldestMultiXactId();
593 
594  srel = RelationCreateStorage(*newrlocator, persistence, true);
595 
596  /*
597  * If required, set up an init fork for an unlogged table so that it can
598  * be correctly reinitialized on restart. An immediate sync is required
599  * even if the page has been logged, because the write did not go through
600  * shared_buffers and therefore a concurrent checkpoint may have moved the
601  * redo pointer past our xlog record. Recovery may as well remove it
602  * while replaying, for example, XLOG_DBASE_CREATE* or XLOG_TBLSPC_CREATE
603  * record. Therefore, logging is necessary even if wal_level=minimal.
604  */
605  if (persistence == RELPERSISTENCE_UNLOGGED)
606  {
607  Assert(rel->rd_rel->relkind == RELKIND_RELATION ||
608  rel->rd_rel->relkind == RELKIND_MATVIEW ||
609  rel->rd_rel->relkind == RELKIND_TOASTVALUE);
610  smgrcreate(srel, INIT_FORKNUM, false);
611  log_smgrcreate(newrlocator, INIT_FORKNUM);
613  }
614 
615  smgrclose(srel);
616 }
617 
618 static void
620 {
621  RelationTruncate(rel, 0);
622 }
623 
624 static void
626 {
627  SMgrRelation dstrel;
628 
629  dstrel = smgropen(*newrlocator, rel->rd_backend);
630 
631  /*
632  * Since we copy the file directly without looking at the shared buffers,
633  * we'd better first flush out any pages of the source relation that are
634  * in shared buffers. We assume no new changes will be made while we are
635  * holding exclusive lock on the rel.
636  */
638 
639  /*
640  * Create and copy all forks of the relation, and schedule unlinking of
641  * old physical files.
642  *
643  * NOTE: any conflict in relfilenumber value will be caught in
644  * RelationCreateStorage().
645  */
646  RelationCreateStorage(*newrlocator, rel->rd_rel->relpersistence, true);
647 
648  /* copy main fork */
650  rel->rd_rel->relpersistence);
651 
652  /* copy those extra forks that exist */
653  for (ForkNumber forkNum = MAIN_FORKNUM + 1;
654  forkNum <= MAX_FORKNUM; forkNum++)
655  {
656  if (smgrexists(RelationGetSmgr(rel), forkNum))
657  {
658  smgrcreate(dstrel, forkNum, false);
659 
660  /*
661  * WAL log creation if the relation is persistent, or this is the
662  * init fork of an unlogged relation.
663  */
664  if (RelationIsPermanent(rel) ||
665  (rel->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED &&
666  forkNum == INIT_FORKNUM))
667  log_smgrcreate(newrlocator, forkNum);
668  RelationCopyStorage(RelationGetSmgr(rel), dstrel, forkNum,
669  rel->rd_rel->relpersistence);
670  }
671  }
672 
673 
674  /* drop old relation, and close new one */
675  RelationDropStorage(rel);
676  smgrclose(dstrel);
677 }
678 
679 static void
681  Relation OldIndex, bool use_sort,
682  TransactionId OldestXmin,
683  TransactionId *xid_cutoff,
684  MultiXactId *multi_cutoff,
685  double *num_tuples,
686  double *tups_vacuumed,
687  double *tups_recently_dead)
688 {
689  RewriteState rwstate;
690  IndexScanDesc indexScan;
691  TableScanDesc tableScan;
692  HeapScanDesc heapScan;
693  bool is_system_catalog;
694  Tuplesortstate *tuplesort;
695  TupleDesc oldTupDesc = RelationGetDescr(OldHeap);
696  TupleDesc newTupDesc = RelationGetDescr(NewHeap);
697  TupleTableSlot *slot;
698  int natts;
699  Datum *values;
700  bool *isnull;
702  BlockNumber prev_cblock = InvalidBlockNumber;
703 
704  /* Remember if it's a system catalog */
705  is_system_catalog = IsSystemRelation(OldHeap);
706 
707  /*
708  * Valid smgr_targblock implies something already wrote to the relation.
709  * This may be harmless, but this function hasn't planned for it.
710  */
712 
713  /* Preallocate values/isnull arrays */
714  natts = newTupDesc->natts;
715  values = (Datum *) palloc(natts * sizeof(Datum));
716  isnull = (bool *) palloc(natts * sizeof(bool));
717 
718  /* Initialize the rewrite operation */
719  rwstate = begin_heap_rewrite(OldHeap, NewHeap, OldestXmin, *xid_cutoff,
720  *multi_cutoff);
721 
722 
723  /* Set up sorting if wanted */
724  if (use_sort)
725  tuplesort = tuplesort_begin_cluster(oldTupDesc, OldIndex,
727  NULL, TUPLESORT_NONE);
728  else
729  tuplesort = NULL;
730 
731  /*
732  * Prepare to scan the OldHeap. To ensure we see recently-dead tuples
733  * that still need to be copied, we scan with SnapshotAny and use
734  * HeapTupleSatisfiesVacuum for the visibility test.
735  */
736  if (OldIndex != NULL && !use_sort)
737  {
738  const int ci_index[] = {
741  };
742  int64 ci_val[2];
743 
744  /* Set phase and OIDOldIndex to columns */
746  ci_val[1] = RelationGetRelid(OldIndex);
747  pgstat_progress_update_multi_param(2, ci_index, ci_val);
748 
749  tableScan = NULL;
750  heapScan = NULL;
751  indexScan = index_beginscan(OldHeap, OldIndex, SnapshotAny, 0, 0);
752  index_rescan(indexScan, NULL, 0, NULL, 0);
753  }
754  else
755  {
756  /* In scan-and-sort mode and also VACUUM FULL, set phase */
759 
760  tableScan = table_beginscan(OldHeap, SnapshotAny, 0, (ScanKey) NULL);
761  heapScan = (HeapScanDesc) tableScan;
762  indexScan = NULL;
763 
764  /* Set total heap blocks */
766  heapScan->rs_nblocks);
767  }
768 
769  slot = table_slot_create(OldHeap, NULL);
770  hslot = (BufferHeapTupleTableSlot *) slot;
771 
772  /*
773  * Scan through the OldHeap, either in OldIndex order or sequentially;
774  * copy each tuple into the NewHeap, or transiently to the tuplesort
775  * module. Note that we don't bother sorting dead tuples (they won't get
776  * to the new table anyway).
777  */
778  for (;;)
779  {
780  HeapTuple tuple;
781  Buffer buf;
782  bool isdead;
783 
785 
786  if (indexScan != NULL)
787  {
788  if (!index_getnext_slot(indexScan, ForwardScanDirection, slot))
789  break;
790 
791  /* Since we used no scan keys, should never need to recheck */
792  if (indexScan->xs_recheck)
793  elog(ERROR, "CLUSTER does not support lossy index conditions");
794  }
795  else
796  {
797  if (!table_scan_getnextslot(tableScan, ForwardScanDirection, slot))
798  {
799  /*
800  * If the last pages of the scan were empty, we would go to
801  * the next phase while heap_blks_scanned != heap_blks_total.
802  * Instead, to ensure that heap_blks_scanned is equivalent to
803  * total_heap_blks after the table scan phase, this parameter
804  * is manually updated to the correct value when the table
805  * scan finishes.
806  */
808  heapScan->rs_nblocks);
809  break;
810  }
811 
812  /*
813  * In scan-and-sort mode and also VACUUM FULL, set heap blocks
814  * scanned
815  *
816  * Note that heapScan may start at an offset and wrap around, i.e.
817  * rs_startblock may be >0, and rs_cblock may end with a number
818  * below rs_startblock. To prevent showing this wraparound to the
819  * user, we offset rs_cblock by rs_startblock (modulo rs_nblocks).
820  */
821  if (prev_cblock != heapScan->rs_cblock)
822  {
824  (heapScan->rs_cblock +
825  heapScan->rs_nblocks -
826  heapScan->rs_startblock
827  ) % heapScan->rs_nblocks + 1);
828  prev_cblock = heapScan->rs_cblock;
829  }
830  }
831 
832  tuple = ExecFetchSlotHeapTuple(slot, false, NULL);
833  buf = hslot->buffer;
834 
836 
837  switch (HeapTupleSatisfiesVacuum(tuple, OldestXmin, buf))
838  {
839  case HEAPTUPLE_DEAD:
840  /* Definitely dead */
841  isdead = true;
842  break;
844  *tups_recently_dead += 1;
845  /* fall through */
846  case HEAPTUPLE_LIVE:
847  /* Live or recently dead, must copy it */
848  isdead = false;
849  break;
851 
852  /*
853  * Since we hold exclusive lock on the relation, normally the
854  * only way to see this is if it was inserted earlier in our
855  * own transaction. However, it can happen in system
856  * catalogs, since we tend to release write lock before commit
857  * there. Give a warning if neither case applies; but in any
858  * case we had better copy it.
859  */
860  if (!is_system_catalog &&
862  elog(WARNING, "concurrent insert in progress within table \"%s\"",
863  RelationGetRelationName(OldHeap));
864  /* treat as live */
865  isdead = false;
866  break;
868 
869  /*
870  * Similar situation to INSERT_IN_PROGRESS case.
871  */
872  if (!is_system_catalog &&
874  elog(WARNING, "concurrent delete in progress within table \"%s\"",
875  RelationGetRelationName(OldHeap));
876  /* treat as recently dead */
877  *tups_recently_dead += 1;
878  isdead = false;
879  break;
880  default:
881  elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
882  isdead = false; /* keep compiler quiet */
883  break;
884  }
885 
887 
888  if (isdead)
889  {
890  *tups_vacuumed += 1;
891  /* heap rewrite module still needs to see it... */
892  if (rewrite_heap_dead_tuple(rwstate, tuple))
893  {
894  /* A previous recently-dead tuple is now known dead */
895  *tups_vacuumed += 1;
896  *tups_recently_dead -= 1;
897  }
898  continue;
899  }
900 
901  *num_tuples += 1;
902  if (tuplesort != NULL)
903  {
904  tuplesort_putheaptuple(tuplesort, tuple);
905 
906  /*
907  * In scan-and-sort mode, report increase in number of tuples
908  * scanned
909  */
911  *num_tuples);
912  }
913  else
914  {
915  const int ct_index[] = {
918  };
919  int64 ct_val[2];
920 
921  reform_and_rewrite_tuple(tuple, OldHeap, NewHeap,
922  values, isnull, rwstate);
923 
924  /*
925  * In indexscan mode and also VACUUM FULL, report increase in
926  * number of tuples scanned and written
927  */
928  ct_val[0] = *num_tuples;
929  ct_val[1] = *num_tuples;
930  pgstat_progress_update_multi_param(2, ct_index, ct_val);
931  }
932  }
933 
934  if (indexScan != NULL)
935  index_endscan(indexScan);
936  if (tableScan != NULL)
937  table_endscan(tableScan);
938  if (slot)
940 
941  /*
942  * In scan-and-sort mode, complete the sort, then read out all live tuples
943  * from the tuplestore and write them to the new relation.
944  */
945  if (tuplesort != NULL)
946  {
947  double n_tuples = 0;
948 
949  /* Report that we are now sorting tuples */
952 
953  tuplesort_performsort(tuplesort);
954 
955  /* Report that we are now writing new heap */
958 
959  for (;;)
960  {
961  HeapTuple tuple;
962 
964 
965  tuple = tuplesort_getheaptuple(tuplesort, true);
966  if (tuple == NULL)
967  break;
968 
969  n_tuples += 1;
971  OldHeap, NewHeap,
972  values, isnull,
973  rwstate);
974  /* Report n_tuples */
976  n_tuples);
977  }
978 
979  tuplesort_end(tuplesort);
980  }
981 
982  /* Write out any remaining tuples, and fsync if needed */
983  end_heap_rewrite(rwstate);
984 
985  /* Clean up */
986  pfree(values);
987  pfree(isnull);
988 }
989 
990 static bool
992  BufferAccessStrategy bstrategy)
993 {
994  HeapScanDesc hscan = (HeapScanDesc) scan;
995 
996  /*
997  * We must maintain a pin on the target page's buffer to ensure that
998  * concurrent activity - e.g. HOT pruning - doesn't delete tuples out from
999  * under us. Hence, pin the page until we are done looking at it. We
1000  * also choose to hold sharelock on the buffer throughout --- we could
1001  * release and re-acquire sharelock for each tuple, but since we aren't
1002  * doing much work per tuple, the extra lock traffic is probably better
1003  * avoided.
1004  */
1005  hscan->rs_cblock = blockno;
1006  hscan->rs_cindex = FirstOffsetNumber;
1007  hscan->rs_cbuf = ReadBufferExtended(scan->rs_rd, MAIN_FORKNUM,
1008  blockno, RBM_NORMAL, bstrategy);
1010 
1011  /* in heap all blocks can contain tuples, so always return true */
1012  return true;
1013 }
1014 
1015 static bool
1017  double *liverows, double *deadrows,
1018  TupleTableSlot *slot)
1019 {
1020  HeapScanDesc hscan = (HeapScanDesc) scan;
1021  Page targpage;
1022  OffsetNumber maxoffset;
1023  BufferHeapTupleTableSlot *hslot;
1024 
1025  Assert(TTS_IS_BUFFERTUPLE(slot));
1026 
1027  hslot = (BufferHeapTupleTableSlot *) slot;
1028  targpage = BufferGetPage(hscan->rs_cbuf);
1029  maxoffset = PageGetMaxOffsetNumber(targpage);
1030 
1031  /* Inner loop over all tuples on the selected page */
1032  for (; hscan->rs_cindex <= maxoffset; hscan->rs_cindex++)
1033  {
1034  ItemId itemid;
1035  HeapTuple targtuple = &hslot->base.tupdata;
1036  bool sample_it = false;
1037 
1038  itemid = PageGetItemId(targpage, hscan->rs_cindex);
1039 
1040  /*
1041  * We ignore unused and redirect line pointers. DEAD line pointers
1042  * should be counted as dead, because we need vacuum to run to get rid
1043  * of them. Note that this rule agrees with the way that
1044  * heap_page_prune() counts things.
1045  */
1046  if (!ItemIdIsNormal(itemid))
1047  {
1048  if (ItemIdIsDead(itemid))
1049  *deadrows += 1;
1050  continue;
1051  }
1052 
1053  ItemPointerSet(&targtuple->t_self, hscan->rs_cblock, hscan->rs_cindex);
1054 
1055  targtuple->t_tableOid = RelationGetRelid(scan->rs_rd);
1056  targtuple->t_data = (HeapTupleHeader) PageGetItem(targpage, itemid);
1057  targtuple->t_len = ItemIdGetLength(itemid);
1058 
1059  switch (HeapTupleSatisfiesVacuum(targtuple, OldestXmin,
1060  hscan->rs_cbuf))
1061  {
1062  case HEAPTUPLE_LIVE:
1063  sample_it = true;
1064  *liverows += 1;
1065  break;
1066 
1067  case HEAPTUPLE_DEAD:
1069  /* Count dead and recently-dead rows */
1070  *deadrows += 1;
1071  break;
1072 
1074 
1075  /*
1076  * Insert-in-progress rows are not counted. We assume that
1077  * when the inserting transaction commits or aborts, it will
1078  * send a stats message to increment the proper count. This
1079  * works right only if that transaction ends after we finish
1080  * analyzing the table; if things happen in the other order,
1081  * its stats update will be overwritten by ours. However, the
1082  * error will be large only if the other transaction runs long
1083  * enough to insert many tuples, so assuming it will finish
1084  * after us is the safer option.
1085  *
1086  * A special case is that the inserting transaction might be
1087  * our own. In this case we should count and sample the row,
1088  * to accommodate users who load a table and analyze it in one
1089  * transaction. (pgstat_report_analyze has to adjust the
1090  * numbers we report to the cumulative stats system to make
1091  * this come out right.)
1092  */
1094  {
1095  sample_it = true;
1096  *liverows += 1;
1097  }
1098  break;
1099 
1101 
1102  /*
1103  * We count and sample delete-in-progress rows the same as
1104  * live ones, so that the stats counters come out right if the
1105  * deleting transaction commits after us, per the same
1106  * reasoning given above.
1107  *
1108  * If the delete was done by our own transaction, however, we
1109  * must count the row as dead to make pgstat_report_analyze's
1110  * stats adjustments come out right. (Note: this works out
1111  * properly when the row was both inserted and deleted in our
1112  * xact.)
1113  *
1114  * The net effect of these choices is that we act as though an
1115  * IN_PROGRESS transaction hasn't happened yet, except if it
1116  * is our own transaction, which we assume has happened.
1117  *
1118  * This approach ensures that we behave sanely if we see both
1119  * the pre-image and post-image rows for a row being updated
1120  * by a concurrent transaction: we will sample the pre-image
1121  * but not the post-image. We also get sane results if the
1122  * concurrent transaction never commits.
1123  */
1125  *deadrows += 1;
1126  else
1127  {
1128  sample_it = true;
1129  *liverows += 1;
1130  }
1131  break;
1132 
1133  default:
1134  elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
1135  break;
1136  }
1137 
1138  if (sample_it)
1139  {
1140  ExecStoreBufferHeapTuple(targtuple, slot, hscan->rs_cbuf);
1141  hscan->rs_cindex++;
1142 
1143  /* note that we leave the buffer locked here! */
1144  return true;
1145  }
1146  }
1147 
1148  /* Now release the lock and pin on the page */
1149  UnlockReleaseBuffer(hscan->rs_cbuf);
1150  hscan->rs_cbuf = InvalidBuffer;
1151 
1152  /* also prevent old slot contents from having pin on page */
1153  ExecClearTuple(slot);
1154 
1155  return false;
1156 }
1157 
1158 static double
1160  Relation indexRelation,
1161  IndexInfo *indexInfo,
1162  bool allow_sync,
1163  bool anyvisible,
1164  bool progress,
1165  BlockNumber start_blockno,
1166  BlockNumber numblocks,
1168  void *callback_state,
1169  TableScanDesc scan)
1170 {
1171  HeapScanDesc hscan;
1172  bool is_system_catalog;
1173  bool checking_uniqueness;
1174  HeapTuple heapTuple;
1176  bool isnull[INDEX_MAX_KEYS];
1177  double reltuples;
1178  ExprState *predicate;
1179  TupleTableSlot *slot;
1180  EState *estate;
1181  ExprContext *econtext;
1182  Snapshot snapshot;
1183  bool need_unregister_snapshot = false;
1184  TransactionId OldestXmin;
1185  BlockNumber previous_blkno = InvalidBlockNumber;
1186  BlockNumber root_blkno = InvalidBlockNumber;
1187  OffsetNumber root_offsets[MaxHeapTuplesPerPage];
1188 
1189  /*
1190  * sanity checks
1191  */
1192  Assert(OidIsValid(indexRelation->rd_rel->relam));
1193 
1194  /* Remember if it's a system catalog */
1195  is_system_catalog = IsSystemRelation(heapRelation);
1196 
1197  /* See whether we're verifying uniqueness/exclusion properties */
1198  checking_uniqueness = (indexInfo->ii_Unique ||
1199  indexInfo->ii_ExclusionOps != NULL);
1200 
1201  /*
1202  * "Any visible" mode is not compatible with uniqueness checks; make sure
1203  * only one of those is requested.
1204  */
1205  Assert(!(anyvisible && checking_uniqueness));
1206 
1207  /*
1208  * Need an EState for evaluation of index expressions and partial-index
1209  * predicates. Also a slot to hold the current tuple.
1210  */
1211  estate = CreateExecutorState();
1212  econtext = GetPerTupleExprContext(estate);
1213  slot = table_slot_create(heapRelation, NULL);
1214 
1215  /* Arrange for econtext's scan tuple to be the tuple under test */
1216  econtext->ecxt_scantuple = slot;
1217 
1218  /* Set up execution state for predicate, if any. */
1219  predicate = ExecPrepareQual(indexInfo->ii_Predicate, estate);
1220 
1221  /*
1222  * Prepare for scan of the base relation. In a normal index build, we use
1223  * SnapshotAny because we must retrieve all tuples and do our own time
1224  * qual checks (because we have to index RECENTLY_DEAD tuples). In a
1225  * concurrent build, or during bootstrap, we take a regular MVCC snapshot
1226  * and index whatever's live according to that.
1227  */
1228  OldestXmin = InvalidTransactionId;
1229 
1230  /* okay to ignore lazy VACUUMs here */
1231  if (!IsBootstrapProcessingMode() && !indexInfo->ii_Concurrent)
1232  OldestXmin = GetOldestNonRemovableTransactionId(heapRelation);
1233 
1234  if (!scan)
1235  {
1236  /*
1237  * Serial index build.
1238  *
1239  * Must begin our own heap scan in this case. We may also need to
1240  * register a snapshot whose lifetime is under our direct control.
1241  */
1242  if (!TransactionIdIsValid(OldestXmin))
1243  {
1245  need_unregister_snapshot = true;
1246  }
1247  else
1248  snapshot = SnapshotAny;
1249 
1250  scan = table_beginscan_strat(heapRelation, /* relation */
1251  snapshot, /* snapshot */
1252  0, /* number of keys */
1253  NULL, /* scan key */
1254  true, /* buffer access strategy OK */
1255  allow_sync); /* syncscan OK? */
1256  }
1257  else
1258  {
1259  /*
1260  * Parallel index build.
1261  *
1262  * Parallel case never registers/unregisters own snapshot. Snapshot
1263  * is taken from parallel heap scan, and is SnapshotAny or an MVCC
1264  * snapshot, based on same criteria as serial case.
1265  */
1267  Assert(allow_sync);
1268  snapshot = scan->rs_snapshot;
1269  }
1270 
1271  hscan = (HeapScanDesc) scan;
1272 
1273  /*
1274  * Must have called GetOldestNonRemovableTransactionId() if using
1275  * SnapshotAny. Shouldn't have for an MVCC snapshot. (It's especially
1276  * worth checking this for parallel builds, since ambuild routines that
1277  * support parallel builds must work these details out for themselves.)
1278  */
1279  Assert(snapshot == SnapshotAny || IsMVCCSnapshot(snapshot));
1280  Assert(snapshot == SnapshotAny ? TransactionIdIsValid(OldestXmin) :
1281  !TransactionIdIsValid(OldestXmin));
1282  Assert(snapshot == SnapshotAny || !anyvisible);
1283 
1284  /* Publish number of blocks to scan */
1285  if (progress)
1286  {
1287  BlockNumber nblocks;
1288 
1289  if (hscan->rs_base.rs_parallel != NULL)
1290  {
1292 
1294  nblocks = pbscan->phs_nblocks;
1295  }
1296  else
1297  nblocks = hscan->rs_nblocks;
1298 
1300  nblocks);
1301  }
1302 
1303  /* set our scan endpoints */
1304  if (!allow_sync)
1305  heap_setscanlimits(scan, start_blockno, numblocks);
1306  else
1307  {
1308  /* syncscan can only be requested on whole relation */
1309  Assert(start_blockno == 0);
1310  Assert(numblocks == InvalidBlockNumber);
1311  }
1312 
1313  reltuples = 0;
1314 
1315  /*
1316  * Scan all tuples in the base relation.
1317  */
1318  while ((heapTuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
1319  {
1320  bool tupleIsAlive;
1321 
1323 
1324  /* Report scan progress, if asked to. */
1325  if (progress)
1326  {
1327  BlockNumber blocks_done = heapam_scan_get_blocks_done(hscan);
1328 
1329  if (blocks_done != previous_blkno)
1330  {
1332  blocks_done);
1333  previous_blkno = blocks_done;
1334  }
1335  }
1336 
1337  /*
1338  * When dealing with a HOT-chain of updated tuples, we want to index
1339  * the values of the live tuple (if any), but index it under the TID
1340  * of the chain's root tuple. This approach is necessary to preserve
1341  * the HOT-chain structure in the heap. So we need to be able to find
1342  * the root item offset for every tuple that's in a HOT-chain. When
1343  * first reaching a new page of the relation, call
1344  * heap_get_root_tuples() to build a map of root item offsets on the
1345  * page.
1346  *
1347  * It might look unsafe to use this information across buffer
1348  * lock/unlock. However, we hold ShareLock on the table so no
1349  * ordinary insert/update/delete should occur; and we hold pin on the
1350  * buffer continuously while visiting the page, so no pruning
1351  * operation can occur either.
1352  *
1353  * In cases with only ShareUpdateExclusiveLock on the table, it's
1354  * possible for some HOT tuples to appear that we didn't know about
1355  * when we first read the page. To handle that case, we re-obtain the
1356  * list of root offsets when a HOT tuple points to a root item that we
1357  * don't know about.
1358  *
1359  * Also, although our opinions about tuple liveness could change while
1360  * we scan the page (due to concurrent transaction commits/aborts),
1361  * the chain root locations won't, so this info doesn't need to be
1362  * rebuilt after waiting for another transaction.
1363  *
1364  * Note the implied assumption that there is no more than one live
1365  * tuple per HOT-chain --- else we could create more than one index
1366  * entry pointing to the same root tuple.
1367  */
1368  if (hscan->rs_cblock != root_blkno)
1369  {
1370  Page page = BufferGetPage(hscan->rs_cbuf);
1371 
1373  heap_get_root_tuples(page, root_offsets);
1375 
1376  root_blkno = hscan->rs_cblock;
1377  }
1378 
1379  if (snapshot == SnapshotAny)
1380  {
1381  /* do our own time qual check */
1382  bool indexIt;
1383  TransactionId xwait;
1384 
1385  recheck:
1386 
1387  /*
1388  * We could possibly get away with not locking the buffer here,
1389  * since caller should hold ShareLock on the relation, but let's
1390  * be conservative about it. (This remark is still correct even
1391  * with HOT-pruning: our pin on the buffer prevents pruning.)
1392  */
1394 
1395  /*
1396  * The criteria for counting a tuple as live in this block need to
1397  * match what analyze.c's heapam_scan_analyze_next_tuple() does,
1398  * otherwise CREATE INDEX and ANALYZE may produce wildly different
1399  * reltuples values, e.g. when there are many recently-dead
1400  * tuples.
1401  */
1402  switch (HeapTupleSatisfiesVacuum(heapTuple, OldestXmin,
1403  hscan->rs_cbuf))
1404  {
1405  case HEAPTUPLE_DEAD:
1406  /* Definitely dead, we can ignore it */
1407  indexIt = false;
1408  tupleIsAlive = false;
1409  break;
1410  case HEAPTUPLE_LIVE:
1411  /* Normal case, index and unique-check it */
1412  indexIt = true;
1413  tupleIsAlive = true;
1414  /* Count it as live, too */
1415  reltuples += 1;
1416  break;
1418 
1419  /*
1420  * If tuple is recently deleted then we must index it
1421  * anyway to preserve MVCC semantics. (Pre-existing
1422  * transactions could try to use the index after we finish
1423  * building it, and may need to see such tuples.)
1424  *
1425  * However, if it was HOT-updated then we must only index
1426  * the live tuple at the end of the HOT-chain. Since this
1427  * breaks semantics for pre-existing snapshots, mark the
1428  * index as unusable for them.
1429  *
1430  * We don't count recently-dead tuples in reltuples, even
1431  * if we index them; see heapam_scan_analyze_next_tuple().
1432  */
1433  if (HeapTupleIsHotUpdated(heapTuple))
1434  {
1435  indexIt = false;
1436  /* mark the index as unsafe for old snapshots */
1437  indexInfo->ii_BrokenHotChain = true;
1438  }
1439  else
1440  indexIt = true;
1441  /* In any case, exclude the tuple from unique-checking */
1442  tupleIsAlive = false;
1443  break;
1445 
1446  /*
1447  * In "anyvisible" mode, this tuple is visible and we
1448  * don't need any further checks.
1449  */
1450  if (anyvisible)
1451  {
1452  indexIt = true;
1453  tupleIsAlive = true;
1454  reltuples += 1;
1455  break;
1456  }
1457 
1458  /*
1459  * Since caller should hold ShareLock or better, normally
1460  * the only way to see this is if it was inserted earlier
1461  * in our own transaction. However, it can happen in
1462  * system catalogs, since we tend to release write lock
1463  * before commit there. Give a warning if neither case
1464  * applies.
1465  */
1466  xwait = HeapTupleHeaderGetXmin(heapTuple->t_data);
1468  {
1469  if (!is_system_catalog)
1470  elog(WARNING, "concurrent insert in progress within table \"%s\"",
1471  RelationGetRelationName(heapRelation));
1472 
1473  /*
1474  * If we are performing uniqueness checks, indexing
1475  * such a tuple could lead to a bogus uniqueness
1476  * failure. In that case we wait for the inserting
1477  * transaction to finish and check again.
1478  */
1479  if (checking_uniqueness)
1480  {
1481  /*
1482  * Must drop the lock on the buffer before we wait
1483  */
1485  XactLockTableWait(xwait, heapRelation,
1486  &heapTuple->t_self,
1489  goto recheck;
1490  }
1491  }
1492  else
1493  {
1494  /*
1495  * For consistency with
1496  * heapam_scan_analyze_next_tuple(), count
1497  * HEAPTUPLE_INSERT_IN_PROGRESS tuples as live only
1498  * when inserted by our own transaction.
1499  */
1500  reltuples += 1;
1501  }
1502 
1503  /*
1504  * We must index such tuples, since if the index build
1505  * commits then they're good.
1506  */
1507  indexIt = true;
1508  tupleIsAlive = true;
1509  break;
1511 
1512  /*
1513  * As with INSERT_IN_PROGRESS case, this is unexpected
1514  * unless it's our own deletion or a system catalog; but
1515  * in anyvisible mode, this tuple is visible.
1516  */
1517  if (anyvisible)
1518  {
1519  indexIt = true;
1520  tupleIsAlive = false;
1521  reltuples += 1;
1522  break;
1523  }
1524 
1525  xwait = HeapTupleHeaderGetUpdateXid(heapTuple->t_data);
1527  {
1528  if (!is_system_catalog)
1529  elog(WARNING, "concurrent delete in progress within table \"%s\"",
1530  RelationGetRelationName(heapRelation));
1531 
1532  /*
1533  * If we are performing uniqueness checks, assuming
1534  * the tuple is dead could lead to missing a
1535  * uniqueness violation. In that case we wait for the
1536  * deleting transaction to finish and check again.
1537  *
1538  * Also, if it's a HOT-updated tuple, we should not
1539  * index it but rather the live tuple at the end of
1540  * the HOT-chain. However, the deleting transaction
1541  * could abort, possibly leaving this tuple as live
1542  * after all, in which case it has to be indexed. The
1543  * only way to know what to do is to wait for the
1544  * deleting transaction to finish and check again.
1545  */
1546  if (checking_uniqueness ||
1547  HeapTupleIsHotUpdated(heapTuple))
1548  {
1549  /*
1550  * Must drop the lock on the buffer before we wait
1551  */
1553  XactLockTableWait(xwait, heapRelation,
1554  &heapTuple->t_self,
1557  goto recheck;
1558  }
1559 
1560  /*
1561  * Otherwise index it but don't check for uniqueness,
1562  * the same as a RECENTLY_DEAD tuple.
1563  */
1564  indexIt = true;
1565 
1566  /*
1567  * Count HEAPTUPLE_DELETE_IN_PROGRESS tuples as live,
1568  * if they were not deleted by the current
1569  * transaction. That's what
1570  * heapam_scan_analyze_next_tuple() does, and we want
1571  * the behavior to be consistent.
1572  */
1573  reltuples += 1;
1574  }
1575  else if (HeapTupleIsHotUpdated(heapTuple))
1576  {
1577  /*
1578  * It's a HOT-updated tuple deleted by our own xact.
1579  * We can assume the deletion will commit (else the
1580  * index contents don't matter), so treat the same as
1581  * RECENTLY_DEAD HOT-updated tuples.
1582  */
1583  indexIt = false;
1584  /* mark the index as unsafe for old snapshots */
1585  indexInfo->ii_BrokenHotChain = true;
1586  }
1587  else
1588  {
1589  /*
1590  * It's a regular tuple deleted by our own xact. Index
1591  * it, but don't check for uniqueness nor count in
1592  * reltuples, the same as a RECENTLY_DEAD tuple.
1593  */
1594  indexIt = true;
1595  }
1596  /* In any case, exclude the tuple from unique-checking */
1597  tupleIsAlive = false;
1598  break;
1599  default:
1600  elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
1601  indexIt = tupleIsAlive = false; /* keep compiler quiet */
1602  break;
1603  }
1604 
1606 
1607  if (!indexIt)
1608  continue;
1609  }
1610  else
1611  {
1612  /* heap_getnext did the time qual check */
1613  tupleIsAlive = true;
1614  reltuples += 1;
1615  }
1616 
1618 
1619  /* Set up for predicate or expression evaluation */
1620  ExecStoreBufferHeapTuple(heapTuple, slot, hscan->rs_cbuf);
1621 
1622  /*
1623  * In a partial index, discard tuples that don't satisfy the
1624  * predicate.
1625  */
1626  if (predicate != NULL)
1627  {
1628  if (!ExecQual(predicate, econtext))
1629  continue;
1630  }
1631 
1632  /*
1633  * For the current heap tuple, extract all the attributes we use in
1634  * this index, and note which are null. This also performs evaluation
1635  * of any expressions needed.
1636  */
1637  FormIndexDatum(indexInfo,
1638  slot,
1639  estate,
1640  values,
1641  isnull);
1642 
1643  /*
1644  * You'd think we should go ahead and build the index tuple here, but
1645  * some index AMs want to do further processing on the data first. So
1646  * pass the values[] and isnull[] arrays, instead.
1647  */
1648 
1649  if (HeapTupleIsHeapOnly(heapTuple))
1650  {
1651  /*
1652  * For a heap-only tuple, pretend its TID is that of the root. See
1653  * src/backend/access/heap/README.HOT for discussion.
1654  */
1655  ItemPointerData tid;
1656  OffsetNumber offnum;
1657 
1658  offnum = ItemPointerGetOffsetNumber(&heapTuple->t_self);
1659 
1660  /*
1661  * If a HOT tuple points to a root that we don't know about,
1662  * obtain root items afresh. If that still fails, report it as
1663  * corruption.
1664  */
1665  if (root_offsets[offnum - 1] == InvalidOffsetNumber)
1666  {
1667  Page page = BufferGetPage(hscan->rs_cbuf);
1668 
1670  heap_get_root_tuples(page, root_offsets);
1672  }
1673 
1674  if (!OffsetNumberIsValid(root_offsets[offnum - 1]))
1675  ereport(ERROR,
1677  errmsg_internal("failed to find parent tuple for heap-only tuple at (%u,%u) in table \"%s\"",
1678  ItemPointerGetBlockNumber(&heapTuple->t_self),
1679  offnum,
1680  RelationGetRelationName(heapRelation))));
1681 
1682  ItemPointerSet(&tid, ItemPointerGetBlockNumber(&heapTuple->t_self),
1683  root_offsets[offnum - 1]);
1684 
1685  /* Call the AM's callback routine to process the tuple */
1686  callback(indexRelation, &tid, values, isnull, tupleIsAlive,
1687  callback_state);
1688  }
1689  else
1690  {
1691  /* Call the AM's callback routine to process the tuple */
1692  callback(indexRelation, &heapTuple->t_self, values, isnull,
1693  tupleIsAlive, callback_state);
1694  }
1695  }
1696 
1697  /* Report scan progress one last time. */
1698  if (progress)
1699  {
1700  BlockNumber blks_done;
1701 
1702  if (hscan->rs_base.rs_parallel != NULL)
1703  {
1705 
1707  blks_done = pbscan->phs_nblocks;
1708  }
1709  else
1710  blks_done = hscan->rs_nblocks;
1711 
1713  blks_done);
1714  }
1715 
1716  table_endscan(scan);
1717 
1718  /* we can now forget our snapshot, if set and registered by us */
1719  if (need_unregister_snapshot)
1720  UnregisterSnapshot(snapshot);
1721 
1723 
1724  FreeExecutorState(estate);
1725 
1726  /* These may have been pointing to the now-gone estate */
1727  indexInfo->ii_ExpressionsState = NIL;
1728  indexInfo->ii_PredicateState = NULL;
1729 
1730  return reltuples;
1731 }
1732 
1733 static void
1735  Relation indexRelation,
1736  IndexInfo *indexInfo,
1737  Snapshot snapshot,
1739 {
1740  TableScanDesc scan;
1741  HeapScanDesc hscan;
1742  HeapTuple heapTuple;
1744  bool isnull[INDEX_MAX_KEYS];
1745  ExprState *predicate;
1746  TupleTableSlot *slot;
1747  EState *estate;
1748  ExprContext *econtext;
1749  BlockNumber root_blkno = InvalidBlockNumber;
1750  OffsetNumber root_offsets[MaxHeapTuplesPerPage];
1751  bool in_index[MaxHeapTuplesPerPage];
1752  BlockNumber previous_blkno = InvalidBlockNumber;
1753 
1754  /* state variables for the merge */
1755  ItemPointer indexcursor = NULL;
1756  ItemPointerData decoded;
1757  bool tuplesort_empty = false;
1758 
1759  /*
1760  * sanity checks
1761  */
1762  Assert(OidIsValid(indexRelation->rd_rel->relam));
1763 
1764  /*
1765  * Need an EState for evaluation of index expressions and partial-index
1766  * predicates. Also a slot to hold the current tuple.
1767  */
1768  estate = CreateExecutorState();
1769  econtext = GetPerTupleExprContext(estate);
1770  slot = MakeSingleTupleTableSlot(RelationGetDescr(heapRelation),
1771  &TTSOpsHeapTuple);
1772 
1773  /* Arrange for econtext's scan tuple to be the tuple under test */
1774  econtext->ecxt_scantuple = slot;
1775 
1776  /* Set up execution state for predicate, if any. */
1777  predicate = ExecPrepareQual(indexInfo->ii_Predicate, estate);
1778 
1779  /*
1780  * Prepare for scan of the base relation. We need just those tuples
1781  * satisfying the passed-in reference snapshot. We must disable syncscan
1782  * here, because it's critical that we read from block zero forward to
1783  * match the sorted TIDs.
1784  */
1785  scan = table_beginscan_strat(heapRelation, /* relation */
1786  snapshot, /* snapshot */
1787  0, /* number of keys */
1788  NULL, /* scan key */
1789  true, /* buffer access strategy OK */
1790  false); /* syncscan not OK */
1791  hscan = (HeapScanDesc) scan;
1792 
1794  hscan->rs_nblocks);
1795 
1796  /*
1797  * Scan all tuples matching the snapshot.
1798  */
1799  while ((heapTuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
1800  {
1801  ItemPointer heapcursor = &heapTuple->t_self;
1802  ItemPointerData rootTuple;
1803  OffsetNumber root_offnum;
1804 
1806 
1807  state->htups += 1;
1808 
1809  if ((previous_blkno == InvalidBlockNumber) ||
1810  (hscan->rs_cblock != previous_blkno))
1811  {
1813  hscan->rs_cblock);
1814  previous_blkno = hscan->rs_cblock;
1815  }
1816 
1817  /*
1818  * As commented in table_index_build_scan, we should index heap-only
1819  * tuples under the TIDs of their root tuples; so when we advance onto
1820  * a new heap page, build a map of root item offsets on the page.
1821  *
1822  * This complicates merging against the tuplesort output: we will
1823  * visit the live tuples in order by their offsets, but the root
1824  * offsets that we need to compare against the index contents might be
1825  * ordered differently. So we might have to "look back" within the
1826  * tuplesort output, but only within the current page. We handle that
1827  * by keeping a bool array in_index[] showing all the
1828  * already-passed-over tuplesort output TIDs of the current page. We
1829  * clear that array here, when advancing onto a new heap page.
1830  */
1831  if (hscan->rs_cblock != root_blkno)
1832  {
1833  Page page = BufferGetPage(hscan->rs_cbuf);
1834 
1836  heap_get_root_tuples(page, root_offsets);
1838 
1839  memset(in_index, 0, sizeof(in_index));
1840 
1841  root_blkno = hscan->rs_cblock;
1842  }
1843 
1844  /* Convert actual tuple TID to root TID */
1845  rootTuple = *heapcursor;
1846  root_offnum = ItemPointerGetOffsetNumber(heapcursor);
1847 
1848  if (HeapTupleIsHeapOnly(heapTuple))
1849  {
1850  root_offnum = root_offsets[root_offnum - 1];
1851  if (!OffsetNumberIsValid(root_offnum))
1852  ereport(ERROR,
1854  errmsg_internal("failed to find parent tuple for heap-only tuple at (%u,%u) in table \"%s\"",
1855  ItemPointerGetBlockNumber(heapcursor),
1856  ItemPointerGetOffsetNumber(heapcursor),
1857  RelationGetRelationName(heapRelation))));
1858  ItemPointerSetOffsetNumber(&rootTuple, root_offnum);
1859  }
1860 
1861  /*
1862  * "merge" by skipping through the index tuples until we find or pass
1863  * the current root tuple.
1864  */
1865  while (!tuplesort_empty &&
1866  (!indexcursor ||
1867  ItemPointerCompare(indexcursor, &rootTuple) < 0))
1868  {
1869  Datum ts_val;
1870  bool ts_isnull;
1871 
1872  if (indexcursor)
1873  {
1874  /*
1875  * Remember index items seen earlier on the current heap page
1876  */
1877  if (ItemPointerGetBlockNumber(indexcursor) == root_blkno)
1878  in_index[ItemPointerGetOffsetNumber(indexcursor) - 1] = true;
1879  }
1880 
1881  tuplesort_empty = !tuplesort_getdatum(state->tuplesort, true,
1882  &ts_val, &ts_isnull, NULL);
1883  Assert(tuplesort_empty || !ts_isnull);
1884  if (!tuplesort_empty)
1885  {
1886  itemptr_decode(&decoded, DatumGetInt64(ts_val));
1887  indexcursor = &decoded;
1888 
1889  /* If int8 is pass-by-ref, free (encoded) TID Datum memory */
1890 #ifndef USE_FLOAT8_BYVAL
1891  pfree(DatumGetPointer(ts_val));
1892 #endif
1893  }
1894  else
1895  {
1896  /* Be tidy */
1897  indexcursor = NULL;
1898  }
1899  }
1900 
1901  /*
1902  * If the tuplesort has overshot *and* we didn't see a match earlier,
1903  * then this tuple is missing from the index, so insert it.
1904  */
1905  if ((tuplesort_empty ||
1906  ItemPointerCompare(indexcursor, &rootTuple) > 0) &&
1907  !in_index[root_offnum - 1])
1908  {
1910 
1911  /* Set up for predicate or expression evaluation */
1912  ExecStoreHeapTuple(heapTuple, slot, false);
1913 
1914  /*
1915  * In a partial index, discard tuples that don't satisfy the
1916  * predicate.
1917  */
1918  if (predicate != NULL)
1919  {
1920  if (!ExecQual(predicate, econtext))
1921  continue;
1922  }
1923 
1924  /*
1925  * For the current heap tuple, extract all the attributes we use
1926  * in this index, and note which are null. This also performs
1927  * evaluation of any expressions needed.
1928  */
1929  FormIndexDatum(indexInfo,
1930  slot,
1931  estate,
1932  values,
1933  isnull);
1934 
1935  /*
1936  * You'd think we should go ahead and build the index tuple here,
1937  * but some index AMs want to do further processing on the data
1938  * first. So pass the values[] and isnull[] arrays, instead.
1939  */
1940 
1941  /*
1942  * If the tuple is already committed dead, you might think we
1943  * could suppress uniqueness checking, but this is no longer true
1944  * in the presence of HOT, because the insert is actually a proxy
1945  * for a uniqueness check on the whole HOT-chain. That is, the
1946  * tuple we have here could be dead because it was already
1947  * HOT-updated, and if so the updating transaction will not have
1948  * thought it should insert index entries. The index AM will
1949  * check the whole HOT-chain and correctly detect a conflict if
1950  * there is one.
1951  */
1952 
1953  index_insert(indexRelation,
1954  values,
1955  isnull,
1956  &rootTuple,
1957  heapRelation,
1958  indexInfo->ii_Unique ?
1960  false,
1961  indexInfo);
1962 
1963  state->tups_inserted += 1;
1964  }
1965  }
1966 
1967  table_endscan(scan);
1968 
1970 
1971  FreeExecutorState(estate);
1972 
1973  /* These may have been pointing to the now-gone estate */
1974  indexInfo->ii_ExpressionsState = NIL;
1975  indexInfo->ii_PredicateState = NULL;
1976 }
1977 
1978 /*
1979  * Return the number of blocks that have been read by this scan since
1980  * starting. This is meant for progress reporting rather than be fully
1981  * accurate: in a parallel scan, workers can be concurrently reading blocks
1982  * further ahead than what we report.
1983  */
1984 static BlockNumber
1986 {
1987  ParallelBlockTableScanDesc bpscan = NULL;
1988  BlockNumber startblock;
1989  BlockNumber blocks_done;
1990 
1991  if (hscan->rs_base.rs_parallel != NULL)
1992  {
1994  startblock = bpscan->phs_startblock;
1995  }
1996  else
1997  startblock = hscan->rs_startblock;
1998 
1999  /*
2000  * Might have wrapped around the end of the relation, if startblock was
2001  * not zero.
2002  */
2003  if (hscan->rs_cblock > startblock)
2004  blocks_done = hscan->rs_cblock - startblock;
2005  else
2006  {
2007  BlockNumber nblocks;
2008 
2009  nblocks = bpscan != NULL ? bpscan->phs_nblocks : hscan->rs_nblocks;
2010  blocks_done = nblocks - startblock +
2011  hscan->rs_cblock;
2012  }
2013 
2014  return blocks_done;
2015 }
2016 
2017 
2018 /* ------------------------------------------------------------------------
2019  * Miscellaneous callbacks for the heap AM
2020  * ------------------------------------------------------------------------
2021  */
2022 
2023 /*
2024  * Check to see whether the table needs a TOAST table. It does only if
2025  * (1) there are any toastable attributes, and (2) the maximum length
2026  * of a tuple could exceed TOAST_TUPLE_THRESHOLD. (We don't want to
2027  * create a toast table for something like "f1 varchar(20)".)
2028  */
2029 static bool
2031 {
2032  int32 data_length = 0;
2033  bool maxlength_unknown = false;
2034  bool has_toastable_attrs = false;
2035  TupleDesc tupdesc = rel->rd_att;
2036  int32 tuple_length;
2037  int i;
2038 
2039  for (i = 0; i < tupdesc->natts; i++)
2040  {
2041  Form_pg_attribute att = TupleDescAttr(tupdesc, i);
2042 
2043  if (att->attisdropped)
2044  continue;
2045  data_length = att_align_nominal(data_length, att->attalign);
2046  if (att->attlen > 0)
2047  {
2048  /* Fixed-length types are never toastable */
2049  data_length += att->attlen;
2050  }
2051  else
2052  {
2053  int32 maxlen = type_maximum_size(att->atttypid,
2054  att->atttypmod);
2055 
2056  if (maxlen < 0)
2057  maxlength_unknown = true;
2058  else
2059  data_length += maxlen;
2060  if (att->attstorage != TYPSTORAGE_PLAIN)
2061  has_toastable_attrs = true;
2062  }
2063  }
2064  if (!has_toastable_attrs)
2065  return false; /* nothing to toast? */
2066  if (maxlength_unknown)
2067  return true; /* any unlimited-length attrs? */
2068  tuple_length = MAXALIGN(SizeofHeapTupleHeader +
2069  BITMAPLEN(tupdesc->natts)) +
2070  MAXALIGN(data_length);
2071  return (tuple_length > TOAST_TUPLE_THRESHOLD);
2072 }
2073 
2074 /*
2075  * TOAST tables for heap relations are just heap relations.
2076  */
2077 static Oid
2079 {
2080  return rel->rd_rel->relam;
2081 }
2082 
2083 
2084 /* ------------------------------------------------------------------------
2085  * Planner related callbacks for the heap AM
2086  * ------------------------------------------------------------------------
2087  */
2088 
2089 #define HEAP_OVERHEAD_BYTES_PER_TUPLE \
2090  (MAXALIGN(SizeofHeapTupleHeader) + sizeof(ItemIdData))
2091 #define HEAP_USABLE_BYTES_PER_PAGE \
2092  (BLCKSZ - SizeOfPageHeaderData)
2093 
2094 static void
2096  BlockNumber *pages, double *tuples,
2097  double *allvisfrac)
2098 {
2099  table_block_relation_estimate_size(rel, attr_widths, pages,
2100  tuples, allvisfrac,
2103 }
2104 
2105 
2106 /* ------------------------------------------------------------------------
2107  * Executor related callbacks for the heap AM
2108  * ------------------------------------------------------------------------
2109  */
2110 
2111 static bool
2113  TBMIterateResult *tbmres)
2114 {
2115  HeapScanDesc hscan = (HeapScanDesc) scan;
2116  BlockNumber page = tbmres->blockno;
2117  Buffer buffer;
2118  Snapshot snapshot;
2119  int ntup;
2120 
2121  hscan->rs_cindex = 0;
2122  hscan->rs_ntuples = 0;
2123 
2124  /*
2125  * Ignore any claimed entries past what we think is the end of the
2126  * relation. It may have been extended after the start of our scan (we
2127  * only hold an AccessShareLock, and it could be inserts from this
2128  * backend).
2129  */
2130  if (page >= hscan->rs_nblocks)
2131  return false;
2132 
2133  /*
2134  * Acquire pin on the target heap page, trading in any pin we held before.
2135  */
2136  hscan->rs_cbuf = ReleaseAndReadBuffer(hscan->rs_cbuf,
2137  scan->rs_rd,
2138  page);
2139  hscan->rs_cblock = page;
2140  buffer = hscan->rs_cbuf;
2141  snapshot = scan->rs_snapshot;
2142 
2143  ntup = 0;
2144 
2145  /*
2146  * Prune and repair fragmentation for the whole page, if possible.
2147  */
2148  heap_page_prune_opt(scan->rs_rd, buffer);
2149 
2150  /*
2151  * We must hold share lock on the buffer content while examining tuple
2152  * visibility. Afterwards, however, the tuples we have found to be
2153  * visible are guaranteed good as long as we hold the buffer pin.
2154  */
2155  LockBuffer(buffer, BUFFER_LOCK_SHARE);
2156 
2157  /*
2158  * We need two separate strategies for lossy and non-lossy cases.
2159  */
2160  if (tbmres->ntuples >= 0)
2161  {
2162  /*
2163  * Bitmap is non-lossy, so we just look through the offsets listed in
2164  * tbmres; but we have to follow any HOT chain starting at each such
2165  * offset.
2166  */
2167  int curslot;
2168 
2169  for (curslot = 0; curslot < tbmres->ntuples; curslot++)
2170  {
2171  OffsetNumber offnum = tbmres->offsets[curslot];
2172  ItemPointerData tid;
2173  HeapTupleData heapTuple;
2174 
2175  ItemPointerSet(&tid, page, offnum);
2176  if (heap_hot_search_buffer(&tid, scan->rs_rd, buffer, snapshot,
2177  &heapTuple, NULL, true))
2178  hscan->rs_vistuples[ntup++] = ItemPointerGetOffsetNumber(&tid);
2179  }
2180  }
2181  else
2182  {
2183  /*
2184  * Bitmap is lossy, so we must examine each line pointer on the page.
2185  * But we can ignore HOT chains, since we'll check each tuple anyway.
2186  */
2187  Page dp = (Page) BufferGetPage(buffer);
2188  OffsetNumber maxoff = PageGetMaxOffsetNumber(dp);
2189  OffsetNumber offnum;
2190 
2191  for (offnum = FirstOffsetNumber; offnum <= maxoff; offnum = OffsetNumberNext(offnum))
2192  {
2193  ItemId lp;
2194  HeapTupleData loctup;
2195  bool valid;
2196 
2197  lp = PageGetItemId(dp, offnum);
2198  if (!ItemIdIsNormal(lp))
2199  continue;
2200  loctup.t_data = (HeapTupleHeader) PageGetItem((Page) dp, lp);
2201  loctup.t_len = ItemIdGetLength(lp);
2202  loctup.t_tableOid = scan->rs_rd->rd_id;
2203  ItemPointerSet(&loctup.t_self, page, offnum);
2204  valid = HeapTupleSatisfiesVisibility(&loctup, snapshot, buffer);
2205  if (valid)
2206  {
2207  hscan->rs_vistuples[ntup++] = offnum;
2208  PredicateLockTID(scan->rs_rd, &loctup.t_self, snapshot,
2209  HeapTupleHeaderGetXmin(loctup.t_data));
2210  }
2211  HeapCheckForSerializableConflictOut(valid, scan->rs_rd, &loctup,
2212  buffer, snapshot);
2213  }
2214  }
2215 
2216  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
2217 
2218  Assert(ntup <= MaxHeapTuplesPerPage);
2219  hscan->rs_ntuples = ntup;
2220 
2221  return ntup > 0;
2222 }
2223 
2224 static bool
2226  TBMIterateResult *tbmres,
2227  TupleTableSlot *slot)
2228 {
2229  HeapScanDesc hscan = (HeapScanDesc) scan;
2230  OffsetNumber targoffset;
2231  Page dp;
2232  ItemId lp;
2233 
2234  /*
2235  * Out of range? If so, nothing more to look at on this page
2236  */
2237  if (hscan->rs_cindex < 0 || hscan->rs_cindex >= hscan->rs_ntuples)
2238  return false;
2239 
2240  targoffset = hscan->rs_vistuples[hscan->rs_cindex];
2241  dp = (Page) BufferGetPage(hscan->rs_cbuf);
2242  lp = PageGetItemId(dp, targoffset);
2243  Assert(ItemIdIsNormal(lp));
2244 
2245  hscan->rs_ctup.t_data = (HeapTupleHeader) PageGetItem((Page) dp, lp);
2246  hscan->rs_ctup.t_len = ItemIdGetLength(lp);
2247  hscan->rs_ctup.t_tableOid = scan->rs_rd->rd_id;
2248  ItemPointerSet(&hscan->rs_ctup.t_self, hscan->rs_cblock, targoffset);
2249 
2251 
2252  /*
2253  * Set up the result slot to point to this tuple. Note that the slot
2254  * acquires a pin on the buffer.
2255  */
2257  slot,
2258  hscan->rs_cbuf);
2259 
2260  hscan->rs_cindex++;
2261 
2262  return true;
2263 }
2264 
2265 static bool
2267 {
2268  HeapScanDesc hscan = (HeapScanDesc) scan;
2269  TsmRoutine *tsm = scanstate->tsmroutine;
2270  BlockNumber blockno;
2271 
2272  /* return false immediately if relation is empty */
2273  if (hscan->rs_nblocks == 0)
2274  return false;
2275 
2276  if (tsm->NextSampleBlock)
2277  {
2278  blockno = tsm->NextSampleBlock(scanstate, hscan->rs_nblocks);
2279  hscan->rs_cblock = blockno;
2280  }
2281  else
2282  {
2283  /* scanning table sequentially */
2284 
2285  if (hscan->rs_cblock == InvalidBlockNumber)
2286  {
2287  Assert(!hscan->rs_inited);
2288  blockno = hscan->rs_startblock;
2289  }
2290  else
2291  {
2292  Assert(hscan->rs_inited);
2293 
2294  blockno = hscan->rs_cblock + 1;
2295 
2296  if (blockno >= hscan->rs_nblocks)
2297  {
2298  /* wrap to beginning of rel, might not have started at 0 */
2299  blockno = 0;
2300  }
2301 
2302  /*
2303  * Report our new scan position for synchronization purposes.
2304  *
2305  * Note: we do this before checking for end of scan so that the
2306  * final state of the position hint is back at the start of the
2307  * rel. That's not strictly necessary, but otherwise when you run
2308  * the same query multiple times the starting position would shift
2309  * a little bit backwards on every invocation, which is confusing.
2310  * We don't guarantee any specific ordering in general, though.
2311  */
2312  if (scan->rs_flags & SO_ALLOW_SYNC)
2313  ss_report_location(scan->rs_rd, blockno);
2314 
2315  if (blockno == hscan->rs_startblock)
2316  {
2317  blockno = InvalidBlockNumber;
2318  }
2319  }
2320  }
2321 
2322  if (!BlockNumberIsValid(blockno))
2323  {
2324  if (BufferIsValid(hscan->rs_cbuf))
2325  ReleaseBuffer(hscan->rs_cbuf);
2326  hscan->rs_cbuf = InvalidBuffer;
2327  hscan->rs_cblock = InvalidBlockNumber;
2328  hscan->rs_inited = false;
2329 
2330  return false;
2331  }
2332 
2333  heapgetpage(scan, blockno);
2334  hscan->rs_inited = true;
2335 
2336  return true;
2337 }
2338 
2339 static bool
2341  TupleTableSlot *slot)
2342 {
2343  HeapScanDesc hscan = (HeapScanDesc) scan;
2344  TsmRoutine *tsm = scanstate->tsmroutine;
2345  BlockNumber blockno = hscan->rs_cblock;
2346  bool pagemode = (scan->rs_flags & SO_ALLOW_PAGEMODE) != 0;
2347 
2348  Page page;
2349  bool all_visible;
2350  OffsetNumber maxoffset;
2351 
2352  /*
2353  * When not using pagemode, we must lock the buffer during tuple
2354  * visibility checks.
2355  */
2356  if (!pagemode)
2358 
2359  page = (Page) BufferGetPage(hscan->rs_cbuf);
2360  all_visible = PageIsAllVisible(page) &&
2362  maxoffset = PageGetMaxOffsetNumber(page);
2363 
2364  for (;;)
2365  {
2366  OffsetNumber tupoffset;
2367 
2369 
2370  /* Ask the tablesample method which tuples to check on this page. */
2371  tupoffset = tsm->NextSampleTuple(scanstate,
2372  blockno,
2373  maxoffset);
2374 
2375  if (OffsetNumberIsValid(tupoffset))
2376  {
2377  ItemId itemid;
2378  bool visible;
2379  HeapTuple tuple = &(hscan->rs_ctup);
2380 
2381  /* Skip invalid tuple pointers. */
2382  itemid = PageGetItemId(page, tupoffset);
2383  if (!ItemIdIsNormal(itemid))
2384  continue;
2385 
2386  tuple->t_data = (HeapTupleHeader) PageGetItem(page, itemid);
2387  tuple->t_len = ItemIdGetLength(itemid);
2388  ItemPointerSet(&(tuple->t_self), blockno, tupoffset);
2389 
2390 
2391  if (all_visible)
2392  visible = true;
2393  else
2394  visible = SampleHeapTupleVisible(scan, hscan->rs_cbuf,
2395  tuple, tupoffset);
2396 
2397  /* in pagemode, heapgetpage did this for us */
2398  if (!pagemode)
2399  HeapCheckForSerializableConflictOut(visible, scan->rs_rd, tuple,
2400  hscan->rs_cbuf, scan->rs_snapshot);
2401 
2402  /* Try next tuple from same page. */
2403  if (!visible)
2404  continue;
2405 
2406  /* Found visible tuple, return it. */
2407  if (!pagemode)
2409 
2410  ExecStoreBufferHeapTuple(tuple, slot, hscan->rs_cbuf);
2411 
2412  /* Count successfully-fetched tuples as heap fetches */
2414 
2415  return true;
2416  }
2417  else
2418  {
2419  /*
2420  * If we get here, it means we've exhausted the items on this page
2421  * and it's time to move to the next.
2422  */
2423  if (!pagemode)
2425 
2426  ExecClearTuple(slot);
2427  return false;
2428  }
2429  }
2430 
2431  Assert(0);
2432 }
2433 
2434 
2435 /* ----------------------------------------------------------------------------
2436  * Helper functions for the above.
2437  * ----------------------------------------------------------------------------
2438  */
2439 
2440 /*
2441  * Reconstruct and rewrite the given tuple
2442  *
2443  * We cannot simply copy the tuple as-is, for several reasons:
2444  *
2445  * 1. We'd like to squeeze out the values of any dropped columns, both
2446  * to save space and to ensure we have no corner-case failures. (It's
2447  * possible for example that the new table hasn't got a TOAST table
2448  * and so is unable to store any large values of dropped cols.)
2449  *
2450  * 2. The tuple might not even be legal for the new table; this is
2451  * currently only known to happen as an after-effect of ALTER TABLE
2452  * SET WITHOUT OIDS.
2453  *
2454  * So, we must reconstruct the tuple from component Datums.
2455  */
2456 static void
2458  Relation OldHeap, Relation NewHeap,
2459  Datum *values, bool *isnull, RewriteState rwstate)
2460 {
2461  TupleDesc oldTupDesc = RelationGetDescr(OldHeap);
2462  TupleDesc newTupDesc = RelationGetDescr(NewHeap);
2463  HeapTuple copiedTuple;
2464  int i;
2465 
2466  heap_deform_tuple(tuple, oldTupDesc, values, isnull);
2467 
2468  /* Be sure to null out any dropped columns */
2469  for (i = 0; i < newTupDesc->natts; i++)
2470  {
2471  if (TupleDescAttr(newTupDesc, i)->attisdropped)
2472  isnull[i] = true;
2473  }
2474 
2475  copiedTuple = heap_form_tuple(newTupDesc, values, isnull);
2476 
2477  /* The heap rewrite module does the rest */
2478  rewrite_heap_tuple(rwstate, tuple, copiedTuple);
2479 
2480  heap_freetuple(copiedTuple);
2481 }
2482 
2483 /*
2484  * Check visibility of the tuple.
2485  */
2486 static bool
2488  HeapTuple tuple,
2489  OffsetNumber tupoffset)
2490 {
2491  HeapScanDesc hscan = (HeapScanDesc) scan;
2492 
2493  if (scan->rs_flags & SO_ALLOW_PAGEMODE)
2494  {
2495  /*
2496  * In pageatatime mode, heapgetpage() already did visibility checks,
2497  * so just look at the info it left in rs_vistuples[].
2498  *
2499  * We use a binary search over the known-sorted array. Note: we could
2500  * save some effort if we insisted that NextSampleTuple select tuples
2501  * in increasing order, but it's not clear that there would be enough
2502  * gain to justify the restriction.
2503  */
2504  int start = 0,
2505  end = hscan->rs_ntuples - 1;
2506 
2507  while (start <= end)
2508  {
2509  int mid = (start + end) / 2;
2510  OffsetNumber curoffset = hscan->rs_vistuples[mid];
2511 
2512  if (tupoffset == curoffset)
2513  return true;
2514  else if (tupoffset < curoffset)
2515  end = mid - 1;
2516  else
2517  start = mid + 1;
2518  }
2519 
2520  return false;
2521  }
2522  else
2523  {
2524  /* Otherwise, we have to check the tuple individually. */
2525  return HeapTupleSatisfiesVisibility(tuple, scan->rs_snapshot,
2526  buffer);
2527  }
2528 }
2529 
2530 
2531 /* ------------------------------------------------------------------------
2532  * Definition of the heap table access method.
2533  * ------------------------------------------------------------------------
2534  */
2535 
2536 static const TableAmRoutine heapam_methods = {
2537  .type = T_TableAmRoutine,
2538 
2539  .slot_callbacks = heapam_slot_callbacks,
2540 
2541  .scan_begin = heap_beginscan,
2542  .scan_end = heap_endscan,
2543  .scan_rescan = heap_rescan,
2544  .scan_getnextslot = heap_getnextslot,
2545 
2546  .scan_set_tidrange = heap_set_tidrange,
2547  .scan_getnextslot_tidrange = heap_getnextslot_tidrange,
2548 
2549  .parallelscan_estimate = table_block_parallelscan_estimate,
2550  .parallelscan_initialize = table_block_parallelscan_initialize,
2551  .parallelscan_reinitialize = table_block_parallelscan_reinitialize,
2552 
2553  .index_fetch_begin = heapam_index_fetch_begin,
2554  .index_fetch_reset = heapam_index_fetch_reset,
2555  .index_fetch_end = heapam_index_fetch_end,
2556  .index_fetch_tuple = heapam_index_fetch_tuple,
2557 
2558  .tuple_insert = heapam_tuple_insert,
2559  .tuple_insert_speculative = heapam_tuple_insert_speculative,
2560  .tuple_complete_speculative = heapam_tuple_complete_speculative,
2561  .multi_insert = heap_multi_insert,
2562  .tuple_delete = heapam_tuple_delete,
2563  .tuple_update = heapam_tuple_update,
2564  .tuple_lock = heapam_tuple_lock,
2565 
2566  .tuple_fetch_row_version = heapam_fetch_row_version,
2567  .tuple_get_latest_tid = heap_get_latest_tid,
2568  .tuple_tid_valid = heapam_tuple_tid_valid,
2569  .tuple_satisfies_snapshot = heapam_tuple_satisfies_snapshot,
2570  .index_delete_tuples = heap_index_delete_tuples,
2571 
2572  .relation_set_new_filelocator = heapam_relation_set_new_filelocator,
2573  .relation_nontransactional_truncate = heapam_relation_nontransactional_truncate,
2574  .relation_copy_data = heapam_relation_copy_data,
2575  .relation_copy_for_cluster = heapam_relation_copy_for_cluster,
2576  .relation_vacuum = heap_vacuum_rel,
2577  .scan_analyze_next_block = heapam_scan_analyze_next_block,
2578  .scan_analyze_next_tuple = heapam_scan_analyze_next_tuple,
2579  .index_build_range_scan = heapam_index_build_range_scan,
2580  .index_validate_scan = heapam_index_validate_scan,
2581 
2582  .relation_size = table_block_relation_size,
2583  .relation_needs_toast_table = heapam_relation_needs_toast_table,
2584  .relation_toast_am = heapam_relation_toast_am,
2585  .relation_fetch_toast_slice = heap_fetch_toast_slice,
2586 
2587  .relation_estimate_size = heapam_estimate_rel_size,
2588 
2589  .scan_bitmap_next_block = heapam_scan_bitmap_next_block,
2590  .scan_bitmap_next_tuple = heapam_scan_bitmap_next_tuple,
2591  .scan_sample_next_block = heapam_scan_sample_next_block,
2592  .scan_sample_next_tuple = heapam_scan_sample_next_tuple
2593 };
2594 
2595 
2596 const TableAmRoutine *
2598 {
2599  return &heapam_methods;
2600 }
2601 
2602 Datum
2604 {
2606 }
void pgstat_progress_update_param(int index, int64 val)
void pgstat_progress_update_multi_param(int nparam, const int *index, const int64 *val)
uint32 BlockNumber
Definition: block.h:31
#define InvalidBlockNumber
Definition: block.h:33
static bool BlockNumberIsValid(BlockNumber blockNumber)
Definition: block.h:71
static Datum values[MAXATTR]
Definition: bootstrap.c:156
int Buffer
Definition: buf.h:23
#define InvalidBuffer
Definition: buf.h:25
Buffer ReleaseAndReadBuffer(Buffer buffer, Relation relation, BlockNumber blockNum)
Definition: bufmgr.c:1646
void ReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:3934
void UnlockReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:3957
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:4175
void FlushRelationBuffers(Relation rel)
Definition: bufmgr.c:3522
Buffer ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
Definition: bufmgr.c:759
#define BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:96
#define BUFFER_LOCK_SHARE
Definition: bufmgr.h:97
static Page BufferGetPage(Buffer buffer)
Definition: bufmgr.h:271
@ RBM_NORMAL
Definition: bufmgr.h:39
static bool BufferIsValid(Buffer bufnum)
Definition: bufmgr.h:219
Pointer Page
Definition: bufpage.h:78
static Item PageGetItem(Page page, ItemId itemId)
Definition: bufpage.h:351
static ItemId PageGetItemId(Page page, OffsetNumber offsetNumber)
Definition: bufpage.h:240
static bool PageIsAllVisible(Page page)
Definition: bufpage.h:426
static OffsetNumber PageGetMaxOffsetNumber(Page page)
Definition: bufpage.h:369
unsigned int uint32
Definition: c.h:442
#define MAXALIGN(LEN)
Definition: c.h:747
signed int int32
Definition: c.h:430
TransactionId MultiXactId
Definition: c.h:598
unsigned char uint8
Definition: c.h:440
uint32 CommandId
Definition: c.h:602
uint32 TransactionId
Definition: c.h:588
#define OidIsValid(objectId)
Definition: c.h:711
bool IsSystemRelation(Relation relation)
Definition: catalog.c:75
CommandId HeapTupleHeaderGetCmin(HeapTupleHeader tup)
Definition: combocid.c:104
int errmsg_internal(const char *fmt,...)
Definition: elog.c:993
int errcode(int sqlerrcode)
Definition: elog.c:695
int errmsg(const char *fmt,...)
Definition: elog.c:906
#define WARNING
Definition: elog.h:32
#define ERROR
Definition: elog.h:35
#define ereport(elevel,...)
Definition: elog.h:145
ExprState * ExecPrepareQual(List *qual, EState *estate)
Definition: execExpr.c:774
void ExecDropSingleTupleTableSlot(TupleTableSlot *slot)
Definition: execTuples.c:1254
TupleTableSlot * ExecStoreBufferHeapTuple(HeapTuple tuple, TupleTableSlot *slot, Buffer buffer)
Definition: execTuples.c:1392
const TupleTableSlotOps TTSOpsBufferHeapTuple
Definition: execTuples.c:86
HeapTuple ExecFetchSlotHeapTuple(TupleTableSlot *slot, bool materialize, bool *shouldFree)
Definition: execTuples.c:1644
TupleTableSlot * ExecStoreHeapTuple(HeapTuple tuple, TupleTableSlot *slot, bool shouldFree)
Definition: execTuples.c:1352
const TupleTableSlotOps TTSOpsHeapTuple
Definition: execTuples.c:84
TupleTableSlot * ExecStorePinnedBufferHeapTuple(HeapTuple tuple, TupleTableSlot *slot, Buffer buffer)
Definition: execTuples.c:1418
TupleTableSlot * MakeSingleTupleTableSlot(TupleDesc tupdesc, const TupleTableSlotOps *tts_ops)
Definition: execTuples.c:1238
EState * CreateExecutorState(void)
Definition: execUtils.c:90
void FreeExecutorState(EState *estate)
Definition: execUtils.c:186
#define GetPerTupleExprContext(estate)
Definition: executor.h:535
static bool ExecQual(ExprState *state, ExprContext *econtext)
Definition: executor.h:398
#define PG_RETURN_POINTER(x)
Definition: fmgr.h:361
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
int32 type_maximum_size(Oid type_oid, int32 typemod)
Definition: format_type.c:408
@ UNIQUE_CHECK_NO
Definition: genam.h:116
@ UNIQUE_CHECK_YES
Definition: genam.h:117
int maintenance_work_mem
Definition: globals.c:127
void heap_finish_speculative(Relation relation, ItemPointer tid)
Definition: heapam.c:5762
void heap_insert(Relation relation, HeapTuple tup, CommandId cid, int options, BulkInsertState bistate)
Definition: heapam.c:2026
bool heap_fetch(Relation relation, Snapshot snapshot, HeapTuple tuple, Buffer *userbuf, bool keep_buf)
Definition: heapam.c:1555
TM_Result heap_update(Relation relation, ItemPointer otid, HeapTuple newtup, CommandId cid, Snapshot crosscheck, bool wait, TM_FailureData *tmfd, LockTupleMode *lockmode)
Definition: heapam.c:3127
TM_Result heap_delete(Relation relation, ItemPointer tid, CommandId cid, Snapshot crosscheck, bool wait, TM_FailureData *tmfd, bool changingPart)
Definition: heapam.c:2666
bool heap_getnextslot(TableScanDesc sscan, ScanDirection direction, TupleTableSlot *slot)
Definition: heapam.c:1345
void heap_endscan(TableScanDesc sscan)
Definition: heapam.c:1263
void heap_rescan(TableScanDesc sscan, ScanKey key, bool set_params, bool allow_strat, bool allow_sync, bool allow_pagemode)
Definition: heapam.c:1226
HeapTuple heap_getnext(TableScanDesc sscan, ScanDirection direction)
Definition: heapam.c:1296
bool heap_hot_search_buffer(ItemPointer tid, Relation relation, Buffer buffer, Snapshot snapshot, HeapTuple heapTuple, bool *all_dead, bool first_call)
Definition: heapam.c:1676
bool heap_getnextslot_tidrange(TableScanDesc sscan, ScanDirection direction, TupleTableSlot *slot)
Definition: heapam.c:1448
void heap_set_tidrange(TableScanDesc sscan, ItemPointer mintid, ItemPointer maxtid)
Definition: heapam.c:1375
void heap_abort_speculative(Relation relation, ItemPointer tid)
Definition: heapam.c:5853
TableScanDesc heap_beginscan(Relation relation, Snapshot snapshot, int nkeys, ScanKey key, ParallelTableScanDesc parallel_scan, uint32 flags)
Definition: heapam.c:1141
void heapgetpage(TableScanDesc sscan, BlockNumber page)
Definition: heapam.c:373
TransactionId heap_index_delete_tuples(Relation rel, TM_IndexDeleteOp *delstate)
Definition: heapam.c:7483
void heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples, CommandId cid, int options, BulkInsertState bistate)
Definition: heapam.c:2268
TM_Result heap_lock_tuple(Relation relation, HeapTuple tuple, CommandId cid, LockTupleMode mode, LockWaitPolicy wait_policy, bool follow_updates, Buffer *buffer, TM_FailureData *tmfd)
Definition: heapam.c:4253
void heap_get_latest_tid(TableScanDesc sscan, ItemPointer tid)
Definition: heapam.c:1828
void heap_setscanlimits(TableScanDesc sscan, BlockNumber startBlk, BlockNumber numBlks)
Definition: heapam.c:350
void HeapCheckForSerializableConflictOut(bool visible, Relation relation, HeapTuple tuple, Buffer buffer, Snapshot snapshot)
Definition: heapam.c:10014
#define HEAP_INSERT_SPECULATIVE
Definition: heapam.h:37
struct HeapScanDescData * HeapScanDesc
Definition: heapam.h:79
@ HEAPTUPLE_RECENTLY_DEAD
Definition: heapam.h:97
@ HEAPTUPLE_INSERT_IN_PROGRESS
Definition: heapam.h:98
@ HEAPTUPLE_LIVE
Definition: heapam.h:96
@ HEAPTUPLE_DELETE_IN_PROGRESS
Definition: heapam.h:99
@ HEAPTUPLE_DEAD
Definition: heapam.h:95
static bool heapam_scan_analyze_next_block(TableScanDesc scan, BlockNumber blockno, BufferAccessStrategy bstrategy)
static double heapam_index_build_range_scan(Relation heapRelation, Relation indexRelation, IndexInfo *indexInfo, bool allow_sync, bool anyvisible, bool progress, BlockNumber start_blockno, BlockNumber numblocks, IndexBuildCallback callback, void *callback_state, TableScanDesc scan)
static TM_Result heapam_tuple_update(Relation relation, ItemPointer otid, TupleTableSlot *slot, CommandId cid, Snapshot snapshot, Snapshot crosscheck, bool wait, TM_FailureData *tmfd, LockTupleMode *lockmode, bool *update_indexes)
#define HEAP_OVERHEAD_BYTES_PER_TUPLE
static void heapam_estimate_rel_size(Relation rel, int32 *attr_widths, BlockNumber *pages, double *tuples, double *allvisfrac)
static IndexFetchTableData * heapam_index_fetch_begin(Relation rel)
static const TableAmRoutine heapam_methods
static void heapam_index_validate_scan(Relation heapRelation, Relation indexRelation, IndexInfo *indexInfo, Snapshot snapshot, ValidateIndexState *state)
static bool heapam_index_fetch_tuple(struct IndexFetchTableData *scan, ItemPointer tid, Snapshot snapshot, TupleTableSlot *slot, bool *call_again, bool *all_dead)
static void heapam_relation_copy_for_cluster(Relation OldHeap, Relation NewHeap, Relation OldIndex, bool use_sort, TransactionId OldestXmin, TransactionId *xid_cutoff, MultiXactId *multi_cutoff, double *num_tuples, double *tups_vacuumed, double *tups_recently_dead)
static void heapam_tuple_insert_speculative(Relation relation, TupleTableSlot *slot, CommandId cid, int options, BulkInsertState bistate, uint32 specToken)
static bool heapam_scan_sample_next_block(TableScanDesc scan, SampleScanState *scanstate)
static void heapam_tuple_insert(Relation relation, TupleTableSlot *slot, CommandId cid, int options, BulkInsertState bistate)
static TM_Result heapam_tuple_delete(Relation relation, ItemPointer tid, CommandId cid, Snapshot snapshot, Snapshot crosscheck, bool wait, TM_FailureData *tmfd, bool changingPart)
static void heapam_index_fetch_reset(IndexFetchTableData *scan)
static Oid heapam_relation_toast_am(Relation rel)
static bool heapam_relation_needs_toast_table(Relation rel)
static BlockNumber heapam_scan_get_blocks_done(HeapScanDesc hscan)
static bool heapam_scan_bitmap_next_block(TableScanDesc scan, TBMIterateResult *tbmres)
const TableAmRoutine * GetHeapamTableAmRoutine(void)
static bool heapam_scan_analyze_next_tuple(TableScanDesc scan, TransactionId OldestXmin, double *liverows, double *deadrows, TupleTableSlot *slot)
static bool SampleHeapTupleVisible(TableScanDesc scan, Buffer buffer, HeapTuple tuple, OffsetNumber tupoffset)
static bool heapam_scan_sample_next_tuple(TableScanDesc scan, SampleScanState *scanstate, TupleTableSlot *slot)
static const TupleTableSlotOps * heapam_slot_callbacks(Relation relation)
static bool heapam_tuple_tid_valid(TableScanDesc scan, ItemPointer tid)
#define HEAP_USABLE_BYTES_PER_PAGE
static TM_Result heapam_tuple_lock(Relation relation, ItemPointer tid, Snapshot snapshot, TupleTableSlot *slot, CommandId cid, LockTupleMode mode, LockWaitPolicy wait_policy, uint8 flags, TM_FailureData *tmfd)
static void heapam_relation_copy_data(Relation rel, const RelFileLocator *newrlocator)
static void heapam_index_fetch_end(IndexFetchTableData *scan)
static void heapam_tuple_complete_speculative(Relation relation, TupleTableSlot *slot, uint32 specToken, bool succeeded)
static void reform_and_rewrite_tuple(HeapTuple tuple, Relation OldHeap, Relation NewHeap, Datum *values, bool *isnull, RewriteState rwstate)
static void heapam_relation_set_new_filelocator(Relation rel, const RelFileLocator *newrlocator, char persistence, TransactionId *freezeXid, MultiXactId *minmulti)
static bool heapam_fetch_row_version(Relation relation, ItemPointer tid, Snapshot snapshot, TupleTableSlot *slot)
static bool heapam_scan_bitmap_next_tuple(TableScanDesc scan, TBMIterateResult *tbmres, TupleTableSlot *slot)
static void heapam_relation_nontransactional_truncate(Relation rel)
static bool heapam_tuple_satisfies_snapshot(Relation rel, TupleTableSlot *slot, Snapshot snapshot)
Datum heap_tableam_handler(PG_FUNCTION_ARGS)
bool HeapTupleSatisfiesVisibility(HeapTuple htup, Snapshot snapshot, Buffer buffer)
HTSV_Result HeapTupleSatisfiesVacuum(HeapTuple htup, TransactionId OldestXmin, Buffer buffer)
void heap_fetch_toast_slice(Relation toastrel, Oid valueid, int32 attrsize, int32 sliceoffset, int32 slicelength, struct varlena *result)
Definition: heaptoast.c:626
#define TOAST_TUPLE_THRESHOLD
Definition: heaptoast.h:48
HeapTuple heap_form_tuple(TupleDesc tupleDescriptor, Datum *values, bool *isnull)
Definition: heaptuple.c:1020
void heap_deform_tuple(HeapTuple tuple, TupleDesc tupleDesc, Datum *values, bool *isnull)
Definition: heaptuple.c:1249
void heap_freetuple(HeapTuple htup)
Definition: heaptuple.c:1338
HeapTupleHeaderData * HeapTupleHeader
Definition: htup.h:23
#define HeapTupleHeaderSetSpeculativeToken(tup, token)
Definition: htup_details.h:435
#define SizeofHeapTupleHeader
Definition: htup_details.h:184
#define HeapTupleHeaderGetXmin(tup)
Definition: htup_details.h:308
#define BITMAPLEN(NATTS)
Definition: htup_details.h:541
#define HeapTupleIsHeapOnly(tuple)
Definition: htup_details.h:679
#define HeapTupleIsHotUpdated(tuple)
Definition: htup_details.h:670
#define MaxHeapTuplesPerPage
Definition: htup_details.h:568
#define HeapTupleHeaderGetUpdateXid(tup)
Definition: htup_details.h:360
#define HeapTupleHeaderIsSpeculative(tup)
Definition: htup_details.h:424
void FormIndexDatum(IndexInfo *indexInfo, TupleTableSlot *slot, EState *estate, Datum *values, bool *isnull)
Definition: index.c:2709
static void itemptr_decode(ItemPointer itemptr, int64 encoded)
Definition: index.h:206
bool index_getnext_slot(IndexScanDesc scan, ScanDirection direction, TupleTableSlot *slot)
Definition: indexam.c:616
bool index_insert(Relation indexRelation, Datum *values, bool *isnull, ItemPointer heap_t_ctid, Relation heapRelation, IndexUniqueCheck checkUnique, bool indexUnchanged, IndexInfo *indexInfo)
Definition: indexam.c:176
IndexScanDesc index_beginscan(Relation heapRelation, Relation indexRelation, Snapshot snapshot, int nkeys, int norderbys)
Definition: indexam.c:205
void index_endscan(IndexScanDesc scan)
Definition: indexam.c:323
void index_rescan(IndexScanDesc scan, ScanKey keys, int nkeys, ScanKey orderbys, int norderbys)
Definition: indexam.c:297
int i
Definition: isn.c:73
#define ItemIdGetLength(itemId)
Definition: itemid.h:59
#define ItemIdIsNormal(itemId)
Definition: itemid.h:99
#define ItemIdIsDead(itemId)
Definition: itemid.h:113
int32 ItemPointerCompare(ItemPointer arg1, ItemPointer arg2)
Definition: itemptr.c:52
bool ItemPointerEquals(ItemPointer pointer1, ItemPointer pointer2)
Definition: itemptr.c:29
static void ItemPointerSet(ItemPointerData *pointer, BlockNumber blockNumber, OffsetNumber offNum)
Definition: itemptr.h:135
static void ItemPointerSetOffsetNumber(ItemPointerData *pointer, OffsetNumber offsetNumber)
Definition: itemptr.h:158
static OffsetNumber ItemPointerGetOffsetNumber(const ItemPointerData *pointer)
Definition: itemptr.h:124
static bool ItemPointerIndicatesMovedPartitions(const ItemPointerData *pointer)
Definition: itemptr.h:197
static BlockNumber ItemPointerGetBlockNumber(const ItemPointerData *pointer)
Definition: itemptr.h:103
static void ItemPointerCopy(const ItemPointerData *fromPointer, ItemPointerData *toPointer)
Definition: itemptr.h:172
static bool ItemPointerIsValid(const ItemPointerData *pointer)
Definition: itemptr.h:83
Assert(fmt[strlen(fmt) - 1] !='\n')
void XactLockTableWait(TransactionId xid, Relation rel, ItemPointer ctid, XLTW_Oper oper)
Definition: lmgr.c:668
bool ConditionalXactLockTableWait(TransactionId xid)
Definition: lmgr.c:741
@ XLTW_FetchUpdated
Definition: lmgr.h:33
@ XLTW_InsertIndexUnique
Definition: lmgr.h:32
LockWaitPolicy
Definition: lockoptions.h:37
@ LockWaitSkip
Definition: lockoptions.h:41
@ LockWaitBlock
Definition: lockoptions.h:39
@ LockWaitError
Definition: lockoptions.h:43
LockTupleMode
Definition: lockoptions.h:50
void MemoryContextReset(MemoryContext context)
Definition: mcxt.c:195
void pfree(void *pointer)
Definition: mcxt.c:1252
void * palloc0(Size size)
Definition: mcxt.c:1176
void * palloc(Size size)
Definition: mcxt.c:1145
#define IsBootstrapProcessingMode()
Definition: miscadmin.h:402
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:121
MultiXactId GetOldestMultiXactId(void)
Definition: multixact.c:2505
#define InvalidOffsetNumber
Definition: off.h:26
#define OffsetNumberIsValid(offsetNumber)
Definition: off.h:39
#define OffsetNumberNext(offsetNumber)
Definition: off.h:52
uint16 OffsetNumber
Definition: off.h:24
#define FirstOffsetNumber
Definition: off.h:27
FormData_pg_attribute * Form_pg_attribute
Definition: pg_attribute.h:207
#define ERRCODE_DATA_CORRUPTED
Definition: pg_basebackup.c:41
static PgChecksumMode mode
Definition: pg_checksums.c:65
#define INDEX_MAX_KEYS
#define NIL
Definition: pg_list.h:66
static char * buf
Definition: pg_test_fsync.c:67
#define ERRCODE_T_R_SERIALIZATION_FAILURE
Definition: pgbench.c:75
int progress
Definition: pgbench.c:270
#define pgstat_count_heap_fetch(rel)
Definition: pgstat.h:535
#define pgstat_count_heap_getnext(rel)
Definition: pgstat.h:530
static int64 DatumGetInt64(Datum X)
Definition: postgres.h:733
uintptr_t Datum
Definition: postgres.h:412
static Pointer DatumGetPointer(Datum X)
Definition: postgres.h:660
unsigned int Oid
Definition: postgres_ext.h:31
void PredicateLockTID(Relation relation, ItemPointer tid, Snapshot snapshot, TransactionId tuple_xid)
Definition: predicate.c:2617
TransactionId GetOldestNonRemovableTransactionId(Relation rel)
Definition: procarray.c:2002
#define PROGRESS_CLUSTER_INDEX_RELID
Definition: progress.h:57
#define PROGRESS_CLUSTER_HEAP_BLKS_SCANNED
Definition: progress.h:61
#define PROGRESS_CLUSTER_PHASE_SORT_TUPLES
Definition: progress.h:67
#define PROGRESS_CLUSTER_PHASE_SEQ_SCAN_HEAP
Definition: progress.h:65
#define PROGRESS_SCAN_BLOCKS_DONE
Definition: progress.h:120
#define PROGRESS_CLUSTER_PHASE
Definition: progress.h:56
#define PROGRESS_CLUSTER_HEAP_TUPLES_SCANNED
Definition: progress.h:58
#define PROGRESS_CLUSTER_TOTAL_HEAP_BLKS
Definition: progress.h:60
#define PROGRESS_CLUSTER_HEAP_TUPLES_WRITTEN
Definition: progress.h:59
#define PROGRESS_CLUSTER_PHASE_INDEX_SCAN_HEAP
Definition: progress.h:66
#define PROGRESS_SCAN_BLOCKS_TOTAL
Definition: progress.h:119
#define PROGRESS_CLUSTER_PHASE_WRITE_NEW_HEAP
Definition: progress.h:68
void heap_get_root_tuples(Page page, OffsetNumber *root_offsets)
Definition: pruneheap.c:1111
void heap_page_prune_opt(Relation relation, Buffer buffer)
Definition: pruneheap.c:108
#define RelationGetRelid(relation)
Definition: rel.h:501
static SMgrRelation RelationGetSmgr(Relation rel)
Definition: rel.h:569
#define RelationGetDescr(relation)
Definition: rel.h:527
#define RelationGetRelationName(relation)
Definition: rel.h:535
#define RelationGetTargetBlock(relation)
Definition: rel.h:600
#define RelationIsPermanent(relation)
Definition: rel.h:616
ForkNumber
Definition: relpath.h:48
@ MAIN_FORKNUM
Definition: relpath.h:50
@ INIT_FORKNUM
Definition: relpath.h:53
#define MAX_FORKNUM
Definition: relpath.h:62
struct ParallelBlockTableScanDescData * ParallelBlockTableScanDesc
Definition: relscan.h:85
void end_heap_rewrite(RewriteState state)
Definition: rewriteheap.c:301
bool rewrite_heap_dead_tuple(RewriteState state, HeapTuple old_tuple)
Definition: rewriteheap.c:564
RewriteState begin_heap_rewrite(Relation old_heap, Relation new_heap, TransactionId oldest_xmin, TransactionId freeze_xid, MultiXactId cutoff_multi)
Definition: rewriteheap.c:238
void rewrite_heap_tuple(RewriteState state, HeapTuple old_tuple, HeapTuple new_tuple)
Definition: rewriteheap.c:362
@ ForwardScanDirection
Definition: sdir.h:26
void smgrimmedsync(SMgrRelation reln, ForkNumber forknum)
Definition: smgr.c:691
void smgrcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo)
Definition: smgr.c:369
void smgrclose(SMgrRelation reln)
Definition: smgr.c:256
SMgrRelation smgropen(RelFileLocator rlocator, BackendId backend)
Definition: smgr.c:146
bool smgrexists(SMgrRelation reln, ForkNumber forknum)
Definition: smgr.c:247
TransactionId RecentXmin
Definition: snapmgr.c:114
Snapshot GetTransactionSnapshot(void)
Definition: snapmgr.c:251
void UnregisterSnapshot(Snapshot snapshot)
Definition: snapmgr.c:871
Snapshot RegisterSnapshot(Snapshot snapshot)
Definition: snapmgr.c:829
#define SnapshotAny
Definition: snapmgr.h:67
#define InitDirtySnapshot(snapshotdata)
Definition: snapmgr.h:74
#define IsMVCCSnapshot(snapshot)
Definition: snapmgr.h:96
void RelationCopyStorage(SMgrRelation src, SMgrRelation dst, ForkNumber forkNum, char relpersistence)
Definition: storage.c:451
SMgrRelation RelationCreateStorage(RelFileLocator rlocator, char relpersistence, bool register_delete)
Definition: storage.c:120
void log_smgrcreate(const RelFileLocator *rlocator, ForkNumber forkNum)
Definition: storage.c:185
void RelationDropStorage(Relation rel)
Definition: storage.c:205
void RelationTruncate(Relation rel, BlockNumber nblocks)
Definition: storage.c:287
MemoryContext ecxt_per_tuple_memory
Definition: execnodes.h:255
TupleTableSlot * ecxt_scantuple
Definition: execnodes.h:247
int rs_ntuples
Definition: heapam.h:76
bool rs_inited
Definition: heapam.h:58
Buffer rs_cbuf
Definition: heapam.h:60
BlockNumber rs_startblock
Definition: heapam.h:53
HeapTupleData rs_ctup
Definition: heapam.h:66
OffsetNumber rs_vistuples[MaxHeapTuplesPerPage]
Definition: heapam.h:77
BlockNumber rs_nblocks
Definition: heapam.h:52
BlockNumber rs_cblock
Definition: heapam.h:59
TableScanDescData rs_base
Definition: heapam.h:49
ItemPointerData t_self
Definition: htup.h:65
uint32 t_len
Definition: htup.h:64
HeapTupleHeader t_data
Definition: htup.h:68
Oid t_tableOid
Definition: htup.h:66
ItemPointerData t_ctid
Definition: htup_details.h:160
Buffer xs_cbuf
Definition: heapam.h:88
IndexFetchTableData xs_base
Definition: heapam.h:86
bool ii_Unique
Definition: execnodes.h:190
bool ii_BrokenHotChain
Definition: execnodes.h:196
ExprState * ii_PredicateState
Definition: execnodes.h:182
Oid * ii_ExclusionOps
Definition: execnodes.h:183
bool ii_Concurrent
Definition: execnodes.h:195
List * ii_ExpressionsState
Definition: execnodes.h:180
List * ii_Predicate
Definition: execnodes.h:181
TupleDesc rd_att
Definition: rel.h:111
Oid rd_id
Definition: rel.h:112
BackendId rd_backend
Definition: rel.h:59
Form_pg_class rd_rel
Definition: rel.h:110
struct TsmRoutine * tsmroutine
Definition: execnodes.h:1466
TransactionId xmin
Definition: snapshot.h:157
TransactionId xmax
Definition: snapshot.h:158
bool takenDuringRecovery
Definition: snapshot.h:184
OffsetNumber offsets[FLEXIBLE_ARRAY_MEMBER]
Definition: tidbitmap.h:46
BlockNumber blockno
Definition: tidbitmap.h:42
bool traversed
Definition: tableam.h:129
TransactionId xmax
Definition: tableam.h:127
CommandId cmax
Definition: tableam.h:128
ItemPointerData ctid
Definition: tableam.h:126
NodeTag type
Definition: tableam.h:269
Relation rs_rd
Definition: relscan.h:34
uint32 rs_flags
Definition: relscan.h:47
struct SnapshotData * rs_snapshot
Definition: relscan.h:35
struct ParallelTableScanDescData * rs_parallel
Definition: relscan.h:49
NextSampleTuple_function NextSampleTuple
Definition: tsmapi.h:74
NextSampleBlock_function NextSampleBlock
Definition: tsmapi.h:73
Oid tts_tableOid
Definition: tuptable.h:131
ItemPointerData tts_tid
Definition: tuptable.h:130
Definition: regguts.h:318
void ss_report_location(Relation rel, BlockNumber location)
Definition: syncscan.c:289
Size table_block_parallelscan_initialize(Relation rel, ParallelTableScanDesc pscan)
Definition: tableam.c:398
void table_block_parallelscan_reinitialize(Relation rel, ParallelTableScanDesc pscan)
Definition: tableam.c:416
uint64 table_block_relation_size(Relation rel, ForkNumber forkNumber)
Definition: tableam.c:626
TupleTableSlot * table_slot_create(Relation relation, List **reglist)
Definition: tableam.c:91
Size table_block_parallelscan_estimate(Relation rel)
Definition: tableam.c:392
void table_block_relation_estimate_size(Relation rel, int32 *attr_widths, BlockNumber *pages, double *tuples, double *allvisfrac, Size overhead_bytes_per_tuple, Size usable_bytes_per_page)
Definition: tableam.c:663
@ SO_ALLOW_PAGEMODE
Definition: tableam.h:61
@ SO_ALLOW_SYNC
Definition: tableam.h:59
static TableScanDesc table_beginscan(Relation rel, Snapshot snapshot, int nkeys, struct ScanKeyData *key)
Definition: tableam.h:885
static void table_endscan(TableScanDesc scan)
Definition: tableam.h:993
TM_Result
Definition: tableam.h:72
@ TM_Ok
Definition: tableam.h:77
@ TM_Deleted
Definition: tableam.h:92
@ TM_WouldBlock
Definition: tableam.h:102
@ TM_Updated
Definition: tableam.h:89
@ TM_SelfModified
Definition: tableam.h:83
static TableScanDesc table_beginscan_strat(Relation rel, Snapshot snapshot, int nkeys, struct ScanKeyData *key, bool allow_strat, bool allow_sync)
Definition: tableam.h:909
#define TUPLE_LOCK_FLAG_FIND_LAST_VERSION
Definition: tableam.h:244
void(* IndexBuildCallback)(Relation index, ItemPointer tid, Datum *values, bool *isnull, bool tupleIsAlive, void *state)
Definition: tableam.h:248
static bool table_scan_getnextslot(TableScanDesc sscan, ScanDirection direction, TupleTableSlot *slot)
Definition: tableam.h:1034
#define TUPLE_LOCK_FLAG_LOCK_UPDATE_IN_PROGRESS
Definition: tableam.h:242
static void callback(struct sockaddr *addr, struct sockaddr *mask, void *unused)
Definition: test_ifaddrs.c:46
#define InvalidTransactionId
Definition: transam.h:31
#define TransactionIdEquals(id1, id2)
Definition: transam.h:43
#define TransactionIdIsValid(xid)
Definition: transam.h:41
#define TupleDescAttr(tupdesc, i)
Definition: tupdesc.h:92
void tuplesort_performsort(Tuplesortstate *state)
Definition: tuplesort.c:1385
void tuplesort_end(Tuplesortstate *state)
Definition: tuplesort.c:972
#define TUPLESORT_NONE
Definition: tuplesort.h:92
HeapTuple tuplesort_getheaptuple(Tuplesortstate *state, bool forward)
Tuplesortstate * tuplesort_begin_cluster(TupleDesc tupDesc, Relation indexRel, int workMem, SortCoordinate coordinate, int sortopt)
bool tuplesort_getdatum(Tuplesortstate *state, bool forward, Datum *val, bool *isNull, Datum *abbrev)
void tuplesort_putheaptuple(Tuplesortstate *state, HeapTuple tup)
#define att_align_nominal(cur_offset, attalign)
Definition: tupmacs.h:129
static TupleTableSlot * ExecClearTuple(TupleTableSlot *slot)
Definition: tuptable.h:433
#define TTS_IS_BUFFERTUPLE(slot)
Definition: tuptable.h:231
void heap_vacuum_rel(Relation rel, VacuumParams *params, BufferAccessStrategy bstrategy)
Definition: vacuumlazy.c:311
bool TransactionIdIsCurrentTransactionId(TransactionId xid)
Definition: xact.c:922