PostgreSQL Source Code  git master
heapam_handler.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * heapam_handler.c
4  * heap table access method code
5  *
6  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  * src/backend/access/heap/heapam_handler.c
12  *
13  *
14  * NOTES
15  * This files wires up the lower level heapam.c et al routines with the
16  * tableam abstraction.
17  *
18  *-------------------------------------------------------------------------
19  */
20 #include "postgres.h"
21 
22 #include "access/genam.h"
23 #include "access/heapam.h"
24 #include "access/heaptoast.h"
25 #include "access/multixact.h"
26 #include "access/rewriteheap.h"
27 #include "access/syncscan.h"
28 #include "access/tableam.h"
29 #include "access/tsmapi.h"
30 #include "access/xact.h"
31 #include "catalog/catalog.h"
32 #include "catalog/index.h"
33 #include "catalog/storage.h"
34 #include "catalog/storage_xlog.h"
35 #include "commands/progress.h"
36 #include "executor/executor.h"
37 #include "miscadmin.h"
38 #include "pgstat.h"
39 #include "storage/bufmgr.h"
40 #include "storage/bufpage.h"
41 #include "storage/lmgr.h"
42 #include "storage/predicate.h"
43 #include "storage/procarray.h"
44 #include "storage/smgr.h"
45 #include "utils/builtins.h"
46 #include "utils/rel.h"
47 
48 static void reform_and_rewrite_tuple(HeapTuple tuple,
49  Relation OldHeap, Relation NewHeap,
50  Datum *values, bool *isnull, RewriteState rwstate);
51 
52 static bool SampleHeapTupleVisible(TableScanDesc scan, Buffer buffer,
53  HeapTuple tuple,
54  OffsetNumber tupoffset);
55 
57 
59 
60 
61 /* ------------------------------------------------------------------------
62  * Slot related callbacks for heap AM
63  * ------------------------------------------------------------------------
64  */
65 
66 static const TupleTableSlotOps *
68 {
69  return &TTSOpsBufferHeapTuple;
70 }
71 
72 
73 /* ------------------------------------------------------------------------
74  * Index Scan Callbacks for heap AM
75  * ------------------------------------------------------------------------
76  */
77 
78 static IndexFetchTableData *
80 {
82 
83  hscan->xs_base.rel = rel;
84  hscan->xs_cbuf = InvalidBuffer;
85 
86  return &hscan->xs_base;
87 }
88 
89 static void
91 {
92  IndexFetchHeapData *hscan = (IndexFetchHeapData *) scan;
93 
94  if (BufferIsValid(hscan->xs_cbuf))
95  {
96  ReleaseBuffer(hscan->xs_cbuf);
97  hscan->xs_cbuf = InvalidBuffer;
98  }
99 }
100 
101 static void
103 {
104  IndexFetchHeapData *hscan = (IndexFetchHeapData *) scan;
105 
107 
108  pfree(hscan);
109 }
110 
111 static bool
113  ItemPointer tid,
114  Snapshot snapshot,
115  TupleTableSlot *slot,
116  bool *call_again, bool *all_dead)
117 {
118  IndexFetchHeapData *hscan = (IndexFetchHeapData *) scan;
120  bool got_heap_tuple;
121 
122  Assert(TTS_IS_BUFFERTUPLE(slot));
123 
124  /* We can skip the buffer-switching logic if we're in mid-HOT chain. */
125  if (!*call_again)
126  {
127  /* Switch to correct buffer if we don't have it already */
128  Buffer prev_buf = hscan->xs_cbuf;
129 
130  hscan->xs_cbuf = ReleaseAndReadBuffer(hscan->xs_cbuf,
131  hscan->xs_base.rel,
133 
134  /*
135  * Prune page, but only if we weren't already on this page
136  */
137  if (prev_buf != hscan->xs_cbuf)
138  heap_page_prune_opt(hscan->xs_base.rel, hscan->xs_cbuf);
139  }
140 
141  /* Obtain share-lock on the buffer so we can examine visibility */
143  got_heap_tuple = heap_hot_search_buffer(tid,
144  hscan->xs_base.rel,
145  hscan->xs_cbuf,
146  snapshot,
147  &bslot->base.tupdata,
148  all_dead,
149  !*call_again);
150  bslot->base.tupdata.t_self = *tid;
152 
153  if (got_heap_tuple)
154  {
155  /*
156  * Only in a non-MVCC snapshot can more than one member of the HOT
157  * chain be visible.
158  */
159  *call_again = !IsMVCCSnapshot(snapshot);
160 
161  slot->tts_tableOid = RelationGetRelid(scan->rel);
162  ExecStoreBufferHeapTuple(&bslot->base.tupdata, slot, hscan->xs_cbuf);
163  }
164  else
165  {
166  /* We've reached the end of the HOT chain. */
167  *call_again = false;
168  }
169 
170  return got_heap_tuple;
171 }
172 
173 
174 /* ------------------------------------------------------------------------
175  * Callbacks for non-modifying operations on individual tuples for heap AM
176  * ------------------------------------------------------------------------
177  */
178 
179 static bool
181  ItemPointer tid,
182  Snapshot snapshot,
183  TupleTableSlot *slot)
184 {
186  Buffer buffer;
187 
188  Assert(TTS_IS_BUFFERTUPLE(slot));
189 
190  bslot->base.tupdata.t_self = *tid;
191  if (heap_fetch(relation, snapshot, &bslot->base.tupdata, &buffer, false))
192  {
193  /* store in slot, transferring existing pin */
194  ExecStorePinnedBufferHeapTuple(&bslot->base.tupdata, slot, buffer);
195  slot->tts_tableOid = RelationGetRelid(relation);
196 
197  return true;
198  }
199 
200  return false;
201 }
202 
203 static bool
205 {
206  HeapScanDesc hscan = (HeapScanDesc) scan;
207 
208  return ItemPointerIsValid(tid) &&
210 }
211 
212 static bool
214  Snapshot snapshot)
215 {
217  bool res;
218 
219  Assert(TTS_IS_BUFFERTUPLE(slot));
220  Assert(BufferIsValid(bslot->buffer));
221 
222  /*
223  * We need buffer pin and lock to call HeapTupleSatisfiesVisibility.
224  * Caller should be holding pin, but not lock.
225  */
227  res = HeapTupleSatisfiesVisibility(bslot->base.tuple, snapshot,
228  bslot->buffer);
230 
231  return res;
232 }
233 
234 
235 /* ----------------------------------------------------------------------------
236  * Functions for manipulations of physical tuples for heap AM.
237  * ----------------------------------------------------------------------------
238  */
239 
240 static void
242  int options, BulkInsertState bistate)
243 {
244  bool shouldFree = true;
245  HeapTuple tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree);
246 
247  /* Update the tuple with table oid */
248  slot->tts_tableOid = RelationGetRelid(relation);
249  tuple->t_tableOid = slot->tts_tableOid;
250 
251  /* Perform the insertion, and copy the resulting ItemPointer */
252  heap_insert(relation, tuple, cid, options, bistate);
253  ItemPointerCopy(&tuple->t_self, &slot->tts_tid);
254 
255  if (shouldFree)
256  pfree(tuple);
257 }
258 
259 static void
261  CommandId cid, int options,
262  BulkInsertState bistate, uint32 specToken)
263 {
264  bool shouldFree = true;
265  HeapTuple tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree);
266 
267  /* Update the tuple with table oid */
268  slot->tts_tableOid = RelationGetRelid(relation);
269  tuple->t_tableOid = slot->tts_tableOid;
270 
271  HeapTupleHeaderSetSpeculativeToken(tuple->t_data, specToken);
273 
274  /* Perform the insertion, and copy the resulting ItemPointer */
275  heap_insert(relation, tuple, cid, options, bistate);
276  ItemPointerCopy(&tuple->t_self, &slot->tts_tid);
277 
278  if (shouldFree)
279  pfree(tuple);
280 }
281 
282 static void
284  uint32 specToken, bool succeeded)
285 {
286  bool shouldFree = true;
287  HeapTuple tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree);
288 
289  /* adjust the tuple's state accordingly */
290  if (succeeded)
291  heap_finish_speculative(relation, &slot->tts_tid);
292  else
293  heap_abort_speculative(relation, &slot->tts_tid);
294 
295  if (shouldFree)
296  pfree(tuple);
297 }
298 
299 static TM_Result
301  Snapshot snapshot, Snapshot crosscheck, bool wait,
302  TM_FailureData *tmfd, bool changingPart)
303 {
304  /*
305  * Currently Deleting of index tuples are handled at vacuum, in case if
306  * the storage itself is cleaning the dead tuples by itself, it is the
307  * time to call the index tuple deletion also.
308  */
309  return heap_delete(relation, tid, cid, crosscheck, wait, tmfd, changingPart);
310 }
311 
312 
313 static TM_Result
315  CommandId cid, Snapshot snapshot, Snapshot crosscheck,
316  bool wait, TM_FailureData *tmfd,
317  LockTupleMode *lockmode, TU_UpdateIndexes *update_indexes)
318 {
319  bool shouldFree = true;
320  HeapTuple tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree);
321  TM_Result result;
322 
323  /* Update the tuple with table oid */
324  slot->tts_tableOid = RelationGetRelid(relation);
325  tuple->t_tableOid = slot->tts_tableOid;
326 
327  result = heap_update(relation, otid, tuple, cid, crosscheck, wait,
328  tmfd, lockmode, update_indexes);
329  ItemPointerCopy(&tuple->t_self, &slot->tts_tid);
330 
331  /*
332  * Decide whether new index entries are needed for the tuple
333  *
334  * Note: heap_update returns the tid (location) of the new tuple in the
335  * t_self field.
336  *
337  * If the update is not HOT, we must update all indexes. If the update is
338  * HOT, it could be that we updated summarized columns, so we either
339  * update only summarized indexes, or none at all.
340  */
341  if (result != TM_Ok)
342  {
343  Assert(*update_indexes == TU_None);
344  *update_indexes = TU_None;
345  }
346  else if (!HeapTupleIsHeapOnly(tuple))
347  Assert(*update_indexes == TU_All);
348  else
349  Assert((*update_indexes == TU_Summarizing) ||
350  (*update_indexes == TU_None));
351 
352  if (shouldFree)
353  pfree(tuple);
354 
355  return result;
356 }
357 
358 static TM_Result
361  LockWaitPolicy wait_policy, uint8 flags,
362  TM_FailureData *tmfd)
363 {
365  TM_Result result;
366  Buffer buffer;
367  HeapTuple tuple = &bslot->base.tupdata;
368  bool follow_updates;
369 
370  follow_updates = (flags & TUPLE_LOCK_FLAG_LOCK_UPDATE_IN_PROGRESS) != 0;
371  tmfd->traversed = false;
372 
373  Assert(TTS_IS_BUFFERTUPLE(slot));
374 
375 tuple_lock_retry:
376  tuple->t_self = *tid;
377  result = heap_lock_tuple(relation, tuple, cid, mode, wait_policy,
378  follow_updates, &buffer, tmfd);
379 
380  if (result == TM_Updated &&
382  {
383  /* Should not encounter speculative tuple on recheck */
385 
386  ReleaseBuffer(buffer);
387 
388  if (!ItemPointerEquals(&tmfd->ctid, &tuple->t_self))
389  {
390  SnapshotData SnapshotDirty;
391  TransactionId priorXmax;
392 
393  /* it was updated, so look at the updated version */
394  *tid = tmfd->ctid;
395  /* updated row should have xmin matching this xmax */
396  priorXmax = tmfd->xmax;
397 
398  /* signal that a tuple later in the chain is getting locked */
399  tmfd->traversed = true;
400 
401  /*
402  * fetch target tuple
403  *
404  * Loop here to deal with updated or busy tuples
405  */
406  InitDirtySnapshot(SnapshotDirty);
407  for (;;)
408  {
410  ereport(ERROR,
412  errmsg("tuple to be locked was already moved to another partition due to concurrent update")));
413 
414  tuple->t_self = *tid;
415  if (heap_fetch(relation, &SnapshotDirty, tuple, &buffer, true))
416  {
417  /*
418  * If xmin isn't what we're expecting, the slot must have
419  * been recycled and reused for an unrelated tuple. This
420  * implies that the latest version of the row was deleted,
421  * so we need do nothing. (Should be safe to examine xmin
422  * without getting buffer's content lock. We assume
423  * reading a TransactionId to be atomic, and Xmin never
424  * changes in an existing tuple, except to invalid or
425  * frozen, and neither of those can match priorXmax.)
426  */
428  priorXmax))
429  {
430  ReleaseBuffer(buffer);
431  return TM_Deleted;
432  }
433 
434  /* otherwise xmin should not be dirty... */
435  if (TransactionIdIsValid(SnapshotDirty.xmin))
436  ereport(ERROR,
438  errmsg_internal("t_xmin %u is uncommitted in tuple (%u,%u) to be updated in table \"%s\"",
439  SnapshotDirty.xmin,
442  RelationGetRelationName(relation))));
443 
444  /*
445  * If tuple is being updated by other transaction then we
446  * have to wait for its commit/abort, or die trying.
447  */
448  if (TransactionIdIsValid(SnapshotDirty.xmax))
449  {
450  ReleaseBuffer(buffer);
451  switch (wait_policy)
452  {
453  case LockWaitBlock:
454  XactLockTableWait(SnapshotDirty.xmax,
455  relation, &tuple->t_self,
457  break;
458  case LockWaitSkip:
459  if (!ConditionalXactLockTableWait(SnapshotDirty.xmax))
460  /* skip instead of waiting */
461  return TM_WouldBlock;
462  break;
463  case LockWaitError:
464  if (!ConditionalXactLockTableWait(SnapshotDirty.xmax))
465  ereport(ERROR,
466  (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
467  errmsg("could not obtain lock on row in relation \"%s\"",
468  RelationGetRelationName(relation))));
469  break;
470  }
471  continue; /* loop back to repeat heap_fetch */
472  }
473 
474  /*
475  * If tuple was inserted by our own transaction, we have
476  * to check cmin against cid: cmin >= current CID means
477  * our command cannot see the tuple, so we should ignore
478  * it. Otherwise heap_lock_tuple() will throw an error,
479  * and so would any later attempt to update or delete the
480  * tuple. (We need not check cmax because
481  * HeapTupleSatisfiesDirty will consider a tuple deleted
482  * by our transaction dead, regardless of cmax.) We just
483  * checked that priorXmax == xmin, so we can test that
484  * variable instead of doing HeapTupleHeaderGetXmin again.
485  */
486  if (TransactionIdIsCurrentTransactionId(priorXmax) &&
487  HeapTupleHeaderGetCmin(tuple->t_data) >= cid)
488  {
489  tmfd->xmax = priorXmax;
490 
491  /*
492  * Cmin is the problematic value, so store that. See
493  * above.
494  */
495  tmfd->cmax = HeapTupleHeaderGetCmin(tuple->t_data);
496  ReleaseBuffer(buffer);
497  return TM_SelfModified;
498  }
499 
500  /*
501  * This is a live tuple, so try to lock it again.
502  */
503  ReleaseBuffer(buffer);
504  goto tuple_lock_retry;
505  }
506 
507  /*
508  * If the referenced slot was actually empty, the latest
509  * version of the row must have been deleted, so we need do
510  * nothing.
511  */
512  if (tuple->t_data == NULL)
513  {
514  Assert(!BufferIsValid(buffer));
515  return TM_Deleted;
516  }
517 
518  /*
519  * As above, if xmin isn't what we're expecting, do nothing.
520  */
522  priorXmax))
523  {
524  ReleaseBuffer(buffer);
525  return TM_Deleted;
526  }
527 
528  /*
529  * If we get here, the tuple was found but failed
530  * SnapshotDirty. Assuming the xmin is either a committed xact
531  * or our own xact (as it certainly should be if we're trying
532  * to modify the tuple), this must mean that the row was
533  * updated or deleted by either a committed xact or our own
534  * xact. If it was deleted, we can ignore it; if it was
535  * updated then chain up to the next version and repeat the
536  * whole process.
537  *
538  * As above, it should be safe to examine xmax and t_ctid
539  * without the buffer content lock, because they can't be
540  * changing. We'd better hold a buffer pin though.
541  */
542  if (ItemPointerEquals(&tuple->t_self, &tuple->t_data->t_ctid))
543  {
544  /* deleted, so forget about it */
545  ReleaseBuffer(buffer);
546  return TM_Deleted;
547  }
548 
549  /* updated, so look at the updated row */
550  *tid = tuple->t_data->t_ctid;
551  /* updated row should have xmin matching this xmax */
552  priorXmax = HeapTupleHeaderGetUpdateXid(tuple->t_data);
553  ReleaseBuffer(buffer);
554  /* loop back to fetch next in chain */
555  }
556  }
557  else
558  {
559  /* tuple was deleted, so give up */
560  return TM_Deleted;
561  }
562  }
563 
564  slot->tts_tableOid = RelationGetRelid(relation);
565  tuple->t_tableOid = slot->tts_tableOid;
566 
567  /* store in slot, transferring existing pin */
568  ExecStorePinnedBufferHeapTuple(tuple, slot, buffer);
569 
570  return result;
571 }
572 
573 
574 /* ------------------------------------------------------------------------
575  * DDL related callbacks for heap AM.
576  * ------------------------------------------------------------------------
577  */
578 
579 static void
581  const RelFileLocator *newrlocator,
582  char persistence,
583  TransactionId *freezeXid,
584  MultiXactId *minmulti)
585 {
586  SMgrRelation srel;
587 
588  /*
589  * Initialize to the minimum XID that could put tuples in the table. We
590  * know that no xacts older than RecentXmin are still running, so that
591  * will do.
592  */
593  *freezeXid = RecentXmin;
594 
595  /*
596  * Similarly, initialize the minimum Multixact to the first value that
597  * could possibly be stored in tuples in the table. Running transactions
598  * could reuse values from their local cache, so we are careful to
599  * consider all currently running multis.
600  *
601  * XXX this could be refined further, but is it worth the hassle?
602  */
603  *minmulti = GetOldestMultiXactId();
604 
605  srel = RelationCreateStorage(*newrlocator, persistence, true);
606 
607  /*
608  * If required, set up an init fork for an unlogged table so that it can
609  * be correctly reinitialized on restart. An immediate sync is required
610  * even if the page has been logged, because the write did not go through
611  * shared_buffers and therefore a concurrent checkpoint may have moved the
612  * redo pointer past our xlog record. Recovery may as well remove it
613  * while replaying, for example, XLOG_DBASE_CREATE* or XLOG_TBLSPC_CREATE
614  * record. Therefore, logging is necessary even if wal_level=minimal.
615  */
616  if (persistence == RELPERSISTENCE_UNLOGGED)
617  {
618  Assert(rel->rd_rel->relkind == RELKIND_RELATION ||
619  rel->rd_rel->relkind == RELKIND_MATVIEW ||
620  rel->rd_rel->relkind == RELKIND_TOASTVALUE);
621  smgrcreate(srel, INIT_FORKNUM, false);
622  log_smgrcreate(newrlocator, INIT_FORKNUM);
624  }
625 
626  smgrclose(srel);
627 }
628 
629 static void
631 {
632  RelationTruncate(rel, 0);
633 }
634 
635 static void
637 {
638  SMgrRelation dstrel;
639 
640  dstrel = smgropen(*newrlocator, rel->rd_backend);
641 
642  /*
643  * Since we copy the file directly without looking at the shared buffers,
644  * we'd better first flush out any pages of the source relation that are
645  * in shared buffers. We assume no new changes will be made while we are
646  * holding exclusive lock on the rel.
647  */
649 
650  /*
651  * Create and copy all forks of the relation, and schedule unlinking of
652  * old physical files.
653  *
654  * NOTE: any conflict in relfilenumber value will be caught in
655  * RelationCreateStorage().
656  */
657  RelationCreateStorage(*newrlocator, rel->rd_rel->relpersistence, true);
658 
659  /* copy main fork */
661  rel->rd_rel->relpersistence);
662 
663  /* copy those extra forks that exist */
664  for (ForkNumber forkNum = MAIN_FORKNUM + 1;
665  forkNum <= MAX_FORKNUM; forkNum++)
666  {
667  if (smgrexists(RelationGetSmgr(rel), forkNum))
668  {
669  smgrcreate(dstrel, forkNum, false);
670 
671  /*
672  * WAL log creation if the relation is persistent, or this is the
673  * init fork of an unlogged relation.
674  */
675  if (RelationIsPermanent(rel) ||
676  (rel->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED &&
677  forkNum == INIT_FORKNUM))
678  log_smgrcreate(newrlocator, forkNum);
679  RelationCopyStorage(RelationGetSmgr(rel), dstrel, forkNum,
680  rel->rd_rel->relpersistence);
681  }
682  }
683 
684 
685  /* drop old relation, and close new one */
686  RelationDropStorage(rel);
687  smgrclose(dstrel);
688 }
689 
690 static void
692  Relation OldIndex, bool use_sort,
693  TransactionId OldestXmin,
694  TransactionId *xid_cutoff,
695  MultiXactId *multi_cutoff,
696  double *num_tuples,
697  double *tups_vacuumed,
698  double *tups_recently_dead)
699 {
700  RewriteState rwstate;
701  IndexScanDesc indexScan;
702  TableScanDesc tableScan;
703  HeapScanDesc heapScan;
704  bool is_system_catalog;
705  Tuplesortstate *tuplesort;
706  TupleDesc oldTupDesc = RelationGetDescr(OldHeap);
707  TupleDesc newTupDesc = RelationGetDescr(NewHeap);
708  TupleTableSlot *slot;
709  int natts;
710  Datum *values;
711  bool *isnull;
713  BlockNumber prev_cblock = InvalidBlockNumber;
714 
715  /* Remember if it's a system catalog */
716  is_system_catalog = IsSystemRelation(OldHeap);
717 
718  /*
719  * Valid smgr_targblock implies something already wrote to the relation.
720  * This may be harmless, but this function hasn't planned for it.
721  */
723 
724  /* Preallocate values/isnull arrays */
725  natts = newTupDesc->natts;
726  values = (Datum *) palloc(natts * sizeof(Datum));
727  isnull = (bool *) palloc(natts * sizeof(bool));
728 
729  /* Initialize the rewrite operation */
730  rwstate = begin_heap_rewrite(OldHeap, NewHeap, OldestXmin, *xid_cutoff,
731  *multi_cutoff);
732 
733 
734  /*
735  * Set up sorting if wanted. NewHeap is being passed to
736  * tuplesort_begin_cluster(), it could have been OldHeap too. It does not
737  * really matter, as the goal is to have a heap relation being passed to
738  * _bt_log_reuse_page() (which should not be called from this code path).
739  */
740  if (use_sort)
741  tuplesort = tuplesort_begin_cluster(oldTupDesc, OldIndex, NewHeap,
743  NULL, TUPLESORT_NONE);
744  else
745  tuplesort = NULL;
746 
747  /*
748  * Prepare to scan the OldHeap. To ensure we see recently-dead tuples
749  * that still need to be copied, we scan with SnapshotAny and use
750  * HeapTupleSatisfiesVacuum for the visibility test.
751  */
752  if (OldIndex != NULL && !use_sort)
753  {
754  const int ci_index[] = {
757  };
758  int64 ci_val[2];
759 
760  /* Set phase and OIDOldIndex to columns */
762  ci_val[1] = RelationGetRelid(OldIndex);
763  pgstat_progress_update_multi_param(2, ci_index, ci_val);
764 
765  tableScan = NULL;
766  heapScan = NULL;
767  indexScan = index_beginscan(OldHeap, OldIndex, SnapshotAny, 0, 0);
768  index_rescan(indexScan, NULL, 0, NULL, 0);
769  }
770  else
771  {
772  /* In scan-and-sort mode and also VACUUM FULL, set phase */
775 
776  tableScan = table_beginscan(OldHeap, SnapshotAny, 0, (ScanKey) NULL);
777  heapScan = (HeapScanDesc) tableScan;
778  indexScan = NULL;
779 
780  /* Set total heap blocks */
782  heapScan->rs_nblocks);
783  }
784 
785  slot = table_slot_create(OldHeap, NULL);
786  hslot = (BufferHeapTupleTableSlot *) slot;
787 
788  /*
789  * Scan through the OldHeap, either in OldIndex order or sequentially;
790  * copy each tuple into the NewHeap, or transiently to the tuplesort
791  * module. Note that we don't bother sorting dead tuples (they won't get
792  * to the new table anyway).
793  */
794  for (;;)
795  {
796  HeapTuple tuple;
797  Buffer buf;
798  bool isdead;
799 
801 
802  if (indexScan != NULL)
803  {
804  if (!index_getnext_slot(indexScan, ForwardScanDirection, slot))
805  break;
806 
807  /* Since we used no scan keys, should never need to recheck */
808  if (indexScan->xs_recheck)
809  elog(ERROR, "CLUSTER does not support lossy index conditions");
810  }
811  else
812  {
813  if (!table_scan_getnextslot(tableScan, ForwardScanDirection, slot))
814  {
815  /*
816  * If the last pages of the scan were empty, we would go to
817  * the next phase while heap_blks_scanned != heap_blks_total.
818  * Instead, to ensure that heap_blks_scanned is equivalent to
819  * heap_blks_total after the table scan phase, this parameter
820  * is manually updated to the correct value when the table
821  * scan finishes.
822  */
824  heapScan->rs_nblocks);
825  break;
826  }
827 
828  /*
829  * In scan-and-sort mode and also VACUUM FULL, set heap blocks
830  * scanned
831  *
832  * Note that heapScan may start at an offset and wrap around, i.e.
833  * rs_startblock may be >0, and rs_cblock may end with a number
834  * below rs_startblock. To prevent showing this wraparound to the
835  * user, we offset rs_cblock by rs_startblock (modulo rs_nblocks).
836  */
837  if (prev_cblock != heapScan->rs_cblock)
838  {
840  (heapScan->rs_cblock +
841  heapScan->rs_nblocks -
842  heapScan->rs_startblock
843  ) % heapScan->rs_nblocks + 1);
844  prev_cblock = heapScan->rs_cblock;
845  }
846  }
847 
848  tuple = ExecFetchSlotHeapTuple(slot, false, NULL);
849  buf = hslot->buffer;
850 
852 
853  switch (HeapTupleSatisfiesVacuum(tuple, OldestXmin, buf))
854  {
855  case HEAPTUPLE_DEAD:
856  /* Definitely dead */
857  isdead = true;
858  break;
860  *tups_recently_dead += 1;
861  /* fall through */
862  case HEAPTUPLE_LIVE:
863  /* Live or recently dead, must copy it */
864  isdead = false;
865  break;
867 
868  /*
869  * Since we hold exclusive lock on the relation, normally the
870  * only way to see this is if it was inserted earlier in our
871  * own transaction. However, it can happen in system
872  * catalogs, since we tend to release write lock before commit
873  * there. Give a warning if neither case applies; but in any
874  * case we had better copy it.
875  */
876  if (!is_system_catalog &&
878  elog(WARNING, "concurrent insert in progress within table \"%s\"",
879  RelationGetRelationName(OldHeap));
880  /* treat as live */
881  isdead = false;
882  break;
884 
885  /*
886  * Similar situation to INSERT_IN_PROGRESS case.
887  */
888  if (!is_system_catalog &&
890  elog(WARNING, "concurrent delete in progress within table \"%s\"",
891  RelationGetRelationName(OldHeap));
892  /* treat as recently dead */
893  *tups_recently_dead += 1;
894  isdead = false;
895  break;
896  default:
897  elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
898  isdead = false; /* keep compiler quiet */
899  break;
900  }
901 
903 
904  if (isdead)
905  {
906  *tups_vacuumed += 1;
907  /* heap rewrite module still needs to see it... */
908  if (rewrite_heap_dead_tuple(rwstate, tuple))
909  {
910  /* A previous recently-dead tuple is now known dead */
911  *tups_vacuumed += 1;
912  *tups_recently_dead -= 1;
913  }
914  continue;
915  }
916 
917  *num_tuples += 1;
918  if (tuplesort != NULL)
919  {
920  tuplesort_putheaptuple(tuplesort, tuple);
921 
922  /*
923  * In scan-and-sort mode, report increase in number of tuples
924  * scanned
925  */
927  *num_tuples);
928  }
929  else
930  {
931  const int ct_index[] = {
934  };
935  int64 ct_val[2];
936 
937  reform_and_rewrite_tuple(tuple, OldHeap, NewHeap,
938  values, isnull, rwstate);
939 
940  /*
941  * In indexscan mode and also VACUUM FULL, report increase in
942  * number of tuples scanned and written
943  */
944  ct_val[0] = *num_tuples;
945  ct_val[1] = *num_tuples;
946  pgstat_progress_update_multi_param(2, ct_index, ct_val);
947  }
948  }
949 
950  if (indexScan != NULL)
951  index_endscan(indexScan);
952  if (tableScan != NULL)
953  table_endscan(tableScan);
954  if (slot)
956 
957  /*
958  * In scan-and-sort mode, complete the sort, then read out all live tuples
959  * from the tuplestore and write them to the new relation.
960  */
961  if (tuplesort != NULL)
962  {
963  double n_tuples = 0;
964 
965  /* Report that we are now sorting tuples */
968 
969  tuplesort_performsort(tuplesort);
970 
971  /* Report that we are now writing new heap */
974 
975  for (;;)
976  {
977  HeapTuple tuple;
978 
980 
981  tuple = tuplesort_getheaptuple(tuplesort, true);
982  if (tuple == NULL)
983  break;
984 
985  n_tuples += 1;
987  OldHeap, NewHeap,
988  values, isnull,
989  rwstate);
990  /* Report n_tuples */
992  n_tuples);
993  }
994 
995  tuplesort_end(tuplesort);
996  }
997 
998  /* Write out any remaining tuples, and fsync if needed */
999  end_heap_rewrite(rwstate);
1000 
1001  /* Clean up */
1002  pfree(values);
1003  pfree(isnull);
1004 }
1005 
1006 static bool
1008  BufferAccessStrategy bstrategy)
1009 {
1010  HeapScanDesc hscan = (HeapScanDesc) scan;
1011 
1012  /*
1013  * We must maintain a pin on the target page's buffer to ensure that
1014  * concurrent activity - e.g. HOT pruning - doesn't delete tuples out from
1015  * under us. Hence, pin the page until we are done looking at it. We
1016  * also choose to hold sharelock on the buffer throughout --- we could
1017  * release and re-acquire sharelock for each tuple, but since we aren't
1018  * doing much work per tuple, the extra lock traffic is probably better
1019  * avoided.
1020  */
1021  hscan->rs_cblock = blockno;
1022  hscan->rs_cindex = FirstOffsetNumber;
1023  hscan->rs_cbuf = ReadBufferExtended(scan->rs_rd, MAIN_FORKNUM,
1024  blockno, RBM_NORMAL, bstrategy);
1026 
1027  /* in heap all blocks can contain tuples, so always return true */
1028  return true;
1029 }
1030 
1031 static bool
1033  double *liverows, double *deadrows,
1034  TupleTableSlot *slot)
1035 {
1036  HeapScanDesc hscan = (HeapScanDesc) scan;
1037  Page targpage;
1038  OffsetNumber maxoffset;
1039  BufferHeapTupleTableSlot *hslot;
1040 
1041  Assert(TTS_IS_BUFFERTUPLE(slot));
1042 
1043  hslot = (BufferHeapTupleTableSlot *) slot;
1044  targpage = BufferGetPage(hscan->rs_cbuf);
1045  maxoffset = PageGetMaxOffsetNumber(targpage);
1046 
1047  /* Inner loop over all tuples on the selected page */
1048  for (; hscan->rs_cindex <= maxoffset; hscan->rs_cindex++)
1049  {
1050  ItemId itemid;
1051  HeapTuple targtuple = &hslot->base.tupdata;
1052  bool sample_it = false;
1053 
1054  itemid = PageGetItemId(targpage, hscan->rs_cindex);
1055 
1056  /*
1057  * We ignore unused and redirect line pointers. DEAD line pointers
1058  * should be counted as dead, because we need vacuum to run to get rid
1059  * of them. Note that this rule agrees with the way that
1060  * heap_page_prune() counts things.
1061  */
1062  if (!ItemIdIsNormal(itemid))
1063  {
1064  if (ItemIdIsDead(itemid))
1065  *deadrows += 1;
1066  continue;
1067  }
1068 
1069  ItemPointerSet(&targtuple->t_self, hscan->rs_cblock, hscan->rs_cindex);
1070 
1071  targtuple->t_tableOid = RelationGetRelid(scan->rs_rd);
1072  targtuple->t_data = (HeapTupleHeader) PageGetItem(targpage, itemid);
1073  targtuple->t_len = ItemIdGetLength(itemid);
1074 
1075  switch (HeapTupleSatisfiesVacuum(targtuple, OldestXmin,
1076  hscan->rs_cbuf))
1077  {
1078  case HEAPTUPLE_LIVE:
1079  sample_it = true;
1080  *liverows += 1;
1081  break;
1082 
1083  case HEAPTUPLE_DEAD:
1085  /* Count dead and recently-dead rows */
1086  *deadrows += 1;
1087  break;
1088 
1090 
1091  /*
1092  * Insert-in-progress rows are not counted. We assume that
1093  * when the inserting transaction commits or aborts, it will
1094  * send a stats message to increment the proper count. This
1095  * works right only if that transaction ends after we finish
1096  * analyzing the table; if things happen in the other order,
1097  * its stats update will be overwritten by ours. However, the
1098  * error will be large only if the other transaction runs long
1099  * enough to insert many tuples, so assuming it will finish
1100  * after us is the safer option.
1101  *
1102  * A special case is that the inserting transaction might be
1103  * our own. In this case we should count and sample the row,
1104  * to accommodate users who load a table and analyze it in one
1105  * transaction. (pgstat_report_analyze has to adjust the
1106  * numbers we report to the cumulative stats system to make
1107  * this come out right.)
1108  */
1110  {
1111  sample_it = true;
1112  *liverows += 1;
1113  }
1114  break;
1115 
1117 
1118  /*
1119  * We count and sample delete-in-progress rows the same as
1120  * live ones, so that the stats counters come out right if the
1121  * deleting transaction commits after us, per the same
1122  * reasoning given above.
1123  *
1124  * If the delete was done by our own transaction, however, we
1125  * must count the row as dead to make pgstat_report_analyze's
1126  * stats adjustments come out right. (Note: this works out
1127  * properly when the row was both inserted and deleted in our
1128  * xact.)
1129  *
1130  * The net effect of these choices is that we act as though an
1131  * IN_PROGRESS transaction hasn't happened yet, except if it
1132  * is our own transaction, which we assume has happened.
1133  *
1134  * This approach ensures that we behave sanely if we see both
1135  * the pre-image and post-image rows for a row being updated
1136  * by a concurrent transaction: we will sample the pre-image
1137  * but not the post-image. We also get sane results if the
1138  * concurrent transaction never commits.
1139  */
1141  *deadrows += 1;
1142  else
1143  {
1144  sample_it = true;
1145  *liverows += 1;
1146  }
1147  break;
1148 
1149  default:
1150  elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
1151  break;
1152  }
1153 
1154  if (sample_it)
1155  {
1156  ExecStoreBufferHeapTuple(targtuple, slot, hscan->rs_cbuf);
1157  hscan->rs_cindex++;
1158 
1159  /* note that we leave the buffer locked here! */
1160  return true;
1161  }
1162  }
1163 
1164  /* Now release the lock and pin on the page */
1165  UnlockReleaseBuffer(hscan->rs_cbuf);
1166  hscan->rs_cbuf = InvalidBuffer;
1167 
1168  /* also prevent old slot contents from having pin on page */
1169  ExecClearTuple(slot);
1170 
1171  return false;
1172 }
1173 
1174 static double
1176  Relation indexRelation,
1177  IndexInfo *indexInfo,
1178  bool allow_sync,
1179  bool anyvisible,
1180  bool progress,
1181  BlockNumber start_blockno,
1182  BlockNumber numblocks,
1184  void *callback_state,
1185  TableScanDesc scan)
1186 {
1187  HeapScanDesc hscan;
1188  bool is_system_catalog;
1189  bool checking_uniqueness;
1190  HeapTuple heapTuple;
1192  bool isnull[INDEX_MAX_KEYS];
1193  double reltuples;
1194  ExprState *predicate;
1195  TupleTableSlot *slot;
1196  EState *estate;
1197  ExprContext *econtext;
1198  Snapshot snapshot;
1199  bool need_unregister_snapshot = false;
1200  TransactionId OldestXmin;
1201  BlockNumber previous_blkno = InvalidBlockNumber;
1202  BlockNumber root_blkno = InvalidBlockNumber;
1203  OffsetNumber root_offsets[MaxHeapTuplesPerPage];
1204 
1205  /*
1206  * sanity checks
1207  */
1208  Assert(OidIsValid(indexRelation->rd_rel->relam));
1209 
1210  /* Remember if it's a system catalog */
1211  is_system_catalog = IsSystemRelation(heapRelation);
1212 
1213  /* See whether we're verifying uniqueness/exclusion properties */
1214  checking_uniqueness = (indexInfo->ii_Unique ||
1215  indexInfo->ii_ExclusionOps != NULL);
1216 
1217  /*
1218  * "Any visible" mode is not compatible with uniqueness checks; make sure
1219  * only one of those is requested.
1220  */
1221  Assert(!(anyvisible && checking_uniqueness));
1222 
1223  /*
1224  * Need an EState for evaluation of index expressions and partial-index
1225  * predicates. Also a slot to hold the current tuple.
1226  */
1227  estate = CreateExecutorState();
1228  econtext = GetPerTupleExprContext(estate);
1229  slot = table_slot_create(heapRelation, NULL);
1230 
1231  /* Arrange for econtext's scan tuple to be the tuple under test */
1232  econtext->ecxt_scantuple = slot;
1233 
1234  /* Set up execution state for predicate, if any. */
1235  predicate = ExecPrepareQual(indexInfo->ii_Predicate, estate);
1236 
1237  /*
1238  * Prepare for scan of the base relation. In a normal index build, we use
1239  * SnapshotAny because we must retrieve all tuples and do our own time
1240  * qual checks (because we have to index RECENTLY_DEAD tuples). In a
1241  * concurrent build, or during bootstrap, we take a regular MVCC snapshot
1242  * and index whatever's live according to that.
1243  */
1244  OldestXmin = InvalidTransactionId;
1245 
1246  /* okay to ignore lazy VACUUMs here */
1247  if (!IsBootstrapProcessingMode() && !indexInfo->ii_Concurrent)
1248  OldestXmin = GetOldestNonRemovableTransactionId(heapRelation);
1249 
1250  if (!scan)
1251  {
1252  /*
1253  * Serial index build.
1254  *
1255  * Must begin our own heap scan in this case. We may also need to
1256  * register a snapshot whose lifetime is under our direct control.
1257  */
1258  if (!TransactionIdIsValid(OldestXmin))
1259  {
1261  need_unregister_snapshot = true;
1262  }
1263  else
1264  snapshot = SnapshotAny;
1265 
1266  scan = table_beginscan_strat(heapRelation, /* relation */
1267  snapshot, /* snapshot */
1268  0, /* number of keys */
1269  NULL, /* scan key */
1270  true, /* buffer access strategy OK */
1271  allow_sync); /* syncscan OK? */
1272  }
1273  else
1274  {
1275  /*
1276  * Parallel index build.
1277  *
1278  * Parallel case never registers/unregisters own snapshot. Snapshot
1279  * is taken from parallel heap scan, and is SnapshotAny or an MVCC
1280  * snapshot, based on same criteria as serial case.
1281  */
1283  Assert(allow_sync);
1284  snapshot = scan->rs_snapshot;
1285  }
1286 
1287  hscan = (HeapScanDesc) scan;
1288 
1289  /*
1290  * Must have called GetOldestNonRemovableTransactionId() if using
1291  * SnapshotAny. Shouldn't have for an MVCC snapshot. (It's especially
1292  * worth checking this for parallel builds, since ambuild routines that
1293  * support parallel builds must work these details out for themselves.)
1294  */
1295  Assert(snapshot == SnapshotAny || IsMVCCSnapshot(snapshot));
1296  Assert(snapshot == SnapshotAny ? TransactionIdIsValid(OldestXmin) :
1297  !TransactionIdIsValid(OldestXmin));
1298  Assert(snapshot == SnapshotAny || !anyvisible);
1299 
1300  /* Publish number of blocks to scan */
1301  if (progress)
1302  {
1303  BlockNumber nblocks;
1304 
1305  if (hscan->rs_base.rs_parallel != NULL)
1306  {
1308 
1310  nblocks = pbscan->phs_nblocks;
1311  }
1312  else
1313  nblocks = hscan->rs_nblocks;
1314 
1316  nblocks);
1317  }
1318 
1319  /* set our scan endpoints */
1320  if (!allow_sync)
1321  heap_setscanlimits(scan, start_blockno, numblocks);
1322  else
1323  {
1324  /* syncscan can only be requested on whole relation */
1325  Assert(start_blockno == 0);
1326  Assert(numblocks == InvalidBlockNumber);
1327  }
1328 
1329  reltuples = 0;
1330 
1331  /*
1332  * Scan all tuples in the base relation.
1333  */
1334  while ((heapTuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
1335  {
1336  bool tupleIsAlive;
1337 
1339 
1340  /* Report scan progress, if asked to. */
1341  if (progress)
1342  {
1343  BlockNumber blocks_done = heapam_scan_get_blocks_done(hscan);
1344 
1345  if (blocks_done != previous_blkno)
1346  {
1348  blocks_done);
1349  previous_blkno = blocks_done;
1350  }
1351  }
1352 
1353  /*
1354  * When dealing with a HOT-chain of updated tuples, we want to index
1355  * the values of the live tuple (if any), but index it under the TID
1356  * of the chain's root tuple. This approach is necessary to preserve
1357  * the HOT-chain structure in the heap. So we need to be able to find
1358  * the root item offset for every tuple that's in a HOT-chain. When
1359  * first reaching a new page of the relation, call
1360  * heap_get_root_tuples() to build a map of root item offsets on the
1361  * page.
1362  *
1363  * It might look unsafe to use this information across buffer
1364  * lock/unlock. However, we hold ShareLock on the table so no
1365  * ordinary insert/update/delete should occur; and we hold pin on the
1366  * buffer continuously while visiting the page, so no pruning
1367  * operation can occur either.
1368  *
1369  * In cases with only ShareUpdateExclusiveLock on the table, it's
1370  * possible for some HOT tuples to appear that we didn't know about
1371  * when we first read the page. To handle that case, we re-obtain the
1372  * list of root offsets when a HOT tuple points to a root item that we
1373  * don't know about.
1374  *
1375  * Also, although our opinions about tuple liveness could change while
1376  * we scan the page (due to concurrent transaction commits/aborts),
1377  * the chain root locations won't, so this info doesn't need to be
1378  * rebuilt after waiting for another transaction.
1379  *
1380  * Note the implied assumption that there is no more than one live
1381  * tuple per HOT-chain --- else we could create more than one index
1382  * entry pointing to the same root tuple.
1383  */
1384  if (hscan->rs_cblock != root_blkno)
1385  {
1386  Page page = BufferGetPage(hscan->rs_cbuf);
1387 
1389  heap_get_root_tuples(page, root_offsets);
1391 
1392  root_blkno = hscan->rs_cblock;
1393  }
1394 
1395  if (snapshot == SnapshotAny)
1396  {
1397  /* do our own time qual check */
1398  bool indexIt;
1399  TransactionId xwait;
1400 
1401  recheck:
1402 
1403  /*
1404  * We could possibly get away with not locking the buffer here,
1405  * since caller should hold ShareLock on the relation, but let's
1406  * be conservative about it. (This remark is still correct even
1407  * with HOT-pruning: our pin on the buffer prevents pruning.)
1408  */
1410 
1411  /*
1412  * The criteria for counting a tuple as live in this block need to
1413  * match what analyze.c's heapam_scan_analyze_next_tuple() does,
1414  * otherwise CREATE INDEX and ANALYZE may produce wildly different
1415  * reltuples values, e.g. when there are many recently-dead
1416  * tuples.
1417  */
1418  switch (HeapTupleSatisfiesVacuum(heapTuple, OldestXmin,
1419  hscan->rs_cbuf))
1420  {
1421  case HEAPTUPLE_DEAD:
1422  /* Definitely dead, we can ignore it */
1423  indexIt = false;
1424  tupleIsAlive = false;
1425  break;
1426  case HEAPTUPLE_LIVE:
1427  /* Normal case, index and unique-check it */
1428  indexIt = true;
1429  tupleIsAlive = true;
1430  /* Count it as live, too */
1431  reltuples += 1;
1432  break;
1434 
1435  /*
1436  * If tuple is recently deleted then we must index it
1437  * anyway to preserve MVCC semantics. (Pre-existing
1438  * transactions could try to use the index after we finish
1439  * building it, and may need to see such tuples.)
1440  *
1441  * However, if it was HOT-updated then we must only index
1442  * the live tuple at the end of the HOT-chain. Since this
1443  * breaks semantics for pre-existing snapshots, mark the
1444  * index as unusable for them.
1445  *
1446  * We don't count recently-dead tuples in reltuples, even
1447  * if we index them; see heapam_scan_analyze_next_tuple().
1448  */
1449  if (HeapTupleIsHotUpdated(heapTuple))
1450  {
1451  indexIt = false;
1452  /* mark the index as unsafe for old snapshots */
1453  indexInfo->ii_BrokenHotChain = true;
1454  }
1455  else
1456  indexIt = true;
1457  /* In any case, exclude the tuple from unique-checking */
1458  tupleIsAlive = false;
1459  break;
1461 
1462  /*
1463  * In "anyvisible" mode, this tuple is visible and we
1464  * don't need any further checks.
1465  */
1466  if (anyvisible)
1467  {
1468  indexIt = true;
1469  tupleIsAlive = true;
1470  reltuples += 1;
1471  break;
1472  }
1473 
1474  /*
1475  * Since caller should hold ShareLock or better, normally
1476  * the only way to see this is if it was inserted earlier
1477  * in our own transaction. However, it can happen in
1478  * system catalogs, since we tend to release write lock
1479  * before commit there. Give a warning if neither case
1480  * applies.
1481  */
1482  xwait = HeapTupleHeaderGetXmin(heapTuple->t_data);
1484  {
1485  if (!is_system_catalog)
1486  elog(WARNING, "concurrent insert in progress within table \"%s\"",
1487  RelationGetRelationName(heapRelation));
1488 
1489  /*
1490  * If we are performing uniqueness checks, indexing
1491  * such a tuple could lead to a bogus uniqueness
1492  * failure. In that case we wait for the inserting
1493  * transaction to finish and check again.
1494  */
1495  if (checking_uniqueness)
1496  {
1497  /*
1498  * Must drop the lock on the buffer before we wait
1499  */
1501  XactLockTableWait(xwait, heapRelation,
1502  &heapTuple->t_self,
1505  goto recheck;
1506  }
1507  }
1508  else
1509  {
1510  /*
1511  * For consistency with
1512  * heapam_scan_analyze_next_tuple(), count
1513  * HEAPTUPLE_INSERT_IN_PROGRESS tuples as live only
1514  * when inserted by our own transaction.
1515  */
1516  reltuples += 1;
1517  }
1518 
1519  /*
1520  * We must index such tuples, since if the index build
1521  * commits then they're good.
1522  */
1523  indexIt = true;
1524  tupleIsAlive = true;
1525  break;
1527 
1528  /*
1529  * As with INSERT_IN_PROGRESS case, this is unexpected
1530  * unless it's our own deletion or a system catalog; but
1531  * in anyvisible mode, this tuple is visible.
1532  */
1533  if (anyvisible)
1534  {
1535  indexIt = true;
1536  tupleIsAlive = false;
1537  reltuples += 1;
1538  break;
1539  }
1540 
1541  xwait = HeapTupleHeaderGetUpdateXid(heapTuple->t_data);
1543  {
1544  if (!is_system_catalog)
1545  elog(WARNING, "concurrent delete in progress within table \"%s\"",
1546  RelationGetRelationName(heapRelation));
1547 
1548  /*
1549  * If we are performing uniqueness checks, assuming
1550  * the tuple is dead could lead to missing a
1551  * uniqueness violation. In that case we wait for the
1552  * deleting transaction to finish and check again.
1553  *
1554  * Also, if it's a HOT-updated tuple, we should not
1555  * index it but rather the live tuple at the end of
1556  * the HOT-chain. However, the deleting transaction
1557  * could abort, possibly leaving this tuple as live
1558  * after all, in which case it has to be indexed. The
1559  * only way to know what to do is to wait for the
1560  * deleting transaction to finish and check again.
1561  */
1562  if (checking_uniqueness ||
1563  HeapTupleIsHotUpdated(heapTuple))
1564  {
1565  /*
1566  * Must drop the lock on the buffer before we wait
1567  */
1569  XactLockTableWait(xwait, heapRelation,
1570  &heapTuple->t_self,
1573  goto recheck;
1574  }
1575 
1576  /*
1577  * Otherwise index it but don't check for uniqueness,
1578  * the same as a RECENTLY_DEAD tuple.
1579  */
1580  indexIt = true;
1581 
1582  /*
1583  * Count HEAPTUPLE_DELETE_IN_PROGRESS tuples as live,
1584  * if they were not deleted by the current
1585  * transaction. That's what
1586  * heapam_scan_analyze_next_tuple() does, and we want
1587  * the behavior to be consistent.
1588  */
1589  reltuples += 1;
1590  }
1591  else if (HeapTupleIsHotUpdated(heapTuple))
1592  {
1593  /*
1594  * It's a HOT-updated tuple deleted by our own xact.
1595  * We can assume the deletion will commit (else the
1596  * index contents don't matter), so treat the same as
1597  * RECENTLY_DEAD HOT-updated tuples.
1598  */
1599  indexIt = false;
1600  /* mark the index as unsafe for old snapshots */
1601  indexInfo->ii_BrokenHotChain = true;
1602  }
1603  else
1604  {
1605  /*
1606  * It's a regular tuple deleted by our own xact. Index
1607  * it, but don't check for uniqueness nor count in
1608  * reltuples, the same as a RECENTLY_DEAD tuple.
1609  */
1610  indexIt = true;
1611  }
1612  /* In any case, exclude the tuple from unique-checking */
1613  tupleIsAlive = false;
1614  break;
1615  default:
1616  elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
1617  indexIt = tupleIsAlive = false; /* keep compiler quiet */
1618  break;
1619  }
1620 
1622 
1623  if (!indexIt)
1624  continue;
1625  }
1626  else
1627  {
1628  /* heap_getnext did the time qual check */
1629  tupleIsAlive = true;
1630  reltuples += 1;
1631  }
1632 
1634 
1635  /* Set up for predicate or expression evaluation */
1636  ExecStoreBufferHeapTuple(heapTuple, slot, hscan->rs_cbuf);
1637 
1638  /*
1639  * In a partial index, discard tuples that don't satisfy the
1640  * predicate.
1641  */
1642  if (predicate != NULL)
1643  {
1644  if (!ExecQual(predicate, econtext))
1645  continue;
1646  }
1647 
1648  /*
1649  * For the current heap tuple, extract all the attributes we use in
1650  * this index, and note which are null. This also performs evaluation
1651  * of any expressions needed.
1652  */
1653  FormIndexDatum(indexInfo,
1654  slot,
1655  estate,
1656  values,
1657  isnull);
1658 
1659  /*
1660  * You'd think we should go ahead and build the index tuple here, but
1661  * some index AMs want to do further processing on the data first. So
1662  * pass the values[] and isnull[] arrays, instead.
1663  */
1664 
1665  if (HeapTupleIsHeapOnly(heapTuple))
1666  {
1667  /*
1668  * For a heap-only tuple, pretend its TID is that of the root. See
1669  * src/backend/access/heap/README.HOT for discussion.
1670  */
1671  ItemPointerData tid;
1672  OffsetNumber offnum;
1673 
1674  offnum = ItemPointerGetOffsetNumber(&heapTuple->t_self);
1675 
1676  /*
1677  * If a HOT tuple points to a root that we don't know about,
1678  * obtain root items afresh. If that still fails, report it as
1679  * corruption.
1680  */
1681  if (root_offsets[offnum - 1] == InvalidOffsetNumber)
1682  {
1683  Page page = BufferGetPage(hscan->rs_cbuf);
1684 
1686  heap_get_root_tuples(page, root_offsets);
1688  }
1689 
1690  if (!OffsetNumberIsValid(root_offsets[offnum - 1]))
1691  ereport(ERROR,
1693  errmsg_internal("failed to find parent tuple for heap-only tuple at (%u,%u) in table \"%s\"",
1694  ItemPointerGetBlockNumber(&heapTuple->t_self),
1695  offnum,
1696  RelationGetRelationName(heapRelation))));
1697 
1698  ItemPointerSet(&tid, ItemPointerGetBlockNumber(&heapTuple->t_self),
1699  root_offsets[offnum - 1]);
1700 
1701  /* Call the AM's callback routine to process the tuple */
1702  callback(indexRelation, &tid, values, isnull, tupleIsAlive,
1703  callback_state);
1704  }
1705  else
1706  {
1707  /* Call the AM's callback routine to process the tuple */
1708  callback(indexRelation, &heapTuple->t_self, values, isnull,
1709  tupleIsAlive, callback_state);
1710  }
1711  }
1712 
1713  /* Report scan progress one last time. */
1714  if (progress)
1715  {
1716  BlockNumber blks_done;
1717 
1718  if (hscan->rs_base.rs_parallel != NULL)
1719  {
1721 
1723  blks_done = pbscan->phs_nblocks;
1724  }
1725  else
1726  blks_done = hscan->rs_nblocks;
1727 
1729  blks_done);
1730  }
1731 
1732  table_endscan(scan);
1733 
1734  /* we can now forget our snapshot, if set and registered by us */
1735  if (need_unregister_snapshot)
1736  UnregisterSnapshot(snapshot);
1737 
1739 
1740  FreeExecutorState(estate);
1741 
1742  /* These may have been pointing to the now-gone estate */
1743  indexInfo->ii_ExpressionsState = NIL;
1744  indexInfo->ii_PredicateState = NULL;
1745 
1746  return reltuples;
1747 }
1748 
1749 static void
1751  Relation indexRelation,
1752  IndexInfo *indexInfo,
1753  Snapshot snapshot,
1755 {
1756  TableScanDesc scan;
1757  HeapScanDesc hscan;
1758  HeapTuple heapTuple;
1760  bool isnull[INDEX_MAX_KEYS];
1761  ExprState *predicate;
1762  TupleTableSlot *slot;
1763  EState *estate;
1764  ExprContext *econtext;
1765  BlockNumber root_blkno = InvalidBlockNumber;
1766  OffsetNumber root_offsets[MaxHeapTuplesPerPage];
1767  bool in_index[MaxHeapTuplesPerPage];
1768  BlockNumber previous_blkno = InvalidBlockNumber;
1769 
1770  /* state variables for the merge */
1771  ItemPointer indexcursor = NULL;
1772  ItemPointerData decoded;
1773  bool tuplesort_empty = false;
1774 
1775  /*
1776  * sanity checks
1777  */
1778  Assert(OidIsValid(indexRelation->rd_rel->relam));
1779 
1780  /*
1781  * Need an EState for evaluation of index expressions and partial-index
1782  * predicates. Also a slot to hold the current tuple.
1783  */
1784  estate = CreateExecutorState();
1785  econtext = GetPerTupleExprContext(estate);
1786  slot = MakeSingleTupleTableSlot(RelationGetDescr(heapRelation),
1787  &TTSOpsHeapTuple);
1788 
1789  /* Arrange for econtext's scan tuple to be the tuple under test */
1790  econtext->ecxt_scantuple = slot;
1791 
1792  /* Set up execution state for predicate, if any. */
1793  predicate = ExecPrepareQual(indexInfo->ii_Predicate, estate);
1794 
1795  /*
1796  * Prepare for scan of the base relation. We need just those tuples
1797  * satisfying the passed-in reference snapshot. We must disable syncscan
1798  * here, because it's critical that we read from block zero forward to
1799  * match the sorted TIDs.
1800  */
1801  scan = table_beginscan_strat(heapRelation, /* relation */
1802  snapshot, /* snapshot */
1803  0, /* number of keys */
1804  NULL, /* scan key */
1805  true, /* buffer access strategy OK */
1806  false); /* syncscan not OK */
1807  hscan = (HeapScanDesc) scan;
1808 
1810  hscan->rs_nblocks);
1811 
1812  /*
1813  * Scan all tuples matching the snapshot.
1814  */
1815  while ((heapTuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
1816  {
1817  ItemPointer heapcursor = &heapTuple->t_self;
1818  ItemPointerData rootTuple;
1819  OffsetNumber root_offnum;
1820 
1822 
1823  state->htups += 1;
1824 
1825  if ((previous_blkno == InvalidBlockNumber) ||
1826  (hscan->rs_cblock != previous_blkno))
1827  {
1829  hscan->rs_cblock);
1830  previous_blkno = hscan->rs_cblock;
1831  }
1832 
1833  /*
1834  * As commented in table_index_build_scan, we should index heap-only
1835  * tuples under the TIDs of their root tuples; so when we advance onto
1836  * a new heap page, build a map of root item offsets on the page.
1837  *
1838  * This complicates merging against the tuplesort output: we will
1839  * visit the live tuples in order by their offsets, but the root
1840  * offsets that we need to compare against the index contents might be
1841  * ordered differently. So we might have to "look back" within the
1842  * tuplesort output, but only within the current page. We handle that
1843  * by keeping a bool array in_index[] showing all the
1844  * already-passed-over tuplesort output TIDs of the current page. We
1845  * clear that array here, when advancing onto a new heap page.
1846  */
1847  if (hscan->rs_cblock != root_blkno)
1848  {
1849  Page page = BufferGetPage(hscan->rs_cbuf);
1850 
1852  heap_get_root_tuples(page, root_offsets);
1854 
1855  memset(in_index, 0, sizeof(in_index));
1856 
1857  root_blkno = hscan->rs_cblock;
1858  }
1859 
1860  /* Convert actual tuple TID to root TID */
1861  rootTuple = *heapcursor;
1862  root_offnum = ItemPointerGetOffsetNumber(heapcursor);
1863 
1864  if (HeapTupleIsHeapOnly(heapTuple))
1865  {
1866  root_offnum = root_offsets[root_offnum - 1];
1867  if (!OffsetNumberIsValid(root_offnum))
1868  ereport(ERROR,
1870  errmsg_internal("failed to find parent tuple for heap-only tuple at (%u,%u) in table \"%s\"",
1871  ItemPointerGetBlockNumber(heapcursor),
1872  ItemPointerGetOffsetNumber(heapcursor),
1873  RelationGetRelationName(heapRelation))));
1874  ItemPointerSetOffsetNumber(&rootTuple, root_offnum);
1875  }
1876 
1877  /*
1878  * "merge" by skipping through the index tuples until we find or pass
1879  * the current root tuple.
1880  */
1881  while (!tuplesort_empty &&
1882  (!indexcursor ||
1883  ItemPointerCompare(indexcursor, &rootTuple) < 0))
1884  {
1885  Datum ts_val;
1886  bool ts_isnull;
1887 
1888  if (indexcursor)
1889  {
1890  /*
1891  * Remember index items seen earlier on the current heap page
1892  */
1893  if (ItemPointerGetBlockNumber(indexcursor) == root_blkno)
1894  in_index[ItemPointerGetOffsetNumber(indexcursor) - 1] = true;
1895  }
1896 
1897  tuplesort_empty = !tuplesort_getdatum(state->tuplesort, true,
1898  false, &ts_val, &ts_isnull,
1899  NULL);
1900  Assert(tuplesort_empty || !ts_isnull);
1901  if (!tuplesort_empty)
1902  {
1903  itemptr_decode(&decoded, DatumGetInt64(ts_val));
1904  indexcursor = &decoded;
1905  }
1906  else
1907  {
1908  /* Be tidy */
1909  indexcursor = NULL;
1910  }
1911  }
1912 
1913  /*
1914  * If the tuplesort has overshot *and* we didn't see a match earlier,
1915  * then this tuple is missing from the index, so insert it.
1916  */
1917  if ((tuplesort_empty ||
1918  ItemPointerCompare(indexcursor, &rootTuple) > 0) &&
1919  !in_index[root_offnum - 1])
1920  {
1922 
1923  /* Set up for predicate or expression evaluation */
1924  ExecStoreHeapTuple(heapTuple, slot, false);
1925 
1926  /*
1927  * In a partial index, discard tuples that don't satisfy the
1928  * predicate.
1929  */
1930  if (predicate != NULL)
1931  {
1932  if (!ExecQual(predicate, econtext))
1933  continue;
1934  }
1935 
1936  /*
1937  * For the current heap tuple, extract all the attributes we use
1938  * in this index, and note which are null. This also performs
1939  * evaluation of any expressions needed.
1940  */
1941  FormIndexDatum(indexInfo,
1942  slot,
1943  estate,
1944  values,
1945  isnull);
1946 
1947  /*
1948  * You'd think we should go ahead and build the index tuple here,
1949  * but some index AMs want to do further processing on the data
1950  * first. So pass the values[] and isnull[] arrays, instead.
1951  */
1952 
1953  /*
1954  * If the tuple is already committed dead, you might think we
1955  * could suppress uniqueness checking, but this is no longer true
1956  * in the presence of HOT, because the insert is actually a proxy
1957  * for a uniqueness check on the whole HOT-chain. That is, the
1958  * tuple we have here could be dead because it was already
1959  * HOT-updated, and if so the updating transaction will not have
1960  * thought it should insert index entries. The index AM will
1961  * check the whole HOT-chain and correctly detect a conflict if
1962  * there is one.
1963  */
1964 
1965  index_insert(indexRelation,
1966  values,
1967  isnull,
1968  &rootTuple,
1969  heapRelation,
1970  indexInfo->ii_Unique ?
1972  false,
1973  indexInfo);
1974 
1975  state->tups_inserted += 1;
1976  }
1977  }
1978 
1979  table_endscan(scan);
1980 
1982 
1983  FreeExecutorState(estate);
1984 
1985  /* These may have been pointing to the now-gone estate */
1986  indexInfo->ii_ExpressionsState = NIL;
1987  indexInfo->ii_PredicateState = NULL;
1988 }
1989 
1990 /*
1991  * Return the number of blocks that have been read by this scan since
1992  * starting. This is meant for progress reporting rather than be fully
1993  * accurate: in a parallel scan, workers can be concurrently reading blocks
1994  * further ahead than what we report.
1995  */
1996 static BlockNumber
1998 {
1999  ParallelBlockTableScanDesc bpscan = NULL;
2000  BlockNumber startblock;
2001  BlockNumber blocks_done;
2002 
2003  if (hscan->rs_base.rs_parallel != NULL)
2004  {
2006  startblock = bpscan->phs_startblock;
2007  }
2008  else
2009  startblock = hscan->rs_startblock;
2010 
2011  /*
2012  * Might have wrapped around the end of the relation, if startblock was
2013  * not zero.
2014  */
2015  if (hscan->rs_cblock > startblock)
2016  blocks_done = hscan->rs_cblock - startblock;
2017  else
2018  {
2019  BlockNumber nblocks;
2020 
2021  nblocks = bpscan != NULL ? bpscan->phs_nblocks : hscan->rs_nblocks;
2022  blocks_done = nblocks - startblock +
2023  hscan->rs_cblock;
2024  }
2025 
2026  return blocks_done;
2027 }
2028 
2029 
2030 /* ------------------------------------------------------------------------
2031  * Miscellaneous callbacks for the heap AM
2032  * ------------------------------------------------------------------------
2033  */
2034 
2035 /*
2036  * Check to see whether the table needs a TOAST table. It does only if
2037  * (1) there are any toastable attributes, and (2) the maximum length
2038  * of a tuple could exceed TOAST_TUPLE_THRESHOLD. (We don't want to
2039  * create a toast table for something like "f1 varchar(20)".)
2040  */
2041 static bool
2043 {
2044  int32 data_length = 0;
2045  bool maxlength_unknown = false;
2046  bool has_toastable_attrs = false;
2047  TupleDesc tupdesc = rel->rd_att;
2048  int32 tuple_length;
2049  int i;
2050 
2051  for (i = 0; i < tupdesc->natts; i++)
2052  {
2053  Form_pg_attribute att = TupleDescAttr(tupdesc, i);
2054 
2055  if (att->attisdropped)
2056  continue;
2057  data_length = att_align_nominal(data_length, att->attalign);
2058  if (att->attlen > 0)
2059  {
2060  /* Fixed-length types are never toastable */
2061  data_length += att->attlen;
2062  }
2063  else
2064  {
2065  int32 maxlen = type_maximum_size(att->atttypid,
2066  att->atttypmod);
2067 
2068  if (maxlen < 0)
2069  maxlength_unknown = true;
2070  else
2071  data_length += maxlen;
2072  if (att->attstorage != TYPSTORAGE_PLAIN)
2073  has_toastable_attrs = true;
2074  }
2075  }
2076  if (!has_toastable_attrs)
2077  return false; /* nothing to toast? */
2078  if (maxlength_unknown)
2079  return true; /* any unlimited-length attrs? */
2080  tuple_length = MAXALIGN(SizeofHeapTupleHeader +
2081  BITMAPLEN(tupdesc->natts)) +
2082  MAXALIGN(data_length);
2083  return (tuple_length > TOAST_TUPLE_THRESHOLD);
2084 }
2085 
2086 /*
2087  * TOAST tables for heap relations are just heap relations.
2088  */
2089 static Oid
2091 {
2092  return rel->rd_rel->relam;
2093 }
2094 
2095 
2096 /* ------------------------------------------------------------------------
2097  * Planner related callbacks for the heap AM
2098  * ------------------------------------------------------------------------
2099  */
2100 
2101 #define HEAP_OVERHEAD_BYTES_PER_TUPLE \
2102  (MAXALIGN(SizeofHeapTupleHeader) + sizeof(ItemIdData))
2103 #define HEAP_USABLE_BYTES_PER_PAGE \
2104  (BLCKSZ - SizeOfPageHeaderData)
2105 
2106 static void
2108  BlockNumber *pages, double *tuples,
2109  double *allvisfrac)
2110 {
2111  table_block_relation_estimate_size(rel, attr_widths, pages,
2112  tuples, allvisfrac,
2115 }
2116 
2117 
2118 /* ------------------------------------------------------------------------
2119  * Executor related callbacks for the heap AM
2120  * ------------------------------------------------------------------------
2121  */
2122 
2123 static bool
2125  TBMIterateResult *tbmres)
2126 {
2127  HeapScanDesc hscan = (HeapScanDesc) scan;
2128  BlockNumber block = tbmres->blockno;
2129  Buffer buffer;
2130  Snapshot snapshot;
2131  int ntup;
2132 
2133  hscan->rs_cindex = 0;
2134  hscan->rs_ntuples = 0;
2135 
2136  /*
2137  * Ignore any claimed entries past what we think is the end of the
2138  * relation. It may have been extended after the start of our scan (we
2139  * only hold an AccessShareLock, and it could be inserts from this
2140  * backend).
2141  */
2142  if (block >= hscan->rs_nblocks)
2143  return false;
2144 
2145  /*
2146  * Acquire pin on the target heap page, trading in any pin we held before.
2147  */
2148  hscan->rs_cbuf = ReleaseAndReadBuffer(hscan->rs_cbuf,
2149  scan->rs_rd,
2150  block);
2151  hscan->rs_cblock = block;
2152  buffer = hscan->rs_cbuf;
2153  snapshot = scan->rs_snapshot;
2154 
2155  ntup = 0;
2156 
2157  /*
2158  * Prune and repair fragmentation for the whole page, if possible.
2159  */
2160  heap_page_prune_opt(scan->rs_rd, buffer);
2161 
2162  /*
2163  * We must hold share lock on the buffer content while examining tuple
2164  * visibility. Afterwards, however, the tuples we have found to be
2165  * visible are guaranteed good as long as we hold the buffer pin.
2166  */
2167  LockBuffer(buffer, BUFFER_LOCK_SHARE);
2168 
2169  /*
2170  * We need two separate strategies for lossy and non-lossy cases.
2171  */
2172  if (tbmres->ntuples >= 0)
2173  {
2174  /*
2175  * Bitmap is non-lossy, so we just look through the offsets listed in
2176  * tbmres; but we have to follow any HOT chain starting at each such
2177  * offset.
2178  */
2179  int curslot;
2180 
2181  for (curslot = 0; curslot < tbmres->ntuples; curslot++)
2182  {
2183  OffsetNumber offnum = tbmres->offsets[curslot];
2184  ItemPointerData tid;
2185  HeapTupleData heapTuple;
2186 
2187  ItemPointerSet(&tid, block, offnum);
2188  if (heap_hot_search_buffer(&tid, scan->rs_rd, buffer, snapshot,
2189  &heapTuple, NULL, true))
2190  hscan->rs_vistuples[ntup++] = ItemPointerGetOffsetNumber(&tid);
2191  }
2192  }
2193  else
2194  {
2195  /*
2196  * Bitmap is lossy, so we must examine each line pointer on the page.
2197  * But we can ignore HOT chains, since we'll check each tuple anyway.
2198  */
2199  Page page = BufferGetPage(buffer);
2200  OffsetNumber maxoff = PageGetMaxOffsetNumber(page);
2201  OffsetNumber offnum;
2202 
2203  for (offnum = FirstOffsetNumber; offnum <= maxoff; offnum = OffsetNumberNext(offnum))
2204  {
2205  ItemId lp;
2206  HeapTupleData loctup;
2207  bool valid;
2208 
2209  lp = PageGetItemId(page, offnum);
2210  if (!ItemIdIsNormal(lp))
2211  continue;
2212  loctup.t_data = (HeapTupleHeader) PageGetItem(page, lp);
2213  loctup.t_len = ItemIdGetLength(lp);
2214  loctup.t_tableOid = scan->rs_rd->rd_id;
2215  ItemPointerSet(&loctup.t_self, block, offnum);
2216  valid = HeapTupleSatisfiesVisibility(&loctup, snapshot, buffer);
2217  if (valid)
2218  {
2219  hscan->rs_vistuples[ntup++] = offnum;
2220  PredicateLockTID(scan->rs_rd, &loctup.t_self, snapshot,
2221  HeapTupleHeaderGetXmin(loctup.t_data));
2222  }
2223  HeapCheckForSerializableConflictOut(valid, scan->rs_rd, &loctup,
2224  buffer, snapshot);
2225  }
2226  }
2227 
2228  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
2229 
2230  Assert(ntup <= MaxHeapTuplesPerPage);
2231  hscan->rs_ntuples = ntup;
2232 
2233  return ntup > 0;
2234 }
2235 
2236 static bool
2238  TBMIterateResult *tbmres,
2239  TupleTableSlot *slot)
2240 {
2241  HeapScanDesc hscan = (HeapScanDesc) scan;
2242  OffsetNumber targoffset;
2243  Page page;
2244  ItemId lp;
2245 
2246  /*
2247  * Out of range? If so, nothing more to look at on this page
2248  */
2249  if (hscan->rs_cindex < 0 || hscan->rs_cindex >= hscan->rs_ntuples)
2250  return false;
2251 
2252  targoffset = hscan->rs_vistuples[hscan->rs_cindex];
2253  page = BufferGetPage(hscan->rs_cbuf);
2254  lp = PageGetItemId(page, targoffset);
2255  Assert(ItemIdIsNormal(lp));
2256 
2257  hscan->rs_ctup.t_data = (HeapTupleHeader) PageGetItem(page, lp);
2258  hscan->rs_ctup.t_len = ItemIdGetLength(lp);
2259  hscan->rs_ctup.t_tableOid = scan->rs_rd->rd_id;
2260  ItemPointerSet(&hscan->rs_ctup.t_self, hscan->rs_cblock, targoffset);
2261 
2263 
2264  /*
2265  * Set up the result slot to point to this tuple. Note that the slot
2266  * acquires a pin on the buffer.
2267  */
2269  slot,
2270  hscan->rs_cbuf);
2271 
2272  hscan->rs_cindex++;
2273 
2274  return true;
2275 }
2276 
2277 static bool
2279 {
2280  HeapScanDesc hscan = (HeapScanDesc) scan;
2281  TsmRoutine *tsm = scanstate->tsmroutine;
2282  BlockNumber blockno;
2283 
2284  /* return false immediately if relation is empty */
2285  if (hscan->rs_nblocks == 0)
2286  return false;
2287 
2288  if (tsm->NextSampleBlock)
2289  {
2290  blockno = tsm->NextSampleBlock(scanstate, hscan->rs_nblocks);
2291  hscan->rs_cblock = blockno;
2292  }
2293  else
2294  {
2295  /* scanning table sequentially */
2296 
2297  if (hscan->rs_cblock == InvalidBlockNumber)
2298  {
2299  Assert(!hscan->rs_inited);
2300  blockno = hscan->rs_startblock;
2301  }
2302  else
2303  {
2304  Assert(hscan->rs_inited);
2305 
2306  blockno = hscan->rs_cblock + 1;
2307 
2308  if (blockno >= hscan->rs_nblocks)
2309  {
2310  /* wrap to beginning of rel, might not have started at 0 */
2311  blockno = 0;
2312  }
2313 
2314  /*
2315  * Report our new scan position for synchronization purposes.
2316  *
2317  * Note: we do this before checking for end of scan so that the
2318  * final state of the position hint is back at the start of the
2319  * rel. That's not strictly necessary, but otherwise when you run
2320  * the same query multiple times the starting position would shift
2321  * a little bit backwards on every invocation, which is confusing.
2322  * We don't guarantee any specific ordering in general, though.
2323  */
2324  if (scan->rs_flags & SO_ALLOW_SYNC)
2325  ss_report_location(scan->rs_rd, blockno);
2326 
2327  if (blockno == hscan->rs_startblock)
2328  {
2329  blockno = InvalidBlockNumber;
2330  }
2331  }
2332  }
2333 
2334  if (!BlockNumberIsValid(blockno))
2335  {
2336  if (BufferIsValid(hscan->rs_cbuf))
2337  ReleaseBuffer(hscan->rs_cbuf);
2338  hscan->rs_cbuf = InvalidBuffer;
2339  hscan->rs_cblock = InvalidBlockNumber;
2340  hscan->rs_inited = false;
2341 
2342  return false;
2343  }
2344 
2345  heapgetpage(scan, blockno);
2346  hscan->rs_inited = true;
2347 
2348  return true;
2349 }
2350 
2351 static bool
2353  TupleTableSlot *slot)
2354 {
2355  HeapScanDesc hscan = (HeapScanDesc) scan;
2356  TsmRoutine *tsm = scanstate->tsmroutine;
2357  BlockNumber blockno = hscan->rs_cblock;
2358  bool pagemode = (scan->rs_flags & SO_ALLOW_PAGEMODE) != 0;
2359 
2360  Page page;
2361  bool all_visible;
2362  OffsetNumber maxoffset;
2363 
2364  /*
2365  * When not using pagemode, we must lock the buffer during tuple
2366  * visibility checks.
2367  */
2368  if (!pagemode)
2370 
2371  page = (Page) BufferGetPage(hscan->rs_cbuf);
2372  all_visible = PageIsAllVisible(page) &&
2374  maxoffset = PageGetMaxOffsetNumber(page);
2375 
2376  for (;;)
2377  {
2378  OffsetNumber tupoffset;
2379 
2381 
2382  /* Ask the tablesample method which tuples to check on this page. */
2383  tupoffset = tsm->NextSampleTuple(scanstate,
2384  blockno,
2385  maxoffset);
2386 
2387  if (OffsetNumberIsValid(tupoffset))
2388  {
2389  ItemId itemid;
2390  bool visible;
2391  HeapTuple tuple = &(hscan->rs_ctup);
2392 
2393  /* Skip invalid tuple pointers. */
2394  itemid = PageGetItemId(page, tupoffset);
2395  if (!ItemIdIsNormal(itemid))
2396  continue;
2397 
2398  tuple->t_data = (HeapTupleHeader) PageGetItem(page, itemid);
2399  tuple->t_len = ItemIdGetLength(itemid);
2400  ItemPointerSet(&(tuple->t_self), blockno, tupoffset);
2401 
2402 
2403  if (all_visible)
2404  visible = true;
2405  else
2406  visible = SampleHeapTupleVisible(scan, hscan->rs_cbuf,
2407  tuple, tupoffset);
2408 
2409  /* in pagemode, heapgetpage did this for us */
2410  if (!pagemode)
2411  HeapCheckForSerializableConflictOut(visible, scan->rs_rd, tuple,
2412  hscan->rs_cbuf, scan->rs_snapshot);
2413 
2414  /* Try next tuple from same page. */
2415  if (!visible)
2416  continue;
2417 
2418  /* Found visible tuple, return it. */
2419  if (!pagemode)
2421 
2422  ExecStoreBufferHeapTuple(tuple, slot, hscan->rs_cbuf);
2423 
2424  /* Count successfully-fetched tuples as heap fetches */
2426 
2427  return true;
2428  }
2429  else
2430  {
2431  /*
2432  * If we get here, it means we've exhausted the items on this page
2433  * and it's time to move to the next.
2434  */
2435  if (!pagemode)
2437 
2438  ExecClearTuple(slot);
2439  return false;
2440  }
2441  }
2442 
2443  Assert(0);
2444 }
2445 
2446 
2447 /* ----------------------------------------------------------------------------
2448  * Helper functions for the above.
2449  * ----------------------------------------------------------------------------
2450  */
2451 
2452 /*
2453  * Reconstruct and rewrite the given tuple
2454  *
2455  * We cannot simply copy the tuple as-is, for several reasons:
2456  *
2457  * 1. We'd like to squeeze out the values of any dropped columns, both
2458  * to save space and to ensure we have no corner-case failures. (It's
2459  * possible for example that the new table hasn't got a TOAST table
2460  * and so is unable to store any large values of dropped cols.)
2461  *
2462  * 2. The tuple might not even be legal for the new table; this is
2463  * currently only known to happen as an after-effect of ALTER TABLE
2464  * SET WITHOUT OIDS.
2465  *
2466  * So, we must reconstruct the tuple from component Datums.
2467  */
2468 static void
2470  Relation OldHeap, Relation NewHeap,
2471  Datum *values, bool *isnull, RewriteState rwstate)
2472 {
2473  TupleDesc oldTupDesc = RelationGetDescr(OldHeap);
2474  TupleDesc newTupDesc = RelationGetDescr(NewHeap);
2475  HeapTuple copiedTuple;
2476  int i;
2477 
2478  heap_deform_tuple(tuple, oldTupDesc, values, isnull);
2479 
2480  /* Be sure to null out any dropped columns */
2481  for (i = 0; i < newTupDesc->natts; i++)
2482  {
2483  if (TupleDescAttr(newTupDesc, i)->attisdropped)
2484  isnull[i] = true;
2485  }
2486 
2487  copiedTuple = heap_form_tuple(newTupDesc, values, isnull);
2488 
2489  /* The heap rewrite module does the rest */
2490  rewrite_heap_tuple(rwstate, tuple, copiedTuple);
2491 
2492  heap_freetuple(copiedTuple);
2493 }
2494 
2495 /*
2496  * Check visibility of the tuple.
2497  */
2498 static bool
2500  HeapTuple tuple,
2501  OffsetNumber tupoffset)
2502 {
2503  HeapScanDesc hscan = (HeapScanDesc) scan;
2504 
2505  if (scan->rs_flags & SO_ALLOW_PAGEMODE)
2506  {
2507  /*
2508  * In pageatatime mode, heapgetpage() already did visibility checks,
2509  * so just look at the info it left in rs_vistuples[].
2510  *
2511  * We use a binary search over the known-sorted array. Note: we could
2512  * save some effort if we insisted that NextSampleTuple select tuples
2513  * in increasing order, but it's not clear that there would be enough
2514  * gain to justify the restriction.
2515  */
2516  int start = 0,
2517  end = hscan->rs_ntuples - 1;
2518 
2519  while (start <= end)
2520  {
2521  int mid = (start + end) / 2;
2522  OffsetNumber curoffset = hscan->rs_vistuples[mid];
2523 
2524  if (tupoffset == curoffset)
2525  return true;
2526  else if (tupoffset < curoffset)
2527  end = mid - 1;
2528  else
2529  start = mid + 1;
2530  }
2531 
2532  return false;
2533  }
2534  else
2535  {
2536  /* Otherwise, we have to check the tuple individually. */
2537  return HeapTupleSatisfiesVisibility(tuple, scan->rs_snapshot,
2538  buffer);
2539  }
2540 }
2541 
2542 
2543 /* ------------------------------------------------------------------------
2544  * Definition of the heap table access method.
2545  * ------------------------------------------------------------------------
2546  */
2547 
2548 static const TableAmRoutine heapam_methods = {
2549  .type = T_TableAmRoutine,
2550 
2551  .slot_callbacks = heapam_slot_callbacks,
2552 
2553  .scan_begin = heap_beginscan,
2554  .scan_end = heap_endscan,
2555  .scan_rescan = heap_rescan,
2556  .scan_getnextslot = heap_getnextslot,
2557 
2558  .scan_set_tidrange = heap_set_tidrange,
2559  .scan_getnextslot_tidrange = heap_getnextslot_tidrange,
2560 
2561  .parallelscan_estimate = table_block_parallelscan_estimate,
2562  .parallelscan_initialize = table_block_parallelscan_initialize,
2563  .parallelscan_reinitialize = table_block_parallelscan_reinitialize,
2564 
2565  .index_fetch_begin = heapam_index_fetch_begin,
2566  .index_fetch_reset = heapam_index_fetch_reset,
2567  .index_fetch_end = heapam_index_fetch_end,
2568  .index_fetch_tuple = heapam_index_fetch_tuple,
2569 
2570  .tuple_insert = heapam_tuple_insert,
2571  .tuple_insert_speculative = heapam_tuple_insert_speculative,
2572  .tuple_complete_speculative = heapam_tuple_complete_speculative,
2573  .multi_insert = heap_multi_insert,
2574  .tuple_delete = heapam_tuple_delete,
2575  .tuple_update = heapam_tuple_update,
2576  .tuple_lock = heapam_tuple_lock,
2577 
2578  .tuple_fetch_row_version = heapam_fetch_row_version,
2579  .tuple_get_latest_tid = heap_get_latest_tid,
2580  .tuple_tid_valid = heapam_tuple_tid_valid,
2581  .tuple_satisfies_snapshot = heapam_tuple_satisfies_snapshot,
2582  .index_delete_tuples = heap_index_delete_tuples,
2583 
2584  .relation_set_new_filelocator = heapam_relation_set_new_filelocator,
2585  .relation_nontransactional_truncate = heapam_relation_nontransactional_truncate,
2586  .relation_copy_data = heapam_relation_copy_data,
2587  .relation_copy_for_cluster = heapam_relation_copy_for_cluster,
2588  .relation_vacuum = heap_vacuum_rel,
2589  .scan_analyze_next_block = heapam_scan_analyze_next_block,
2590  .scan_analyze_next_tuple = heapam_scan_analyze_next_tuple,
2591  .index_build_range_scan = heapam_index_build_range_scan,
2592  .index_validate_scan = heapam_index_validate_scan,
2593 
2594  .relation_size = table_block_relation_size,
2595  .relation_needs_toast_table = heapam_relation_needs_toast_table,
2596  .relation_toast_am = heapam_relation_toast_am,
2597  .relation_fetch_toast_slice = heap_fetch_toast_slice,
2598 
2599  .relation_estimate_size = heapam_estimate_rel_size,
2600 
2601  .scan_bitmap_next_block = heapam_scan_bitmap_next_block,
2602  .scan_bitmap_next_tuple = heapam_scan_bitmap_next_tuple,
2603  .scan_sample_next_block = heapam_scan_sample_next_block,
2604  .scan_sample_next_tuple = heapam_scan_sample_next_tuple
2605 };
2606 
2607 
2608 const TableAmRoutine *
2610 {
2611  return &heapam_methods;
2612 }
2613 
2614 Datum
2616 {
2618 }
void pgstat_progress_update_param(int index, int64 val)
void pgstat_progress_update_multi_param(int nparam, const int *index, const int64 *val)
uint32 BlockNumber
Definition: block.h:31
#define InvalidBlockNumber
Definition: block.h:33
static bool BlockNumberIsValid(BlockNumber blockNumber)
Definition: block.h:71
static Datum values[MAXATTR]
Definition: bootstrap.c:156
int Buffer
Definition: buf.h:23
#define InvalidBuffer
Definition: buf.h:25
Buffer ReleaseAndReadBuffer(Buffer buffer, Relation relation, BlockNumber blockNum)
Definition: bufmgr.c:2174
void ReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:4480
void UnlockReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:4497
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:4715
void FlushRelationBuffers(Relation rel)
Definition: bufmgr.c:4058
Buffer ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
Definition: bufmgr.c:755
#define BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:157
#define BUFFER_LOCK_SHARE
Definition: bufmgr.h:158
static Page BufferGetPage(Buffer buffer)
Definition: bufmgr.h:355
@ RBM_NORMAL
Definition: bufmgr.h:44
static bool BufferIsValid(Buffer bufnum)
Definition: bufmgr.h:303
Pointer Page
Definition: bufpage.h:78
static Item PageGetItem(Page page, ItemId itemId)
Definition: bufpage.h:351
static ItemId PageGetItemId(Page page, OffsetNumber offsetNumber)
Definition: bufpage.h:240
static bool PageIsAllVisible(Page page)
Definition: bufpage.h:426
static OffsetNumber PageGetMaxOffsetNumber(Page page)
Definition: bufpage.h:369
unsigned int uint32
Definition: c.h:490
#define MAXALIGN(LEN)
Definition: c.h:795
signed int int32
Definition: c.h:478
TransactionId MultiXactId
Definition: c.h:646
unsigned char uint8
Definition: c.h:488
uint32 CommandId
Definition: c.h:650
uint32 TransactionId
Definition: c.h:636
#define OidIsValid(objectId)
Definition: c.h:759
bool IsSystemRelation(Relation relation)
Definition: catalog.c:75
CommandId HeapTupleHeaderGetCmin(HeapTupleHeader tup)
Definition: combocid.c:104
int errmsg_internal(const char *fmt,...)
Definition: elog.c:1156
int errcode(int sqlerrcode)
Definition: elog.c:858
int errmsg(const char *fmt,...)
Definition: elog.c:1069
#define WARNING
Definition: elog.h:36
#define ERROR
Definition: elog.h:39
#define ereport(elevel,...)
Definition: elog.h:149
ExprState * ExecPrepareQual(List *qual, EState *estate)
Definition: execExpr.c:763
void ExecDropSingleTupleTableSlot(TupleTableSlot *slot)
Definition: execTuples.c:1255
TupleTableSlot * ExecStoreBufferHeapTuple(HeapTuple tuple, TupleTableSlot *slot, Buffer buffer)
Definition: execTuples.c:1393
const TupleTableSlotOps TTSOpsBufferHeapTuple
Definition: execTuples.c:86
HeapTuple ExecFetchSlotHeapTuple(TupleTableSlot *slot, bool materialize, bool *shouldFree)
Definition: execTuples.c:1645
TupleTableSlot * ExecStoreHeapTuple(HeapTuple tuple, TupleTableSlot *slot, bool shouldFree)
Definition: execTuples.c:1353
const TupleTableSlotOps TTSOpsHeapTuple
Definition: execTuples.c:84
TupleTableSlot * ExecStorePinnedBufferHeapTuple(HeapTuple tuple, TupleTableSlot *slot, Buffer buffer)
Definition: execTuples.c:1419
TupleTableSlot * MakeSingleTupleTableSlot(TupleDesc tupdesc, const TupleTableSlotOps *tts_ops)
Definition: execTuples.c:1239
EState * CreateExecutorState(void)
Definition: execUtils.c:93
void FreeExecutorState(EState *estate)
Definition: execUtils.c:194
#define GetPerTupleExprContext(estate)
Definition: executor.h:549
static bool ExecQual(ExprState *state, ExprContext *econtext)
Definition: executor.h:412
#define PG_RETURN_POINTER(x)
Definition: fmgr.h:361
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
int32 type_maximum_size(Oid type_oid, int32 typemod)
Definition: format_type.c:408
@ UNIQUE_CHECK_NO
Definition: genam.h:117
@ UNIQUE_CHECK_YES
Definition: genam.h:118
int maintenance_work_mem
Definition: globals.c:127
void heap_finish_speculative(Relation relation, ItemPointer tid)
Definition: heapam.c:5643
void heap_insert(Relation relation, HeapTuple tup, CommandId cid, int options, BulkInsertState bistate)
Definition: heapam.c:1825
bool heap_fetch(Relation relation, Snapshot snapshot, HeapTuple tuple, Buffer *userbuf, bool keep_buf)
Definition: heapam.c:1352
TM_Result heap_delete(Relation relation, ItemPointer tid, CommandId cid, Snapshot crosscheck, bool wait, TM_FailureData *tmfd, bool changingPart)
Definition: heapam.c:2514
bool heap_getnextslot(TableScanDesc sscan, ScanDirection direction, TupleTableSlot *slot)
Definition: heapam.c:1142
void heap_endscan(TableScanDesc sscan)
Definition: heapam.c:1060
void heap_rescan(TableScanDesc sscan, ScanKey key, bool set_params, bool allow_strat, bool allow_sync, bool allow_pagemode)
Definition: heapam.c:1023
TM_Result heap_update(Relation relation, ItemPointer otid, HeapTuple newtup, CommandId cid, Snapshot crosscheck, bool wait, TM_FailureData *tmfd, LockTupleMode *lockmode, TU_UpdateIndexes *update_indexes)
Definition: heapam.c:2976
HeapTuple heap_getnext(TableScanDesc sscan, ScanDirection direction)
Definition: heapam.c:1093
bool heap_hot_search_buffer(ItemPointer tid, Relation relation, Buffer buffer, Snapshot snapshot, HeapTuple heapTuple, bool *all_dead, bool first_call)
Definition: heapam.c:1473
bool heap_getnextslot_tidrange(TableScanDesc sscan, ScanDirection direction, TupleTableSlot *slot)
Definition: heapam.c:1245
void heap_set_tidrange(TableScanDesc sscan, ItemPointer mintid, ItemPointer maxtid)
Definition: heapam.c:1172
void heap_abort_speculative(Relation relation, ItemPointer tid)
Definition: heapam.c:5730
TableScanDesc heap_beginscan(Relation relation, Snapshot snapshot, int nkeys, ScanKey key, ParallelTableScanDesc parallel_scan, uint32 flags)
Definition: heapam.c:938
TransactionId heap_index_delete_tuples(Relation rel, TM_IndexDeleteOp *delstate)
Definition: heapam.c:7637
void heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples, CommandId cid, int options, BulkInsertState bistate)
Definition: heapam.c:2094
TM_Result heap_lock_tuple(Relation relation, HeapTuple tuple, CommandId cid, LockTupleMode mode, LockWaitPolicy wait_policy, bool follow_updates, Buffer *buffer, TM_FailureData *tmfd)
Definition: heapam.c:4134
void heap_get_latest_tid(TableScanDesc sscan, ItemPointer tid)
Definition: heapam.c:1625
void heap_setscanlimits(TableScanDesc sscan, BlockNumber startBlk, BlockNumber numBlks)
Definition: heapam.c:354
void HeapCheckForSerializableConflictOut(bool visible, Relation relation, HeapTuple tuple, Buffer buffer, Snapshot snapshot)
Definition: heapam.c:10147
void heapgetpage(TableScanDesc sscan, BlockNumber block)
Definition: heapam.c:377
#define HEAP_INSERT_SPECULATIVE
Definition: heapam.h:37
struct HeapScanDescData * HeapScanDesc
Definition: heapam.h:80
@ HEAPTUPLE_RECENTLY_DEAD
Definition: heapam.h:98
@ HEAPTUPLE_INSERT_IN_PROGRESS
Definition: heapam.h:99
@ HEAPTUPLE_LIVE
Definition: heapam.h:97
@ HEAPTUPLE_DELETE_IN_PROGRESS
Definition: heapam.h:100
@ HEAPTUPLE_DEAD
Definition: heapam.h:96
static bool heapam_scan_analyze_next_block(TableScanDesc scan, BlockNumber blockno, BufferAccessStrategy bstrategy)
static double heapam_index_build_range_scan(Relation heapRelation, Relation indexRelation, IndexInfo *indexInfo, bool allow_sync, bool anyvisible, bool progress, BlockNumber start_blockno, BlockNumber numblocks, IndexBuildCallback callback, void *callback_state, TableScanDesc scan)
#define HEAP_OVERHEAD_BYTES_PER_TUPLE
static void heapam_estimate_rel_size(Relation rel, int32 *attr_widths, BlockNumber *pages, double *tuples, double *allvisfrac)
static IndexFetchTableData * heapam_index_fetch_begin(Relation rel)
static const TableAmRoutine heapam_methods
static void heapam_index_validate_scan(Relation heapRelation, Relation indexRelation, IndexInfo *indexInfo, Snapshot snapshot, ValidateIndexState *state)
static bool heapam_index_fetch_tuple(struct IndexFetchTableData *scan, ItemPointer tid, Snapshot snapshot, TupleTableSlot *slot, bool *call_again, bool *all_dead)
static void heapam_relation_copy_for_cluster(Relation OldHeap, Relation NewHeap, Relation OldIndex, bool use_sort, TransactionId OldestXmin, TransactionId *xid_cutoff, MultiXactId *multi_cutoff, double *num_tuples, double *tups_vacuumed, double *tups_recently_dead)
static void heapam_tuple_insert_speculative(Relation relation, TupleTableSlot *slot, CommandId cid, int options, BulkInsertState bistate, uint32 specToken)
static bool heapam_scan_sample_next_block(TableScanDesc scan, SampleScanState *scanstate)
static void heapam_tuple_insert(Relation relation, TupleTableSlot *slot, CommandId cid, int options, BulkInsertState bistate)
static TM_Result heapam_tuple_delete(Relation relation, ItemPointer tid, CommandId cid, Snapshot snapshot, Snapshot crosscheck, bool wait, TM_FailureData *tmfd, bool changingPart)
static void heapam_index_fetch_reset(IndexFetchTableData *scan)
static Oid heapam_relation_toast_am(Relation rel)
static TM_Result heapam_tuple_update(Relation relation, ItemPointer otid, TupleTableSlot *slot, CommandId cid, Snapshot snapshot, Snapshot crosscheck, bool wait, TM_FailureData *tmfd, LockTupleMode *lockmode, TU_UpdateIndexes *update_indexes)
static bool heapam_relation_needs_toast_table(Relation rel)
static BlockNumber heapam_scan_get_blocks_done(HeapScanDesc hscan)
static bool heapam_scan_bitmap_next_block(TableScanDesc scan, TBMIterateResult *tbmres)
const TableAmRoutine * GetHeapamTableAmRoutine(void)
static bool heapam_scan_analyze_next_tuple(TableScanDesc scan, TransactionId OldestXmin, double *liverows, double *deadrows, TupleTableSlot *slot)
static bool SampleHeapTupleVisible(TableScanDesc scan, Buffer buffer, HeapTuple tuple, OffsetNumber tupoffset)
static bool heapam_scan_sample_next_tuple(TableScanDesc scan, SampleScanState *scanstate, TupleTableSlot *slot)
static const TupleTableSlotOps * heapam_slot_callbacks(Relation relation)
static bool heapam_tuple_tid_valid(TableScanDesc scan, ItemPointer tid)
#define HEAP_USABLE_BYTES_PER_PAGE
static TM_Result heapam_tuple_lock(Relation relation, ItemPointer tid, Snapshot snapshot, TupleTableSlot *slot, CommandId cid, LockTupleMode mode, LockWaitPolicy wait_policy, uint8 flags, TM_FailureData *tmfd)
static void heapam_relation_copy_data(Relation rel, const RelFileLocator *newrlocator)
static void heapam_index_fetch_end(IndexFetchTableData *scan)
static void heapam_tuple_complete_speculative(Relation relation, TupleTableSlot *slot, uint32 specToken, bool succeeded)
static void reform_and_rewrite_tuple(HeapTuple tuple, Relation OldHeap, Relation NewHeap, Datum *values, bool *isnull, RewriteState rwstate)
static void heapam_relation_set_new_filelocator(Relation rel, const RelFileLocator *newrlocator, char persistence, TransactionId *freezeXid, MultiXactId *minmulti)
static bool heapam_fetch_row_version(Relation relation, ItemPointer tid, Snapshot snapshot, TupleTableSlot *slot)
static bool heapam_scan_bitmap_next_tuple(TableScanDesc scan, TBMIterateResult *tbmres, TupleTableSlot *slot)
static void heapam_relation_nontransactional_truncate(Relation rel)
static bool heapam_tuple_satisfies_snapshot(Relation rel, TupleTableSlot *slot, Snapshot snapshot)
Datum heap_tableam_handler(PG_FUNCTION_ARGS)
bool HeapTupleSatisfiesVisibility(HeapTuple htup, Snapshot snapshot, Buffer buffer)
HTSV_Result HeapTupleSatisfiesVacuum(HeapTuple htup, TransactionId OldestXmin, Buffer buffer)
void heap_fetch_toast_slice(Relation toastrel, Oid valueid, int32 attrsize, int32 sliceoffset, int32 slicelength, struct varlena *result)
Definition: heaptoast.c:626
#define TOAST_TUPLE_THRESHOLD
Definition: heaptoast.h:48
HeapTuple heap_form_tuple(TupleDesc tupleDescriptor, Datum *values, bool *isnull)
Definition: heaptuple.c:1020
void heap_deform_tuple(HeapTuple tuple, TupleDesc tupleDesc, Datum *values, bool *isnull)
Definition: heaptuple.c:1249
void heap_freetuple(HeapTuple htup)
Definition: heaptuple.c:1338
HeapTupleHeaderData * HeapTupleHeader
Definition: htup.h:23
#define HeapTupleHeaderSetSpeculativeToken(tup, token)
Definition: htup_details.h:439
#define SizeofHeapTupleHeader
Definition: htup_details.h:185
#define HeapTupleHeaderGetXmin(tup)
Definition: htup_details.h:309
#define BITMAPLEN(NATTS)
Definition: htup_details.h:545
#define HeapTupleIsHeapOnly(tuple)
Definition: htup_details.h:683
#define HeapTupleIsHotUpdated(tuple)
Definition: htup_details.h:674
#define MaxHeapTuplesPerPage
Definition: htup_details.h:572
#define HeapTupleHeaderGetUpdateXid(tup)
Definition: htup_details.h:361
#define HeapTupleHeaderIsSpeculative(tup)
Definition: htup_details.h:428
void FormIndexDatum(IndexInfo *indexInfo, TupleTableSlot *slot, EState *estate, Datum *values, bool *isnull)
Definition: index.c:2721
static void itemptr_decode(ItemPointer itemptr, int64 encoded)
Definition: index.h:206
bool index_getnext_slot(IndexScanDesc scan, ScanDirection direction, TupleTableSlot *slot)
Definition: indexam.c:624
bool index_insert(Relation indexRelation, Datum *values, bool *isnull, ItemPointer heap_t_ctid, Relation heapRelation, IndexUniqueCheck checkUnique, bool indexUnchanged, IndexInfo *indexInfo)
Definition: indexam.c:176
IndexScanDesc index_beginscan(Relation heapRelation, Relation indexRelation, Snapshot snapshot, int nkeys, int norderbys)
Definition: indexam.c:205
void index_endscan(IndexScanDesc scan)
Definition: indexam.c:327
void index_rescan(IndexScanDesc scan, ScanKey keys, int nkeys, ScanKey orderbys, int norderbys)
Definition: indexam.c:301
int i
Definition: isn.c:73
#define ItemIdGetLength(itemId)
Definition: itemid.h:59
#define ItemIdIsNormal(itemId)
Definition: itemid.h:99
#define ItemIdIsDead(itemId)
Definition: itemid.h:113
int32 ItemPointerCompare(ItemPointer arg1, ItemPointer arg2)
Definition: itemptr.c:51
bool ItemPointerEquals(ItemPointer pointer1, ItemPointer pointer2)
Definition: itemptr.c:35
static void ItemPointerSet(ItemPointerData *pointer, BlockNumber blockNumber, OffsetNumber offNum)
Definition: itemptr.h:135
static void ItemPointerSetOffsetNumber(ItemPointerData *pointer, OffsetNumber offsetNumber)
Definition: itemptr.h:158
static OffsetNumber ItemPointerGetOffsetNumber(const ItemPointerData *pointer)
Definition: itemptr.h:124
static bool ItemPointerIndicatesMovedPartitions(const ItemPointerData *pointer)
Definition: itemptr.h:197
static BlockNumber ItemPointerGetBlockNumber(const ItemPointerData *pointer)
Definition: itemptr.h:103
static void ItemPointerCopy(const ItemPointerData *fromPointer, ItemPointerData *toPointer)
Definition: itemptr.h:172
static bool ItemPointerIsValid(const ItemPointerData *pointer)
Definition: itemptr.h:83
Assert(fmt[strlen(fmt) - 1] !='\n')
void XactLockTableWait(TransactionId xid, Relation rel, ItemPointer ctid, XLTW_Oper oper)
Definition: lmgr.c:668
bool ConditionalXactLockTableWait(TransactionId xid)
Definition: lmgr.c:741
@ XLTW_FetchUpdated
Definition: lmgr.h:33
@ XLTW_InsertIndexUnique
Definition: lmgr.h:32
LockWaitPolicy
Definition: lockoptions.h:37
@ LockWaitSkip
Definition: lockoptions.h:41
@ LockWaitBlock
Definition: lockoptions.h:39
@ LockWaitError
Definition: lockoptions.h:43
LockTupleMode
Definition: lockoptions.h:50
void MemoryContextReset(MemoryContext context)
Definition: mcxt.c:330
void pfree(void *pointer)
Definition: mcxt.c:1456
void * palloc0(Size size)
Definition: mcxt.c:1257
void * palloc(Size size)
Definition: mcxt.c:1226
#define IsBootstrapProcessingMode()
Definition: miscadmin.h:414
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:121
MultiXactId GetOldestMultiXactId(void)
Definition: multixact.c:2507
#define InvalidOffsetNumber
Definition: off.h:26
#define OffsetNumberIsValid(offsetNumber)
Definition: off.h:39
#define OffsetNumberNext(offsetNumber)
Definition: off.h:52
uint16 OffsetNumber
Definition: off.h:24
#define FirstOffsetNumber
Definition: off.h:27
FormData_pg_attribute * Form_pg_attribute
Definition: pg_attribute.h:209
#define ERRCODE_DATA_CORRUPTED
Definition: pg_basebackup.c:41
static PgChecksumMode mode
Definition: pg_checksums.c:65
#define INDEX_MAX_KEYS
#define NIL
Definition: pg_list.h:68
static char * buf
Definition: pg_test_fsync.c:67
#define ERRCODE_T_R_SERIALIZATION_FAILURE
Definition: pgbench.c:76
int progress
Definition: pgbench.c:271
#define pgstat_count_heap_fetch(rel)
Definition: pgstat.h:618
#define pgstat_count_heap_getnext(rel)
Definition: pgstat.h:613
static int64 DatumGetInt64(Datum X)
Definition: postgres.h:385
uintptr_t Datum
Definition: postgres.h:64
unsigned int Oid
Definition: postgres_ext.h:31
void PredicateLockTID(Relation relation, ItemPointer tid, Snapshot snapshot, TransactionId tuple_xid)
Definition: predicate.c:2555
TransactionId GetOldestNonRemovableTransactionId(Relation rel)
Definition: procarray.c:1989
#define PROGRESS_CLUSTER_INDEX_RELID
Definition: progress.h:57
#define PROGRESS_CLUSTER_HEAP_BLKS_SCANNED
Definition: progress.h:61
#define PROGRESS_CLUSTER_PHASE_SORT_TUPLES
Definition: progress.h:67
#define PROGRESS_CLUSTER_PHASE_SEQ_SCAN_HEAP
Definition: progress.h:65
#define PROGRESS_SCAN_BLOCKS_DONE
Definition: progress.h:120
#define PROGRESS_CLUSTER_PHASE
Definition: progress.h:56
#define PROGRESS_CLUSTER_HEAP_TUPLES_SCANNED
Definition: progress.h:58
#define PROGRESS_CLUSTER_TOTAL_HEAP_BLKS
Definition: progress.h:60
#define PROGRESS_CLUSTER_HEAP_TUPLES_WRITTEN
Definition: progress.h:59
#define PROGRESS_CLUSTER_PHASE_INDEX_SCAN_HEAP
Definition: progress.h:66
#define PROGRESS_SCAN_BLOCKS_TOTAL
Definition: progress.h:119
#define PROGRESS_CLUSTER_PHASE_WRITE_NEW_HEAP
Definition: progress.h:68
void heap_get_root_tuples(Page page, OffsetNumber *root_offsets)
Definition: pruneheap.c:1111
void heap_page_prune_opt(Relation relation, Buffer buffer)
Definition: pruneheap.c:108
#define RelationGetRelid(relation)
Definition: rel.h:504
static SMgrRelation RelationGetSmgr(Relation rel)
Definition: rel.h:572
#define RelationGetDescr(relation)
Definition: rel.h:530
#define RelationGetRelationName(relation)
Definition: rel.h:538
#define RelationGetTargetBlock(relation)
Definition: rel.h:602
#define RelationIsPermanent(relation)
Definition: rel.h:618
ForkNumber
Definition: relpath.h:48
@ MAIN_FORKNUM
Definition: relpath.h:50
@ INIT_FORKNUM
Definition: relpath.h:53
#define MAX_FORKNUM
Definition: relpath.h:62
struct ParallelBlockTableScanDescData * ParallelBlockTableScanDesc
Definition: relscan.h:85
void end_heap_rewrite(RewriteState state)
Definition: rewriteheap.c:299
bool rewrite_heap_dead_tuple(RewriteState state, HeapTuple old_tuple)
Definition: rewriteheap.c:562
RewriteState begin_heap_rewrite(Relation old_heap, Relation new_heap, TransactionId oldest_xmin, TransactionId freeze_xid, MultiXactId cutoff_multi)
Definition: rewriteheap.c:236
void rewrite_heap_tuple(RewriteState state, HeapTuple old_tuple, HeapTuple new_tuple)
Definition: rewriteheap.c:360
@ ForwardScanDirection
Definition: sdir.h:28
void smgrimmedsync(SMgrRelation reln, ForkNumber forknum)
Definition: smgr.c:720
void smgrcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo)
Definition: smgr.c:373
void smgrclose(SMgrRelation reln)
Definition: smgr.c:260
SMgrRelation smgropen(RelFileLocator rlocator, BackendId backend)
Definition: smgr.c:150
bool smgrexists(SMgrRelation reln, ForkNumber forknum)
Definition: smgr.c:251
TransactionId RecentXmin
Definition: snapmgr.c:114
Snapshot GetTransactionSnapshot(void)
Definition: snapmgr.c:251
void UnregisterSnapshot(Snapshot snapshot)
Definition: snapmgr.c:871
Snapshot RegisterSnapshot(Snapshot snapshot)
Definition: snapmgr.c:829
#define SnapshotAny
Definition: snapmgr.h:67
#define InitDirtySnapshot(snapshotdata)
Definition: snapmgr.h:74
#define IsMVCCSnapshot(snapshot)
Definition: snapmgr.h:96
void RelationCopyStorage(SMgrRelation src, SMgrRelation dst, ForkNumber forkNum, char relpersistence)
Definition: storage.c:451
SMgrRelation RelationCreateStorage(RelFileLocator rlocator, char relpersistence, bool register_delete)
Definition: storage.c:120
void log_smgrcreate(const RelFileLocator *rlocator, ForkNumber forkNum)
Definition: storage.c:185
void RelationDropStorage(Relation rel)
Definition: storage.c:205
void RelationTruncate(Relation rel, BlockNumber nblocks)
Definition: storage.c:287
MemoryContext ecxt_per_tuple_memory
Definition: execnodes.h:257
TupleTableSlot * ecxt_scantuple
Definition: execnodes.h:249
int rs_ntuples
Definition: heapam.h:77
bool rs_inited
Definition: heapam.h:59
Buffer rs_cbuf
Definition: heapam.h:62
BlockNumber rs_startblock
Definition: heapam.h:54
HeapTupleData rs_ctup
Definition: heapam.h:67
OffsetNumber rs_vistuples[MaxHeapTuplesPerPage]
Definition: heapam.h:78
BlockNumber rs_nblocks
Definition: heapam.h:53
BlockNumber rs_cblock
Definition: heapam.h:61
TableScanDescData rs_base
Definition: heapam.h:50
ItemPointerData t_self
Definition: htup.h:65
uint32 t_len
Definition: htup.h:64
HeapTupleHeader t_data
Definition: htup.h:68
Oid t_tableOid
Definition: htup.h:66
ItemPointerData t_ctid
Definition: htup_details.h:161
Buffer xs_cbuf
Definition: heapam.h:89
IndexFetchTableData xs_base
Definition: heapam.h:87
bool ii_Unique
Definition: execnodes.h:191
bool ii_BrokenHotChain
Definition: execnodes.h:197
ExprState * ii_PredicateState
Definition: execnodes.h:183
Oid * ii_ExclusionOps
Definition: execnodes.h:184
bool ii_Concurrent
Definition: execnodes.h:196
List * ii_ExpressionsState
Definition: execnodes.h:181
List * ii_Predicate
Definition: execnodes.h:182
TupleDesc rd_att
Definition: rel.h:112
Oid rd_id
Definition: rel.h:113
BackendId rd_backend
Definition: rel.h:60
Form_pg_class rd_rel
Definition: rel.h:111
struct TsmRoutine * tsmroutine
Definition: execnodes.h:1500
TransactionId xmin
Definition: snapshot.h:157
TransactionId xmax
Definition: snapshot.h:158
bool takenDuringRecovery
Definition: snapshot.h:184
OffsetNumber offsets[FLEXIBLE_ARRAY_MEMBER]
Definition: tidbitmap.h:46
BlockNumber blockno
Definition: tidbitmap.h:42
bool traversed
Definition: tableam.h:145
TransactionId xmax
Definition: tableam.h:143
CommandId cmax
Definition: tableam.h:144
ItemPointerData ctid
Definition: tableam.h:142
NodeTag type
Definition: tableam.h:285
Relation rs_rd
Definition: relscan.h:34
uint32 rs_flags
Definition: relscan.h:47
struct SnapshotData * rs_snapshot
Definition: relscan.h:35
struct ParallelTableScanDescData * rs_parallel
Definition: relscan.h:49
NextSampleTuple_function NextSampleTuple
Definition: tsmapi.h:74
NextSampleBlock_function NextSampleBlock
Definition: tsmapi.h:73
Oid tts_tableOid
Definition: tuptable.h:131
ItemPointerData tts_tid
Definition: tuptable.h:130
Definition: regguts.h:323
void ss_report_location(Relation rel, BlockNumber location)
Definition: syncscan.c:289
Size table_block_parallelscan_initialize(Relation rel, ParallelTableScanDesc pscan)
Definition: tableam.c:398
void table_block_parallelscan_reinitialize(Relation rel, ParallelTableScanDesc pscan)
Definition: tableam.c:416
uint64 table_block_relation_size(Relation rel, ForkNumber forkNumber)
Definition: tableam.c:626
TupleTableSlot * table_slot_create(Relation relation, List **reglist)
Definition: tableam.c:91
Size table_block_parallelscan_estimate(Relation rel)
Definition: tableam.c:392
void table_block_relation_estimate_size(Relation rel, int32 *attr_widths, BlockNumber *pages, double *tuples, double *allvisfrac, Size overhead_bytes_per_tuple, Size usable_bytes_per_page)
Definition: tableam.c:663
@ SO_ALLOW_PAGEMODE
Definition: tableam.h:61
@ SO_ALLOW_SYNC
Definition: tableam.h:59
static TableScanDesc table_beginscan(Relation rel, Snapshot snapshot, int nkeys, struct ScanKeyData *key)
Definition: tableam.h:901
TU_UpdateIndexes
Definition: tableam.h:110
@ TU_Summarizing
Definition: tableam.h:118
@ TU_All
Definition: tableam.h:115
@ TU_None
Definition: tableam.h:112
static void table_endscan(TableScanDesc scan)
Definition: tableam.h:1009
TM_Result
Definition: tableam.h:72
@ TM_Ok
Definition: tableam.h:77
@ TM_Deleted
Definition: tableam.h:92
@ TM_WouldBlock
Definition: tableam.h:102
@ TM_Updated
Definition: tableam.h:89
@ TM_SelfModified
Definition: tableam.h:83
static TableScanDesc table_beginscan_strat(Relation rel, Snapshot snapshot, int nkeys, struct ScanKeyData *key, bool allow_strat, bool allow_sync)
Definition: tableam.h:925
#define TUPLE_LOCK_FLAG_FIND_LAST_VERSION
Definition: tableam.h:260
void(* IndexBuildCallback)(Relation index, ItemPointer tid, Datum *values, bool *isnull, bool tupleIsAlive, void *state)
Definition: tableam.h:264
static bool table_scan_getnextslot(TableScanDesc sscan, ScanDirection direction, TupleTableSlot *slot)
Definition: tableam.h:1050
#define TUPLE_LOCK_FLAG_LOCK_UPDATE_IN_PROGRESS
Definition: tableam.h:258
static void callback(struct sockaddr *addr, struct sockaddr *mask, void *unused)
Definition: test_ifaddrs.c:46
#define InvalidTransactionId
Definition: transam.h:31
#define TransactionIdEquals(id1, id2)
Definition: transam.h:43
#define TransactionIdIsValid(xid)
Definition: transam.h:41
#define TupleDescAttr(tupdesc, i)
Definition: tupdesc.h:92
void tuplesort_performsort(Tuplesortstate *state)
Definition: tuplesort.c:1385
void tuplesort_end(Tuplesortstate *state)
Definition: tuplesort.c:972
#define TUPLESORT_NONE
Definition: tuplesort.h:92
HeapTuple tuplesort_getheaptuple(Tuplesortstate *state, bool forward)
Tuplesortstate * tuplesort_begin_cluster(TupleDesc tupDesc, Relation indexRel, Relation heaprel, int workMem, SortCoordinate coordinate, int sortopt)
void tuplesort_putheaptuple(Tuplesortstate *state, HeapTuple tup)
bool tuplesort_getdatum(Tuplesortstate *state, bool forward, bool copy, Datum *val, bool *isNull, Datum *abbrev)
#define att_align_nominal(cur_offset, attalign)
Definition: tupmacs.h:129
static TupleTableSlot * ExecClearTuple(TupleTableSlot *slot)
Definition: tuptable.h:433
#define TTS_IS_BUFFERTUPLE(slot)
Definition: tuptable.h:231
void heap_vacuum_rel(Relation rel, VacuumParams *params, BufferAccessStrategy bstrategy)
Definition: vacuumlazy.c:303
bool TransactionIdIsCurrentTransactionId(TransactionId xid)
Definition: xact.c:926