PostgreSQL Source Code  git master
heapam_handler.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * heapam_handler.c
4  * heap table access method code
5  *
6  * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  * src/backend/access/heap/heapam_handler.c
12  *
13  *
14  * NOTES
15  * This files wires up the lower level heapam.c et al routines with the
16  * tableam abstraction.
17  *
18  *-------------------------------------------------------------------------
19  */
20 #include "postgres.h"
21 
22 #include "access/detoast.h"
23 #include "access/genam.h"
24 #include "access/heapam.h"
25 #include "access/heaptoast.h"
26 #include "access/multixact.h"
27 #include "access/rewriteheap.h"
28 #include "access/syncscan.h"
29 #include "access/tableam.h"
31 #include "access/tsmapi.h"
32 #include "access/xact.h"
33 #include "catalog/catalog.h"
34 #include "catalog/index.h"
35 #include "catalog/storage.h"
36 #include "catalog/storage_xlog.h"
37 #include "commands/progress.h"
38 #include "executor/executor.h"
39 #include "miscadmin.h"
40 #include "pgstat.h"
41 #include "storage/bufmgr.h"
42 #include "storage/bufpage.h"
43 #include "storage/lmgr.h"
44 #include "storage/predicate.h"
45 #include "storage/procarray.h"
46 #include "storage/smgr.h"
47 #include "utils/builtins.h"
48 #include "utils/rel.h"
49 
50 static void reform_and_rewrite_tuple(HeapTuple tuple,
51  Relation OldHeap, Relation NewHeap,
52  Datum *values, bool *isnull, RewriteState rwstate);
53 
54 static bool SampleHeapTupleVisible(TableScanDesc scan, Buffer buffer,
55  HeapTuple tuple,
56  OffsetNumber tupoffset);
57 
59 
61 
62 
63 /* ------------------------------------------------------------------------
64  * Slot related callbacks for heap AM
65  * ------------------------------------------------------------------------
66  */
67 
68 static const TupleTableSlotOps *
70 {
71  return &TTSOpsBufferHeapTuple;
72 }
73 
74 
75 /* ------------------------------------------------------------------------
76  * Index Scan Callbacks for heap AM
77  * ------------------------------------------------------------------------
78  */
79 
80 static IndexFetchTableData *
82 {
84 
85  hscan->xs_base.rel = rel;
86  hscan->xs_cbuf = InvalidBuffer;
87 
88  return &hscan->xs_base;
89 }
90 
91 static void
93 {
94  IndexFetchHeapData *hscan = (IndexFetchHeapData *) scan;
95 
96  if (BufferIsValid(hscan->xs_cbuf))
97  {
98  ReleaseBuffer(hscan->xs_cbuf);
99  hscan->xs_cbuf = InvalidBuffer;
100  }
101 }
102 
103 static void
105 {
106  IndexFetchHeapData *hscan = (IndexFetchHeapData *) scan;
107 
109 
110  pfree(hscan);
111 }
112 
113 static bool
115  ItemPointer tid,
116  Snapshot snapshot,
117  TupleTableSlot *slot,
118  bool *call_again, bool *all_dead)
119 {
120  IndexFetchHeapData *hscan = (IndexFetchHeapData *) scan;
122  bool got_heap_tuple;
123 
124  Assert(TTS_IS_BUFFERTUPLE(slot));
125 
126  /* We can skip the buffer-switching logic if we're in mid-HOT chain. */
127  if (!*call_again)
128  {
129  /* Switch to correct buffer if we don't have it already */
130  Buffer prev_buf = hscan->xs_cbuf;
131 
132  hscan->xs_cbuf = ReleaseAndReadBuffer(hscan->xs_cbuf,
133  hscan->xs_base.rel,
135 
136  /*
137  * Prune page, but only if we weren't already on this page
138  */
139  if (prev_buf != hscan->xs_cbuf)
140  heap_page_prune_opt(hscan->xs_base.rel, hscan->xs_cbuf);
141  }
142 
143  /* Obtain share-lock on the buffer so we can examine visibility */
145  got_heap_tuple = heap_hot_search_buffer(tid,
146  hscan->xs_base.rel,
147  hscan->xs_cbuf,
148  snapshot,
149  &bslot->base.tupdata,
150  all_dead,
151  !*call_again);
152  bslot->base.tupdata.t_self = *tid;
154 
155  if (got_heap_tuple)
156  {
157  /*
158  * Only in a non-MVCC snapshot can more than one member of the HOT
159  * chain be visible.
160  */
161  *call_again = !IsMVCCSnapshot(snapshot);
162 
163  slot->tts_tableOid = RelationGetRelid(scan->rel);
164  ExecStoreBufferHeapTuple(&bslot->base.tupdata, slot, hscan->xs_cbuf);
165  }
166  else
167  {
168  /* We've reached the end of the HOT chain. */
169  *call_again = false;
170  }
171 
172  return got_heap_tuple;
173 }
174 
175 
176 /* ------------------------------------------------------------------------
177  * Callbacks for non-modifying operations on individual tuples for heap AM
178  * ------------------------------------------------------------------------
179  */
180 
181 static bool
183  ItemPointer tid,
184  Snapshot snapshot,
185  TupleTableSlot *slot)
186 {
188  Buffer buffer;
189 
190  Assert(TTS_IS_BUFFERTUPLE(slot));
191 
192  bslot->base.tupdata.t_self = *tid;
193  if (heap_fetch(relation, snapshot, &bslot->base.tupdata, &buffer))
194  {
195  /* store in slot, transferring existing pin */
196  ExecStorePinnedBufferHeapTuple(&bslot->base.tupdata, slot, buffer);
197  slot->tts_tableOid = RelationGetRelid(relation);
198 
199  return true;
200  }
201 
202  return false;
203 }
204 
205 static bool
207 {
208  HeapScanDesc hscan = (HeapScanDesc) scan;
209 
210  return ItemPointerIsValid(tid) &&
212 }
213 
214 static bool
216  Snapshot snapshot)
217 {
219  bool res;
220 
221  Assert(TTS_IS_BUFFERTUPLE(slot));
222  Assert(BufferIsValid(bslot->buffer));
223 
224  /*
225  * We need buffer pin and lock to call HeapTupleSatisfiesVisibility.
226  * Caller should be holding pin, but not lock.
227  */
229  res = HeapTupleSatisfiesVisibility(bslot->base.tuple, snapshot,
230  bslot->buffer);
232 
233  return res;
234 }
235 
236 
237 /* ----------------------------------------------------------------------------
238  * Functions for manipulations of physical tuples for heap AM.
239  * ----------------------------------------------------------------------------
240  */
241 
242 static void
244  int options, BulkInsertState bistate)
245 {
246  bool shouldFree = true;
247  HeapTuple tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree);
248 
249  /* Update the tuple with table oid */
250  slot->tts_tableOid = RelationGetRelid(relation);
251  tuple->t_tableOid = slot->tts_tableOid;
252 
253  /* Perform the insertion, and copy the resulting ItemPointer */
254  heap_insert(relation, tuple, cid, options, bistate);
255  ItemPointerCopy(&tuple->t_self, &slot->tts_tid);
256 
257  if (shouldFree)
258  pfree(tuple);
259 }
260 
261 static void
263  CommandId cid, int options,
264  BulkInsertState bistate, uint32 specToken)
265 {
266  bool shouldFree = true;
267  HeapTuple tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree);
268 
269  /* Update the tuple with table oid */
270  slot->tts_tableOid = RelationGetRelid(relation);
271  tuple->t_tableOid = slot->tts_tableOid;
272 
273  HeapTupleHeaderSetSpeculativeToken(tuple->t_data, specToken);
274  options |= HEAP_INSERT_SPECULATIVE;
275 
276  /* Perform the insertion, and copy the resulting ItemPointer */
277  heap_insert(relation, tuple, cid, options, bistate);
278  ItemPointerCopy(&tuple->t_self, &slot->tts_tid);
279 
280  if (shouldFree)
281  pfree(tuple);
282 }
283 
284 static void
286  uint32 specToken, bool succeeded)
287 {
288  bool shouldFree = true;
289  HeapTuple tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree);
290 
291  /* adjust the tuple's state accordingly */
292  if (succeeded)
293  heap_finish_speculative(relation, &slot->tts_tid);
294  else
295  heap_abort_speculative(relation, &slot->tts_tid);
296 
297  if (shouldFree)
298  pfree(tuple);
299 }
300 
301 static TM_Result
303  Snapshot snapshot, Snapshot crosscheck, bool wait,
304  TM_FailureData *tmfd, bool changingPart)
305 {
306  /*
307  * Currently Deleting of index tuples are handled at vacuum, in case if
308  * the storage itself is cleaning the dead tuples by itself, it is the
309  * time to call the index tuple deletion also.
310  */
311  return heap_delete(relation, tid, cid, crosscheck, wait, tmfd, changingPart);
312 }
313 
314 
315 static TM_Result
317  CommandId cid, Snapshot snapshot, Snapshot crosscheck,
318  bool wait, TM_FailureData *tmfd,
319  LockTupleMode *lockmode, bool *update_indexes)
320 {
321  bool shouldFree = true;
322  HeapTuple tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree);
323  TM_Result result;
324 
325  /* Update the tuple with table oid */
326  slot->tts_tableOid = RelationGetRelid(relation);
327  tuple->t_tableOid = slot->tts_tableOid;
328 
329  result = heap_update(relation, otid, tuple, cid, crosscheck, wait,
330  tmfd, lockmode);
331  ItemPointerCopy(&tuple->t_self, &slot->tts_tid);
332 
333  /*
334  * Decide whether new index entries are needed for the tuple
335  *
336  * Note: heap_update returns the tid (location) of the new tuple in the
337  * t_self field.
338  *
339  * If it's a HOT update, we mustn't insert new index entries.
340  */
341  *update_indexes = result == TM_Ok && !HeapTupleIsHeapOnly(tuple);
342 
343  if (shouldFree)
344  pfree(tuple);
345 
346  return result;
347 }
348 
349 static TM_Result
352  LockWaitPolicy wait_policy, uint8 flags,
353  TM_FailureData *tmfd)
354 {
356  TM_Result result;
357  Buffer buffer;
358  HeapTuple tuple = &bslot->base.tupdata;
359  bool follow_updates;
360 
361  follow_updates = (flags & TUPLE_LOCK_FLAG_LOCK_UPDATE_IN_PROGRESS) != 0;
362  tmfd->traversed = false;
363 
364  Assert(TTS_IS_BUFFERTUPLE(slot));
365 
366 tuple_lock_retry:
367  tuple->t_self = *tid;
368  result = heap_lock_tuple(relation, tuple, cid, mode, wait_policy,
369  follow_updates, &buffer, tmfd);
370 
371  if (result == TM_Updated &&
373  {
374  /* Should not encounter speculative tuple on recheck */
376 
377  ReleaseBuffer(buffer);
378 
379  if (!ItemPointerEquals(&tmfd->ctid, &tuple->t_self))
380  {
381  SnapshotData SnapshotDirty;
382  TransactionId priorXmax;
383 
384  /* it was updated, so look at the updated version */
385  *tid = tmfd->ctid;
386  /* updated row should have xmin matching this xmax */
387  priorXmax = tmfd->xmax;
388 
389  /* signal that a tuple later in the chain is getting locked */
390  tmfd->traversed = true;
391 
392  /*
393  * fetch target tuple
394  *
395  * Loop here to deal with updated or busy tuples
396  */
397  InitDirtySnapshot(SnapshotDirty);
398  for (;;)
399  {
401  ereport(ERROR,
402  (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
403  errmsg("tuple to be locked was already moved to another partition due to concurrent update")));
404 
405  tuple->t_self = *tid;
406  if (heap_fetch(relation, &SnapshotDirty, tuple, &buffer))
407  {
408  /*
409  * If xmin isn't what we're expecting, the slot must have
410  * been recycled and reused for an unrelated tuple. This
411  * implies that the latest version of the row was deleted,
412  * so we need do nothing. (Should be safe to examine xmin
413  * without getting buffer's content lock. We assume
414  * reading a TransactionId to be atomic, and Xmin never
415  * changes in an existing tuple, except to invalid or
416  * frozen, and neither of those can match priorXmax.)
417  */
419  priorXmax))
420  {
421  ReleaseBuffer(buffer);
422  return TM_Deleted;
423  }
424 
425  /* otherwise xmin should not be dirty... */
426  if (TransactionIdIsValid(SnapshotDirty.xmin))
427  ereport(ERROR,
429  errmsg_internal("t_xmin is uncommitted in tuple to be updated")));
430 
431  /*
432  * If tuple is being updated by other transaction then we
433  * have to wait for its commit/abort, or die trying.
434  */
435  if (TransactionIdIsValid(SnapshotDirty.xmax))
436  {
437  ReleaseBuffer(buffer);
438  switch (wait_policy)
439  {
440  case LockWaitBlock:
441  XactLockTableWait(SnapshotDirty.xmax,
442  relation, &tuple->t_self,
444  break;
445  case LockWaitSkip:
446  if (!ConditionalXactLockTableWait(SnapshotDirty.xmax))
447  /* skip instead of waiting */
448  return TM_WouldBlock;
449  break;
450  case LockWaitError:
451  if (!ConditionalXactLockTableWait(SnapshotDirty.xmax))
452  ereport(ERROR,
453  (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
454  errmsg("could not obtain lock on row in relation \"%s\"",
455  RelationGetRelationName(relation))));
456  break;
457  }
458  continue; /* loop back to repeat heap_fetch */
459  }
460 
461  /*
462  * If tuple was inserted by our own transaction, we have
463  * to check cmin against cid: cmin >= current CID means
464  * our command cannot see the tuple, so we should ignore
465  * it. Otherwise heap_lock_tuple() will throw an error,
466  * and so would any later attempt to update or delete the
467  * tuple. (We need not check cmax because
468  * HeapTupleSatisfiesDirty will consider a tuple deleted
469  * by our transaction dead, regardless of cmax.) We just
470  * checked that priorXmax == xmin, so we can test that
471  * variable instead of doing HeapTupleHeaderGetXmin again.
472  */
473  if (TransactionIdIsCurrentTransactionId(priorXmax) &&
474  HeapTupleHeaderGetCmin(tuple->t_data) >= cid)
475  {
476  tmfd->xmax = priorXmax;
477 
478  /*
479  * Cmin is the problematic value, so store that. See
480  * above.
481  */
482  tmfd->cmax = HeapTupleHeaderGetCmin(tuple->t_data);
483  ReleaseBuffer(buffer);
484  return TM_SelfModified;
485  }
486 
487  /*
488  * This is a live tuple, so try to lock it again.
489  */
490  ReleaseBuffer(buffer);
491  goto tuple_lock_retry;
492  }
493 
494  /*
495  * If the referenced slot was actually empty, the latest
496  * version of the row must have been deleted, so we need do
497  * nothing.
498  */
499  if (tuple->t_data == NULL)
500  {
501  return TM_Deleted;
502  }
503 
504  /*
505  * As above, if xmin isn't what we're expecting, do nothing.
506  */
508  priorXmax))
509  {
510  if (BufferIsValid(buffer))
511  ReleaseBuffer(buffer);
512  return TM_Deleted;
513  }
514 
515  /*
516  * If we get here, the tuple was found but failed
517  * SnapshotDirty. Assuming the xmin is either a committed xact
518  * or our own xact (as it certainly should be if we're trying
519  * to modify the tuple), this must mean that the row was
520  * updated or deleted by either a committed xact or our own
521  * xact. If it was deleted, we can ignore it; if it was
522  * updated then chain up to the next version and repeat the
523  * whole process.
524  *
525  * As above, it should be safe to examine xmax and t_ctid
526  * without the buffer content lock, because they can't be
527  * changing.
528  */
529  if (ItemPointerEquals(&tuple->t_self, &tuple->t_data->t_ctid))
530  {
531  /* deleted, so forget about it */
532  if (BufferIsValid(buffer))
533  ReleaseBuffer(buffer);
534  return TM_Deleted;
535  }
536 
537  /* updated, so look at the updated row */
538  *tid = tuple->t_data->t_ctid;
539  /* updated row should have xmin matching this xmax */
540  priorXmax = HeapTupleHeaderGetUpdateXid(tuple->t_data);
541  if (BufferIsValid(buffer))
542  ReleaseBuffer(buffer);
543  /* loop back to fetch next in chain */
544  }
545  }
546  else
547  {
548  /* tuple was deleted, so give up */
549  return TM_Deleted;
550  }
551  }
552 
553  slot->tts_tableOid = RelationGetRelid(relation);
554  tuple->t_tableOid = slot->tts_tableOid;
555 
556  /* store in slot, transferring existing pin */
557  ExecStorePinnedBufferHeapTuple(tuple, slot, buffer);
558 
559  return result;
560 }
561 
562 
563 /* ------------------------------------------------------------------------
564  * DDL related callbacks for heap AM.
565  * ------------------------------------------------------------------------
566  */
567 
568 static void
570  const RelFileNode *newrnode,
571  char persistence,
572  TransactionId *freezeXid,
573  MultiXactId *minmulti)
574 {
575  SMgrRelation srel;
576 
577  /*
578  * Initialize to the minimum XID that could put tuples in the table. We
579  * know that no xacts older than RecentXmin are still running, so that
580  * will do.
581  */
582  *freezeXid = RecentXmin;
583 
584  /*
585  * Similarly, initialize the minimum Multixact to the first value that
586  * could possibly be stored in tuples in the table. Running transactions
587  * could reuse values from their local cache, so we are careful to
588  * consider all currently running multis.
589  *
590  * XXX this could be refined further, but is it worth the hassle?
591  */
592  *minmulti = GetOldestMultiXactId();
593 
594  srel = RelationCreateStorage(*newrnode, persistence);
595 
596  /*
597  * If required, set up an init fork for an unlogged table so that it can
598  * be correctly reinitialized on restart. An immediate sync is required
599  * even if the page has been logged, because the write did not go through
600  * shared_buffers and therefore a concurrent checkpoint may have moved the
601  * redo pointer past our xlog record. Recovery may as well remove it
602  * while replaying, for example, XLOG_DBASE_CREATE or XLOG_TBLSPC_CREATE
603  * record. Therefore, logging is necessary even if wal_level=minimal.
604  */
605  if (persistence == RELPERSISTENCE_UNLOGGED)
606  {
607  Assert(rel->rd_rel->relkind == RELKIND_RELATION ||
608  rel->rd_rel->relkind == RELKIND_MATVIEW ||
609  rel->rd_rel->relkind == RELKIND_TOASTVALUE);
610  smgrcreate(srel, INIT_FORKNUM, false);
611  log_smgrcreate(newrnode, INIT_FORKNUM);
613  }
614 
615  smgrclose(srel);
616 }
617 
618 static void
620 {
621  RelationTruncate(rel, 0);
622 }
623 
624 static void
626 {
627  SMgrRelation dstrel;
628 
629  dstrel = smgropen(*newrnode, rel->rd_backend);
630  RelationOpenSmgr(rel);
631 
632  /*
633  * Since we copy the file directly without looking at the shared buffers,
634  * we'd better first flush out any pages of the source relation that are
635  * in shared buffers. We assume no new changes will be made while we are
636  * holding exclusive lock on the rel.
637  */
639 
640  /*
641  * Create and copy all forks of the relation, and schedule unlinking of
642  * old physical files.
643  *
644  * NOTE: any conflict in relfilenode value will be caught in
645  * RelationCreateStorage().
646  */
647  RelationCreateStorage(*newrnode, rel->rd_rel->relpersistence);
648 
649  /* copy main fork */
651  rel->rd_rel->relpersistence);
652 
653  /* copy those extra forks that exist */
654  for (ForkNumber forkNum = MAIN_FORKNUM + 1;
655  forkNum <= MAX_FORKNUM; forkNum++)
656  {
657  if (smgrexists(rel->rd_smgr, forkNum))
658  {
659  smgrcreate(dstrel, forkNum, false);
660 
661  /*
662  * WAL log creation if the relation is persistent, or this is the
663  * init fork of an unlogged relation.
664  */
665  if (RelationIsPermanent(rel) ||
666  (rel->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED &&
667  forkNum == INIT_FORKNUM))
668  log_smgrcreate(newrnode, forkNum);
669  RelationCopyStorage(rel->rd_smgr, dstrel, forkNum,
670  rel->rd_rel->relpersistence);
671  }
672  }
673 
674 
675  /* drop old relation, and close new one */
676  RelationDropStorage(rel);
677  smgrclose(dstrel);
678 }
679 
680 static void
682  Relation OldIndex, bool use_sort,
683  TransactionId OldestXmin,
684  TransactionId *xid_cutoff,
685  MultiXactId *multi_cutoff,
686  double *num_tuples,
687  double *tups_vacuumed,
688  double *tups_recently_dead)
689 {
690  RewriteState rwstate;
691  IndexScanDesc indexScan;
692  TableScanDesc tableScan;
693  HeapScanDesc heapScan;
694  bool is_system_catalog;
695  Tuplesortstate *tuplesort;
696  TupleDesc oldTupDesc = RelationGetDescr(OldHeap);
697  TupleDesc newTupDesc = RelationGetDescr(NewHeap);
698  TupleTableSlot *slot;
699  int natts;
700  Datum *values;
701  bool *isnull;
703  BlockNumber prev_cblock = InvalidBlockNumber;
704 
705  /* Remember if it's a system catalog */
706  is_system_catalog = IsSystemRelation(OldHeap);
707 
708  /*
709  * Valid smgr_targblock implies something already wrote to the relation.
710  * This may be harmless, but this function hasn't planned for it.
711  */
713 
714  /* Preallocate values/isnull arrays */
715  natts = newTupDesc->natts;
716  values = (Datum *) palloc(natts * sizeof(Datum));
717  isnull = (bool *) palloc(natts * sizeof(bool));
718 
719  /* Initialize the rewrite operation */
720  rwstate = begin_heap_rewrite(OldHeap, NewHeap, OldestXmin, *xid_cutoff,
721  *multi_cutoff);
722 
723 
724  /* Set up sorting if wanted */
725  if (use_sort)
726  tuplesort = tuplesort_begin_cluster(oldTupDesc, OldIndex,
728  NULL, false);
729  else
730  tuplesort = NULL;
731 
732  /*
733  * Prepare to scan the OldHeap. To ensure we see recently-dead tuples
734  * that still need to be copied, we scan with SnapshotAny and use
735  * HeapTupleSatisfiesVacuum for the visibility test.
736  */
737  if (OldIndex != NULL && !use_sort)
738  {
739  const int ci_index[] = {
742  };
743  int64 ci_val[2];
744 
745  /* Set phase and OIDOldIndex to columns */
747  ci_val[1] = RelationGetRelid(OldIndex);
748  pgstat_progress_update_multi_param(2, ci_index, ci_val);
749 
750  tableScan = NULL;
751  heapScan = NULL;
752  indexScan = index_beginscan(OldHeap, OldIndex, SnapshotAny, 0, 0);
753  index_rescan(indexScan, NULL, 0, NULL, 0);
754  }
755  else
756  {
757  /* In scan-and-sort mode and also VACUUM FULL, set phase */
760 
761  tableScan = table_beginscan(OldHeap, SnapshotAny, 0, (ScanKey) NULL);
762  heapScan = (HeapScanDesc) tableScan;
763  indexScan = NULL;
764 
765  /* Set total heap blocks */
767  heapScan->rs_nblocks);
768  }
769 
770  slot = table_slot_create(OldHeap, NULL);
771  hslot = (BufferHeapTupleTableSlot *) slot;
772 
773  /*
774  * Scan through the OldHeap, either in OldIndex order or sequentially;
775  * copy each tuple into the NewHeap, or transiently to the tuplesort
776  * module. Note that we don't bother sorting dead tuples (they won't get
777  * to the new table anyway).
778  */
779  for (;;)
780  {
781  HeapTuple tuple;
782  Buffer buf;
783  bool isdead;
784 
786 
787  if (indexScan != NULL)
788  {
789  if (!index_getnext_slot(indexScan, ForwardScanDirection, slot))
790  break;
791 
792  /* Since we used no scan keys, should never need to recheck */
793  if (indexScan->xs_recheck)
794  elog(ERROR, "CLUSTER does not support lossy index conditions");
795  }
796  else
797  {
798  if (!table_scan_getnextslot(tableScan, ForwardScanDirection, slot))
799  {
800  /*
801  * If the last pages of the scan were empty, we would go to
802  * the next phase while heap_blks_scanned != heap_blks_total.
803  * Instead, to ensure that heap_blks_scanned is equivalent to
804  * total_heap_blks after the table scan phase, this parameter
805  * is manually updated to the correct value when the table
806  * scan finishes.
807  */
809  heapScan->rs_nblocks);
810  break;
811  }
812 
813  /*
814  * In scan-and-sort mode and also VACUUM FULL, set heap blocks
815  * scanned
816  *
817  * Note that heapScan may start at an offset and wrap around, i.e.
818  * rs_startblock may be >0, and rs_cblock may end with a number
819  * below rs_startblock. To prevent showing this wraparound to the
820  * user, we offset rs_cblock by rs_startblock (modulo rs_nblocks).
821  */
822  if (prev_cblock != heapScan->rs_cblock)
823  {
825  (heapScan->rs_cblock +
826  heapScan->rs_nblocks -
827  heapScan->rs_startblock
828  ) % heapScan->rs_nblocks + 1);
829  prev_cblock = heapScan->rs_cblock;
830  }
831  }
832 
833  tuple = ExecFetchSlotHeapTuple(slot, false, NULL);
834  buf = hslot->buffer;
835 
837 
838  switch (HeapTupleSatisfiesVacuum(tuple, OldestXmin, buf))
839  {
840  case HEAPTUPLE_DEAD:
841  /* Definitely dead */
842  isdead = true;
843  break;
845  *tups_recently_dead += 1;
846  /* fall through */
847  case HEAPTUPLE_LIVE:
848  /* Live or recently dead, must copy it */
849  isdead = false;
850  break;
852 
853  /*
854  * Since we hold exclusive lock on the relation, normally the
855  * only way to see this is if it was inserted earlier in our
856  * own transaction. However, it can happen in system
857  * catalogs, since we tend to release write lock before commit
858  * there. Give a warning if neither case applies; but in any
859  * case we had better copy it.
860  */
861  if (!is_system_catalog &&
863  elog(WARNING, "concurrent insert in progress within table \"%s\"",
864  RelationGetRelationName(OldHeap));
865  /* treat as live */
866  isdead = false;
867  break;
869 
870  /*
871  * Similar situation to INSERT_IN_PROGRESS case.
872  */
873  if (!is_system_catalog &&
875  elog(WARNING, "concurrent delete in progress within table \"%s\"",
876  RelationGetRelationName(OldHeap));
877  /* treat as recently dead */
878  *tups_recently_dead += 1;
879  isdead = false;
880  break;
881  default:
882  elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
883  isdead = false; /* keep compiler quiet */
884  break;
885  }
886 
888 
889  if (isdead)
890  {
891  *tups_vacuumed += 1;
892  /* heap rewrite module still needs to see it... */
893  if (rewrite_heap_dead_tuple(rwstate, tuple))
894  {
895  /* A previous recently-dead tuple is now known dead */
896  *tups_vacuumed += 1;
897  *tups_recently_dead -= 1;
898  }
899  continue;
900  }
901 
902  *num_tuples += 1;
903  if (tuplesort != NULL)
904  {
905  tuplesort_putheaptuple(tuplesort, tuple);
906 
907  /*
908  * In scan-and-sort mode, report increase in number of tuples
909  * scanned
910  */
912  *num_tuples);
913  }
914  else
915  {
916  const int ct_index[] = {
919  };
920  int64 ct_val[2];
921 
922  reform_and_rewrite_tuple(tuple, OldHeap, NewHeap,
923  values, isnull, rwstate);
924 
925  /*
926  * In indexscan mode and also VACUUM FULL, report increase in
927  * number of tuples scanned and written
928  */
929  ct_val[0] = *num_tuples;
930  ct_val[1] = *num_tuples;
931  pgstat_progress_update_multi_param(2, ct_index, ct_val);
932  }
933  }
934 
935  if (indexScan != NULL)
936  index_endscan(indexScan);
937  if (tableScan != NULL)
938  table_endscan(tableScan);
939  if (slot)
941 
942  /*
943  * In scan-and-sort mode, complete the sort, then read out all live tuples
944  * from the tuplestore and write them to the new relation.
945  */
946  if (tuplesort != NULL)
947  {
948  double n_tuples = 0;
949 
950  /* Report that we are now sorting tuples */
953 
954  tuplesort_performsort(tuplesort);
955 
956  /* Report that we are now writing new heap */
959 
960  for (;;)
961  {
962  HeapTuple tuple;
963 
965 
966  tuple = tuplesort_getheaptuple(tuplesort, true);
967  if (tuple == NULL)
968  break;
969 
970  n_tuples += 1;
972  OldHeap, NewHeap,
973  values, isnull,
974  rwstate);
975  /* Report n_tuples */
977  n_tuples);
978  }
979 
980  tuplesort_end(tuplesort);
981  }
982 
983  /* Write out any remaining tuples, and fsync if needed */
984  end_heap_rewrite(rwstate);
985 
986  /* Clean up */
987  pfree(values);
988  pfree(isnull);
989 }
990 
991 static bool
993  BufferAccessStrategy bstrategy)
994 {
995  HeapScanDesc hscan = (HeapScanDesc) scan;
996 
997  /*
998  * We must maintain a pin on the target page's buffer to ensure that
999  * concurrent activity - e.g. HOT pruning - doesn't delete tuples out from
1000  * under us. Hence, pin the page until we are done looking at it. We
1001  * also choose to hold sharelock on the buffer throughout --- we could
1002  * release and re-acquire sharelock for each tuple, but since we aren't
1003  * doing much work per tuple, the extra lock traffic is probably better
1004  * avoided.
1005  */
1006  hscan->rs_cblock = blockno;
1007  hscan->rs_cindex = FirstOffsetNumber;
1008  hscan->rs_cbuf = ReadBufferExtended(scan->rs_rd, MAIN_FORKNUM,
1009  blockno, RBM_NORMAL, bstrategy);
1011 
1012  /* in heap all blocks can contain tuples, so always return true */
1013  return true;
1014 }
1015 
1016 static bool
1018  double *liverows, double *deadrows,
1019  TupleTableSlot *slot)
1020 {
1021  HeapScanDesc hscan = (HeapScanDesc) scan;
1022  Page targpage;
1023  OffsetNumber maxoffset;
1024  BufferHeapTupleTableSlot *hslot;
1025 
1026  Assert(TTS_IS_BUFFERTUPLE(slot));
1027 
1028  hslot = (BufferHeapTupleTableSlot *) slot;
1029  targpage = BufferGetPage(hscan->rs_cbuf);
1030  maxoffset = PageGetMaxOffsetNumber(targpage);
1031 
1032  /* Inner loop over all tuples on the selected page */
1033  for (; hscan->rs_cindex <= maxoffset; hscan->rs_cindex++)
1034  {
1035  ItemId itemid;
1036  HeapTuple targtuple = &hslot->base.tupdata;
1037  bool sample_it = false;
1038 
1039  itemid = PageGetItemId(targpage, hscan->rs_cindex);
1040 
1041  /*
1042  * We ignore unused and redirect line pointers. DEAD line pointers
1043  * should be counted as dead, because we need vacuum to run to get rid
1044  * of them. Note that this rule agrees with the way that
1045  * heap_page_prune() counts things.
1046  */
1047  if (!ItemIdIsNormal(itemid))
1048  {
1049  if (ItemIdIsDead(itemid))
1050  *deadrows += 1;
1051  continue;
1052  }
1053 
1054  ItemPointerSet(&targtuple->t_self, hscan->rs_cblock, hscan->rs_cindex);
1055 
1056  targtuple->t_tableOid = RelationGetRelid(scan->rs_rd);
1057  targtuple->t_data = (HeapTupleHeader) PageGetItem(targpage, itemid);
1058  targtuple->t_len = ItemIdGetLength(itemid);
1059 
1060  switch (HeapTupleSatisfiesVacuum(targtuple, OldestXmin,
1061  hscan->rs_cbuf))
1062  {
1063  case HEAPTUPLE_LIVE:
1064  sample_it = true;
1065  *liverows += 1;
1066  break;
1067 
1068  case HEAPTUPLE_DEAD:
1070  /* Count dead and recently-dead rows */
1071  *deadrows += 1;
1072  break;
1073 
1075 
1076  /*
1077  * Insert-in-progress rows are not counted. We assume that
1078  * when the inserting transaction commits or aborts, it will
1079  * send a stats message to increment the proper count. This
1080  * works right only if that transaction ends after we finish
1081  * analyzing the table; if things happen in the other order,
1082  * its stats update will be overwritten by ours. However, the
1083  * error will be large only if the other transaction runs long
1084  * enough to insert many tuples, so assuming it will finish
1085  * after us is the safer option.
1086  *
1087  * A special case is that the inserting transaction might be
1088  * our own. In this case we should count and sample the row,
1089  * to accommodate users who load a table and analyze it in one
1090  * transaction. (pgstat_report_analyze has to adjust the
1091  * numbers we send to the stats collector to make this come
1092  * out right.)
1093  */
1095  {
1096  sample_it = true;
1097  *liverows += 1;
1098  }
1099  break;
1100 
1102 
1103  /*
1104  * We count and sample delete-in-progress rows the same as
1105  * live ones, so that the stats counters come out right if the
1106  * deleting transaction commits after us, per the same
1107  * reasoning given above.
1108  *
1109  * If the delete was done by our own transaction, however, we
1110  * must count the row as dead to make pgstat_report_analyze's
1111  * stats adjustments come out right. (Note: this works out
1112  * properly when the row was both inserted and deleted in our
1113  * xact.)
1114  *
1115  * The net effect of these choices is that we act as though an
1116  * IN_PROGRESS transaction hasn't happened yet, except if it
1117  * is our own transaction, which we assume has happened.
1118  *
1119  * This approach ensures that we behave sanely if we see both
1120  * the pre-image and post-image rows for a row being updated
1121  * by a concurrent transaction: we will sample the pre-image
1122  * but not the post-image. We also get sane results if the
1123  * concurrent transaction never commits.
1124  */
1126  *deadrows += 1;
1127  else
1128  {
1129  sample_it = true;
1130  *liverows += 1;
1131  }
1132  break;
1133 
1134  default:
1135  elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
1136  break;
1137  }
1138 
1139  if (sample_it)
1140  {
1141  ExecStoreBufferHeapTuple(targtuple, slot, hscan->rs_cbuf);
1142  hscan->rs_cindex++;
1143 
1144  /* note that we leave the buffer locked here! */
1145  return true;
1146  }
1147  }
1148 
1149  /* Now release the lock and pin on the page */
1150  UnlockReleaseBuffer(hscan->rs_cbuf);
1151  hscan->rs_cbuf = InvalidBuffer;
1152 
1153  /* also prevent old slot contents from having pin on page */
1154  ExecClearTuple(slot);
1155 
1156  return false;
1157 }
1158 
1159 static double
1161  Relation indexRelation,
1162  IndexInfo *indexInfo,
1163  bool allow_sync,
1164  bool anyvisible,
1165  bool progress,
1166  BlockNumber start_blockno,
1167  BlockNumber numblocks,
1169  void *callback_state,
1170  TableScanDesc scan)
1171 {
1172  HeapScanDesc hscan;
1173  bool is_system_catalog;
1174  bool checking_uniqueness;
1175  HeapTuple heapTuple;
1177  bool isnull[INDEX_MAX_KEYS];
1178  double reltuples;
1179  ExprState *predicate;
1180  TupleTableSlot *slot;
1181  EState *estate;
1182  ExprContext *econtext;
1183  Snapshot snapshot;
1184  bool need_unregister_snapshot = false;
1185  TransactionId OldestXmin;
1186  BlockNumber previous_blkno = InvalidBlockNumber;
1187  BlockNumber root_blkno = InvalidBlockNumber;
1188  OffsetNumber root_offsets[MaxHeapTuplesPerPage];
1189 
1190  /*
1191  * sanity checks
1192  */
1193  Assert(OidIsValid(indexRelation->rd_rel->relam));
1194 
1195  /* Remember if it's a system catalog */
1196  is_system_catalog = IsSystemRelation(heapRelation);
1197 
1198  /* See whether we're verifying uniqueness/exclusion properties */
1199  checking_uniqueness = (indexInfo->ii_Unique ||
1200  indexInfo->ii_ExclusionOps != NULL);
1201 
1202  /*
1203  * "Any visible" mode is not compatible with uniqueness checks; make sure
1204  * only one of those is requested.
1205  */
1206  Assert(!(anyvisible && checking_uniqueness));
1207 
1208  /*
1209  * Need an EState for evaluation of index expressions and partial-index
1210  * predicates. Also a slot to hold the current tuple.
1211  */
1212  estate = CreateExecutorState();
1213  econtext = GetPerTupleExprContext(estate);
1214  slot = table_slot_create(heapRelation, NULL);
1215 
1216  /* Arrange for econtext's scan tuple to be the tuple under test */
1217  econtext->ecxt_scantuple = slot;
1218 
1219  /* Set up execution state for predicate, if any. */
1220  predicate = ExecPrepareQual(indexInfo->ii_Predicate, estate);
1221 
1222  /*
1223  * Prepare for scan of the base relation. In a normal index build, we use
1224  * SnapshotAny because we must retrieve all tuples and do our own time
1225  * qual checks (because we have to index RECENTLY_DEAD tuples). In a
1226  * concurrent build, or during bootstrap, we take a regular MVCC snapshot
1227  * and index whatever's live according to that.
1228  */
1229  OldestXmin = InvalidTransactionId;
1230 
1231  /* okay to ignore lazy VACUUMs here */
1232  if (!IsBootstrapProcessingMode() && !indexInfo->ii_Concurrent)
1233  OldestXmin = GetOldestNonRemovableTransactionId(heapRelation);
1234 
1235  if (!scan)
1236  {
1237  /*
1238  * Serial index build.
1239  *
1240  * Must begin our own heap scan in this case. We may also need to
1241  * register a snapshot whose lifetime is under our direct control.
1242  */
1243  if (!TransactionIdIsValid(OldestXmin))
1244  {
1246  need_unregister_snapshot = true;
1247  }
1248  else
1249  snapshot = SnapshotAny;
1250 
1251  scan = table_beginscan_strat(heapRelation, /* relation */
1252  snapshot, /* snapshot */
1253  0, /* number of keys */
1254  NULL, /* scan key */
1255  true, /* buffer access strategy OK */
1256  allow_sync); /* syncscan OK? */
1257  }
1258  else
1259  {
1260  /*
1261  * Parallel index build.
1262  *
1263  * Parallel case never registers/unregisters own snapshot. Snapshot
1264  * is taken from parallel heap scan, and is SnapshotAny or an MVCC
1265  * snapshot, based on same criteria as serial case.
1266  */
1268  Assert(allow_sync);
1269  snapshot = scan->rs_snapshot;
1270  }
1271 
1272  hscan = (HeapScanDesc) scan;
1273 
1274  /*
1275  * Must have called GetOldestNonRemovableTransactionId() if using
1276  * SnapshotAny. Shouldn't have for an MVCC snapshot. (It's especially
1277  * worth checking this for parallel builds, since ambuild routines that
1278  * support parallel builds must work these details out for themselves.)
1279  */
1280  Assert(snapshot == SnapshotAny || IsMVCCSnapshot(snapshot));
1281  Assert(snapshot == SnapshotAny ? TransactionIdIsValid(OldestXmin) :
1282  !TransactionIdIsValid(OldestXmin));
1283  Assert(snapshot == SnapshotAny || !anyvisible);
1284 
1285  /* Publish number of blocks to scan */
1286  if (progress)
1287  {
1288  BlockNumber nblocks;
1289 
1290  if (hscan->rs_base.rs_parallel != NULL)
1291  {
1293 
1295  nblocks = pbscan->phs_nblocks;
1296  }
1297  else
1298  nblocks = hscan->rs_nblocks;
1299 
1301  nblocks);
1302  }
1303 
1304  /* set our scan endpoints */
1305  if (!allow_sync)
1306  heap_setscanlimits(scan, start_blockno, numblocks);
1307  else
1308  {
1309  /* syncscan can only be requested on whole relation */
1310  Assert(start_blockno == 0);
1311  Assert(numblocks == InvalidBlockNumber);
1312  }
1313 
1314  reltuples = 0;
1315 
1316  /*
1317  * Scan all tuples in the base relation.
1318  */
1319  while ((heapTuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
1320  {
1321  bool tupleIsAlive;
1322 
1324 
1325  /* Report scan progress, if asked to. */
1326  if (progress)
1327  {
1328  BlockNumber blocks_done = heapam_scan_get_blocks_done(hscan);
1329 
1330  if (blocks_done != previous_blkno)
1331  {
1333  blocks_done);
1334  previous_blkno = blocks_done;
1335  }
1336  }
1337 
1338  /*
1339  * When dealing with a HOT-chain of updated tuples, we want to index
1340  * the values of the live tuple (if any), but index it under the TID
1341  * of the chain's root tuple. This approach is necessary to preserve
1342  * the HOT-chain structure in the heap. So we need to be able to find
1343  * the root item offset for every tuple that's in a HOT-chain. When
1344  * first reaching a new page of the relation, call
1345  * heap_get_root_tuples() to build a map of root item offsets on the
1346  * page.
1347  *
1348  * It might look unsafe to use this information across buffer
1349  * lock/unlock. However, we hold ShareLock on the table so no
1350  * ordinary insert/update/delete should occur; and we hold pin on the
1351  * buffer continuously while visiting the page, so no pruning
1352  * operation can occur either.
1353  *
1354  * In cases with only ShareUpdateExclusiveLock on the table, it's
1355  * possible for some HOT tuples to appear that we didn't know about
1356  * when we first read the page. To handle that case, we re-obtain the
1357  * list of root offsets when a HOT tuple points to a root item that we
1358  * don't know about.
1359  *
1360  * Also, although our opinions about tuple liveness could change while
1361  * we scan the page (due to concurrent transaction commits/aborts),
1362  * the chain root locations won't, so this info doesn't need to be
1363  * rebuilt after waiting for another transaction.
1364  *
1365  * Note the implied assumption that there is no more than one live
1366  * tuple per HOT-chain --- else we could create more than one index
1367  * entry pointing to the same root tuple.
1368  */
1369  if (hscan->rs_cblock != root_blkno)
1370  {
1371  Page page = BufferGetPage(hscan->rs_cbuf);
1372 
1374  heap_get_root_tuples(page, root_offsets);
1376 
1377  root_blkno = hscan->rs_cblock;
1378  }
1379 
1380  if (snapshot == SnapshotAny)
1381  {
1382  /* do our own time qual check */
1383  bool indexIt;
1384  TransactionId xwait;
1385 
1386  recheck:
1387 
1388  /*
1389  * We could possibly get away with not locking the buffer here,
1390  * since caller should hold ShareLock on the relation, but let's
1391  * be conservative about it. (This remark is still correct even
1392  * with HOT-pruning: our pin on the buffer prevents pruning.)
1393  */
1395 
1396  /*
1397  * The criteria for counting a tuple as live in this block need to
1398  * match what analyze.c's heapam_scan_analyze_next_tuple() does,
1399  * otherwise CREATE INDEX and ANALYZE may produce wildly different
1400  * reltuples values, e.g. when there are many recently-dead
1401  * tuples.
1402  */
1403  switch (HeapTupleSatisfiesVacuum(heapTuple, OldestXmin,
1404  hscan->rs_cbuf))
1405  {
1406  case HEAPTUPLE_DEAD:
1407  /* Definitely dead, we can ignore it */
1408  indexIt = false;
1409  tupleIsAlive = false;
1410  break;
1411  case HEAPTUPLE_LIVE:
1412  /* Normal case, index and unique-check it */
1413  indexIt = true;
1414  tupleIsAlive = true;
1415  /* Count it as live, too */
1416  reltuples += 1;
1417  break;
1419 
1420  /*
1421  * If tuple is recently deleted then we must index it
1422  * anyway to preserve MVCC semantics. (Pre-existing
1423  * transactions could try to use the index after we finish
1424  * building it, and may need to see such tuples.)
1425  *
1426  * However, if it was HOT-updated then we must only index
1427  * the live tuple at the end of the HOT-chain. Since this
1428  * breaks semantics for pre-existing snapshots, mark the
1429  * index as unusable for them.
1430  *
1431  * We don't count recently-dead tuples in reltuples, even
1432  * if we index them; see heapam_scan_analyze_next_tuple().
1433  */
1434  if (HeapTupleIsHotUpdated(heapTuple))
1435  {
1436  indexIt = false;
1437  /* mark the index as unsafe for old snapshots */
1438  indexInfo->ii_BrokenHotChain = true;
1439  }
1440  else
1441  indexIt = true;
1442  /* In any case, exclude the tuple from unique-checking */
1443  tupleIsAlive = false;
1444  break;
1446 
1447  /*
1448  * In "anyvisible" mode, this tuple is visible and we
1449  * don't need any further checks.
1450  */
1451  if (anyvisible)
1452  {
1453  indexIt = true;
1454  tupleIsAlive = true;
1455  reltuples += 1;
1456  break;
1457  }
1458 
1459  /*
1460  * Since caller should hold ShareLock or better, normally
1461  * the only way to see this is if it was inserted earlier
1462  * in our own transaction. However, it can happen in
1463  * system catalogs, since we tend to release write lock
1464  * before commit there. Give a warning if neither case
1465  * applies.
1466  */
1467  xwait = HeapTupleHeaderGetXmin(heapTuple->t_data);
1469  {
1470  if (!is_system_catalog)
1471  elog(WARNING, "concurrent insert in progress within table \"%s\"",
1472  RelationGetRelationName(heapRelation));
1473 
1474  /*
1475  * If we are performing uniqueness checks, indexing
1476  * such a tuple could lead to a bogus uniqueness
1477  * failure. In that case we wait for the inserting
1478  * transaction to finish and check again.
1479  */
1480  if (checking_uniqueness)
1481  {
1482  /*
1483  * Must drop the lock on the buffer before we wait
1484  */
1486  XactLockTableWait(xwait, heapRelation,
1487  &heapTuple->t_self,
1490  goto recheck;
1491  }
1492  }
1493  else
1494  {
1495  /*
1496  * For consistency with
1497  * heapam_scan_analyze_next_tuple(), count
1498  * HEAPTUPLE_INSERT_IN_PROGRESS tuples as live only
1499  * when inserted by our own transaction.
1500  */
1501  reltuples += 1;
1502  }
1503 
1504  /*
1505  * We must index such tuples, since if the index build
1506  * commits then they're good.
1507  */
1508  indexIt = true;
1509  tupleIsAlive = true;
1510  break;
1512 
1513  /*
1514  * As with INSERT_IN_PROGRESS case, this is unexpected
1515  * unless it's our own deletion or a system catalog; but
1516  * in anyvisible mode, this tuple is visible.
1517  */
1518  if (anyvisible)
1519  {
1520  indexIt = true;
1521  tupleIsAlive = false;
1522  reltuples += 1;
1523  break;
1524  }
1525 
1526  xwait = HeapTupleHeaderGetUpdateXid(heapTuple->t_data);
1528  {
1529  if (!is_system_catalog)
1530  elog(WARNING, "concurrent delete in progress within table \"%s\"",
1531  RelationGetRelationName(heapRelation));
1532 
1533  /*
1534  * If we are performing uniqueness checks, assuming
1535  * the tuple is dead could lead to missing a
1536  * uniqueness violation. In that case we wait for the
1537  * deleting transaction to finish and check again.
1538  *
1539  * Also, if it's a HOT-updated tuple, we should not
1540  * index it but rather the live tuple at the end of
1541  * the HOT-chain. However, the deleting transaction
1542  * could abort, possibly leaving this tuple as live
1543  * after all, in which case it has to be indexed. The
1544  * only way to know what to do is to wait for the
1545  * deleting transaction to finish and check again.
1546  */
1547  if (checking_uniqueness ||
1548  HeapTupleIsHotUpdated(heapTuple))
1549  {
1550  /*
1551  * Must drop the lock on the buffer before we wait
1552  */
1554  XactLockTableWait(xwait, heapRelation,
1555  &heapTuple->t_self,
1558  goto recheck;
1559  }
1560 
1561  /*
1562  * Otherwise index it but don't check for uniqueness,
1563  * the same as a RECENTLY_DEAD tuple.
1564  */
1565  indexIt = true;
1566 
1567  /*
1568  * Count HEAPTUPLE_DELETE_IN_PROGRESS tuples as live,
1569  * if they were not deleted by the current
1570  * transaction. That's what
1571  * heapam_scan_analyze_next_tuple() does, and we want
1572  * the behavior to be consistent.
1573  */
1574  reltuples += 1;
1575  }
1576  else if (HeapTupleIsHotUpdated(heapTuple))
1577  {
1578  /*
1579  * It's a HOT-updated tuple deleted by our own xact.
1580  * We can assume the deletion will commit (else the
1581  * index contents don't matter), so treat the same as
1582  * RECENTLY_DEAD HOT-updated tuples.
1583  */
1584  indexIt = false;
1585  /* mark the index as unsafe for old snapshots */
1586  indexInfo->ii_BrokenHotChain = true;
1587  }
1588  else
1589  {
1590  /*
1591  * It's a regular tuple deleted by our own xact. Index
1592  * it, but don't check for uniqueness nor count in
1593  * reltuples, the same as a RECENTLY_DEAD tuple.
1594  */
1595  indexIt = true;
1596  }
1597  /* In any case, exclude the tuple from unique-checking */
1598  tupleIsAlive = false;
1599  break;
1600  default:
1601  elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
1602  indexIt = tupleIsAlive = false; /* keep compiler quiet */
1603  break;
1604  }
1605 
1607 
1608  if (!indexIt)
1609  continue;
1610  }
1611  else
1612  {
1613  /* heap_getnext did the time qual check */
1614  tupleIsAlive = true;
1615  reltuples += 1;
1616  }
1617 
1619 
1620  /* Set up for predicate or expression evaluation */
1621  ExecStoreBufferHeapTuple(heapTuple, slot, hscan->rs_cbuf);
1622 
1623  /*
1624  * In a partial index, discard tuples that don't satisfy the
1625  * predicate.
1626  */
1627  if (predicate != NULL)
1628  {
1629  if (!ExecQual(predicate, econtext))
1630  continue;
1631  }
1632 
1633  /*
1634  * For the current heap tuple, extract all the attributes we use in
1635  * this index, and note which are null. This also performs evaluation
1636  * of any expressions needed.
1637  */
1638  FormIndexDatum(indexInfo,
1639  slot,
1640  estate,
1641  values,
1642  isnull);
1643 
1644  /*
1645  * You'd think we should go ahead and build the index tuple here, but
1646  * some index AMs want to do further processing on the data first. So
1647  * pass the values[] and isnull[] arrays, instead.
1648  */
1649 
1650  if (HeapTupleIsHeapOnly(heapTuple))
1651  {
1652  /*
1653  * For a heap-only tuple, pretend its TID is that of the root. See
1654  * src/backend/access/heap/README.HOT for discussion.
1655  */
1656  ItemPointerData tid;
1657  OffsetNumber offnum;
1658 
1659  offnum = ItemPointerGetOffsetNumber(&heapTuple->t_self);
1660 
1661  /*
1662  * If a HOT tuple points to a root that we don't know about,
1663  * obtain root items afresh. If that still fails, report it as
1664  * corruption.
1665  */
1666  if (root_offsets[offnum - 1] == InvalidOffsetNumber)
1667  {
1668  Page page = BufferGetPage(hscan->rs_cbuf);
1669 
1671  heap_get_root_tuples(page, root_offsets);
1673  }
1674 
1675  if (!OffsetNumberIsValid(root_offsets[offnum - 1]))
1676  ereport(ERROR,
1678  errmsg_internal("failed to find parent tuple for heap-only tuple at (%u,%u) in table \"%s\"",
1679  ItemPointerGetBlockNumber(&heapTuple->t_self),
1680  offnum,
1681  RelationGetRelationName(heapRelation))));
1682 
1683  ItemPointerSet(&tid, ItemPointerGetBlockNumber(&heapTuple->t_self),
1684  root_offsets[offnum - 1]);
1685 
1686  /* Call the AM's callback routine to process the tuple */
1687  callback(indexRelation, &tid, values, isnull, tupleIsAlive,
1688  callback_state);
1689  }
1690  else
1691  {
1692  /* Call the AM's callback routine to process the tuple */
1693  callback(indexRelation, &heapTuple->t_self, values, isnull,
1694  tupleIsAlive, callback_state);
1695  }
1696  }
1697 
1698  /* Report scan progress one last time. */
1699  if (progress)
1700  {
1701  BlockNumber blks_done;
1702 
1703  if (hscan->rs_base.rs_parallel != NULL)
1704  {
1706 
1708  blks_done = pbscan->phs_nblocks;
1709  }
1710  else
1711  blks_done = hscan->rs_nblocks;
1712 
1714  blks_done);
1715  }
1716 
1717  table_endscan(scan);
1718 
1719  /* we can now forget our snapshot, if set and registered by us */
1720  if (need_unregister_snapshot)
1721  UnregisterSnapshot(snapshot);
1722 
1724 
1725  FreeExecutorState(estate);
1726 
1727  /* These may have been pointing to the now-gone estate */
1728  indexInfo->ii_ExpressionsState = NIL;
1729  indexInfo->ii_PredicateState = NULL;
1730 
1731  return reltuples;
1732 }
1733 
1734 static void
1736  Relation indexRelation,
1737  IndexInfo *indexInfo,
1738  Snapshot snapshot,
1740 {
1741  TableScanDesc scan;
1742  HeapScanDesc hscan;
1743  HeapTuple heapTuple;
1745  bool isnull[INDEX_MAX_KEYS];
1746  ExprState *predicate;
1747  TupleTableSlot *slot;
1748  EState *estate;
1749  ExprContext *econtext;
1750  BlockNumber root_blkno = InvalidBlockNumber;
1751  OffsetNumber root_offsets[MaxHeapTuplesPerPage];
1752  bool in_index[MaxHeapTuplesPerPage];
1753  BlockNumber previous_blkno = InvalidBlockNumber;
1754 
1755  /* state variables for the merge */
1756  ItemPointer indexcursor = NULL;
1757  ItemPointerData decoded;
1758  bool tuplesort_empty = false;
1759 
1760  /*
1761  * sanity checks
1762  */
1763  Assert(OidIsValid(indexRelation->rd_rel->relam));
1764 
1765  /*
1766  * Need an EState for evaluation of index expressions and partial-index
1767  * predicates. Also a slot to hold the current tuple.
1768  */
1769  estate = CreateExecutorState();
1770  econtext = GetPerTupleExprContext(estate);
1771  slot = MakeSingleTupleTableSlot(RelationGetDescr(heapRelation),
1772  &TTSOpsHeapTuple);
1773 
1774  /* Arrange for econtext's scan tuple to be the tuple under test */
1775  econtext->ecxt_scantuple = slot;
1776 
1777  /* Set up execution state for predicate, if any. */
1778  predicate = ExecPrepareQual(indexInfo->ii_Predicate, estate);
1779 
1780  /*
1781  * Prepare for scan of the base relation. We need just those tuples
1782  * satisfying the passed-in reference snapshot. We must disable syncscan
1783  * here, because it's critical that we read from block zero forward to
1784  * match the sorted TIDs.
1785  */
1786  scan = table_beginscan_strat(heapRelation, /* relation */
1787  snapshot, /* snapshot */
1788  0, /* number of keys */
1789  NULL, /* scan key */
1790  true, /* buffer access strategy OK */
1791  false); /* syncscan not OK */
1792  hscan = (HeapScanDesc) scan;
1793 
1795  hscan->rs_nblocks);
1796 
1797  /*
1798  * Scan all tuples matching the snapshot.
1799  */
1800  while ((heapTuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
1801  {
1802  ItemPointer heapcursor = &heapTuple->t_self;
1803  ItemPointerData rootTuple;
1804  OffsetNumber root_offnum;
1805 
1807 
1808  state->htups += 1;
1809 
1810  if ((previous_blkno == InvalidBlockNumber) ||
1811  (hscan->rs_cblock != previous_blkno))
1812  {
1814  hscan->rs_cblock);
1815  previous_blkno = hscan->rs_cblock;
1816  }
1817 
1818  /*
1819  * As commented in table_index_build_scan, we should index heap-only
1820  * tuples under the TIDs of their root tuples; so when we advance onto
1821  * a new heap page, build a map of root item offsets on the page.
1822  *
1823  * This complicates merging against the tuplesort output: we will
1824  * visit the live tuples in order by their offsets, but the root
1825  * offsets that we need to compare against the index contents might be
1826  * ordered differently. So we might have to "look back" within the
1827  * tuplesort output, but only within the current page. We handle that
1828  * by keeping a bool array in_index[] showing all the
1829  * already-passed-over tuplesort output TIDs of the current page. We
1830  * clear that array here, when advancing onto a new heap page.
1831  */
1832  if (hscan->rs_cblock != root_blkno)
1833  {
1834  Page page = BufferGetPage(hscan->rs_cbuf);
1835 
1837  heap_get_root_tuples(page, root_offsets);
1839 
1840  memset(in_index, 0, sizeof(in_index));
1841 
1842  root_blkno = hscan->rs_cblock;
1843  }
1844 
1845  /* Convert actual tuple TID to root TID */
1846  rootTuple = *heapcursor;
1847  root_offnum = ItemPointerGetOffsetNumber(heapcursor);
1848 
1849  if (HeapTupleIsHeapOnly(heapTuple))
1850  {
1851  root_offnum = root_offsets[root_offnum - 1];
1852  if (!OffsetNumberIsValid(root_offnum))
1853  ereport(ERROR,
1855  errmsg_internal("failed to find parent tuple for heap-only tuple at (%u,%u) in table \"%s\"",
1856  ItemPointerGetBlockNumber(heapcursor),
1857  ItemPointerGetOffsetNumber(heapcursor),
1858  RelationGetRelationName(heapRelation))));
1859  ItemPointerSetOffsetNumber(&rootTuple, root_offnum);
1860  }
1861 
1862  /*
1863  * "merge" by skipping through the index tuples until we find or pass
1864  * the current root tuple.
1865  */
1866  while (!tuplesort_empty &&
1867  (!indexcursor ||
1868  ItemPointerCompare(indexcursor, &rootTuple) < 0))
1869  {
1870  Datum ts_val;
1871  bool ts_isnull;
1872 
1873  if (indexcursor)
1874  {
1875  /*
1876  * Remember index items seen earlier on the current heap page
1877  */
1878  if (ItemPointerGetBlockNumber(indexcursor) == root_blkno)
1879  in_index[ItemPointerGetOffsetNumber(indexcursor) - 1] = true;
1880  }
1881 
1882  tuplesort_empty = !tuplesort_getdatum(state->tuplesort, true,
1883  &ts_val, &ts_isnull, NULL);
1884  Assert(tuplesort_empty || !ts_isnull);
1885  if (!tuplesort_empty)
1886  {
1887  itemptr_decode(&decoded, DatumGetInt64(ts_val));
1888  indexcursor = &decoded;
1889 
1890  /* If int8 is pass-by-ref, free (encoded) TID Datum memory */
1891 #ifndef USE_FLOAT8_BYVAL
1892  pfree(DatumGetPointer(ts_val));
1893 #endif
1894  }
1895  else
1896  {
1897  /* Be tidy */
1898  indexcursor = NULL;
1899  }
1900  }
1901 
1902  /*
1903  * If the tuplesort has overshot *and* we didn't see a match earlier,
1904  * then this tuple is missing from the index, so insert it.
1905  */
1906  if ((tuplesort_empty ||
1907  ItemPointerCompare(indexcursor, &rootTuple) > 0) &&
1908  !in_index[root_offnum - 1])
1909  {
1911 
1912  /* Set up for predicate or expression evaluation */
1913  ExecStoreHeapTuple(heapTuple, slot, false);
1914 
1915  /*
1916  * In a partial index, discard tuples that don't satisfy the
1917  * predicate.
1918  */
1919  if (predicate != NULL)
1920  {
1921  if (!ExecQual(predicate, econtext))
1922  continue;
1923  }
1924 
1925  /*
1926  * For the current heap tuple, extract all the attributes we use
1927  * in this index, and note which are null. This also performs
1928  * evaluation of any expressions needed.
1929  */
1930  FormIndexDatum(indexInfo,
1931  slot,
1932  estate,
1933  values,
1934  isnull);
1935 
1936  /*
1937  * You'd think we should go ahead and build the index tuple here,
1938  * but some index AMs want to do further processing on the data
1939  * first. So pass the values[] and isnull[] arrays, instead.
1940  */
1941 
1942  /*
1943  * If the tuple is already committed dead, you might think we
1944  * could suppress uniqueness checking, but this is no longer true
1945  * in the presence of HOT, because the insert is actually a proxy
1946  * for a uniqueness check on the whole HOT-chain. That is, the
1947  * tuple we have here could be dead because it was already
1948  * HOT-updated, and if so the updating transaction will not have
1949  * thought it should insert index entries. The index AM will
1950  * check the whole HOT-chain and correctly detect a conflict if
1951  * there is one.
1952  */
1953 
1954  index_insert(indexRelation,
1955  values,
1956  isnull,
1957  &rootTuple,
1958  heapRelation,
1959  indexInfo->ii_Unique ?
1961  false,
1962  indexInfo);
1963 
1964  state->tups_inserted += 1;
1965  }
1966  }
1967 
1968  table_endscan(scan);
1969 
1971 
1972  FreeExecutorState(estate);
1973 
1974  /* These may have been pointing to the now-gone estate */
1975  indexInfo->ii_ExpressionsState = NIL;
1976  indexInfo->ii_PredicateState = NULL;
1977 }
1978 
1979 /*
1980  * Return the number of blocks that have been read by this scan since
1981  * starting. This is meant for progress reporting rather than be fully
1982  * accurate: in a parallel scan, workers can be concurrently reading blocks
1983  * further ahead than what we report.
1984  */
1985 static BlockNumber
1987 {
1988  ParallelBlockTableScanDesc bpscan = NULL;
1989  BlockNumber startblock;
1990  BlockNumber blocks_done;
1991 
1992  if (hscan->rs_base.rs_parallel != NULL)
1993  {
1995  startblock = bpscan->phs_startblock;
1996  }
1997  else
1998  startblock = hscan->rs_startblock;
1999 
2000  /*
2001  * Might have wrapped around the end of the relation, if startblock was
2002  * not zero.
2003  */
2004  if (hscan->rs_cblock > startblock)
2005  blocks_done = hscan->rs_cblock - startblock;
2006  else
2007  {
2008  BlockNumber nblocks;
2009 
2010  nblocks = bpscan != NULL ? bpscan->phs_nblocks : hscan->rs_nblocks;
2011  blocks_done = nblocks - startblock +
2012  hscan->rs_cblock;
2013  }
2014 
2015  return blocks_done;
2016 }
2017 
2018 
2019 /* ------------------------------------------------------------------------
2020  * Miscellaneous callbacks for the heap AM
2021  * ------------------------------------------------------------------------
2022  */
2023 
2024 /*
2025  * Check to see whether the table needs a TOAST table. It does only if
2026  * (1) there are any toastable attributes, and (2) the maximum length
2027  * of a tuple could exceed TOAST_TUPLE_THRESHOLD. (We don't want to
2028  * create a toast table for something like "f1 varchar(20)".)
2029  */
2030 static bool
2032 {
2033  int32 data_length = 0;
2034  bool maxlength_unknown = false;
2035  bool has_toastable_attrs = false;
2036  TupleDesc tupdesc = rel->rd_att;
2037  int32 tuple_length;
2038  int i;
2039 
2040  for (i = 0; i < tupdesc->natts; i++)
2041  {
2042  Form_pg_attribute att = TupleDescAttr(tupdesc, i);
2043 
2044  if (att->attisdropped)
2045  continue;
2046  data_length = att_align_nominal(data_length, att->attalign);
2047  if (att->attlen > 0)
2048  {
2049  /* Fixed-length types are never toastable */
2050  data_length += att->attlen;
2051  }
2052  else
2053  {
2054  int32 maxlen = type_maximum_size(att->atttypid,
2055  att->atttypmod);
2056 
2057  if (maxlen < 0)
2058  maxlength_unknown = true;
2059  else
2060  data_length += maxlen;
2061  if (att->attstorage != TYPSTORAGE_PLAIN)
2062  has_toastable_attrs = true;
2063  }
2064  }
2065  if (!has_toastable_attrs)
2066  return false; /* nothing to toast? */
2067  if (maxlength_unknown)
2068  return true; /* any unlimited-length attrs? */
2069  tuple_length = MAXALIGN(SizeofHeapTupleHeader +
2070  BITMAPLEN(tupdesc->natts)) +
2071  MAXALIGN(data_length);
2072  return (tuple_length > TOAST_TUPLE_THRESHOLD);
2073 }
2074 
2075 /*
2076  * TOAST tables for heap relations are just heap relations.
2077  */
2078 static Oid
2080 {
2081  return rel->rd_rel->relam;
2082 }
2083 
2084 
2085 /* ------------------------------------------------------------------------
2086  * Planner related callbacks for the heap AM
2087  * ------------------------------------------------------------------------
2088  */
2089 
2090 #define HEAP_OVERHEAD_BYTES_PER_TUPLE \
2091  (MAXALIGN(SizeofHeapTupleHeader) + sizeof(ItemIdData))
2092 #define HEAP_USABLE_BYTES_PER_PAGE \
2093  (BLCKSZ - SizeOfPageHeaderData)
2094 
2095 static void
2097  BlockNumber *pages, double *tuples,
2098  double *allvisfrac)
2099 {
2100  table_block_relation_estimate_size(rel, attr_widths, pages,
2101  tuples, allvisfrac,
2104 }
2105 
2106 
2107 /* ------------------------------------------------------------------------
2108  * Executor related callbacks for the heap AM
2109  * ------------------------------------------------------------------------
2110  */
2111 
2112 static bool
2114  TBMIterateResult *tbmres)
2115 {
2116  HeapScanDesc hscan = (HeapScanDesc) scan;
2117  BlockNumber page = tbmres->blockno;
2118  Buffer buffer;
2119  Snapshot snapshot;
2120  int ntup;
2121 
2122  hscan->rs_cindex = 0;
2123  hscan->rs_ntuples = 0;
2124 
2125  /*
2126  * Ignore any claimed entries past what we think is the end of the
2127  * relation. It may have been extended after the start of our scan (we
2128  * only hold an AccessShareLock, and it could be inserts from this
2129  * backend).
2130  */
2131  if (page >= hscan->rs_nblocks)
2132  return false;
2133 
2134  /*
2135  * Acquire pin on the target heap page, trading in any pin we held before.
2136  */
2137  hscan->rs_cbuf = ReleaseAndReadBuffer(hscan->rs_cbuf,
2138  scan->rs_rd,
2139  page);
2140  hscan->rs_cblock = page;
2141  buffer = hscan->rs_cbuf;
2142  snapshot = scan->rs_snapshot;
2143 
2144  ntup = 0;
2145 
2146  /*
2147  * Prune and repair fragmentation for the whole page, if possible.
2148  */
2149  heap_page_prune_opt(scan->rs_rd, buffer);
2150 
2151  /*
2152  * We must hold share lock on the buffer content while examining tuple
2153  * visibility. Afterwards, however, the tuples we have found to be
2154  * visible are guaranteed good as long as we hold the buffer pin.
2155  */
2156  LockBuffer(buffer, BUFFER_LOCK_SHARE);
2157 
2158  /*
2159  * We need two separate strategies for lossy and non-lossy cases.
2160  */
2161  if (tbmres->ntuples >= 0)
2162  {
2163  /*
2164  * Bitmap is non-lossy, so we just look through the offsets listed in
2165  * tbmres; but we have to follow any HOT chain starting at each such
2166  * offset.
2167  */
2168  int curslot;
2169 
2170  for (curslot = 0; curslot < tbmres->ntuples; curslot++)
2171  {
2172  OffsetNumber offnum = tbmres->offsets[curslot];
2173  ItemPointerData tid;
2174  HeapTupleData heapTuple;
2175 
2176  ItemPointerSet(&tid, page, offnum);
2177  if (heap_hot_search_buffer(&tid, scan->rs_rd, buffer, snapshot,
2178  &heapTuple, NULL, true))
2179  hscan->rs_vistuples[ntup++] = ItemPointerGetOffsetNumber(&tid);
2180  }
2181  }
2182  else
2183  {
2184  /*
2185  * Bitmap is lossy, so we must examine each line pointer on the page.
2186  * But we can ignore HOT chains, since we'll check each tuple anyway.
2187  */
2188  Page dp = (Page) BufferGetPage(buffer);
2189  OffsetNumber maxoff = PageGetMaxOffsetNumber(dp);
2190  OffsetNumber offnum;
2191 
2192  for (offnum = FirstOffsetNumber; offnum <= maxoff; offnum = OffsetNumberNext(offnum))
2193  {
2194  ItemId lp;
2195  HeapTupleData loctup;
2196  bool valid;
2197 
2198  lp = PageGetItemId(dp, offnum);
2199  if (!ItemIdIsNormal(lp))
2200  continue;
2201  loctup.t_data = (HeapTupleHeader) PageGetItem((Page) dp, lp);
2202  loctup.t_len = ItemIdGetLength(lp);
2203  loctup.t_tableOid = scan->rs_rd->rd_id;
2204  ItemPointerSet(&loctup.t_self, page, offnum);
2205  valid = HeapTupleSatisfiesVisibility(&loctup, snapshot, buffer);
2206  if (valid)
2207  {
2208  hscan->rs_vistuples[ntup++] = offnum;
2209  PredicateLockTID(scan->rs_rd, &loctup.t_self, snapshot,
2210  HeapTupleHeaderGetXmin(loctup.t_data));
2211  }
2212  HeapCheckForSerializableConflictOut(valid, scan->rs_rd, &loctup,
2213  buffer, snapshot);
2214  }
2215  }
2216 
2217  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
2218 
2219  Assert(ntup <= MaxHeapTuplesPerPage);
2220  hscan->rs_ntuples = ntup;
2221 
2222  return ntup > 0;
2223 }
2224 
2225 static bool
2227  TBMIterateResult *tbmres,
2228  TupleTableSlot *slot)
2229 {
2230  HeapScanDesc hscan = (HeapScanDesc) scan;
2231  OffsetNumber targoffset;
2232  Page dp;
2233  ItemId lp;
2234 
2235  /*
2236  * Out of range? If so, nothing more to look at on this page
2237  */
2238  if (hscan->rs_cindex < 0 || hscan->rs_cindex >= hscan->rs_ntuples)
2239  return false;
2240 
2241  targoffset = hscan->rs_vistuples[hscan->rs_cindex];
2242  dp = (Page) BufferGetPage(hscan->rs_cbuf);
2243  lp = PageGetItemId(dp, targoffset);
2244  Assert(ItemIdIsNormal(lp));
2245 
2246  hscan->rs_ctup.t_data = (HeapTupleHeader) PageGetItem((Page) dp, lp);
2247  hscan->rs_ctup.t_len = ItemIdGetLength(lp);
2248  hscan->rs_ctup.t_tableOid = scan->rs_rd->rd_id;
2249  ItemPointerSet(&hscan->rs_ctup.t_self, hscan->rs_cblock, targoffset);
2250 
2252 
2253  /*
2254  * Set up the result slot to point to this tuple. Note that the slot
2255  * acquires a pin on the buffer.
2256  */
2258  slot,
2259  hscan->rs_cbuf);
2260 
2261  hscan->rs_cindex++;
2262 
2263  return true;
2264 }
2265 
2266 static bool
2268 {
2269  HeapScanDesc hscan = (HeapScanDesc) scan;
2270  TsmRoutine *tsm = scanstate->tsmroutine;
2271  BlockNumber blockno;
2272 
2273  /* return false immediately if relation is empty */
2274  if (hscan->rs_nblocks == 0)
2275  return false;
2276 
2277  if (tsm->NextSampleBlock)
2278  {
2279  blockno = tsm->NextSampleBlock(scanstate, hscan->rs_nblocks);
2280  hscan->rs_cblock = blockno;
2281  }
2282  else
2283  {
2284  /* scanning table sequentially */
2285 
2286  if (hscan->rs_cblock == InvalidBlockNumber)
2287  {
2288  Assert(!hscan->rs_inited);
2289  blockno = hscan->rs_startblock;
2290  }
2291  else
2292  {
2293  Assert(hscan->rs_inited);
2294 
2295  blockno = hscan->rs_cblock + 1;
2296 
2297  if (blockno >= hscan->rs_nblocks)
2298  {
2299  /* wrap to beginning of rel, might not have started at 0 */
2300  blockno = 0;
2301  }
2302 
2303  /*
2304  * Report our new scan position for synchronization purposes.
2305  *
2306  * Note: we do this before checking for end of scan so that the
2307  * final state of the position hint is back at the start of the
2308  * rel. That's not strictly necessary, but otherwise when you run
2309  * the same query multiple times the starting position would shift
2310  * a little bit backwards on every invocation, which is confusing.
2311  * We don't guarantee any specific ordering in general, though.
2312  */
2313  if (scan->rs_flags & SO_ALLOW_SYNC)
2314  ss_report_location(scan->rs_rd, blockno);
2315 
2316  if (blockno == hscan->rs_startblock)
2317  {
2318  blockno = InvalidBlockNumber;
2319  }
2320  }
2321  }
2322 
2323  if (!BlockNumberIsValid(blockno))
2324  {
2325  if (BufferIsValid(hscan->rs_cbuf))
2326  ReleaseBuffer(hscan->rs_cbuf);
2327  hscan->rs_cbuf = InvalidBuffer;
2328  hscan->rs_cblock = InvalidBlockNumber;
2329  hscan->rs_inited = false;
2330 
2331  return false;
2332  }
2333 
2334  heapgetpage(scan, blockno);
2335  hscan->rs_inited = true;
2336 
2337  return true;
2338 }
2339 
2340 static bool
2342  TupleTableSlot *slot)
2343 {
2344  HeapScanDesc hscan = (HeapScanDesc) scan;
2345  TsmRoutine *tsm = scanstate->tsmroutine;
2346  BlockNumber blockno = hscan->rs_cblock;
2347  bool pagemode = (scan->rs_flags & SO_ALLOW_PAGEMODE) != 0;
2348 
2349  Page page;
2350  bool all_visible;
2351  OffsetNumber maxoffset;
2352 
2353  /*
2354  * When not using pagemode, we must lock the buffer during tuple
2355  * visibility checks.
2356  */
2357  if (!pagemode)
2359 
2360  page = (Page) BufferGetPage(hscan->rs_cbuf);
2361  all_visible = PageIsAllVisible(page) &&
2363  maxoffset = PageGetMaxOffsetNumber(page);
2364 
2365  for (;;)
2366  {
2367  OffsetNumber tupoffset;
2368 
2370 
2371  /* Ask the tablesample method which tuples to check on this page. */
2372  tupoffset = tsm->NextSampleTuple(scanstate,
2373  blockno,
2374  maxoffset);
2375 
2376  if (OffsetNumberIsValid(tupoffset))
2377  {
2378  ItemId itemid;
2379  bool visible;
2380  HeapTuple tuple = &(hscan->rs_ctup);
2381 
2382  /* Skip invalid tuple pointers. */
2383  itemid = PageGetItemId(page, tupoffset);
2384  if (!ItemIdIsNormal(itemid))
2385  continue;
2386 
2387  tuple->t_data = (HeapTupleHeader) PageGetItem(page, itemid);
2388  tuple->t_len = ItemIdGetLength(itemid);
2389  ItemPointerSet(&(tuple->t_self), blockno, tupoffset);
2390 
2391 
2392  if (all_visible)
2393  visible = true;
2394  else
2395  visible = SampleHeapTupleVisible(scan, hscan->rs_cbuf,
2396  tuple, tupoffset);
2397 
2398  /* in pagemode, heapgetpage did this for us */
2399  if (!pagemode)
2400  HeapCheckForSerializableConflictOut(visible, scan->rs_rd, tuple,
2401  hscan->rs_cbuf, scan->rs_snapshot);
2402 
2403  /* Try next tuple from same page. */
2404  if (!visible)
2405  continue;
2406 
2407  /* Found visible tuple, return it. */
2408  if (!pagemode)
2410 
2411  ExecStoreBufferHeapTuple(tuple, slot, hscan->rs_cbuf);
2412 
2413  /* Count successfully-fetched tuples as heap fetches */
2415 
2416  return true;
2417  }
2418  else
2419  {
2420  /*
2421  * If we get here, it means we've exhausted the items on this page
2422  * and it's time to move to the next.
2423  */
2424  if (!pagemode)
2426 
2427  ExecClearTuple(slot);
2428  return false;
2429  }
2430  }
2431 
2432  Assert(0);
2433 }
2434 
2435 
2436 /* ----------------------------------------------------------------------------
2437  * Helper functions for the above.
2438  * ----------------------------------------------------------------------------
2439  */
2440 
2441 /*
2442  * Reconstruct and rewrite the given tuple
2443  *
2444  * We cannot simply copy the tuple as-is, for several reasons:
2445  *
2446  * 1. We'd like to squeeze out the values of any dropped columns, both
2447  * to save space and to ensure we have no corner-case failures. (It's
2448  * possible for example that the new table hasn't got a TOAST table
2449  * and so is unable to store any large values of dropped cols.)
2450  *
2451  * 2. The tuple might not even be legal for the new table; this is
2452  * currently only known to happen as an after-effect of ALTER TABLE
2453  * SET WITHOUT OIDS.
2454  *
2455  * So, we must reconstruct the tuple from component Datums.
2456  */
2457 static void
2459  Relation OldHeap, Relation NewHeap,
2460  Datum *values, bool *isnull, RewriteState rwstate)
2461 {
2462  TupleDesc oldTupDesc = RelationGetDescr(OldHeap);
2463  TupleDesc newTupDesc = RelationGetDescr(NewHeap);
2464  HeapTuple copiedTuple;
2465  int i;
2466 
2467  heap_deform_tuple(tuple, oldTupDesc, values, isnull);
2468 
2469  /* Be sure to null out any dropped columns */
2470  for (i = 0; i < newTupDesc->natts; i++)
2471  {
2472  if (TupleDescAttr(newTupDesc, i)->attisdropped)
2473  isnull[i] = true;
2474 
2475  /*
2476  * Use this opportunity to force recompression of any data that's
2477  * compressed with some TOAST compression method other than the one
2478  * configured for the column. We don't actually need to perform the
2479  * compression here; we just need to decompress. That will trigger
2480  * recompression later on.
2481  */
2482  else if (!isnull[i] && TupleDescAttr(newTupDesc, i)->attlen == -1)
2483  {
2484  struct varlena *new_value;
2485  ToastCompressionId cmid;
2486  char cmethod;
2487 
2488  new_value = (struct varlena *) DatumGetPointer(values[i]);
2489  cmid = toast_get_compression_id(new_value);
2490 
2491  /* nothing to be done for uncompressed data */
2492  if (cmid == TOAST_INVALID_COMPRESSION_ID)
2493  continue;
2494 
2495  /* convert compression id to compression method */
2496  switch (cmid)
2497  {
2499  cmethod = TOAST_PGLZ_COMPRESSION;
2500  break;
2502  cmethod = TOAST_LZ4_COMPRESSION;
2503  break;
2504  default:
2505  elog(ERROR, "invalid compression method id %d", cmid);
2506  }
2507 
2508  /* if compression method doesn't match then detoast the value */
2509  if (TupleDescAttr(newTupDesc, i)->attcompression != cmethod)
2510  values[i] = PointerGetDatum(detoast_attr(new_value));
2511  }
2512  }
2513 
2514  copiedTuple = heap_form_tuple(newTupDesc, values, isnull);
2515 
2516  /* The heap rewrite module does the rest */
2517  rewrite_heap_tuple(rwstate, tuple, copiedTuple);
2518 
2519  heap_freetuple(copiedTuple);
2520 }
2521 
2522 /*
2523  * Check visibility of the tuple.
2524  */
2525 static bool
2527  HeapTuple tuple,
2528  OffsetNumber tupoffset)
2529 {
2530  HeapScanDesc hscan = (HeapScanDesc) scan;
2531 
2532  if (scan->rs_flags & SO_ALLOW_PAGEMODE)
2533  {
2534  /*
2535  * In pageatatime mode, heapgetpage() already did visibility checks,
2536  * so just look at the info it left in rs_vistuples[].
2537  *
2538  * We use a binary search over the known-sorted array. Note: we could
2539  * save some effort if we insisted that NextSampleTuple select tuples
2540  * in increasing order, but it's not clear that there would be enough
2541  * gain to justify the restriction.
2542  */
2543  int start = 0,
2544  end = hscan->rs_ntuples - 1;
2545 
2546  while (start <= end)
2547  {
2548  int mid = (start + end) / 2;
2549  OffsetNumber curoffset = hscan->rs_vistuples[mid];
2550 
2551  if (tupoffset == curoffset)
2552  return true;
2553  else if (tupoffset < curoffset)
2554  end = mid - 1;
2555  else
2556  start = mid + 1;
2557  }
2558 
2559  return false;
2560  }
2561  else
2562  {
2563  /* Otherwise, we have to check the tuple individually. */
2564  return HeapTupleSatisfiesVisibility(tuple, scan->rs_snapshot,
2565  buffer);
2566  }
2567 }
2568 
2569 
2570 /* ------------------------------------------------------------------------
2571  * Definition of the heap table access method.
2572  * ------------------------------------------------------------------------
2573  */
2574 
2575 static const TableAmRoutine heapam_methods = {
2577 
2578  .slot_callbacks = heapam_slot_callbacks,
2579 
2580  .scan_begin = heap_beginscan,
2581  .scan_end = heap_endscan,
2582  .scan_rescan = heap_rescan,
2583  .scan_getnextslot = heap_getnextslot,
2584 
2585  .scan_set_tidrange = heap_set_tidrange,
2586  .scan_getnextslot_tidrange = heap_getnextslot_tidrange,
2587 
2588  .parallelscan_estimate = table_block_parallelscan_estimate,
2589  .parallelscan_initialize = table_block_parallelscan_initialize,
2590  .parallelscan_reinitialize = table_block_parallelscan_reinitialize,
2591 
2592  .index_fetch_begin = heapam_index_fetch_begin,
2593  .index_fetch_reset = heapam_index_fetch_reset,
2594  .index_fetch_end = heapam_index_fetch_end,
2595  .index_fetch_tuple = heapam_index_fetch_tuple,
2596 
2597  .tuple_insert = heapam_tuple_insert,
2598  .tuple_insert_speculative = heapam_tuple_insert_speculative,
2599  .tuple_complete_speculative = heapam_tuple_complete_speculative,
2600  .multi_insert = heap_multi_insert,
2601  .tuple_delete = heapam_tuple_delete,
2602  .tuple_update = heapam_tuple_update,
2603  .tuple_lock = heapam_tuple_lock,
2604 
2605  .tuple_fetch_row_version = heapam_fetch_row_version,
2606  .tuple_get_latest_tid = heap_get_latest_tid,
2607  .tuple_tid_valid = heapam_tuple_tid_valid,
2608  .tuple_satisfies_snapshot = heapam_tuple_satisfies_snapshot,
2609  .index_delete_tuples = heap_index_delete_tuples,
2610 
2611  .relation_set_new_filenode = heapam_relation_set_new_filenode,
2612  .relation_nontransactional_truncate = heapam_relation_nontransactional_truncate,
2613  .relation_copy_data = heapam_relation_copy_data,
2614  .relation_copy_for_cluster = heapam_relation_copy_for_cluster,
2615  .relation_vacuum = heap_vacuum_rel,
2616  .scan_analyze_next_block = heapam_scan_analyze_next_block,
2617  .scan_analyze_next_tuple = heapam_scan_analyze_next_tuple,
2618  .index_build_range_scan = heapam_index_build_range_scan,
2619  .index_validate_scan = heapam_index_validate_scan,
2620 
2621  .relation_size = table_block_relation_size,
2622  .relation_needs_toast_table = heapam_relation_needs_toast_table,
2623  .relation_toast_am = heapam_relation_toast_am,
2624  .relation_fetch_toast_slice = heap_fetch_toast_slice,
2625 
2626  .relation_estimate_size = heapam_estimate_rel_size,
2627 
2628  .scan_bitmap_next_block = heapam_scan_bitmap_next_block,
2629  .scan_bitmap_next_tuple = heapam_scan_bitmap_next_tuple,
2630  .scan_sample_next_block = heapam_scan_sample_next_block,
2631  .scan_sample_next_tuple = heapam_scan_sample_next_tuple
2632 };
2633 
2634 
2635 const TableAmRoutine *
2637 {
2638  return &heapam_methods;
2639 }
2640 
2641 Datum
2643 {
2644  PG_RETURN_POINTER(&heapam_methods);
2645 }
TupleTableSlot * table_slot_create(Relation relation, List **reglist)
Definition: tableam.c:91
#define HeapTupleHeaderGetUpdateXid(tup)
Definition: htup_details.h:365
int32 ItemPointerCompare(ItemPointer arg1, ItemPointer arg2)
Definition: itemptr.c:52
#define ItemPointerIsValid(pointer)
Definition: itemptr.h:82
void FormIndexDatum(IndexInfo *indexInfo, TupleTableSlot *slot, EState *estate, Datum *values, bool *isnull)
Definition: index.c:2660
void heap_insert(Relation relation, HeapTuple tup, CommandId cid, int options, BulkInsertState bistate)
Definition: heapam.c:2060
#define PG_RETURN_POINTER(x)
Definition: fmgr.h:361
#define NIL
Definition: pg_list.h:65
Oid tts_tableOid
Definition: tuptable.h:131
uint32 CommandId
Definition: c.h:601
ItemPointerData ctid
Definition: tableam.h:126
#define RelationIsPermanent(relation)
Definition: rel.h:571
#define PROGRESS_CLUSTER_PHASE_SEQ_SCAN_HEAP
Definition: progress.h:65
static PgChecksumMode mode
Definition: pg_checksums.c:61
#define BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:96
bool tuplesort_getdatum(Tuplesortstate *state, bool forward, Datum *val, bool *isNull, Datum *abbrev)
Definition: tuplesort.c:2494
#define SizeofHeapTupleHeader
Definition: htup_details.h:184
BlockNumber rs_cblock
Definition: heapam.h:59
LockTupleMode
Definition: lockoptions.h:49
NodeTag type
Definition: tableam.h:267
void tuplesort_performsort(Tuplesortstate *state)
Definition: tuplesort.c:2040
void heap_abort_speculative(Relation relation, ItemPointer tid)
Definition: heapam.c:5906
void end_heap_rewrite(RewriteState state)
Definition: rewriteheap.c:300
HeapTuple tuplesort_getheaptuple(Tuplesortstate *state, bool forward)
Definition: tuplesort.c:2445
void smgrclose(SMgrRelation reln)
Definition: smgr.c:256
void heap_set_tidrange(TableScanDesc sscan, ItemPointer mintid, ItemPointer maxtid)
Definition: heapam.c:1419
void table_block_parallelscan_reinitialize(Relation rel, ParallelTableScanDesc pscan)
Definition: tableam.c:418
#define InitDirtySnapshot(snapshotdata)
Definition: snapmgr.h:74
static bool heapam_tuple_tid_valid(TableScanDesc scan, ItemPointer tid)
void smgrcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo)
Definition: smgr.c:333
List * ii_Predicate
Definition: execnodes.h:163
static void heapam_tuple_complete_speculative(Relation relation, TupleTableSlot *slot, uint32 specToken, bool succeeded)
#define att_align_nominal(cur_offset, attalign)
Definition: tupmacs.h:148
uint64 table_block_relation_size(Relation rel, ForkNumber forkNumber)
Definition: tableam.c:628
bool IsSystemRelation(Relation relation)
Definition: catalog.c:73
#define TransactionIdEquals(id1, id2)
Definition: transam.h:43
#define PageIsAllVisible(page)
Definition: bufpage.h:385
#define PROGRESS_CLUSTER_HEAP_TUPLES_WRITTEN
Definition: progress.h:59
static TupleTableSlot * ExecClearTuple(TupleTableSlot *slot)
Definition: tuptable.h:425
uint32 TransactionId
Definition: c.h:587
Snapshot RegisterSnapshot(Snapshot snapshot)
Definition: snapmgr.c:810
static const TupleTableSlotOps * heapam_slot_callbacks(Relation relation)
void heap_endscan(TableScanDesc sscan)
Definition: heapam.c:1307
#define RelationGetDescr(relation)
Definition: rel.h:495
TupleTableSlot * MakeSingleTupleTableSlot(TupleDesc tupdesc, const TupleTableSlotOps *tts_ops)
Definition: execTuples.c:1238
bool TransactionIdIsCurrentTransactionId(TransactionId xid)
Definition: xact.c:869
static void heapam_index_fetch_end(IndexFetchTableData *scan)
static void reform_and_rewrite_tuple(HeapTuple tuple, Relation OldHeap, Relation NewHeap, Datum *values, bool *isnull, RewriteState rwstate)
static TM_Result heapam_tuple_update(Relation relation, ItemPointer otid, TupleTableSlot *slot, CommandId cid, Snapshot snapshot, Snapshot crosscheck, bool wait, TM_FailureData *tmfd, LockTupleMode *lockmode, bool *update_indexes)
#define PointerGetDatum(X)
Definition: postgres.h:600
HeapTupleHeaderData * HeapTupleHeader
Definition: htup.h:23
CommandId HeapTupleHeaderGetCmin(HeapTupleHeader tup)
Definition: combocid.c:104
#define TupleDescAttr(tupdesc, i)
Definition: tupdesc.h:92
bool heap_fetch(Relation relation, Snapshot snapshot, HeapTuple tuple, Buffer *userbuf)
Definition: heapam.c:1595
struct SMgrRelationData * rd_smgr
Definition: rel.h:57
struct ParallelBlockTableScanDescData * ParallelBlockTableScanDesc
Definition: relscan.h:85
TableScanDescData rs_base
Definition: heapam.h:49
ExprState * ii_PredicateState
Definition: execnodes.h:164
void heap_fetch_toast_slice(Relation toastrel, Oid valueid, int32 attrsize, int32 sliceoffset, int32 slicelength, struct varlena *result)
Definition: heaptoast.c:626
const TupleTableSlotOps TTSOpsBufferHeapTuple
Definition: execTuples.c:86
MemoryContext ecxt_per_tuple_memory
Definition: execnodes.h:234
Buffer ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
Definition: bufmgr.c:744
ToastCompressionId
CommandId cmax
Definition: tableam.h:128
#define MaxHeapTuplesPerPage
Definition: htup_details.h:573
unsigned char uint8
Definition: c.h:439
#define HeapTupleHeaderSetSpeculativeToken(tup, token)
Definition: htup_details.h:440
#define HeapTupleHeaderIsSpeculative(tup)
Definition: htup_details.h:429
Tuplesortstate * tuplesort_begin_cluster(TupleDesc tupDesc, Relation indexRel, int workMem, SortCoordinate coordinate, bool randomAccess)
Definition: tuplesort.c:971
#define InvalidBuffer
Definition: buf.h:25
void index_rescan(IndexScanDesc scan, ScanKey keys, int nkeys, ScanKey orderbys, int norderbys)
Definition: indexam.c:297
static void heapam_relation_set_new_filenode(Relation rel, const RelFileNode *newrnode, char persistence, TransactionId *freezeXid, MultiXactId *minmulti)
HeapTuple tuple
Definition: tuptable.h:250
int errcode(int sqlerrcode)
Definition: elog.c:698
TransactionId RecentXmin
Definition: snapmgr.c:113
#define PROGRESS_CLUSTER_INDEX_RELID
Definition: progress.h:57
#define PROGRESS_CLUSTER_PHASE_WRITE_NEW_HEAP
Definition: progress.h:68
static TM_Result heapam_tuple_lock(Relation relation, ItemPointer tid, Snapshot snapshot, TupleTableSlot *slot, CommandId cid, LockTupleMode mode, LockWaitPolicy wait_policy, uint8 flags, TM_FailureData *tmfd)
void MemoryContextReset(MemoryContext context)
Definition: mcxt.c:143
uint32 BlockNumber
Definition: block.h:31
void ReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:3784
#define BITMAPLEN(NATTS)
Definition: htup_details.h:546
static void heapam_relation_nontransactional_truncate(Relation rel)
HeapTuple heap_form_tuple(TupleDesc tupleDescriptor, Datum *values, bool *isnull)
Definition: heaptuple.c:1020
static bool heapam_tuple_satisfies_snapshot(Relation rel, TupleTableSlot *slot, Snapshot snapshot)
#define TUPLE_LOCK_FLAG_LOCK_UPDATE_IN_PROGRESS
Definition: tableam.h:240
bool smgrexists(SMgrRelation reln, ForkNumber forknum)
Definition: smgr.c:247
static bool table_scan_getnextslot(TableScanDesc sscan, ScanDirection direction, TupleTableSlot *slot)
Definition: tableam.h:1032
static Oid heapam_relation_toast_am(Relation rel)
Form_pg_class rd_rel
Definition: rel.h:109
void heap_freetuple(HeapTuple htup)
Definition: heaptuple.c:1338
unsigned int Oid
Definition: postgres_ext.h:31
static bool ExecQual(ExprState *state, ExprContext *econtext)
Definition: executor.h:396
uint32 rs_flags
Definition: relscan.h:47
#define ItemIdIsDead(itemId)
Definition: itemid.h:113
Snapshot GetTransactionSnapshot(void)
Definition: snapmgr.c:250
#define OidIsValid(objectId)
Definition: c.h:710
static IndexFetchTableData * heapam_index_fetch_begin(Relation rel)
#define PageGetMaxOffsetNumber(page)
Definition: bufpage.h:357
#define RelationGetTargetBlock(relation)
Definition: rel.h:554
Size table_block_parallelscan_initialize(Relation rel, ParallelTableScanDesc pscan)
Definition: tableam.c:400
static TableScanDesc table_beginscan_strat(Relation rel, Snapshot snapshot, int nkeys, struct ScanKeyData *key, bool allow_strat, bool allow_sync)
Definition: tableam.h:907
static void heapam_index_fetch_reset(IndexFetchTableData *scan)
#define HEAP_INSERT_SPECULATIVE
Definition: heapam.h:37
static bool heapam_scan_bitmap_next_block(TableScanDesc scan, TBMIterateResult *tbmres)
void heapgetpage(TableScanDesc sscan, BlockNumber page)
Definition: heapam.c:371
signed int int32
Definition: c.h:429
struct HeapScanDescData * HeapScanDesc
Definition: heapam.h:79
bool ConditionalXactLockTableWait(TransactionId xid)
Definition: lmgr.c:713
void pgstat_progress_update_multi_param(int nparam, const int *index, const int64 *val)
HeapTupleData rs_ctup
Definition: heapam.h:66
uint16 OffsetNumber
Definition: off.h:24
Size table_block_parallelscan_estimate(Relation rel)
Definition: tableam.c:394
HeapTupleHeader t_data
Definition: htup.h:68
BlockNumber blockno
Definition: tidbitmap.h:42
#define RelationOpenSmgr(relation)
Definition: rel.h:526
bool heap_hot_search_buffer(ItemPointer tid, Relation relation, Buffer buffer, Snapshot snapshot, HeapTuple heapTuple, bool *all_dead, bool first_call)
Definition: heapam.c:1710
void FreeExecutorState(EState *estate)
Definition: execUtils.c:186
#define HeapTupleIsHotUpdated(tuple)
Definition: htup_details.h:675
struct varlena * detoast_attr(struct varlena *attr)
Definition: detoast.c:116
#define GetPerTupleExprContext(estate)
Definition: executor.h:533
List * ii_ExpressionsState
Definition: execnodes.h:162
static const TableAmRoutine heapam_methods
#define ItemIdGetLength(itemId)
Definition: itemid.h:59
void pfree(void *pointer)
Definition: mcxt.c:1169
SMgrRelation RelationCreateStorage(RelFileNode rnode, char relpersistence)
Definition: storage.c:118
NextSampleTuple_function NextSampleTuple
Definition: tsmapi.h:74
void UnlockReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:3807
TransactionId xmax
Definition: tableam.h:127
TransactionId GetOldestNonRemovableTransactionId(Relation rel)
Definition: procarray.c:1944
#define ERROR
Definition: elog.h:46
static void heapam_relation_copy_for_cluster(Relation OldHeap, Relation NewHeap, Relation OldIndex, bool use_sort, TransactionId OldestXmin, TransactionId *xid_cutoff, MultiXactId *multi_cutoff, double *num_tuples, double *tups_vacuumed, double *tups_recently_dead)
static TableScanDesc table_beginscan(Relation rel, Snapshot snapshot, int nkeys, struct ScanKeyData *key)
Definition: tableam.h:883
ItemPointerData t_ctid
Definition: htup_details.h:160
int32 type_maximum_size(Oid type_oid, int32 typemod)
Definition: format_type.c:408
ItemPointerData t_self
Definition: htup.h:65
static void callback(struct sockaddr *addr, struct sockaddr *mask, void *unused)
Definition: test_ifaddrs.c:48
static bool SampleHeapTupleVisible(TableScanDesc scan, Buffer buffer, HeapTuple tuple, OffsetNumber tupoffset)
static bool heapam_scan_sample_next_block(TableScanDesc scan, SampleScanState *scanstate)
#define DatumGetInt64(X)
Definition: postgres.h:651
static void heapam_tuple_insert(Relation relation, TupleTableSlot *slot, CommandId cid, int options, BulkInsertState bistate)
#define pgstat_count_heap_fetch(rel)
Definition: pgstat.h:1035
#define TOAST_LZ4_COMPRESSION
Tuplesortstate * tuplesort
Definition: index.h:49
uint32 t_len
Definition: htup.h:64
#define PROGRESS_CLUSTER_PHASE_SORT_TUPLES
Definition: progress.h:67
Buffer xs_cbuf
Definition: heapam.h:88
static char * buf
Definition: pg_test_fsync.c:68
HeapTuple heap_getnext(TableScanDesc sscan, ScanDirection direction)
Definition: heapam.c:1340
void ExecDropSingleTupleTableSlot(TupleTableSlot *slot)
Definition: execTuples.c:1254
OffsetNumber offsets[FLEXIBLE_ARRAY_MEMBER]
Definition: tidbitmap.h:46
#define HEAP_OVERHEAD_BYTES_PER_TUPLE
#define FirstOffsetNumber
Definition: off.h:27
void heap_setscanlimits(TableScanDesc sscan, BlockNumber startBlk, BlockNumber numBlks)
Definition: heapam.c:348
#define PROGRESS_CLUSTER_HEAP_TUPLES_SCANNED
Definition: progress.h:58
TransactionId heap_index_delete_tuples(Relation rel, TM_IndexDeleteOp *delstate)
Definition: heapam.c:7337
#define InvalidTransactionId
Definition: transam.h:31
#define RelationGetRelationName(relation)
Definition: rel.h:503
ExprState * ExecPrepareQual(List *qual, EState *estate)
Definition: execExpr.c:774
FormData_pg_attribute * Form_pg_attribute
Definition: pg_attribute.h:203
static bool heapam_scan_sample_next_tuple(TableScanDesc scan, SampleScanState *scanstate, TupleTableSlot *slot)
bool ii_BrokenHotChain
Definition: execnodes.h:175
unsigned int uint32
Definition: c.h:441
Oid t_tableOid
Definition: htup.h:66
TransactionId xmax
Definition: snapshot.h:158
TransactionId xmin
Definition: snapshot.h:157
static void heapam_tuple_insert_speculative(Relation relation, TupleTableSlot *slot, CommandId cid, int options, BulkInsertState bistate, uint32 specToken)
bool rs_inited
Definition: heapam.h:58
NextSampleBlock_function NextSampleBlock
Definition: tsmapi.h:73
void index_endscan(IndexScanDesc scan)
Definition: indexam.c:323
#define BufferGetPage(buffer)
Definition: bufmgr.h:169
BlockNumber rs_startblock
Definition: heapam.h:53
static void heapam_estimate_rel_size(Relation rel, int32 *attr_widths, BlockNumber *pages, double *tuples, double *allvisfrac)
static bool heapam_scan_bitmap_next_tuple(TableScanDesc scan, TBMIterateResult *tbmres, TupleTableSlot *slot)
void heap_get_root_tuples(Page page, OffsetNumber *root_offsets)
Definition: pruneheap.c:900
HTSV_Result HeapTupleSatisfiesVacuum(HeapTuple htup, TransactionId OldestXmin, Buffer buffer)
HeapTuple ExecFetchSlotHeapTuple(TupleTableSlot *slot, bool materialize, bool *shouldFree)
Definition: execTuples.c:1644
Datum heap_tableam_handler(PG_FUNCTION_ARGS)
Oid rd_id
Definition: rel.h:111
SMgrRelation smgropen(RelFileNode rnode, BackendId backend)
Definition: smgr.c:146
ForkNumber
Definition: relpath.h:40
EState * CreateExecutorState(void)
Definition: execUtils.c:90
void UnregisterSnapshot(Snapshot snapshot)
Definition: snapmgr.c:852
#define ERRCODE_DATA_CORRUPTED
Definition: pg_basebackup.c:45
int rs_ntuples
Definition: heapam.h:76
static bool heapam_fetch_row_version(Relation relation, ItemPointer tid, Snapshot snapshot, TupleTableSlot *slot)
int16 attlen
Definition: pg_attribute.h:68
#define WARNING
Definition: elog.h:40
#define PageGetItemId(page, offsetNumber)
Definition: bufpage.h:235
int progress
Definition: pgbench.c:270
#define TOAST_TUPLE_THRESHOLD
Definition: heaptoast.h:48
#define HEAP_USABLE_BYTES_PER_PAGE
TM_Result
Definition: tableam.h:71
void table_block_relation_estimate_size(Relation rel, int32 *attr_widths, BlockNumber *pages, double *tuples, double *allvisfrac, Size overhead_bytes_per_tuple, Size usable_bytes_per_page)
Definition: tableam.c:668
MultiXactId GetOldestMultiXactId(void)
Definition: multixact.c:2503
void * palloc0(Size size)
Definition: mcxt.c:1093
#define PROGRESS_SCAN_BLOCKS_DONE
Definition: progress.h:120
void heap_finish_speculative(Relation relation, ItemPointer tid)
Definition: heapam.c:5815
void RelationDropStorage(Relation rel)
Definition: storage.c:195
uintptr_t Datum
Definition: postgres.h:411
#define TTS_IS_BUFFERTUPLE(slot)
Definition: tuptable.h:231
static double heapam_index_build_range_scan(Relation heapRelation, Relation indexRelation, IndexInfo *indexInfo, bool allow_sync, bool anyvisible, bool progress, BlockNumber start_blockno, BlockNumber numblocks, IndexBuildCallback callback, void *callback_state, TableScanDesc scan)
BlockNumber rs_nblocks
Definition: heapam.h:52
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:4023
void PredicateLockTID(Relation relation, ItemPointer tid, Snapshot snapshot, TransactionId tuple_xid)
Definition: predicate.c:2614
#define ItemPointerIndicatesMovedPartitions(pointer)
Definition: itemptr.h:184
TupleTableSlot * ExecStoreBufferHeapTuple(HeapTuple tuple, TupleTableSlot *slot, Buffer buffer)
Definition: execTuples.c:1392
TupleDesc rd_att
Definition: rel.h:110
#define IsMVCCSnapshot(snapshot)
Definition: snapmgr.h:96
#define InvalidOffsetNumber
Definition: off.h:26
static void itemptr_decode(ItemPointer itemptr, int64 encoded)
Definition: index.h:206
void RelationCopyStorage(SMgrRelation src, SMgrRelation dst, ForkNumber forkNum, char relpersistence)
Definition: storage.c:408
#define ereport(elevel,...)
Definition: elog.h:157
int maintenance_work_mem
Definition: globals.c:126
static bool heapam_index_fetch_tuple(struct IndexFetchTableData *scan, ItemPointer tid, Snapshot snapshot, TupleTableSlot *slot, bool *call_again, bool *all_dead)
#define BlockNumberIsValid(blockNumber)
Definition: block.h:70
void HeapCheckForSerializableConflictOut(bool visible, Relation relation, HeapTuple tuple, Buffer buffer, Snapshot snapshot)
Definition: heapam.c:9861
Buffer rs_cbuf
Definition: heapam.h:60
static void heapam_relation_copy_data(Relation rel, const RelFileNode *newrnode)
TransactionId MultiXactId
Definition: c.h:597
int errmsg_internal(const char *fmt,...)
Definition: elog.c:996
bool ii_Unique
Definition: execnodes.h:172
void XactLockTableWait(TransactionId xid, Relation rel, ItemPointer ctid, XLTW_Oper oper)
Definition: lmgr.c:640
BackendId rd_backend
Definition: rel.h:59
#define HeapTupleIsHeapOnly(tuple)
Definition: htup_details.h:684
#define Assert(condition)
Definition: c.h:804
#define PROGRESS_CLUSTER_PHASE_INDEX_SCAN_HEAP
Definition: progress.h:66
TableScanDesc heap_beginscan(Relation relation, Snapshot snapshot, int nkeys, ScanKey key, ParallelTableScanDesc parallel_scan, uint32 flags)
Definition: heapam.c:1185
void pgstat_progress_update_param(int index, int64 val)
Definition: regguts.h:317
double tups_inserted
Definition: index.h:51
OffsetNumber rs_vistuples[MaxHeapTuplesPerPage]
Definition: heapam.h:77
Definition: tableam.h:77
#define ItemIdIsNormal(itemId)
Definition: itemid.h:99
bool heap_getnextslot_tidrange(TableScanDesc sscan, ScanDirection direction, TupleTableSlot *slot)
Definition: heapam.c:1492
bool takenDuringRecovery
Definition: snapshot.h:184
void FlushRelationBuffers(Relation rel)
Definition: bufmgr.c:3525
#define HeapTupleHeaderGetXmin(tup)
Definition: htup_details.h:313
#define INDEX_MAX_KEYS
#define OffsetNumberNext(offsetNumber)
Definition: off.h:52
TM_Result heap_update(Relation relation, ItemPointer otid, HeapTuple newtup, CommandId cid, Snapshot crosscheck, bool wait, TM_FailureData *tmfd, LockTupleMode *lockmode)
Definition: heapam.c:3221
#define InvalidBlockNumber
Definition: block.h:33
static TM_Result heapam_tuple_delete(Relation relation, ItemPointer tid, CommandId cid, Snapshot snapshot, Snapshot crosscheck, bool wait, TM_FailureData *tmfd, bool changingPart)
bool index_getnext_slot(IndexScanDesc scan, ScanDirection direction, TupleTableSlot *slot)
Definition: indexam.c:616
void heap_vacuum_rel(Relation rel, VacuumParams *params, BufferAccessStrategy bstrategy)
Definition: vacuumlazy.c:481
#define MAX_FORKNUM
Definition: relpath.h:55
Buffer ReleaseAndReadBuffer(Buffer buffer, Relation relation, BlockNumber blockNum)
Definition: bufmgr.c:1625
TupleTableSlot * ecxt_scantuple
Definition: execnodes.h:226
#define MAXALIGN(LEN)
Definition: c.h:757
#define BufferIsValid(bufnum)
Definition: bufmgr.h:123
#define ItemPointerGetOffsetNumber(pointer)
Definition: itemptr.h:117
#define PROGRESS_SCAN_BLOCKS_TOTAL
Definition: progress.h:119
struct TsmRoutine * tsmroutine
Definition: execnodes.h:1402
#define TUPLE_LOCK_FLAG_FIND_LAST_VERSION
Definition: tableam.h:242
#define TOAST_PGLZ_COMPRESSION
void ss_report_location(Relation rel, BlockNumber location)
Definition: syncscan.c:288
TupleTableSlot * ExecStorePinnedBufferHeapTuple(HeapTuple tuple, TupleTableSlot *slot, Buffer buffer)
Definition: execTuples.c:1418
bool ItemPointerEquals(ItemPointer pointer1, ItemPointer pointer2)
Definition: itemptr.c:29
bool ii_Concurrent
Definition: execnodes.h:174
#define SnapshotAny
Definition: snapmgr.h:67
TM_Result heap_lock_tuple(Relation relation, HeapTuple tuple, CommandId cid, LockTupleMode mode, LockWaitPolicy wait_policy, bool follow_updates, Buffer *buffer, TM_FailureData *tmfd)
Definition: heapam.c:4315
#define DatumGetPointer(X)
Definition: postgres.h:593
Relation rs_rd
Definition: relscan.h:34
double htups
Definition: index.h:51
#define ItemPointerSetOffsetNumber(pointer, offsetNumber)
Definition: itemptr.h:148
void heap_deform_tuple(HeapTuple tuple, TupleDesc tupleDesc, Datum *values, bool *isnull)
Definition: heaptuple.c:1249
static void table_endscan(TableScanDesc scan)
Definition: tableam.h:991
static Datum values[MAXATTR]
Definition: bootstrap.c:166
#define IsBootstrapProcessingMode()
Definition: miscadmin.h:406
bool rewrite_heap_dead_tuple(RewriteState state, HeapTuple old_tuple)
Definition: rewriteheap.c:568
bool heap_getnextslot(TableScanDesc sscan, ScanDirection direction, TupleTableSlot *slot)
Definition: heapam.c:1389
Oid * ii_ExclusionOps
Definition: execnodes.h:165
struct SnapshotData * rs_snapshot
Definition: relscan.h:35
void * palloc(Size size)
Definition: mcxt.c:1062
int errmsg(const char *fmt,...)
Definition: elog.c:909
#define elog(elevel,...)
Definition: elog.h:232
int i
#define OffsetNumberIsValid(offsetNumber)
Definition: off.h:39
struct ParallelTableScanDescData * rs_parallel
Definition: relscan.h:49
const TupleTableSlotOps TTSOpsHeapTuple
Definition: execTuples.c:84
void tuplesort_putheaptuple(Tuplesortstate *state, HeapTuple tup)
Definition: tuplesort.c:1706
void heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples, CommandId cid, int options, BulkInsertState bistate)
Definition: heapam.c:2370
TM_Result heap_delete(Relation relation, ItemPointer tid, CommandId cid, Snapshot crosscheck, bool wait, TM_FailureData *tmfd, bool changingPart)
Definition: heapam.c:2768
#define BUFFER_LOCK_SHARE
Definition: bufmgr.h:97
static void heapam_index_validate_scan(Relation heapRelation, Relation indexRelation, IndexInfo *indexInfo, Snapshot snapshot, ValidateIndexState *state)
void heap_rescan(TableScanDesc sscan, ScanKey key, bool set_params, bool allow_strat, bool allow_sync, bool allow_pagemode)
Definition: heapam.c:1270
Definition: c.h:621
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
#define PROGRESS_CLUSTER_HEAP_BLKS_SCANNED
Definition: progress.h:61
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:120
#define PROGRESS_CLUSTER_TOTAL_HEAP_BLKS
Definition: progress.h:60
static bool heapam_relation_needs_toast_table(Relation rel)
HeapTupleTableSlot base
Definition: tuptable.h:259
void heap_page_prune_opt(Relation relation, Buffer buffer)
Definition: pruneheap.c:87
bool index_insert(Relation indexRelation, Datum *values, bool *isnull, ItemPointer heap_t_ctid, Relation heapRelation, IndexUniqueCheck checkUnique, bool indexUnchanged, IndexInfo *indexInfo)
Definition: indexam.c:176
#define ItemPointerGetBlockNumber(pointer)
Definition: itemptr.h:98
#define pgstat_count_heap_getnext(rel)
Definition: pgstat.h:1030
static bool heapam_scan_analyze_next_tuple(TableScanDesc scan, TransactionId OldestXmin, double *liverows, double *deadrows, TupleTableSlot *slot)
#define TransactionIdIsValid(xid)
Definition: transam.h:41
RewriteState begin_heap_rewrite(Relation old_heap, Relation new_heap, TransactionId oldest_xmin, TransactionId freeze_xid, MultiXactId cutoff_multi)
Definition: rewriteheap.c:237
static BlockNumber heapam_scan_get_blocks_done(HeapScanDesc hscan)
void tuplesort_end(Tuplesortstate *state)
Definition: tuplesort.c:1464
bool traversed
Definition: tableam.h:129
HeapTupleData tupdata
Definition: tuptable.h:253
#define PROGRESS_CLUSTER_PHASE
Definition: progress.h:56
ItemPointerData tts_tid
Definition: tuptable.h:130
int Buffer
Definition: buf.h:23
void smgrimmedsync(SMgrRelation reln, ForkNumber forknum)
Definition: smgr.c:660
void rewrite_heap_tuple(RewriteState state, HeapTuple old_tuple, HeapTuple new_tuple)
Definition: rewriteheap.c:366
#define RelationGetRelid(relation)
Definition: rel.h:469
LockWaitPolicy
Definition: lockoptions.h:36
ToastCompressionId toast_get_compression_id(struct varlena *attr)
TupleTableSlot * ExecStoreHeapTuple(HeapTuple tuple, TupleTableSlot *slot, bool shouldFree)
Definition: execTuples.c:1352
#define PageGetItem(page, itemId)
Definition: bufpage.h:340
static bool heapam_scan_analyze_next_block(TableScanDesc scan, BlockNumber blockno, BufferAccessStrategy bstrategy)
Pointer Page
Definition: bufpage.h:78
#define ItemPointerSet(pointer, blockNumber, offNum)
Definition: itemptr.h:127
IndexScanDesc index_beginscan(Relation heapRelation, Relation indexRelation, Snapshot snapshot, int nkeys, int norderbys)
Definition: indexam.c:205
#define ItemPointerCopy(fromPointer, toPointer)
Definition: itemptr.h:161
void(* IndexBuildCallback)(Relation index, ItemPointer tid, Datum *values, bool *isnull, bool tupleIsAlive, void *state)
Definition: tableam.h:246
IndexFetchTableData xs_base
Definition: heapam.h:86
bool HeapTupleSatisfiesVisibility(HeapTuple tup, Snapshot snapshot, Buffer buffer)
void heap_get_latest_tid(TableScanDesc sscan, ItemPointer tid)
Definition: heapam.c:1862
void RelationTruncate(Relation rel, BlockNumber nblocks)
Definition: storage.c:277
const TableAmRoutine * GetHeapamTableAmRoutine(void)
void log_smgrcreate(const RelFileNode *rnode, ForkNumber forkNum)
Definition: storage.c:175