PostgreSQL Source Code  git master
heapam_handler.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * heapam_handler.c
4  * heap table access method code
5  *
6  * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  * src/backend/access/heap/heapam_handler.c
12  *
13  *
14  * NOTES
15  * This files wires up the lower level heapam.c et al routines with the
16  * tableam abstraction.
17  *
18  *-------------------------------------------------------------------------
19  */
20 #include "postgres.h"
21 
22 #include "access/genam.h"
23 #include "access/heapam.h"
24 #include "access/heaptoast.h"
25 #include "access/multixact.h"
26 #include "access/rewriteheap.h"
27 #include "access/syncscan.h"
28 #include "access/tableam.h"
29 #include "access/tsmapi.h"
30 #include "access/xact.h"
31 #include "catalog/catalog.h"
32 #include "catalog/index.h"
33 #include "catalog/storage.h"
34 #include "catalog/storage_xlog.h"
35 #include "commands/progress.h"
36 #include "executor/executor.h"
37 #include "miscadmin.h"
38 #include "pgstat.h"
39 #include "storage/bufmgr.h"
40 #include "storage/bufpage.h"
41 #include "storage/lmgr.h"
42 #include "storage/predicate.h"
43 #include "storage/procarray.h"
44 #include "storage/smgr.h"
45 #include "utils/builtins.h"
46 #include "utils/rel.h"
47 
48 static void reform_and_rewrite_tuple(HeapTuple tuple,
49  Relation OldHeap, Relation NewHeap,
50  Datum *values, bool *isnull, RewriteState rwstate);
51 
52 static bool SampleHeapTupleVisible(TableScanDesc scan, Buffer buffer,
53  HeapTuple tuple,
54  OffsetNumber tupoffset);
55 
57 
59 
60 
61 /* ------------------------------------------------------------------------
62  * Slot related callbacks for heap AM
63  * ------------------------------------------------------------------------
64  */
65 
66 static const TupleTableSlotOps *
68 {
69  return &TTSOpsBufferHeapTuple;
70 }
71 
72 
73 /* ------------------------------------------------------------------------
74  * Index Scan Callbacks for heap AM
75  * ------------------------------------------------------------------------
76  */
77 
78 static IndexFetchTableData *
80 {
82 
83  hscan->xs_base.rel = rel;
84  hscan->xs_cbuf = InvalidBuffer;
85 
86  return &hscan->xs_base;
87 }
88 
89 static void
91 {
92  IndexFetchHeapData *hscan = (IndexFetchHeapData *) scan;
93 
94  if (BufferIsValid(hscan->xs_cbuf))
95  {
96  ReleaseBuffer(hscan->xs_cbuf);
97  hscan->xs_cbuf = InvalidBuffer;
98  }
99 }
100 
101 static void
103 {
104  IndexFetchHeapData *hscan = (IndexFetchHeapData *) scan;
105 
107 
108  pfree(hscan);
109 }
110 
111 static bool
113  ItemPointer tid,
114  Snapshot snapshot,
115  TupleTableSlot *slot,
116  bool *call_again, bool *all_dead)
117 {
118  IndexFetchHeapData *hscan = (IndexFetchHeapData *) scan;
120  bool got_heap_tuple;
121 
122  Assert(TTS_IS_BUFFERTUPLE(slot));
123 
124  /* We can skip the buffer-switching logic if we're in mid-HOT chain. */
125  if (!*call_again)
126  {
127  /* Switch to correct buffer if we don't have it already */
128  Buffer prev_buf = hscan->xs_cbuf;
129 
130  hscan->xs_cbuf = ReleaseAndReadBuffer(hscan->xs_cbuf,
131  hscan->xs_base.rel,
133 
134  /*
135  * Prune page, but only if we weren't already on this page
136  */
137  if (prev_buf != hscan->xs_cbuf)
138  heap_page_prune_opt(hscan->xs_base.rel, hscan->xs_cbuf);
139  }
140 
141  /* Obtain share-lock on the buffer so we can examine visibility */
143  got_heap_tuple = heap_hot_search_buffer(tid,
144  hscan->xs_base.rel,
145  hscan->xs_cbuf,
146  snapshot,
147  &bslot->base.tupdata,
148  all_dead,
149  !*call_again);
150  bslot->base.tupdata.t_self = *tid;
152 
153  if (got_heap_tuple)
154  {
155  /*
156  * Only in a non-MVCC snapshot can more than one member of the HOT
157  * chain be visible.
158  */
159  *call_again = !IsMVCCSnapshot(snapshot);
160 
161  slot->tts_tableOid = RelationGetRelid(scan->rel);
162  ExecStoreBufferHeapTuple(&bslot->base.tupdata, slot, hscan->xs_cbuf);
163  }
164  else
165  {
166  /* We've reached the end of the HOT chain. */
167  *call_again = false;
168  }
169 
170  return got_heap_tuple;
171 }
172 
173 
174 /* ------------------------------------------------------------------------
175  * Callbacks for non-modifying operations on individual tuples for heap AM
176  * ------------------------------------------------------------------------
177  */
178 
179 static bool
181  ItemPointer tid,
182  Snapshot snapshot,
183  TupleTableSlot *slot)
184 {
186  Buffer buffer;
187 
188  Assert(TTS_IS_BUFFERTUPLE(slot));
189 
190  bslot->base.tupdata.t_self = *tid;
191  if (heap_fetch(relation, snapshot, &bslot->base.tupdata, &buffer))
192  {
193  /* store in slot, transferring existing pin */
194  ExecStorePinnedBufferHeapTuple(&bslot->base.tupdata, slot, buffer);
195  slot->tts_tableOid = RelationGetRelid(relation);
196 
197  return true;
198  }
199 
200  return false;
201 }
202 
203 static bool
205 {
206  HeapScanDesc hscan = (HeapScanDesc) scan;
207 
208  return ItemPointerIsValid(tid) &&
210 }
211 
212 static bool
214  Snapshot snapshot)
215 {
217  bool res;
218 
219  Assert(TTS_IS_BUFFERTUPLE(slot));
220  Assert(BufferIsValid(bslot->buffer));
221 
222  /*
223  * We need buffer pin and lock to call HeapTupleSatisfiesVisibility.
224  * Caller should be holding pin, but not lock.
225  */
227  res = HeapTupleSatisfiesVisibility(bslot->base.tuple, snapshot,
228  bslot->buffer);
230 
231  return res;
232 }
233 
234 
235 /* ----------------------------------------------------------------------------
236  * Functions for manipulations of physical tuples for heap AM.
237  * ----------------------------------------------------------------------------
238  */
239 
240 static void
242  int options, BulkInsertState bistate)
243 {
244  bool shouldFree = true;
245  HeapTuple tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree);
246 
247  /* Update the tuple with table oid */
248  slot->tts_tableOid = RelationGetRelid(relation);
249  tuple->t_tableOid = slot->tts_tableOid;
250 
251  /* Perform the insertion, and copy the resulting ItemPointer */
252  heap_insert(relation, tuple, cid, options, bistate);
253  ItemPointerCopy(&tuple->t_self, &slot->tts_tid);
254 
255  if (shouldFree)
256  pfree(tuple);
257 }
258 
259 static void
261  CommandId cid, int options,
262  BulkInsertState bistate, uint32 specToken)
263 {
264  bool shouldFree = true;
265  HeapTuple tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree);
266 
267  /* Update the tuple with table oid */
268  slot->tts_tableOid = RelationGetRelid(relation);
269  tuple->t_tableOid = slot->tts_tableOid;
270 
271  HeapTupleHeaderSetSpeculativeToken(tuple->t_data, specToken);
272  options |= HEAP_INSERT_SPECULATIVE;
273 
274  /* Perform the insertion, and copy the resulting ItemPointer */
275  heap_insert(relation, tuple, cid, options, bistate);
276  ItemPointerCopy(&tuple->t_self, &slot->tts_tid);
277 
278  if (shouldFree)
279  pfree(tuple);
280 }
281 
282 static void
284  uint32 specToken, bool succeeded)
285 {
286  bool shouldFree = true;
287  HeapTuple tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree);
288 
289  /* adjust the tuple's state accordingly */
290  if (succeeded)
291  heap_finish_speculative(relation, &slot->tts_tid);
292  else
293  heap_abort_speculative(relation, &slot->tts_tid);
294 
295  if (shouldFree)
296  pfree(tuple);
297 }
298 
299 static TM_Result
301  Snapshot snapshot, Snapshot crosscheck, bool wait,
302  TM_FailureData *tmfd, bool changingPart)
303 {
304  /*
305  * Currently Deleting of index tuples are handled at vacuum, in case if
306  * the storage itself is cleaning the dead tuples by itself, it is the
307  * time to call the index tuple deletion also.
308  */
309  return heap_delete(relation, tid, cid, crosscheck, wait, tmfd, changingPart);
310 }
311 
312 
313 static TM_Result
315  CommandId cid, Snapshot snapshot, Snapshot crosscheck,
316  bool wait, TM_FailureData *tmfd,
317  LockTupleMode *lockmode, bool *update_indexes)
318 {
319  bool shouldFree = true;
320  HeapTuple tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree);
321  TM_Result result;
322 
323  /* Update the tuple with table oid */
324  slot->tts_tableOid = RelationGetRelid(relation);
325  tuple->t_tableOid = slot->tts_tableOid;
326 
327  result = heap_update(relation, otid, tuple, cid, crosscheck, wait,
328  tmfd, lockmode);
329  ItemPointerCopy(&tuple->t_self, &slot->tts_tid);
330 
331  /*
332  * Decide whether new index entries are needed for the tuple
333  *
334  * Note: heap_update returns the tid (location) of the new tuple in the
335  * t_self field.
336  *
337  * If it's a HOT update, we mustn't insert new index entries.
338  */
339  *update_indexes = result == TM_Ok && !HeapTupleIsHeapOnly(tuple);
340 
341  if (shouldFree)
342  pfree(tuple);
343 
344  return result;
345 }
346 
347 static TM_Result
350  LockWaitPolicy wait_policy, uint8 flags,
351  TM_FailureData *tmfd)
352 {
354  TM_Result result;
355  Buffer buffer;
356  HeapTuple tuple = &bslot->base.tupdata;
357  bool follow_updates;
358 
359  follow_updates = (flags & TUPLE_LOCK_FLAG_LOCK_UPDATE_IN_PROGRESS) != 0;
360  tmfd->traversed = false;
361 
362  Assert(TTS_IS_BUFFERTUPLE(slot));
363 
364 tuple_lock_retry:
365  tuple->t_self = *tid;
366  result = heap_lock_tuple(relation, tuple, cid, mode, wait_policy,
367  follow_updates, &buffer, tmfd);
368 
369  if (result == TM_Updated &&
371  {
372  /* Should not encounter speculative tuple on recheck */
374 
375  ReleaseBuffer(buffer);
376 
377  if (!ItemPointerEquals(&tmfd->ctid, &tuple->t_self))
378  {
379  SnapshotData SnapshotDirty;
380  TransactionId priorXmax;
381 
382  /* it was updated, so look at the updated version */
383  *tid = tmfd->ctid;
384  /* updated row should have xmin matching this xmax */
385  priorXmax = tmfd->xmax;
386 
387  /* signal that a tuple later in the chain is getting locked */
388  tmfd->traversed = true;
389 
390  /*
391  * fetch target tuple
392  *
393  * Loop here to deal with updated or busy tuples
394  */
395  InitDirtySnapshot(SnapshotDirty);
396  for (;;)
397  {
399  ereport(ERROR,
400  (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
401  errmsg("tuple to be locked was already moved to another partition due to concurrent update")));
402 
403  tuple->t_self = *tid;
404  if (heap_fetch(relation, &SnapshotDirty, tuple, &buffer))
405  {
406  /*
407  * If xmin isn't what we're expecting, the slot must have
408  * been recycled and reused for an unrelated tuple. This
409  * implies that the latest version of the row was deleted,
410  * so we need do nothing. (Should be safe to examine xmin
411  * without getting buffer's content lock. We assume
412  * reading a TransactionId to be atomic, and Xmin never
413  * changes in an existing tuple, except to invalid or
414  * frozen, and neither of those can match priorXmax.)
415  */
417  priorXmax))
418  {
419  ReleaseBuffer(buffer);
420  return TM_Deleted;
421  }
422 
423  /* otherwise xmin should not be dirty... */
424  if (TransactionIdIsValid(SnapshotDirty.xmin))
425  ereport(ERROR,
427  errmsg_internal("t_xmin is uncommitted in tuple to be updated")));
428 
429  /*
430  * If tuple is being updated by other transaction then we
431  * have to wait for its commit/abort, or die trying.
432  */
433  if (TransactionIdIsValid(SnapshotDirty.xmax))
434  {
435  ReleaseBuffer(buffer);
436  switch (wait_policy)
437  {
438  case LockWaitBlock:
439  XactLockTableWait(SnapshotDirty.xmax,
440  relation, &tuple->t_self,
442  break;
443  case LockWaitSkip:
444  if (!ConditionalXactLockTableWait(SnapshotDirty.xmax))
445  /* skip instead of waiting */
446  return TM_WouldBlock;
447  break;
448  case LockWaitError:
449  if (!ConditionalXactLockTableWait(SnapshotDirty.xmax))
450  ereport(ERROR,
451  (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
452  errmsg("could not obtain lock on row in relation \"%s\"",
453  RelationGetRelationName(relation))));
454  break;
455  }
456  continue; /* loop back to repeat heap_fetch */
457  }
458 
459  /*
460  * If tuple was inserted by our own transaction, we have
461  * to check cmin against cid: cmin >= current CID means
462  * our command cannot see the tuple, so we should ignore
463  * it. Otherwise heap_lock_tuple() will throw an error,
464  * and so would any later attempt to update or delete the
465  * tuple. (We need not check cmax because
466  * HeapTupleSatisfiesDirty will consider a tuple deleted
467  * by our transaction dead, regardless of cmax.) We just
468  * checked that priorXmax == xmin, so we can test that
469  * variable instead of doing HeapTupleHeaderGetXmin again.
470  */
471  if (TransactionIdIsCurrentTransactionId(priorXmax) &&
472  HeapTupleHeaderGetCmin(tuple->t_data) >= cid)
473  {
474  tmfd->xmax = priorXmax;
475 
476  /*
477  * Cmin is the problematic value, so store that. See
478  * above.
479  */
480  tmfd->cmax = HeapTupleHeaderGetCmin(tuple->t_data);
481  ReleaseBuffer(buffer);
482  return TM_SelfModified;
483  }
484 
485  /*
486  * This is a live tuple, so try to lock it again.
487  */
488  ReleaseBuffer(buffer);
489  goto tuple_lock_retry;
490  }
491 
492  /*
493  * If the referenced slot was actually empty, the latest
494  * version of the row must have been deleted, so we need do
495  * nothing.
496  */
497  if (tuple->t_data == NULL)
498  {
499  return TM_Deleted;
500  }
501 
502  /*
503  * As above, if xmin isn't what we're expecting, do nothing.
504  */
506  priorXmax))
507  {
508  if (BufferIsValid(buffer))
509  ReleaseBuffer(buffer);
510  return TM_Deleted;
511  }
512 
513  /*
514  * If we get here, the tuple was found but failed
515  * SnapshotDirty. Assuming the xmin is either a committed xact
516  * or our own xact (as it certainly should be if we're trying
517  * to modify the tuple), this must mean that the row was
518  * updated or deleted by either a committed xact or our own
519  * xact. If it was deleted, we can ignore it; if it was
520  * updated then chain up to the next version and repeat the
521  * whole process.
522  *
523  * As above, it should be safe to examine xmax and t_ctid
524  * without the buffer content lock, because they can't be
525  * changing.
526  */
527  if (ItemPointerEquals(&tuple->t_self, &tuple->t_data->t_ctid))
528  {
529  /* deleted, so forget about it */
530  if (BufferIsValid(buffer))
531  ReleaseBuffer(buffer);
532  return TM_Deleted;
533  }
534 
535  /* updated, so look at the updated row */
536  *tid = tuple->t_data->t_ctid;
537  /* updated row should have xmin matching this xmax */
538  priorXmax = HeapTupleHeaderGetUpdateXid(tuple->t_data);
539  if (BufferIsValid(buffer))
540  ReleaseBuffer(buffer);
541  /* loop back to fetch next in chain */
542  }
543  }
544  else
545  {
546  /* tuple was deleted, so give up */
547  return TM_Deleted;
548  }
549  }
550 
551  slot->tts_tableOid = RelationGetRelid(relation);
552  tuple->t_tableOid = slot->tts_tableOid;
553 
554  /* store in slot, transferring existing pin */
555  ExecStorePinnedBufferHeapTuple(tuple, slot, buffer);
556 
557  return result;
558 }
559 
560 
561 /* ------------------------------------------------------------------------
562  * DDL related callbacks for heap AM.
563  * ------------------------------------------------------------------------
564  */
565 
566 static void
568  const RelFileNode *newrnode,
569  char persistence,
570  TransactionId *freezeXid,
571  MultiXactId *minmulti)
572 {
573  SMgrRelation srel;
574 
575  /*
576  * Initialize to the minimum XID that could put tuples in the table. We
577  * know that no xacts older than RecentXmin are still running, so that
578  * will do.
579  */
580  *freezeXid = RecentXmin;
581 
582  /*
583  * Similarly, initialize the minimum Multixact to the first value that
584  * could possibly be stored in tuples in the table. Running transactions
585  * could reuse values from their local cache, so we are careful to
586  * consider all currently running multis.
587  *
588  * XXX this could be refined further, but is it worth the hassle?
589  */
590  *minmulti = GetOldestMultiXactId();
591 
592  srel = RelationCreateStorage(*newrnode, persistence);
593 
594  /*
595  * If required, set up an init fork for an unlogged table so that it can
596  * be correctly reinitialized on restart. An immediate sync is required
597  * even if the page has been logged, because the write did not go through
598  * shared_buffers and therefore a concurrent checkpoint may have moved the
599  * redo pointer past our xlog record. Recovery may as well remove it
600  * while replaying, for example, XLOG_DBASE_CREATE or XLOG_TBLSPC_CREATE
601  * record. Therefore, logging is necessary even if wal_level=minimal.
602  */
603  if (persistence == RELPERSISTENCE_UNLOGGED)
604  {
605  Assert(rel->rd_rel->relkind == RELKIND_RELATION ||
606  rel->rd_rel->relkind == RELKIND_MATVIEW ||
607  rel->rd_rel->relkind == RELKIND_TOASTVALUE);
608  smgrcreate(srel, INIT_FORKNUM, false);
609  log_smgrcreate(newrnode, INIT_FORKNUM);
611  }
612 
613  smgrclose(srel);
614 }
615 
616 static void
618 {
619  RelationTruncate(rel, 0);
620 }
621 
622 static void
624 {
625  SMgrRelation dstrel;
626 
627  dstrel = smgropen(*newrnode, rel->rd_backend);
628  RelationOpenSmgr(rel);
629 
630  /*
631  * Since we copy the file directly without looking at the shared buffers,
632  * we'd better first flush out any pages of the source relation that are
633  * in shared buffers. We assume no new changes will be made while we are
634  * holding exclusive lock on the rel.
635  */
637 
638  /*
639  * Create and copy all forks of the relation, and schedule unlinking of
640  * old physical files.
641  *
642  * NOTE: any conflict in relfilenode value will be caught in
643  * RelationCreateStorage().
644  */
645  RelationCreateStorage(*newrnode, rel->rd_rel->relpersistence);
646 
647  /* copy main fork */
649  rel->rd_rel->relpersistence);
650 
651  /* copy those extra forks that exist */
652  for (ForkNumber forkNum = MAIN_FORKNUM + 1;
653  forkNum <= MAX_FORKNUM; forkNum++)
654  {
655  if (smgrexists(rel->rd_smgr, forkNum))
656  {
657  smgrcreate(dstrel, forkNum, false);
658 
659  /*
660  * WAL log creation if the relation is persistent, or this is the
661  * init fork of an unlogged relation.
662  */
663  if (rel->rd_rel->relpersistence == RELPERSISTENCE_PERMANENT ||
664  (rel->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED &&
665  forkNum == INIT_FORKNUM))
666  log_smgrcreate(newrnode, forkNum);
667  RelationCopyStorage(rel->rd_smgr, dstrel, forkNum,
668  rel->rd_rel->relpersistence);
669  }
670  }
671 
672 
673  /* drop old relation, and close new one */
674  RelationDropStorage(rel);
675  smgrclose(dstrel);
676 }
677 
678 static void
680  Relation OldIndex, bool use_sort,
682  TransactionId *xid_cutoff,
683  MultiXactId *multi_cutoff,
684  double *num_tuples,
685  double *tups_vacuumed,
686  double *tups_recently_dead)
687 {
688  RewriteState rwstate;
689  IndexScanDesc indexScan;
690  TableScanDesc tableScan;
691  HeapScanDesc heapScan;
692  bool is_system_catalog;
693  Tuplesortstate *tuplesort;
694  TupleDesc oldTupDesc = RelationGetDescr(OldHeap);
695  TupleDesc newTupDesc = RelationGetDescr(NewHeap);
696  TupleTableSlot *slot;
697  int natts;
698  Datum *values;
699  bool *isnull;
701  BlockNumber prev_cblock = InvalidBlockNumber;
702 
703  /* Remember if it's a system catalog */
704  is_system_catalog = IsSystemRelation(OldHeap);
705 
706  /*
707  * Valid smgr_targblock implies something already wrote to the relation.
708  * This may be harmless, but this function hasn't planned for it.
709  */
711 
712  /* Preallocate values/isnull arrays */
713  natts = newTupDesc->natts;
714  values = (Datum *) palloc(natts * sizeof(Datum));
715  isnull = (bool *) palloc(natts * sizeof(bool));
716 
717  /* Initialize the rewrite operation */
718  rwstate = begin_heap_rewrite(OldHeap, NewHeap, OldestXmin, *xid_cutoff,
719  *multi_cutoff);
720 
721 
722  /* Set up sorting if wanted */
723  if (use_sort)
724  tuplesort = tuplesort_begin_cluster(oldTupDesc, OldIndex,
726  NULL, false);
727  else
728  tuplesort = NULL;
729 
730  /*
731  * Prepare to scan the OldHeap. To ensure we see recently-dead tuples
732  * that still need to be copied, we scan with SnapshotAny and use
733  * HeapTupleSatisfiesVacuum for the visibility test.
734  */
735  if (OldIndex != NULL && !use_sort)
736  {
737  const int ci_index[] = {
740  };
741  int64 ci_val[2];
742 
743  /* Set phase and OIDOldIndex to columns */
745  ci_val[1] = RelationGetRelid(OldIndex);
746  pgstat_progress_update_multi_param(2, ci_index, ci_val);
747 
748  tableScan = NULL;
749  heapScan = NULL;
750  indexScan = index_beginscan(OldHeap, OldIndex, SnapshotAny, 0, 0);
751  index_rescan(indexScan, NULL, 0, NULL, 0);
752  }
753  else
754  {
755  /* In scan-and-sort mode and also VACUUM FULL, set phase */
758 
759  tableScan = table_beginscan(OldHeap, SnapshotAny, 0, (ScanKey) NULL);
760  heapScan = (HeapScanDesc) tableScan;
761  indexScan = NULL;
762 
763  /* Set total heap blocks */
765  heapScan->rs_nblocks);
766  }
767 
768  slot = table_slot_create(OldHeap, NULL);
769  hslot = (BufferHeapTupleTableSlot *) slot;
770 
771  /*
772  * Scan through the OldHeap, either in OldIndex order or sequentially;
773  * copy each tuple into the NewHeap, or transiently to the tuplesort
774  * module. Note that we don't bother sorting dead tuples (they won't get
775  * to the new table anyway).
776  */
777  for (;;)
778  {
779  HeapTuple tuple;
780  Buffer buf;
781  bool isdead;
782 
784 
785  if (indexScan != NULL)
786  {
787  if (!index_getnext_slot(indexScan, ForwardScanDirection, slot))
788  break;
789 
790  /* Since we used no scan keys, should never need to recheck */
791  if (indexScan->xs_recheck)
792  elog(ERROR, "CLUSTER does not support lossy index conditions");
793  }
794  else
795  {
796  if (!table_scan_getnextslot(tableScan, ForwardScanDirection, slot))
797  {
798  /*
799  * If the last pages of the scan were empty, we would go to
800  * the next phase while heap_blks_scanned != heap_blks_total.
801  * Instead, to ensure that heap_blks_scanned is equivalent to
802  * total_heap_blks after the table scan phase, this parameter
803  * is manually updated to the correct value when the table
804  * scan finishes.
805  */
807  heapScan->rs_nblocks);
808  break;
809  }
810 
811  /*
812  * In scan-and-sort mode and also VACUUM FULL, set heap blocks
813  * scanned
814  *
815  * Note that heapScan may start at an offset and wrap around, i.e.
816  * rs_startblock may be >0, and rs_cblock may end with a number
817  * below rs_startblock. To prevent showing this wraparound to the
818  * user, we offset rs_cblock by rs_startblock (modulo rs_nblocks).
819  */
820  if (prev_cblock != heapScan->rs_cblock)
821  {
823  (heapScan->rs_cblock +
824  heapScan->rs_nblocks -
825  heapScan->rs_startblock
826  ) % heapScan->rs_nblocks + 1);
827  prev_cblock = heapScan->rs_cblock;
828  }
829  }
830 
831  tuple = ExecFetchSlotHeapTuple(slot, false, NULL);
832  buf = hslot->buffer;
833 
835 
836  switch (HeapTupleSatisfiesVacuum(tuple, OldestXmin, buf))
837  {
838  case HEAPTUPLE_DEAD:
839  /* Definitely dead */
840  isdead = true;
841  break;
843  *tups_recently_dead += 1;
844  /* fall through */
845  case HEAPTUPLE_LIVE:
846  /* Live or recently dead, must copy it */
847  isdead = false;
848  break;
850 
851  /*
852  * Since we hold exclusive lock on the relation, normally the
853  * only way to see this is if it was inserted earlier in our
854  * own transaction. However, it can happen in system
855  * catalogs, since we tend to release write lock before commit
856  * there. Give a warning if neither case applies; but in any
857  * case we had better copy it.
858  */
859  if (!is_system_catalog &&
861  elog(WARNING, "concurrent insert in progress within table \"%s\"",
862  RelationGetRelationName(OldHeap));
863  /* treat as live */
864  isdead = false;
865  break;
867 
868  /*
869  * Similar situation to INSERT_IN_PROGRESS case.
870  */
871  if (!is_system_catalog &&
873  elog(WARNING, "concurrent delete in progress within table \"%s\"",
874  RelationGetRelationName(OldHeap));
875  /* treat as recently dead */
876  *tups_recently_dead += 1;
877  isdead = false;
878  break;
879  default:
880  elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
881  isdead = false; /* keep compiler quiet */
882  break;
883  }
884 
886 
887  if (isdead)
888  {
889  *tups_vacuumed += 1;
890  /* heap rewrite module still needs to see it... */
891  if (rewrite_heap_dead_tuple(rwstate, tuple))
892  {
893  /* A previous recently-dead tuple is now known dead */
894  *tups_vacuumed += 1;
895  *tups_recently_dead -= 1;
896  }
897  continue;
898  }
899 
900  *num_tuples += 1;
901  if (tuplesort != NULL)
902  {
903  tuplesort_putheaptuple(tuplesort, tuple);
904 
905  /*
906  * In scan-and-sort mode, report increase in number of tuples
907  * scanned
908  */
910  *num_tuples);
911  }
912  else
913  {
914  const int ct_index[] = {
917  };
918  int64 ct_val[2];
919 
920  reform_and_rewrite_tuple(tuple, OldHeap, NewHeap,
921  values, isnull, rwstate);
922 
923  /*
924  * In indexscan mode and also VACUUM FULL, report increase in
925  * number of tuples scanned and written
926  */
927  ct_val[0] = *num_tuples;
928  ct_val[1] = *num_tuples;
929  pgstat_progress_update_multi_param(2, ct_index, ct_val);
930  }
931  }
932 
933  if (indexScan != NULL)
934  index_endscan(indexScan);
935  if (tableScan != NULL)
936  table_endscan(tableScan);
937  if (slot)
939 
940  /*
941  * In scan-and-sort mode, complete the sort, then read out all live tuples
942  * from the tuplestore and write them to the new relation.
943  */
944  if (tuplesort != NULL)
945  {
946  double n_tuples = 0;
947 
948  /* Report that we are now sorting tuples */
951 
952  tuplesort_performsort(tuplesort);
953 
954  /* Report that we are now writing new heap */
957 
958  for (;;)
959  {
960  HeapTuple tuple;
961 
963 
964  tuple = tuplesort_getheaptuple(tuplesort, true);
965  if (tuple == NULL)
966  break;
967 
968  n_tuples += 1;
970  OldHeap, NewHeap,
971  values, isnull,
972  rwstate);
973  /* Report n_tuples */
975  n_tuples);
976  }
977 
978  tuplesort_end(tuplesort);
979  }
980 
981  /* Write out any remaining tuples, and fsync if needed */
982  end_heap_rewrite(rwstate);
983 
984  /* Clean up */
985  pfree(values);
986  pfree(isnull);
987 }
988 
989 static bool
991  BufferAccessStrategy bstrategy)
992 {
993  HeapScanDesc hscan = (HeapScanDesc) scan;
994 
995  /*
996  * We must maintain a pin on the target page's buffer to ensure that
997  * concurrent activity - e.g. HOT pruning - doesn't delete tuples out from
998  * under us. Hence, pin the page until we are done looking at it. We
999  * also choose to hold sharelock on the buffer throughout --- we could
1000  * release and re-acquire sharelock for each tuple, but since we aren't
1001  * doing much work per tuple, the extra lock traffic is probably better
1002  * avoided.
1003  */
1004  hscan->rs_cblock = blockno;
1005  hscan->rs_cindex = FirstOffsetNumber;
1006  hscan->rs_cbuf = ReadBufferExtended(scan->rs_rd, MAIN_FORKNUM,
1007  blockno, RBM_NORMAL, bstrategy);
1009 
1010  /* in heap all blocks can contain tuples, so always return true */
1011  return true;
1012 }
1013 
1014 static bool
1016  double *liverows, double *deadrows,
1017  TupleTableSlot *slot)
1018 {
1019  HeapScanDesc hscan = (HeapScanDesc) scan;
1020  Page targpage;
1021  OffsetNumber maxoffset;
1022  BufferHeapTupleTableSlot *hslot;
1023 
1024  Assert(TTS_IS_BUFFERTUPLE(slot));
1025 
1026  hslot = (BufferHeapTupleTableSlot *) slot;
1027  targpage = BufferGetPage(hscan->rs_cbuf);
1028  maxoffset = PageGetMaxOffsetNumber(targpage);
1029 
1030  /* Inner loop over all tuples on the selected page */
1031  for (; hscan->rs_cindex <= maxoffset; hscan->rs_cindex++)
1032  {
1033  ItemId itemid;
1034  HeapTuple targtuple = &hslot->base.tupdata;
1035  bool sample_it = false;
1036 
1037  itemid = PageGetItemId(targpage, hscan->rs_cindex);
1038 
1039  /*
1040  * We ignore unused and redirect line pointers. DEAD line pointers
1041  * should be counted as dead, because we need vacuum to run to get rid
1042  * of them. Note that this rule agrees with the way that
1043  * heap_page_prune() counts things.
1044  */
1045  if (!ItemIdIsNormal(itemid))
1046  {
1047  if (ItemIdIsDead(itemid))
1048  *deadrows += 1;
1049  continue;
1050  }
1051 
1052  ItemPointerSet(&targtuple->t_self, hscan->rs_cblock, hscan->rs_cindex);
1053 
1054  targtuple->t_tableOid = RelationGetRelid(scan->rs_rd);
1055  targtuple->t_data = (HeapTupleHeader) PageGetItem(targpage, itemid);
1056  targtuple->t_len = ItemIdGetLength(itemid);
1057 
1058  switch (HeapTupleSatisfiesVacuum(targtuple, OldestXmin,
1059  hscan->rs_cbuf))
1060  {
1061  case HEAPTUPLE_LIVE:
1062  sample_it = true;
1063  *liverows += 1;
1064  break;
1065 
1066  case HEAPTUPLE_DEAD:
1068  /* Count dead and recently-dead rows */
1069  *deadrows += 1;
1070  break;
1071 
1073 
1074  /*
1075  * Insert-in-progress rows are not counted. We assume that
1076  * when the inserting transaction commits or aborts, it will
1077  * send a stats message to increment the proper count. This
1078  * works right only if that transaction ends after we finish
1079  * analyzing the table; if things happen in the other order,
1080  * its stats update will be overwritten by ours. However, the
1081  * error will be large only if the other transaction runs long
1082  * enough to insert many tuples, so assuming it will finish
1083  * after us is the safer option.
1084  *
1085  * A special case is that the inserting transaction might be
1086  * our own. In this case we should count and sample the row,
1087  * to accommodate users who load a table and analyze it in one
1088  * transaction. (pgstat_report_analyze has to adjust the
1089  * numbers we send to the stats collector to make this come
1090  * out right.)
1091  */
1093  {
1094  sample_it = true;
1095  *liverows += 1;
1096  }
1097  break;
1098 
1100 
1101  /*
1102  * We count and sample delete-in-progress rows the same as
1103  * live ones, so that the stats counters come out right if the
1104  * deleting transaction commits after us, per the same
1105  * reasoning given above.
1106  *
1107  * If the delete was done by our own transaction, however, we
1108  * must count the row as dead to make pgstat_report_analyze's
1109  * stats adjustments come out right. (Note: this works out
1110  * properly when the row was both inserted and deleted in our
1111  * xact.)
1112  *
1113  * The net effect of these choices is that we act as though an
1114  * IN_PROGRESS transaction hasn't happened yet, except if it
1115  * is our own transaction, which we assume has happened.
1116  *
1117  * This approach ensures that we behave sanely if we see both
1118  * the pre-image and post-image rows for a row being updated
1119  * by a concurrent transaction: we will sample the pre-image
1120  * but not the post-image. We also get sane results if the
1121  * concurrent transaction never commits.
1122  */
1124  *deadrows += 1;
1125  else
1126  {
1127  sample_it = true;
1128  *liverows += 1;
1129  }
1130  break;
1131 
1132  default:
1133  elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
1134  break;
1135  }
1136 
1137  if (sample_it)
1138  {
1139  ExecStoreBufferHeapTuple(targtuple, slot, hscan->rs_cbuf);
1140  hscan->rs_cindex++;
1141 
1142  /* note that we leave the buffer locked here! */
1143  return true;
1144  }
1145  }
1146 
1147  /* Now release the lock and pin on the page */
1148  UnlockReleaseBuffer(hscan->rs_cbuf);
1149  hscan->rs_cbuf = InvalidBuffer;
1150 
1151  /* also prevent old slot contents from having pin on page */
1152  ExecClearTuple(slot);
1153 
1154  return false;
1155 }
1156 
1157 static double
1159  Relation indexRelation,
1160  IndexInfo *indexInfo,
1161  bool allow_sync,
1162  bool anyvisible,
1163  bool progress,
1164  BlockNumber start_blockno,
1165  BlockNumber numblocks,
1167  void *callback_state,
1168  TableScanDesc scan)
1169 {
1170  HeapScanDesc hscan;
1171  bool is_system_catalog;
1172  bool checking_uniqueness;
1173  HeapTuple heapTuple;
1175  bool isnull[INDEX_MAX_KEYS];
1176  double reltuples;
1177  ExprState *predicate;
1178  TupleTableSlot *slot;
1179  EState *estate;
1180  ExprContext *econtext;
1181  Snapshot snapshot;
1182  bool need_unregister_snapshot = false;
1184  BlockNumber previous_blkno = InvalidBlockNumber;
1185  BlockNumber root_blkno = InvalidBlockNumber;
1186  OffsetNumber root_offsets[MaxHeapTuplesPerPage];
1187 
1188  /*
1189  * sanity checks
1190  */
1191  Assert(OidIsValid(indexRelation->rd_rel->relam));
1192 
1193  /* Remember if it's a system catalog */
1194  is_system_catalog = IsSystemRelation(heapRelation);
1195 
1196  /* See whether we're verifying uniqueness/exclusion properties */
1197  checking_uniqueness = (indexInfo->ii_Unique ||
1198  indexInfo->ii_ExclusionOps != NULL);
1199 
1200  /*
1201  * "Any visible" mode is not compatible with uniqueness checks; make sure
1202  * only one of those is requested.
1203  */
1204  Assert(!(anyvisible && checking_uniqueness));
1205 
1206  /*
1207  * Need an EState for evaluation of index expressions and partial-index
1208  * predicates. Also a slot to hold the current tuple.
1209  */
1210  estate = CreateExecutorState();
1211  econtext = GetPerTupleExprContext(estate);
1212  slot = table_slot_create(heapRelation, NULL);
1213 
1214  /* Arrange for econtext's scan tuple to be the tuple under test */
1215  econtext->ecxt_scantuple = slot;
1216 
1217  /* Set up execution state for predicate, if any. */
1218  predicate = ExecPrepareQual(indexInfo->ii_Predicate, estate);
1219 
1220  /*
1221  * Prepare for scan of the base relation. In a normal index build, we use
1222  * SnapshotAny because we must retrieve all tuples and do our own time
1223  * qual checks (because we have to index RECENTLY_DEAD tuples). In a
1224  * concurrent build, or during bootstrap, we take a regular MVCC snapshot
1225  * and index whatever's live according to that.
1226  */
1227  OldestXmin = InvalidTransactionId;
1228 
1229  /* okay to ignore lazy VACUUMs here */
1230  if (!IsBootstrapProcessingMode() && !indexInfo->ii_Concurrent)
1231  OldestXmin = GetOldestNonRemovableTransactionId(heapRelation);
1232 
1233  if (!scan)
1234  {
1235  /*
1236  * Serial index build.
1237  *
1238  * Must begin our own heap scan in this case. We may also need to
1239  * register a snapshot whose lifetime is under our direct control.
1240  */
1241  if (!TransactionIdIsValid(OldestXmin))
1242  {
1244  need_unregister_snapshot = true;
1245  }
1246  else
1247  snapshot = SnapshotAny;
1248 
1249  scan = table_beginscan_strat(heapRelation, /* relation */
1250  snapshot, /* snapshot */
1251  0, /* number of keys */
1252  NULL, /* scan key */
1253  true, /* buffer access strategy OK */
1254  allow_sync); /* syncscan OK? */
1255  }
1256  else
1257  {
1258  /*
1259  * Parallel index build.
1260  *
1261  * Parallel case never registers/unregisters own snapshot. Snapshot
1262  * is taken from parallel heap scan, and is SnapshotAny or an MVCC
1263  * snapshot, based on same criteria as serial case.
1264  */
1266  Assert(allow_sync);
1267  snapshot = scan->rs_snapshot;
1268  }
1269 
1270  hscan = (HeapScanDesc) scan;
1271 
1272  /*
1273  * Must have called GetOldestNonRemovableTransactionId() if using
1274  * SnapshotAny. Shouldn't have for an MVCC snapshot. (It's especially
1275  * worth checking this for parallel builds, since ambuild routines that
1276  * support parallel builds must work these details out for themselves.)
1277  */
1278  Assert(snapshot == SnapshotAny || IsMVCCSnapshot(snapshot));
1279  Assert(snapshot == SnapshotAny ? TransactionIdIsValid(OldestXmin) :
1280  !TransactionIdIsValid(OldestXmin));
1281  Assert(snapshot == SnapshotAny || !anyvisible);
1282 
1283  /* Publish number of blocks to scan */
1284  if (progress)
1285  {
1286  BlockNumber nblocks;
1287 
1288  if (hscan->rs_base.rs_parallel != NULL)
1289  {
1291 
1293  nblocks = pbscan->phs_nblocks;
1294  }
1295  else
1296  nblocks = hscan->rs_nblocks;
1297 
1299  nblocks);
1300  }
1301 
1302  /* set our scan endpoints */
1303  if (!allow_sync)
1304  heap_setscanlimits(scan, start_blockno, numblocks);
1305  else
1306  {
1307  /* syncscan can only be requested on whole relation */
1308  Assert(start_blockno == 0);
1309  Assert(numblocks == InvalidBlockNumber);
1310  }
1311 
1312  reltuples = 0;
1313 
1314  /*
1315  * Scan all tuples in the base relation.
1316  */
1317  while ((heapTuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
1318  {
1319  bool tupleIsAlive;
1320 
1322 
1323  /* Report scan progress, if asked to. */
1324  if (progress)
1325  {
1326  BlockNumber blocks_done = heapam_scan_get_blocks_done(hscan);
1327 
1328  if (blocks_done != previous_blkno)
1329  {
1331  blocks_done);
1332  previous_blkno = blocks_done;
1333  }
1334  }
1335 
1336  /*
1337  * When dealing with a HOT-chain of updated tuples, we want to index
1338  * the values of the live tuple (if any), but index it under the TID
1339  * of the chain's root tuple. This approach is necessary to preserve
1340  * the HOT-chain structure in the heap. So we need to be able to find
1341  * the root item offset for every tuple that's in a HOT-chain. When
1342  * first reaching a new page of the relation, call
1343  * heap_get_root_tuples() to build a map of root item offsets on the
1344  * page.
1345  *
1346  * It might look unsafe to use this information across buffer
1347  * lock/unlock. However, we hold ShareLock on the table so no
1348  * ordinary insert/update/delete should occur; and we hold pin on the
1349  * buffer continuously while visiting the page, so no pruning
1350  * operation can occur either.
1351  *
1352  * In cases with only ShareUpdateExclusiveLock on the table, it's
1353  * possible for some HOT tuples to appear that we didn't know about
1354  * when we first read the page. To handle that case, we re-obtain the
1355  * list of root offsets when a HOT tuple points to a root item that we
1356  * don't know about.
1357  *
1358  * Also, although our opinions about tuple liveness could change while
1359  * we scan the page (due to concurrent transaction commits/aborts),
1360  * the chain root locations won't, so this info doesn't need to be
1361  * rebuilt after waiting for another transaction.
1362  *
1363  * Note the implied assumption that there is no more than one live
1364  * tuple per HOT-chain --- else we could create more than one index
1365  * entry pointing to the same root tuple.
1366  */
1367  if (hscan->rs_cblock != root_blkno)
1368  {
1369  Page page = BufferGetPage(hscan->rs_cbuf);
1370 
1372  heap_get_root_tuples(page, root_offsets);
1374 
1375  root_blkno = hscan->rs_cblock;
1376  }
1377 
1378  if (snapshot == SnapshotAny)
1379  {
1380  /* do our own time qual check */
1381  bool indexIt;
1382  TransactionId xwait;
1383 
1384  recheck:
1385 
1386  /*
1387  * We could possibly get away with not locking the buffer here,
1388  * since caller should hold ShareLock on the relation, but let's
1389  * be conservative about it. (This remark is still correct even
1390  * with HOT-pruning: our pin on the buffer prevents pruning.)
1391  */
1393 
1394  /*
1395  * The criteria for counting a tuple as live in this block need to
1396  * match what analyze.c's heapam_scan_analyze_next_tuple() does,
1397  * otherwise CREATE INDEX and ANALYZE may produce wildly different
1398  * reltuples values, e.g. when there are many recently-dead
1399  * tuples.
1400  */
1401  switch (HeapTupleSatisfiesVacuum(heapTuple, OldestXmin,
1402  hscan->rs_cbuf))
1403  {
1404  case HEAPTUPLE_DEAD:
1405  /* Definitely dead, we can ignore it */
1406  indexIt = false;
1407  tupleIsAlive = false;
1408  break;
1409  case HEAPTUPLE_LIVE:
1410  /* Normal case, index and unique-check it */
1411  indexIt = true;
1412  tupleIsAlive = true;
1413  /* Count it as live, too */
1414  reltuples += 1;
1415  break;
1417 
1418  /*
1419  * If tuple is recently deleted then we must index it
1420  * anyway to preserve MVCC semantics. (Pre-existing
1421  * transactions could try to use the index after we finish
1422  * building it, and may need to see such tuples.)
1423  *
1424  * However, if it was HOT-updated then we must only index
1425  * the live tuple at the end of the HOT-chain. Since this
1426  * breaks semantics for pre-existing snapshots, mark the
1427  * index as unusable for them.
1428  *
1429  * We don't count recently-dead tuples in reltuples, even
1430  * if we index them; see heapam_scan_analyze_next_tuple().
1431  */
1432  if (HeapTupleIsHotUpdated(heapTuple))
1433  {
1434  indexIt = false;
1435  /* mark the index as unsafe for old snapshots */
1436  indexInfo->ii_BrokenHotChain = true;
1437  }
1438  else
1439  indexIt = true;
1440  /* In any case, exclude the tuple from unique-checking */
1441  tupleIsAlive = false;
1442  break;
1444 
1445  /*
1446  * In "anyvisible" mode, this tuple is visible and we
1447  * don't need any further checks.
1448  */
1449  if (anyvisible)
1450  {
1451  indexIt = true;
1452  tupleIsAlive = true;
1453  reltuples += 1;
1454  break;
1455  }
1456 
1457  /*
1458  * Since caller should hold ShareLock or better, normally
1459  * the only way to see this is if it was inserted earlier
1460  * in our own transaction. However, it can happen in
1461  * system catalogs, since we tend to release write lock
1462  * before commit there. Give a warning if neither case
1463  * applies.
1464  */
1465  xwait = HeapTupleHeaderGetXmin(heapTuple->t_data);
1467  {
1468  if (!is_system_catalog)
1469  elog(WARNING, "concurrent insert in progress within table \"%s\"",
1470  RelationGetRelationName(heapRelation));
1471 
1472  /*
1473  * If we are performing uniqueness checks, indexing
1474  * such a tuple could lead to a bogus uniqueness
1475  * failure. In that case we wait for the inserting
1476  * transaction to finish and check again.
1477  */
1478  if (checking_uniqueness)
1479  {
1480  /*
1481  * Must drop the lock on the buffer before we wait
1482  */
1484  XactLockTableWait(xwait, heapRelation,
1485  &heapTuple->t_self,
1488  goto recheck;
1489  }
1490  }
1491  else
1492  {
1493  /*
1494  * For consistency with
1495  * heapam_scan_analyze_next_tuple(), count
1496  * HEAPTUPLE_INSERT_IN_PROGRESS tuples as live only
1497  * when inserted by our own transaction.
1498  */
1499  reltuples += 1;
1500  }
1501 
1502  /*
1503  * We must index such tuples, since if the index build
1504  * commits then they're good.
1505  */
1506  indexIt = true;
1507  tupleIsAlive = true;
1508  break;
1510 
1511  /*
1512  * As with INSERT_IN_PROGRESS case, this is unexpected
1513  * unless it's our own deletion or a system catalog; but
1514  * in anyvisible mode, this tuple is visible.
1515  */
1516  if (anyvisible)
1517  {
1518  indexIt = true;
1519  tupleIsAlive = false;
1520  reltuples += 1;
1521  break;
1522  }
1523 
1524  xwait = HeapTupleHeaderGetUpdateXid(heapTuple->t_data);
1526  {
1527  if (!is_system_catalog)
1528  elog(WARNING, "concurrent delete in progress within table \"%s\"",
1529  RelationGetRelationName(heapRelation));
1530 
1531  /*
1532  * If we are performing uniqueness checks, assuming
1533  * the tuple is dead could lead to missing a
1534  * uniqueness violation. In that case we wait for the
1535  * deleting transaction to finish and check again.
1536  *
1537  * Also, if it's a HOT-updated tuple, we should not
1538  * index it but rather the live tuple at the end of
1539  * the HOT-chain. However, the deleting transaction
1540  * could abort, possibly leaving this tuple as live
1541  * after all, in which case it has to be indexed. The
1542  * only way to know what to do is to wait for the
1543  * deleting transaction to finish and check again.
1544  */
1545  if (checking_uniqueness ||
1546  HeapTupleIsHotUpdated(heapTuple))
1547  {
1548  /*
1549  * Must drop the lock on the buffer before we wait
1550  */
1552  XactLockTableWait(xwait, heapRelation,
1553  &heapTuple->t_self,
1556  goto recheck;
1557  }
1558 
1559  /*
1560  * Otherwise index it but don't check for uniqueness,
1561  * the same as a RECENTLY_DEAD tuple.
1562  */
1563  indexIt = true;
1564 
1565  /*
1566  * Count HEAPTUPLE_DELETE_IN_PROGRESS tuples as live,
1567  * if they were not deleted by the current
1568  * transaction. That's what
1569  * heapam_scan_analyze_next_tuple() does, and we want
1570  * the behavior to be consistent.
1571  */
1572  reltuples += 1;
1573  }
1574  else if (HeapTupleIsHotUpdated(heapTuple))
1575  {
1576  /*
1577  * It's a HOT-updated tuple deleted by our own xact.
1578  * We can assume the deletion will commit (else the
1579  * index contents don't matter), so treat the same as
1580  * RECENTLY_DEAD HOT-updated tuples.
1581  */
1582  indexIt = false;
1583  /* mark the index as unsafe for old snapshots */
1584  indexInfo->ii_BrokenHotChain = true;
1585  }
1586  else
1587  {
1588  /*
1589  * It's a regular tuple deleted by our own xact. Index
1590  * it, but don't check for uniqueness nor count in
1591  * reltuples, the same as a RECENTLY_DEAD tuple.
1592  */
1593  indexIt = true;
1594  }
1595  /* In any case, exclude the tuple from unique-checking */
1596  tupleIsAlive = false;
1597  break;
1598  default:
1599  elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
1600  indexIt = tupleIsAlive = false; /* keep compiler quiet */
1601  break;
1602  }
1603 
1605 
1606  if (!indexIt)
1607  continue;
1608  }
1609  else
1610  {
1611  /* heap_getnext did the time qual check */
1612  tupleIsAlive = true;
1613  reltuples += 1;
1614  }
1615 
1617 
1618  /* Set up for predicate or expression evaluation */
1619  ExecStoreBufferHeapTuple(heapTuple, slot, hscan->rs_cbuf);
1620 
1621  /*
1622  * In a partial index, discard tuples that don't satisfy the
1623  * predicate.
1624  */
1625  if (predicate != NULL)
1626  {
1627  if (!ExecQual(predicate, econtext))
1628  continue;
1629  }
1630 
1631  /*
1632  * For the current heap tuple, extract all the attributes we use in
1633  * this index, and note which are null. This also performs evaluation
1634  * of any expressions needed.
1635  */
1636  FormIndexDatum(indexInfo,
1637  slot,
1638  estate,
1639  values,
1640  isnull);
1641 
1642  /*
1643  * You'd think we should go ahead and build the index tuple here, but
1644  * some index AMs want to do further processing on the data first. So
1645  * pass the values[] and isnull[] arrays, instead.
1646  */
1647 
1648  if (HeapTupleIsHeapOnly(heapTuple))
1649  {
1650  /*
1651  * For a heap-only tuple, pretend its TID is that of the root. See
1652  * src/backend/access/heap/README.HOT for discussion.
1653  */
1654  ItemPointerData tid;
1655  OffsetNumber offnum;
1656 
1657  offnum = ItemPointerGetOffsetNumber(&heapTuple->t_self);
1658 
1659  /*
1660  * If a HOT tuple points to a root that we don't know
1661  * about, obtain root items afresh. If that still fails,
1662  * report it as corruption.
1663  */
1664  if (root_offsets[offnum - 1] == InvalidOffsetNumber)
1665  {
1666  Page page = BufferGetPage(hscan->rs_cbuf);
1667 
1669  heap_get_root_tuples(page, root_offsets);
1671  }
1672 
1673  if (!OffsetNumberIsValid(root_offsets[offnum - 1]))
1674  ereport(ERROR,
1676  errmsg_internal("failed to find parent tuple for heap-only tuple at (%u,%u) in table \"%s\"",
1677  ItemPointerGetBlockNumber(&heapTuple->t_self),
1678  offnum,
1679  RelationGetRelationName(heapRelation))));
1680 
1681  ItemPointerSet(&tid, ItemPointerGetBlockNumber(&heapTuple->t_self),
1682  root_offsets[offnum - 1]);
1683 
1684  /* Call the AM's callback routine to process the tuple */
1685  callback(indexRelation, &tid, values, isnull, tupleIsAlive,
1686  callback_state);
1687  }
1688  else
1689  {
1690  /* Call the AM's callback routine to process the tuple */
1691  callback(indexRelation, &heapTuple->t_self, values, isnull,
1692  tupleIsAlive, callback_state);
1693  }
1694  }
1695 
1696  /* Report scan progress one last time. */
1697  if (progress)
1698  {
1699  BlockNumber blks_done;
1700 
1701  if (hscan->rs_base.rs_parallel != NULL)
1702  {
1704 
1706  blks_done = pbscan->phs_nblocks;
1707  }
1708  else
1709  blks_done = hscan->rs_nblocks;
1710 
1712  blks_done);
1713  }
1714 
1715  table_endscan(scan);
1716 
1717  /* we can now forget our snapshot, if set and registered by us */
1718  if (need_unregister_snapshot)
1719  UnregisterSnapshot(snapshot);
1720 
1722 
1723  FreeExecutorState(estate);
1724 
1725  /* These may have been pointing to the now-gone estate */
1726  indexInfo->ii_ExpressionsState = NIL;
1727  indexInfo->ii_PredicateState = NULL;
1728 
1729  return reltuples;
1730 }
1731 
1732 static void
1734  Relation indexRelation,
1735  IndexInfo *indexInfo,
1736  Snapshot snapshot,
1738 {
1739  TableScanDesc scan;
1740  HeapScanDesc hscan;
1741  HeapTuple heapTuple;
1743  bool isnull[INDEX_MAX_KEYS];
1744  ExprState *predicate;
1745  TupleTableSlot *slot;
1746  EState *estate;
1747  ExprContext *econtext;
1748  BlockNumber root_blkno = InvalidBlockNumber;
1749  OffsetNumber root_offsets[MaxHeapTuplesPerPage];
1750  bool in_index[MaxHeapTuplesPerPage];
1751  BlockNumber previous_blkno = InvalidBlockNumber;
1752 
1753  /* state variables for the merge */
1754  ItemPointer indexcursor = NULL;
1755  ItemPointerData decoded;
1756  bool tuplesort_empty = false;
1757 
1758  /*
1759  * sanity checks
1760  */
1761  Assert(OidIsValid(indexRelation->rd_rel->relam));
1762 
1763  /*
1764  * Need an EState for evaluation of index expressions and partial-index
1765  * predicates. Also a slot to hold the current tuple.
1766  */
1767  estate = CreateExecutorState();
1768  econtext = GetPerTupleExprContext(estate);
1769  slot = MakeSingleTupleTableSlot(RelationGetDescr(heapRelation),
1770  &TTSOpsHeapTuple);
1771 
1772  /* Arrange for econtext's scan tuple to be the tuple under test */
1773  econtext->ecxt_scantuple = slot;
1774 
1775  /* Set up execution state for predicate, if any. */
1776  predicate = ExecPrepareQual(indexInfo->ii_Predicate, estate);
1777 
1778  /*
1779  * Prepare for scan of the base relation. We need just those tuples
1780  * satisfying the passed-in reference snapshot. We must disable syncscan
1781  * here, because it's critical that we read from block zero forward to
1782  * match the sorted TIDs.
1783  */
1784  scan = table_beginscan_strat(heapRelation, /* relation */
1785  snapshot, /* snapshot */
1786  0, /* number of keys */
1787  NULL, /* scan key */
1788  true, /* buffer access strategy OK */
1789  false); /* syncscan not OK */
1790  hscan = (HeapScanDesc) scan;
1791 
1793  hscan->rs_nblocks);
1794 
1795  /*
1796  * Scan all tuples matching the snapshot.
1797  */
1798  while ((heapTuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
1799  {
1800  ItemPointer heapcursor = &heapTuple->t_self;
1801  ItemPointerData rootTuple;
1802  OffsetNumber root_offnum;
1803 
1805 
1806  state->htups += 1;
1807 
1808  if ((previous_blkno == InvalidBlockNumber) ||
1809  (hscan->rs_cblock != previous_blkno))
1810  {
1812  hscan->rs_cblock);
1813  previous_blkno = hscan->rs_cblock;
1814  }
1815 
1816  /*
1817  * As commented in table_index_build_scan, we should index heap-only
1818  * tuples under the TIDs of their root tuples; so when we advance onto
1819  * a new heap page, build a map of root item offsets on the page.
1820  *
1821  * This complicates merging against the tuplesort output: we will
1822  * visit the live tuples in order by their offsets, but the root
1823  * offsets that we need to compare against the index contents might be
1824  * ordered differently. So we might have to "look back" within the
1825  * tuplesort output, but only within the current page. We handle that
1826  * by keeping a bool array in_index[] showing all the
1827  * already-passed-over tuplesort output TIDs of the current page. We
1828  * clear that array here, when advancing onto a new heap page.
1829  */
1830  if (hscan->rs_cblock != root_blkno)
1831  {
1832  Page page = BufferGetPage(hscan->rs_cbuf);
1833 
1835  heap_get_root_tuples(page, root_offsets);
1837 
1838  memset(in_index, 0, sizeof(in_index));
1839 
1840  root_blkno = hscan->rs_cblock;
1841  }
1842 
1843  /* Convert actual tuple TID to root TID */
1844  rootTuple = *heapcursor;
1845  root_offnum = ItemPointerGetOffsetNumber(heapcursor);
1846 
1847  if (HeapTupleIsHeapOnly(heapTuple))
1848  {
1849  root_offnum = root_offsets[root_offnum - 1];
1850  if (!OffsetNumberIsValid(root_offnum))
1851  ereport(ERROR,
1853  errmsg_internal("failed to find parent tuple for heap-only tuple at (%u,%u) in table \"%s\"",
1854  ItemPointerGetBlockNumber(heapcursor),
1855  ItemPointerGetOffsetNumber(heapcursor),
1856  RelationGetRelationName(heapRelation))));
1857  ItemPointerSetOffsetNumber(&rootTuple, root_offnum);
1858  }
1859 
1860  /*
1861  * "merge" by skipping through the index tuples until we find or pass
1862  * the current root tuple.
1863  */
1864  while (!tuplesort_empty &&
1865  (!indexcursor ||
1866  ItemPointerCompare(indexcursor, &rootTuple) < 0))
1867  {
1868  Datum ts_val;
1869  bool ts_isnull;
1870 
1871  if (indexcursor)
1872  {
1873  /*
1874  * Remember index items seen earlier on the current heap page
1875  */
1876  if (ItemPointerGetBlockNumber(indexcursor) == root_blkno)
1877  in_index[ItemPointerGetOffsetNumber(indexcursor) - 1] = true;
1878  }
1879 
1880  tuplesort_empty = !tuplesort_getdatum(state->tuplesort, true,
1881  &ts_val, &ts_isnull, NULL);
1882  Assert(tuplesort_empty || !ts_isnull);
1883  if (!tuplesort_empty)
1884  {
1885  itemptr_decode(&decoded, DatumGetInt64(ts_val));
1886  indexcursor = &decoded;
1887 
1888  /* If int8 is pass-by-ref, free (encoded) TID Datum memory */
1889 #ifndef USE_FLOAT8_BYVAL
1890  pfree(DatumGetPointer(ts_val));
1891 #endif
1892  }
1893  else
1894  {
1895  /* Be tidy */
1896  indexcursor = NULL;
1897  }
1898  }
1899 
1900  /*
1901  * If the tuplesort has overshot *and* we didn't see a match earlier,
1902  * then this tuple is missing from the index, so insert it.
1903  */
1904  if ((tuplesort_empty ||
1905  ItemPointerCompare(indexcursor, &rootTuple) > 0) &&
1906  !in_index[root_offnum - 1])
1907  {
1909 
1910  /* Set up for predicate or expression evaluation */
1911  ExecStoreHeapTuple(heapTuple, slot, false);
1912 
1913  /*
1914  * In a partial index, discard tuples that don't satisfy the
1915  * predicate.
1916  */
1917  if (predicate != NULL)
1918  {
1919  if (!ExecQual(predicate, econtext))
1920  continue;
1921  }
1922 
1923  /*
1924  * For the current heap tuple, extract all the attributes we use
1925  * in this index, and note which are null. This also performs
1926  * evaluation of any expressions needed.
1927  */
1928  FormIndexDatum(indexInfo,
1929  slot,
1930  estate,
1931  values,
1932  isnull);
1933 
1934  /*
1935  * You'd think we should go ahead and build the index tuple here,
1936  * but some index AMs want to do further processing on the data
1937  * first. So pass the values[] and isnull[] arrays, instead.
1938  */
1939 
1940  /*
1941  * If the tuple is already committed dead, you might think we
1942  * could suppress uniqueness checking, but this is no longer true
1943  * in the presence of HOT, because the insert is actually a proxy
1944  * for a uniqueness check on the whole HOT-chain. That is, the
1945  * tuple we have here could be dead because it was already
1946  * HOT-updated, and if so the updating transaction will not have
1947  * thought it should insert index entries. The index AM will
1948  * check the whole HOT-chain and correctly detect a conflict if
1949  * there is one.
1950  */
1951 
1952  index_insert(indexRelation,
1953  values,
1954  isnull,
1955  &rootTuple,
1956  heapRelation,
1957  indexInfo->ii_Unique ?
1959  indexInfo);
1960 
1961  state->tups_inserted += 1;
1962  }
1963  }
1964 
1965  table_endscan(scan);
1966 
1968 
1969  FreeExecutorState(estate);
1970 
1971  /* These may have been pointing to the now-gone estate */
1972  indexInfo->ii_ExpressionsState = NIL;
1973  indexInfo->ii_PredicateState = NULL;
1974 }
1975 
1976 /*
1977  * Return the number of blocks that have been read by this scan since
1978  * starting. This is meant for progress reporting rather than be fully
1979  * accurate: in a parallel scan, workers can be concurrently reading blocks
1980  * further ahead than what we report.
1981  */
1982 static BlockNumber
1984 {
1985  ParallelBlockTableScanDesc bpscan = NULL;
1986  BlockNumber startblock;
1987  BlockNumber blocks_done;
1988 
1989  if (hscan->rs_base.rs_parallel != NULL)
1990  {
1992  startblock = bpscan->phs_startblock;
1993  }
1994  else
1995  startblock = hscan->rs_startblock;
1996 
1997  /*
1998  * Might have wrapped around the end of the relation, if startblock was
1999  * not zero.
2000  */
2001  if (hscan->rs_cblock > startblock)
2002  blocks_done = hscan->rs_cblock - startblock;
2003  else
2004  {
2005  BlockNumber nblocks;
2006 
2007  nblocks = bpscan != NULL ? bpscan->phs_nblocks : hscan->rs_nblocks;
2008  blocks_done = nblocks - startblock +
2009  hscan->rs_cblock;
2010  }
2011 
2012  return blocks_done;
2013 }
2014 
2015 
2016 /* ------------------------------------------------------------------------
2017  * Miscellaneous callbacks for the heap AM
2018  * ------------------------------------------------------------------------
2019  */
2020 
2021 /*
2022  * Check to see whether the table needs a TOAST table. It does only if
2023  * (1) there are any toastable attributes, and (2) the maximum length
2024  * of a tuple could exceed TOAST_TUPLE_THRESHOLD. (We don't want to
2025  * create a toast table for something like "f1 varchar(20)".)
2026  */
2027 static bool
2029 {
2030  int32 data_length = 0;
2031  bool maxlength_unknown = false;
2032  bool has_toastable_attrs = false;
2033  TupleDesc tupdesc = rel->rd_att;
2034  int32 tuple_length;
2035  int i;
2036 
2037  for (i = 0; i < tupdesc->natts; i++)
2038  {
2039  Form_pg_attribute att = TupleDescAttr(tupdesc, i);
2040 
2041  if (att->attisdropped)
2042  continue;
2043  data_length = att_align_nominal(data_length, att->attalign);
2044  if (att->attlen > 0)
2045  {
2046  /* Fixed-length types are never toastable */
2047  data_length += att->attlen;
2048  }
2049  else
2050  {
2051  int32 maxlen = type_maximum_size(att->atttypid,
2052  att->atttypmod);
2053 
2054  if (maxlen < 0)
2055  maxlength_unknown = true;
2056  else
2057  data_length += maxlen;
2058  if (att->attstorage != TYPSTORAGE_PLAIN)
2059  has_toastable_attrs = true;
2060  }
2061  }
2062  if (!has_toastable_attrs)
2063  return false; /* nothing to toast? */
2064  if (maxlength_unknown)
2065  return true; /* any unlimited-length attrs? */
2066  tuple_length = MAXALIGN(SizeofHeapTupleHeader +
2067  BITMAPLEN(tupdesc->natts)) +
2068  MAXALIGN(data_length);
2069  return (tuple_length > TOAST_TUPLE_THRESHOLD);
2070 }
2071 
2072 /*
2073  * TOAST tables for heap relations are just heap relations.
2074  */
2075 static Oid
2077 {
2078  return rel->rd_rel->relam;
2079 }
2080 
2081 
2082 /* ------------------------------------------------------------------------
2083  * Planner related callbacks for the heap AM
2084  * ------------------------------------------------------------------------
2085  */
2086 
2087 #define HEAP_OVERHEAD_BYTES_PER_TUPLE \
2088  (MAXALIGN(SizeofHeapTupleHeader) + sizeof(ItemIdData))
2089 #define HEAP_USABLE_BYTES_PER_PAGE \
2090  (BLCKSZ - SizeOfPageHeaderData)
2091 
2092 static void
2094  BlockNumber *pages, double *tuples,
2095  double *allvisfrac)
2096 {
2097  table_block_relation_estimate_size(rel, attr_widths, pages,
2098  tuples, allvisfrac,
2101 }
2102 
2103 
2104 /* ------------------------------------------------------------------------
2105  * Executor related callbacks for the heap AM
2106  * ------------------------------------------------------------------------
2107  */
2108 
2109 static bool
2111  TBMIterateResult *tbmres)
2112 {
2113  HeapScanDesc hscan = (HeapScanDesc) scan;
2114  BlockNumber page = tbmres->blockno;
2115  Buffer buffer;
2116  Snapshot snapshot;
2117  int ntup;
2118 
2119  hscan->rs_cindex = 0;
2120  hscan->rs_ntuples = 0;
2121 
2122  /*
2123  * Ignore any claimed entries past what we think is the end of the
2124  * relation. It may have been extended after the start of our scan (we
2125  * only hold an AccessShareLock, and it could be inserts from this
2126  * backend).
2127  */
2128  if (page >= hscan->rs_nblocks)
2129  return false;
2130 
2131  /*
2132  * Acquire pin on the target heap page, trading in any pin we held before.
2133  */
2134  hscan->rs_cbuf = ReleaseAndReadBuffer(hscan->rs_cbuf,
2135  scan->rs_rd,
2136  page);
2137  hscan->rs_cblock = page;
2138  buffer = hscan->rs_cbuf;
2139  snapshot = scan->rs_snapshot;
2140 
2141  ntup = 0;
2142 
2143  /*
2144  * Prune and repair fragmentation for the whole page, if possible.
2145  */
2146  heap_page_prune_opt(scan->rs_rd, buffer);
2147 
2148  /*
2149  * We must hold share lock on the buffer content while examining tuple
2150  * visibility. Afterwards, however, the tuples we have found to be
2151  * visible are guaranteed good as long as we hold the buffer pin.
2152  */
2153  LockBuffer(buffer, BUFFER_LOCK_SHARE);
2154 
2155  /*
2156  * We need two separate strategies for lossy and non-lossy cases.
2157  */
2158  if (tbmres->ntuples >= 0)
2159  {
2160  /*
2161  * Bitmap is non-lossy, so we just look through the offsets listed in
2162  * tbmres; but we have to follow any HOT chain starting at each such
2163  * offset.
2164  */
2165  int curslot;
2166 
2167  for (curslot = 0; curslot < tbmres->ntuples; curslot++)
2168  {
2169  OffsetNumber offnum = tbmres->offsets[curslot];
2170  ItemPointerData tid;
2171  HeapTupleData heapTuple;
2172 
2173  ItemPointerSet(&tid, page, offnum);
2174  if (heap_hot_search_buffer(&tid, scan->rs_rd, buffer, snapshot,
2175  &heapTuple, NULL, true))
2176  hscan->rs_vistuples[ntup++] = ItemPointerGetOffsetNumber(&tid);
2177  }
2178  }
2179  else
2180  {
2181  /*
2182  * Bitmap is lossy, so we must examine each line pointer on the page.
2183  * But we can ignore HOT chains, since we'll check each tuple anyway.
2184  */
2185  Page dp = (Page) BufferGetPage(buffer);
2186  OffsetNumber maxoff = PageGetMaxOffsetNumber(dp);
2187  OffsetNumber offnum;
2188 
2189  for (offnum = FirstOffsetNumber; offnum <= maxoff; offnum = OffsetNumberNext(offnum))
2190  {
2191  ItemId lp;
2192  HeapTupleData loctup;
2193  bool valid;
2194 
2195  lp = PageGetItemId(dp, offnum);
2196  if (!ItemIdIsNormal(lp))
2197  continue;
2198  loctup.t_data = (HeapTupleHeader) PageGetItem((Page) dp, lp);
2199  loctup.t_len = ItemIdGetLength(lp);
2200  loctup.t_tableOid = scan->rs_rd->rd_id;
2201  ItemPointerSet(&loctup.t_self, page, offnum);
2202  valid = HeapTupleSatisfiesVisibility(&loctup, snapshot, buffer);
2203  if (valid)
2204  {
2205  hscan->rs_vistuples[ntup++] = offnum;
2206  PredicateLockTID(scan->rs_rd, &loctup.t_self, snapshot,
2207  HeapTupleHeaderGetXmin(loctup.t_data));
2208  }
2209  HeapCheckForSerializableConflictOut(valid, scan->rs_rd, &loctup,
2210  buffer, snapshot);
2211  }
2212  }
2213 
2214  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
2215 
2216  Assert(ntup <= MaxHeapTuplesPerPage);
2217  hscan->rs_ntuples = ntup;
2218 
2219  return ntup > 0;
2220 }
2221 
2222 static bool
2224  TBMIterateResult *tbmres,
2225  TupleTableSlot *slot)
2226 {
2227  HeapScanDesc hscan = (HeapScanDesc) scan;
2228  OffsetNumber targoffset;
2229  Page dp;
2230  ItemId lp;
2231 
2232  /*
2233  * Out of range? If so, nothing more to look at on this page
2234  */
2235  if (hscan->rs_cindex < 0 || hscan->rs_cindex >= hscan->rs_ntuples)
2236  return false;
2237 
2238  targoffset = hscan->rs_vistuples[hscan->rs_cindex];
2239  dp = (Page) BufferGetPage(hscan->rs_cbuf);
2240  lp = PageGetItemId(dp, targoffset);
2241  Assert(ItemIdIsNormal(lp));
2242 
2243  hscan->rs_ctup.t_data = (HeapTupleHeader) PageGetItem((Page) dp, lp);
2244  hscan->rs_ctup.t_len = ItemIdGetLength(lp);
2245  hscan->rs_ctup.t_tableOid = scan->rs_rd->rd_id;
2246  ItemPointerSet(&hscan->rs_ctup.t_self, hscan->rs_cblock, targoffset);
2247 
2249 
2250  /*
2251  * Set up the result slot to point to this tuple. Note that the slot
2252  * acquires a pin on the buffer.
2253  */
2255  slot,
2256  hscan->rs_cbuf);
2257 
2258  hscan->rs_cindex++;
2259 
2260  return true;
2261 }
2262 
2263 static bool
2265 {
2266  HeapScanDesc hscan = (HeapScanDesc) scan;
2267  TsmRoutine *tsm = scanstate->tsmroutine;
2268  BlockNumber blockno;
2269 
2270  /* return false immediately if relation is empty */
2271  if (hscan->rs_nblocks == 0)
2272  return false;
2273 
2274  if (tsm->NextSampleBlock)
2275  {
2276  blockno = tsm->NextSampleBlock(scanstate, hscan->rs_nblocks);
2277  hscan->rs_cblock = blockno;
2278  }
2279  else
2280  {
2281  /* scanning table sequentially */
2282 
2283  if (hscan->rs_cblock == InvalidBlockNumber)
2284  {
2285  Assert(!hscan->rs_inited);
2286  blockno = hscan->rs_startblock;
2287  }
2288  else
2289  {
2290  Assert(hscan->rs_inited);
2291 
2292  blockno = hscan->rs_cblock + 1;
2293 
2294  if (blockno >= hscan->rs_nblocks)
2295  {
2296  /* wrap to beginning of rel, might not have started at 0 */
2297  blockno = 0;
2298  }
2299 
2300  /*
2301  * Report our new scan position for synchronization purposes.
2302  *
2303  * Note: we do this before checking for end of scan so that the
2304  * final state of the position hint is back at the start of the
2305  * rel. That's not strictly necessary, but otherwise when you run
2306  * the same query multiple times the starting position would shift
2307  * a little bit backwards on every invocation, which is confusing.
2308  * We don't guarantee any specific ordering in general, though.
2309  */
2310  if (scan->rs_flags & SO_ALLOW_SYNC)
2311  ss_report_location(scan->rs_rd, blockno);
2312 
2313  if (blockno == hscan->rs_startblock)
2314  {
2315  blockno = InvalidBlockNumber;
2316  }
2317  }
2318  }
2319 
2320  if (!BlockNumberIsValid(blockno))
2321  {
2322  if (BufferIsValid(hscan->rs_cbuf))
2323  ReleaseBuffer(hscan->rs_cbuf);
2324  hscan->rs_cbuf = InvalidBuffer;
2325  hscan->rs_cblock = InvalidBlockNumber;
2326  hscan->rs_inited = false;
2327 
2328  return false;
2329  }
2330 
2331  heapgetpage(scan, blockno);
2332  hscan->rs_inited = true;
2333 
2334  return true;
2335 }
2336 
2337 static bool
2339  TupleTableSlot *slot)
2340 {
2341  HeapScanDesc hscan = (HeapScanDesc) scan;
2342  TsmRoutine *tsm = scanstate->tsmroutine;
2343  BlockNumber blockno = hscan->rs_cblock;
2344  bool pagemode = (scan->rs_flags & SO_ALLOW_PAGEMODE) != 0;
2345 
2346  Page page;
2347  bool all_visible;
2348  OffsetNumber maxoffset;
2349 
2350  /*
2351  * When not using pagemode, we must lock the buffer during tuple
2352  * visibility checks.
2353  */
2354  if (!pagemode)
2356 
2357  page = (Page) BufferGetPage(hscan->rs_cbuf);
2358  all_visible = PageIsAllVisible(page) &&
2360  maxoffset = PageGetMaxOffsetNumber(page);
2361 
2362  for (;;)
2363  {
2364  OffsetNumber tupoffset;
2365 
2367 
2368  /* Ask the tablesample method which tuples to check on this page. */
2369  tupoffset = tsm->NextSampleTuple(scanstate,
2370  blockno,
2371  maxoffset);
2372 
2373  if (OffsetNumberIsValid(tupoffset))
2374  {
2375  ItemId itemid;
2376  bool visible;
2377  HeapTuple tuple = &(hscan->rs_ctup);
2378 
2379  /* Skip invalid tuple pointers. */
2380  itemid = PageGetItemId(page, tupoffset);
2381  if (!ItemIdIsNormal(itemid))
2382  continue;
2383 
2384  tuple->t_data = (HeapTupleHeader) PageGetItem(page, itemid);
2385  tuple->t_len = ItemIdGetLength(itemid);
2386  ItemPointerSet(&(tuple->t_self), blockno, tupoffset);
2387 
2388 
2389  if (all_visible)
2390  visible = true;
2391  else
2392  visible = SampleHeapTupleVisible(scan, hscan->rs_cbuf,
2393  tuple, tupoffset);
2394 
2395  /* in pagemode, heapgetpage did this for us */
2396  if (!pagemode)
2397  HeapCheckForSerializableConflictOut(visible, scan->rs_rd, tuple,
2398  hscan->rs_cbuf, scan->rs_snapshot);
2399 
2400  /* Try next tuple from same page. */
2401  if (!visible)
2402  continue;
2403 
2404  /* Found visible tuple, return it. */
2405  if (!pagemode)
2407 
2408  ExecStoreBufferHeapTuple(tuple, slot, hscan->rs_cbuf);
2409 
2410  /* Count successfully-fetched tuples as heap fetches */
2412 
2413  return true;
2414  }
2415  else
2416  {
2417  /*
2418  * If we get here, it means we've exhausted the items on this page
2419  * and it's time to move to the next.
2420  */
2421  if (!pagemode)
2423 
2424  ExecClearTuple(slot);
2425  return false;
2426  }
2427  }
2428 
2429  Assert(0);
2430 }
2431 
2432 
2433 /* ----------------------------------------------------------------------------
2434  * Helper functions for the above.
2435  * ----------------------------------------------------------------------------
2436  */
2437 
2438 /*
2439  * Reconstruct and rewrite the given tuple
2440  *
2441  * We cannot simply copy the tuple as-is, for several reasons:
2442  *
2443  * 1. We'd like to squeeze out the values of any dropped columns, both
2444  * to save space and to ensure we have no corner-case failures. (It's
2445  * possible for example that the new table hasn't got a TOAST table
2446  * and so is unable to store any large values of dropped cols.)
2447  *
2448  * 2. The tuple might not even be legal for the new table; this is
2449  * currently only known to happen as an after-effect of ALTER TABLE
2450  * SET WITHOUT OIDS.
2451  *
2452  * So, we must reconstruct the tuple from component Datums.
2453  */
2454 static void
2456  Relation OldHeap, Relation NewHeap,
2457  Datum *values, bool *isnull, RewriteState rwstate)
2458 {
2459  TupleDesc oldTupDesc = RelationGetDescr(OldHeap);
2460  TupleDesc newTupDesc = RelationGetDescr(NewHeap);
2461  HeapTuple copiedTuple;
2462  int i;
2463 
2464  heap_deform_tuple(tuple, oldTupDesc, values, isnull);
2465 
2466  /* Be sure to null out any dropped columns */
2467  for (i = 0; i < newTupDesc->natts; i++)
2468  {
2469  if (TupleDescAttr(newTupDesc, i)->attisdropped)
2470  isnull[i] = true;
2471  }
2472 
2473  copiedTuple = heap_form_tuple(newTupDesc, values, isnull);
2474 
2475  /* The heap rewrite module does the rest */
2476  rewrite_heap_tuple(rwstate, tuple, copiedTuple);
2477 
2478  heap_freetuple(copiedTuple);
2479 }
2480 
2481 /*
2482  * Check visibility of the tuple.
2483  */
2484 static bool
2486  HeapTuple tuple,
2487  OffsetNumber tupoffset)
2488 {
2489  HeapScanDesc hscan = (HeapScanDesc) scan;
2490 
2491  if (scan->rs_flags & SO_ALLOW_PAGEMODE)
2492  {
2493  /*
2494  * In pageatatime mode, heapgetpage() already did visibility checks,
2495  * so just look at the info it left in rs_vistuples[].
2496  *
2497  * We use a binary search over the known-sorted array. Note: we could
2498  * save some effort if we insisted that NextSampleTuple select tuples
2499  * in increasing order, but it's not clear that there would be enough
2500  * gain to justify the restriction.
2501  */
2502  int start = 0,
2503  end = hscan->rs_ntuples - 1;
2504 
2505  while (start <= end)
2506  {
2507  int mid = (start + end) / 2;
2508  OffsetNumber curoffset = hscan->rs_vistuples[mid];
2509 
2510  if (tupoffset == curoffset)
2511  return true;
2512  else if (tupoffset < curoffset)
2513  end = mid - 1;
2514  else
2515  start = mid + 1;
2516  }
2517 
2518  return false;
2519  }
2520  else
2521  {
2522  /* Otherwise, we have to check the tuple individually. */
2523  return HeapTupleSatisfiesVisibility(tuple, scan->rs_snapshot,
2524  buffer);
2525  }
2526 }
2527 
2528 
2529 /* ------------------------------------------------------------------------
2530  * Definition of the heap table access method.
2531  * ------------------------------------------------------------------------
2532  */
2533 
2534 static const TableAmRoutine heapam_methods = {
2536 
2537  .slot_callbacks = heapam_slot_callbacks,
2538 
2539  .scan_begin = heap_beginscan,
2540  .scan_end = heap_endscan,
2541  .scan_rescan = heap_rescan,
2542  .scan_getnextslot = heap_getnextslot,
2543 
2544  .parallelscan_estimate = table_block_parallelscan_estimate,
2545  .parallelscan_initialize = table_block_parallelscan_initialize,
2546  .parallelscan_reinitialize = table_block_parallelscan_reinitialize,
2547 
2548  .index_fetch_begin = heapam_index_fetch_begin,
2549  .index_fetch_reset = heapam_index_fetch_reset,
2550  .index_fetch_end = heapam_index_fetch_end,
2551  .index_fetch_tuple = heapam_index_fetch_tuple,
2552 
2553  .tuple_insert = heapam_tuple_insert,
2554  .tuple_insert_speculative = heapam_tuple_insert_speculative,
2555  .tuple_complete_speculative = heapam_tuple_complete_speculative,
2556  .multi_insert = heap_multi_insert,
2557  .tuple_delete = heapam_tuple_delete,
2558  .tuple_update = heapam_tuple_update,
2559  .tuple_lock = heapam_tuple_lock,
2560 
2561  .tuple_fetch_row_version = heapam_fetch_row_version,
2562  .tuple_get_latest_tid = heap_get_latest_tid,
2563  .tuple_tid_valid = heapam_tuple_tid_valid,
2564  .tuple_satisfies_snapshot = heapam_tuple_satisfies_snapshot,
2565  .compute_xid_horizon_for_tuples = heap_compute_xid_horizon_for_tuples,
2566 
2567  .relation_set_new_filenode = heapam_relation_set_new_filenode,
2568  .relation_nontransactional_truncate = heapam_relation_nontransactional_truncate,
2569  .relation_copy_data = heapam_relation_copy_data,
2570  .relation_copy_for_cluster = heapam_relation_copy_for_cluster,
2571  .relation_vacuum = heap_vacuum_rel,
2572  .scan_analyze_next_block = heapam_scan_analyze_next_block,
2573  .scan_analyze_next_tuple = heapam_scan_analyze_next_tuple,
2574  .index_build_range_scan = heapam_index_build_range_scan,
2575  .index_validate_scan = heapam_index_validate_scan,
2576 
2577  .relation_size = table_block_relation_size,
2578  .relation_needs_toast_table = heapam_relation_needs_toast_table,
2579  .relation_toast_am = heapam_relation_toast_am,
2580  .relation_fetch_toast_slice = heap_fetch_toast_slice,
2581 
2582  .relation_estimate_size = heapam_estimate_rel_size,
2583 
2584  .scan_bitmap_next_block = heapam_scan_bitmap_next_block,
2585  .scan_bitmap_next_tuple = heapam_scan_bitmap_next_tuple,
2586  .scan_sample_next_block = heapam_scan_sample_next_block,
2587  .scan_sample_next_tuple = heapam_scan_sample_next_tuple
2588 };
2589 
2590 
2591 const TableAmRoutine *
2593 {
2594  return &heapam_methods;
2595 }
2596 
2597 Datum
2599 {
2600  PG_RETURN_POINTER(&heapam_methods);
2601 }
TupleTableSlot * table_slot_create(Relation relation, List **reglist)
Definition: tableam.c:91
#define HeapTupleHeaderGetUpdateXid(tup)
Definition: htup_details.h:365
int32 ItemPointerCompare(ItemPointer arg1, ItemPointer arg2)
Definition: itemptr.c:52
#define ItemPointerIsValid(pointer)
Definition: itemptr.h:82
void FormIndexDatum(IndexInfo *indexInfo, TupleTableSlot *slot, EState *estate, Datum *values, bool *isnull)
Definition: index.c:2755
void heap_insert(Relation relation, HeapTuple tup, CommandId cid, int options, BulkInsertState bistate)
Definition: heapam.c:1859
#define PG_RETURN_POINTER(x)
Definition: fmgr.h:361
#define NIL
Definition: pg_list.h:65
Oid tts_tableOid
Definition: tuptable.h:131
uint32 CommandId
Definition: c.h:589
ItemPointerData ctid
Definition: tableam.h:125
#define PROGRESS_CLUSTER_PHASE_SEQ_SCAN_HEAP
Definition: progress.h:65
static PgChecksumMode mode
Definition: pg_checksums.c:61
#define BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:96
bool tuplesort_getdatum(Tuplesortstate *state, bool forward, Datum *val, bool *isNull, Datum *abbrev)
Definition: tuplesort.c:2475
TransactionId heap_compute_xid_horizon_for_tuples(Relation rel, ItemPointerData *tids, int nitems)
Definition: heapam.c:6992
#define SizeofHeapTupleHeader
Definition: htup_details.h:184
BlockNumber rs_cblock
Definition: heapam.h:59
LockTupleMode
Definition: lockoptions.h:49
NodeTag type
Definition: tableam.h:166
void tuplesort_performsort(Tuplesortstate *state)
Definition: tuplesort.c:2021
void heap_abort_speculative(Relation relation, ItemPointer tid)
Definition: heapam.c:5566
void end_heap_rewrite(RewriteState state)
Definition: rewriteheap.c:301
HeapTuple tuplesort_getheaptuple(Tuplesortstate *state, bool forward)
Definition: tuplesort.c:2426
void smgrclose(SMgrRelation reln)
Definition: smgr.c:257
void table_block_parallelscan_reinitialize(Relation rel, ParallelTableScanDesc pscan)
Definition: tableam.c:418
#define InitDirtySnapshot(snapshotdata)
Definition: snapmgr.h:75
static bool heapam_tuple_tid_valid(TableScanDesc scan, ItemPointer tid)
void smgrcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo)
Definition: smgr.c:334
List * ii_Predicate
Definition: execnodes.h:162
static void heapam_tuple_complete_speculative(Relation relation, TupleTableSlot *slot, uint32 specToken, bool succeeded)
#define att_align_nominal(cur_offset, attalign)
Definition: tupmacs.h:148
uint64 table_block_relation_size(Relation rel, ForkNumber forkNumber)
Definition: tableam.c:628
bool IsSystemRelation(Relation relation)
Definition: catalog.c:66
#define TransactionIdEquals(id1, id2)
Definition: transam.h:43
#define PageIsAllVisible(page)
Definition: bufpage.h:385
#define PROGRESS_CLUSTER_HEAP_TUPLES_WRITTEN
Definition: progress.h:59
static TupleTableSlot * ExecClearTuple(TupleTableSlot *slot)
Definition: tuptable.h:425
uint32 TransactionId
Definition: c.h:575
Snapshot RegisterSnapshot(Snapshot snapshot)
Definition: snapmgr.c:810
static const TupleTableSlotOps * heapam_slot_callbacks(Relation relation)
void heap_endscan(TableScanDesc sscan)
Definition: heapam.c:1256
#define RelationGetDescr(relation)
Definition: rel.h:483
TupleTableSlot * MakeSingleTupleTableSlot(TupleDesc tupdesc, const TupleTableSlotOps *tts_ops)
Definition: execTuples.c:1208
bool TransactionIdIsCurrentTransactionId(TransactionId xid)
Definition: xact.c:869
static void heapam_index_fetch_end(IndexFetchTableData *scan)
static void reform_and_rewrite_tuple(HeapTuple tuple, Relation OldHeap, Relation NewHeap, Datum *values, bool *isnull, RewriteState rwstate)
static TM_Result heapam_tuple_update(Relation relation, ItemPointer otid, TupleTableSlot *slot, CommandId cid, Snapshot snapshot, Snapshot crosscheck, bool wait, TM_FailureData *tmfd, LockTupleMode *lockmode, bool *update_indexes)
HeapTupleHeaderData * HeapTupleHeader
Definition: htup.h:23
CommandId HeapTupleHeaderGetCmin(HeapTupleHeader tup)
Definition: combocid.c:104
#define TupleDescAttr(tupdesc, i)
Definition: tupdesc.h:92
bool heap_fetch(Relation relation, Snapshot snapshot, HeapTuple tuple, Buffer *userbuf)
Definition: heapam.c:1394
struct SMgrRelationData * rd_smgr
Definition: rel.h:57
struct ParallelBlockTableScanDescData * ParallelBlockTableScanDesc
Definition: relscan.h:82
TableScanDescData rs_base
Definition: heapam.h:49
void pgstat_progress_update_param(int index, int64 val)
Definition: pgstat.c:3395
ExprState * ii_PredicateState
Definition: execnodes.h:163
void heap_fetch_toast_slice(Relation toastrel, Oid valueid, int32 attrsize, int32 sliceoffset, int32 slicelength, struct varlena *result)
Definition: heaptoast.c:626
const TupleTableSlotOps TTSOpsBufferHeapTuple
Definition: execTuples.c:86
MemoryContext ecxt_per_tuple_memory
Definition: execnodes.h:233
Buffer ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
Definition: bufmgr.c:654
CommandId cmax
Definition: tableam.h:127
#define MaxHeapTuplesPerPage
Definition: htup_details.h:574
unsigned char uint8
Definition: c.h:427
#define HeapTupleHeaderSetSpeculativeToken(tup, token)
Definition: htup_details.h:440
#define HeapTupleHeaderIsSpeculative(tup)
Definition: htup_details.h:429
Tuplesortstate * tuplesort_begin_cluster(TupleDesc tupDesc, Relation indexRel, int workMem, SortCoordinate coordinate, bool randomAccess)
Definition: tuplesort.c:952
#define InvalidBuffer
Definition: buf.h:25
void index_rescan(IndexScanDesc scan, ScanKey keys, int nkeys, ScanKey orderbys, int norderbys)
Definition: indexam.c:295
static void heapam_relation_set_new_filenode(Relation rel, const RelFileNode *newrnode, char persistence, TransactionId *freezeXid, MultiXactId *minmulti)
HeapTuple tuple
Definition: tuptable.h:250
int errcode(int sqlerrcode)
Definition: elog.c:691
TransactionId RecentXmin
Definition: snapmgr.c:113
#define PROGRESS_CLUSTER_INDEX_RELID
Definition: progress.h:57
#define PROGRESS_CLUSTER_PHASE_WRITE_NEW_HEAP
Definition: progress.h:68
static TM_Result heapam_tuple_lock(Relation relation, ItemPointer tid, Snapshot snapshot, TupleTableSlot *slot, CommandId cid, LockTupleMode mode, LockWaitPolicy wait_policy, uint8 flags, TM_FailureData *tmfd)
void MemoryContextReset(MemoryContext context)
Definition: mcxt.c:137
uint32 BlockNumber
Definition: block.h:31
void ReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:3513
#define BITMAPLEN(NATTS)
Definition: htup_details.h:547
static void heapam_relation_nontransactional_truncate(Relation rel)
HeapTuple heap_form_tuple(TupleDesc tupleDescriptor, Datum *values, bool *isnull)
Definition: heaptuple.c:1020
static bool heapam_tuple_satisfies_snapshot(Relation rel, TupleTableSlot *slot, Snapshot snapshot)
#define TUPLE_LOCK_FLAG_LOCK_UPDATE_IN_PROGRESS
Definition: tableam.h:139
bool smgrexists(SMgrRelation reln, ForkNumber forknum)
Definition: smgr.c:248
static bool table_scan_getnextslot(TableScanDesc sscan, ScanDirection direction, TupleTableSlot *slot)
Definition: tableam.h:904
static Oid heapam_relation_toast_am(Relation rel)
Form_pg_class rd_rel
Definition: rel.h:110
void heap_freetuple(HeapTuple htup)
Definition: heaptuple.c:1338
unsigned int Oid
Definition: postgres_ext.h:31
static bool ExecQual(ExprState *state, ExprContext *econtext)
Definition: executor.h:372
uint32 rs_flags
Definition: relscan.h:43
#define ItemIdIsDead(itemId)
Definition: itemid.h:113
Snapshot GetTransactionSnapshot(void)
Definition: snapmgr.c:250
#define OidIsValid(objectId)
Definition: c.h:706
static IndexFetchTableData * heapam_index_fetch_begin(Relation rel)
#define PageGetMaxOffsetNumber(page)
Definition: bufpage.h:357
#define RelationGetTargetBlock(relation)
Definition: rel.h:542
Size table_block_parallelscan_initialize(Relation rel, ParallelTableScanDesc pscan)
Definition: tableam.c:400
static TableScanDesc table_beginscan_strat(Relation rel, Snapshot snapshot, int nkeys, struct ScanKeyData *key, bool allow_strat, bool allow_sync)
Definition: tableam.h:779
static void heapam_index_fetch_reset(IndexFetchTableData *scan)
#define HEAP_INSERT_SPECULATIVE
Definition: heapam.h:37
static bool heapam_scan_bitmap_next_block(TableScanDesc scan, TBMIterateResult *tbmres)
void heapgetpage(TableScanDesc sscan, BlockNumber page)
Definition: heapam.c:353
signed int int32
Definition: c.h:417
struct HeapScanDescData * HeapScanDesc
Definition: heapam.h:73
bool ConditionalXactLockTableWait(TransactionId xid)
Definition: lmgr.c:712
HeapTupleData rs_ctup
Definition: heapam.h:66
uint16 OffsetNumber
Definition: off.h:24
Size table_block_parallelscan_estimate(Relation rel)
Definition: tableam.c:394
HeapTupleHeader t_data
Definition: htup.h:68
BlockNumber blockno
Definition: tidbitmap.h:42
#define RelationOpenSmgr(relation)
Definition: rel.h:514
bool heap_hot_search_buffer(ItemPointer tid, Relation relation, Buffer buffer, Snapshot snapshot, HeapTuple heapTuple, bool *all_dead, bool first_call)
Definition: heapam.c:1509
void FreeExecutorState(EState *estate)
Definition: execUtils.c:185
#define HeapTupleIsHotUpdated(tuple)
Definition: htup_details.h:676
#define GetPerTupleExprContext(estate)
Definition: executor.h:509
List * ii_ExpressionsState
Definition: execnodes.h:161
static const TableAmRoutine heapam_methods
#define ItemIdGetLength(itemId)
Definition: itemid.h:59
void pfree(void *pointer)
Definition: mcxt.c:1057
SMgrRelation RelationCreateStorage(RelFileNode rnode, char relpersistence)
Definition: storage.c:118
NextSampleTuple_function NextSampleTuple
Definition: tsmapi.h:74
void UnlockReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:3536
TransactionId xmax
Definition: tableam.h:126
TransactionId GetOldestNonRemovableTransactionId(Relation rel)
Definition: procarray.c:1913
#define ERROR
Definition: elog.h:43
static void heapam_relation_copy_for_cluster(Relation OldHeap, Relation NewHeap, Relation OldIndex, bool use_sort, TransactionId OldestXmin, TransactionId *xid_cutoff, MultiXactId *multi_cutoff, double *num_tuples, double *tups_vacuumed, double *tups_recently_dead)
static TableScanDesc table_beginscan(Relation rel, Snapshot snapshot, int nkeys, struct ScanKeyData *key)
Definition: tableam.h:755
ItemPointerData t_ctid
Definition: htup_details.h:160
int32 type_maximum_size(Oid type_oid, int32 typemod)
Definition: format_type.c:408
ItemPointerData t_self
Definition: htup.h:65
static void callback(struct sockaddr *addr, struct sockaddr *mask, void *unused)
Definition: test_ifaddrs.c:48
static bool SampleHeapTupleVisible(TableScanDesc scan, Buffer buffer, HeapTuple tuple, OffsetNumber tupoffset)
static bool heapam_scan_sample_next_block(TableScanDesc scan, SampleScanState *scanstate)
#define DatumGetInt64(X)
Definition: postgres.h:607
static void heapam_tuple_insert(Relation relation, TupleTableSlot *slot, CommandId cid, int options, BulkInsertState bistate)
#define pgstat_count_heap_fetch(rel)
Definition: pgstat.h:1498
Tuplesortstate * tuplesort
Definition: index.h:44
uint32 t_len
Definition: htup.h:64
#define PROGRESS_CLUSTER_PHASE_SORT_TUPLES
Definition: progress.h:67
Buffer xs_cbuf
Definition: heapam.h:82
static char * buf
Definition: pg_test_fsync.c:68
HeapTuple heap_getnext(TableScanDesc sscan, ScanDirection direction)
Definition: heapam.c:1286
void ExecDropSingleTupleTableSlot(TupleTableSlot *slot)
Definition: execTuples.c:1224
OffsetNumber offsets[FLEXIBLE_ARRAY_MEMBER]
Definition: tidbitmap.h:46
#define HEAP_OVERHEAD_BYTES_PER_TUPLE
#define FirstOffsetNumber
Definition: off.h:27
void heap_setscanlimits(TableScanDesc sscan, BlockNumber startBlk, BlockNumber numBlks)
Definition: heapam.c:330
#define PROGRESS_CLUSTER_HEAP_TUPLES_SCANNED
Definition: progress.h:58
#define InvalidTransactionId
Definition: transam.h:31
#define RelationGetRelationName(relation)
Definition: rel.h:491
ExprState * ExecPrepareQual(List *qual, EState *estate)
Definition: execExpr.c:519
static TransactionId OldestXmin
Definition: vacuumlazy.c:335
FormData_pg_attribute * Form_pg_attribute
Definition: pg_attribute.h:193
static bool heapam_scan_sample_next_tuple(TableScanDesc scan, SampleScanState *scanstate, TupleTableSlot *slot)
bool ii_BrokenHotChain
Definition: execnodes.h:174
unsigned int uint32
Definition: c.h:429
Oid t_tableOid
Definition: htup.h:66
TransactionId xmax
Definition: snapshot.h:158
TransactionId xmin
Definition: snapshot.h:157
static void heapam_tuple_insert_speculative(Relation relation, TupleTableSlot *slot, CommandId cid, int options, BulkInsertState bistate, uint32 specToken)
bool rs_inited
Definition: heapam.h:58
NextSampleBlock_function NextSampleBlock
Definition: tsmapi.h:73
void index_endscan(IndexScanDesc scan)
Definition: indexam.c:321
#define BufferGetPage(buffer)
Definition: bufmgr.h:169
BlockNumber rs_startblock
Definition: heapam.h:53
static void heapam_estimate_rel_size(Relation rel, int32 *attr_widths, BlockNumber *pages, double *tuples, double *allvisfrac)
static bool heapam_scan_bitmap_next_tuple(TableScanDesc scan, TBMIterateResult *tbmres, TupleTableSlot *slot)
void heap_get_root_tuples(Page page, OffsetNumber *root_offsets)
Definition: pruneheap.c:883
HTSV_Result HeapTupleSatisfiesVacuum(HeapTuple htup, TransactionId OldestXmin, Buffer buffer)
HeapTuple ExecFetchSlotHeapTuple(TupleTableSlot *slot, bool materialize, bool *shouldFree)
Definition: execTuples.c:1614
Datum heap_tableam_handler(PG_FUNCTION_ARGS)
Oid rd_id
Definition: rel.h:112
SMgrRelation smgropen(RelFileNode rnode, BackendId backend)
Definition: smgr.c:146
ForkNumber
Definition: relpath.h:40
EState * CreateExecutorState(void)
Definition: execUtils.c:89
void UnregisterSnapshot(Snapshot snapshot)
Definition: snapmgr.c:852
#define ERRCODE_DATA_CORRUPTED
Definition: pg_basebackup.c:45
int rs_ntuples
Definition: heapam.h:70
static bool heapam_fetch_row_version(Relation relation, ItemPointer tid, Snapshot snapshot, TupleTableSlot *slot)
#define WARNING
Definition: elog.h:40
#define PageGetItemId(page, offsetNumber)
Definition: bufpage.h:235
int progress
Definition: pgbench.c:235
#define TOAST_TUPLE_THRESHOLD
Definition: heaptoast.h:48
#define HEAP_USABLE_BYTES_PER_PAGE
TM_Result
Definition: tableam.h:70
void table_block_relation_estimate_size(Relation rel, int32 *attr_widths, BlockNumber *pages, double *tuples, double *allvisfrac, Size overhead_bytes_per_tuple, Size usable_bytes_per_page)
Definition: tableam.c:668
MultiXactId GetOldestMultiXactId(void)
Definition: multixact.c:2501
void * palloc0(Size size)
Definition: mcxt.c:981
#define PROGRESS_SCAN_BLOCKS_DONE
Definition: progress.h:120
void heap_finish_speculative(Relation relation, ItemPointer tid)
Definition: heapam.c:5475
void RelationDropStorage(Relation rel)
Definition: storage.c:195
uintptr_t Datum
Definition: postgres.h:367
#define TTS_IS_BUFFERTUPLE(slot)
Definition: tuptable.h:231
void heap_vacuum_rel(Relation onerel, VacuumParams *params, BufferAccessStrategy bstrategy)
Definition: vacuumlazy.c:419
static double heapam_index_build_range_scan(Relation heapRelation, Relation indexRelation, IndexInfo *indexInfo, bool allow_sync, bool anyvisible, bool progress, BlockNumber start_blockno, BlockNumber numblocks, IndexBuildCallback callback, void *callback_state, TableScanDesc scan)
BlockNumber rs_nblocks
Definition: heapam.h:52
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:3752
void PredicateLockTID(Relation relation, ItemPointer tid, Snapshot snapshot, TransactionId tuple_xid)
Definition: predicate.c:2543
#define ItemPointerIndicatesMovedPartitions(pointer)
Definition: itemptr.h:184
TupleTableSlot * ExecStoreBufferHeapTuple(HeapTuple tuple, TupleTableSlot *slot, Buffer buffer)
Definition: execTuples.c:1362
TupleDesc rd_att
Definition: rel.h:111
#define IsMVCCSnapshot(snapshot)
Definition: snapmgr.h:97
#define InvalidOffsetNumber
Definition: off.h:26
static void itemptr_decode(ItemPointer itemptr, int64 encoded)
Definition: index.h:203
void RelationCopyStorage(SMgrRelation src, SMgrRelation dst, ForkNumber forkNum, char relpersistence)
Definition: storage.c:408
#define ereport(elevel,...)
Definition: elog.h:155
int maintenance_work_mem
Definition: globals.c:123
static bool heapam_index_fetch_tuple(struct IndexFetchTableData *scan, ItemPointer tid, Snapshot snapshot, TupleTableSlot *slot, bool *call_again, bool *all_dead)
#define BlockNumberIsValid(blockNumber)
Definition: block.h:70
void HeapCheckForSerializableConflictOut(bool visible, Relation relation, HeapTuple tuple, Buffer buffer, Snapshot snapshot)
Definition: heapam.c:9018
Buffer rs_cbuf
Definition: heapam.h:60
static void heapam_relation_copy_data(Relation rel, const RelFileNode *newrnode)
TransactionId MultiXactId
Definition: c.h:585
int errmsg_internal(const char *fmt,...)
Definition: elog.c:989
bool ii_Unique
Definition: execnodes.h:171
void XactLockTableWait(TransactionId xid, Relation rel, ItemPointer ctid, XLTW_Oper oper)
Definition: lmgr.c:639
BackendId rd_backend
Definition: rel.h:59
#define HeapTupleIsHeapOnly(tuple)
Definition: htup_details.h:685
#define Assert(condition)
Definition: c.h:800
#define PROGRESS_CLUSTER_PHASE_INDEX_SCAN_HEAP
Definition: progress.h:66
TableScanDesc heap_beginscan(Relation relation, Snapshot snapshot, int nkeys, ScanKey key, ParallelTableScanDesc parallel_scan, uint32 flags)
Definition: heapam.c:1141
Definition: regguts.h:298
double tups_inserted
Definition: index.h:46
OffsetNumber rs_vistuples[MaxHeapTuplesPerPage]
Definition: heapam.h:71
Definition: tableam.h:76
#define ItemIdIsNormal(itemId)
Definition: itemid.h:99
bool takenDuringRecovery
Definition: snapshot.h:184
void FlushRelationBuffers(Relation rel)
Definition: bufmgr.c:3254
#define HeapTupleHeaderGetXmin(tup)
Definition: htup_details.h:313
void pgstat_progress_update_multi_param(int nparam, const int *index, const int64 *val)
Definition: pgstat.c:3417
#define INDEX_MAX_KEYS
#define OffsetNumberNext(offsetNumber)
Definition: off.h:52
TM_Result heap_update(Relation relation, ItemPointer otid, HeapTuple newtup, CommandId cid, Snapshot crosscheck, bool wait, TM_FailureData *tmfd, LockTupleMode *lockmode)
Definition: heapam.c:2893
#define InvalidBlockNumber
Definition: block.h:33
static TM_Result heapam_tuple_delete(Relation relation, ItemPointer tid, CommandId cid, Snapshot snapshot, Snapshot crosscheck, bool wait, TM_FailureData *tmfd, bool changingPart)
bool index_getnext_slot(IndexScanDesc scan, ScanDirection direction, TupleTableSlot *slot)
Definition: indexam.c:614
#define MAX_FORKNUM
Definition: relpath.h:55
Buffer ReleaseAndReadBuffer(Buffer buffer, Relation relation, BlockNumber blockNum)
Definition: bufmgr.c:1534
TupleTableSlot * ecxt_scantuple
Definition: execnodes.h:225
#define MAXALIGN(LEN)
Definition: c.h:753
#define BufferIsValid(bufnum)
Definition: bufmgr.h:123
#define ItemPointerGetOffsetNumber(pointer)
Definition: itemptr.h:117
#define PROGRESS_SCAN_BLOCKS_TOTAL
Definition: progress.h:119
struct TsmRoutine * tsmroutine
Definition: execnodes.h:1343
#define TUPLE_LOCK_FLAG_FIND_LAST_VERSION
Definition: tableam.h:141
void ss_report_location(Relation rel, BlockNumber location)
Definition: syncscan.c:288
TupleTableSlot * ExecStorePinnedBufferHeapTuple(HeapTuple tuple, TupleTableSlot *slot, Buffer buffer)
Definition: execTuples.c:1388
bool ItemPointerEquals(ItemPointer pointer1, ItemPointer pointer2)
Definition: itemptr.c:29
bool ii_Concurrent
Definition: execnodes.h:173
#define SnapshotAny
Definition: snapmgr.h:68
TM_Result heap_lock_tuple(Relation relation, HeapTuple tuple, CommandId cid, LockTupleMode mode, LockWaitPolicy wait_policy, bool follow_updates, Buffer *buffer, TM_FailureData *tmfd)
Definition: heapam.c:3973
#define DatumGetPointer(X)
Definition: postgres.h:549
Relation rs_rd
Definition: relscan.h:34
double htups
Definition: index.h:46
#define ItemPointerSetOffsetNumber(pointer, offsetNumber)
Definition: itemptr.h:148
void heap_deform_tuple(HeapTuple tuple, TupleDesc tupleDesc, Datum *values, bool *isnull)
Definition: heaptuple.c:1249
static void table_endscan(TableScanDesc scan)
Definition: tableam.h:863
static Datum values[MAXATTR]
Definition: bootstrap.c:165
#define IsBootstrapProcessingMode()
Definition: miscadmin.h:393
bool rewrite_heap_dead_tuple(RewriteState state, HeapTuple old_tuple)
Definition: rewriteheap.c:565
bool heap_getnextslot(TableScanDesc sscan, ScanDirection direction, TupleTableSlot *slot)
Definition: heapam.c:1335
Oid * ii_ExclusionOps
Definition: execnodes.h:164
struct SnapshotData * rs_snapshot
Definition: relscan.h:35
void * palloc(Size size)
Definition: mcxt.c:950
int errmsg(const char *fmt,...)
Definition: elog.c:902
#define elog(elevel,...)
Definition: elog.h:228
int i
#define OffsetNumberIsValid(offsetNumber)
Definition: off.h:39
struct ParallelTableScanDescData * rs_parallel
Definition: relscan.h:46
const TupleTableSlotOps TTSOpsHeapTuple
Definition: execTuples.c:84
void tuplesort_putheaptuple(Tuplesortstate *state, HeapTuple tup)
Definition: tuplesort.c:1687
void heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples, CommandId cid, int options, BulkInsertState bistate)
Definition: heapam.c:2097
TM_Result heap_delete(Relation relation, ItemPointer tid, CommandId cid, Snapshot crosscheck, bool wait, TM_FailureData *tmfd, bool changingPart)
Definition: heapam.c:2442
#define BUFFER_LOCK_SHARE
Definition: bufmgr.h:97
static void heapam_index_validate_scan(Relation heapRelation, Relation indexRelation, IndexInfo *indexInfo, Snapshot snapshot, ValidateIndexState *state)
void heap_rescan(TableScanDesc sscan, ScanKey key, bool set_params, bool allow_strat, bool allow_sync, bool allow_pagemode)
Definition: heapam.c:1219
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
#define PROGRESS_CLUSTER_HEAP_BLKS_SCANNED
Definition: progress.h:61
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:99
#define PROGRESS_CLUSTER_TOTAL_HEAP_BLKS
Definition: progress.h:60
static bool heapam_relation_needs_toast_table(Relation rel)
HeapTupleTableSlot base
Definition: tuptable.h:259
void heap_page_prune_opt(Relation relation, Buffer buffer)
Definition: pruneheap.c:87
#define ItemPointerGetBlockNumber(pointer)
Definition: itemptr.h:98
#define pgstat_count_heap_getnext(rel)
Definition: pgstat.h:1493
static bool heapam_scan_analyze_next_tuple(TableScanDesc scan, TransactionId OldestXmin, double *liverows, double *deadrows, TupleTableSlot *slot)
#define TransactionIdIsValid(xid)
Definition: transam.h:41
RewriteState begin_heap_rewrite(Relation old_heap, Relation new_heap, TransactionId oldest_xmin, TransactionId freeze_xid, MultiXactId cutoff_multi)
Definition: rewriteheap.c:237
static BlockNumber heapam_scan_get_blocks_done(HeapScanDesc hscan)
void tuplesort_end(Tuplesortstate *state)
Definition: tuplesort.c:1445
bool traversed
Definition: tableam.h:128
HeapTupleData tupdata
Definition: tuptable.h:253
#define PROGRESS_CLUSTER_PHASE
Definition: progress.h:56
ItemPointerData tts_tid
Definition: tuptable.h:130
int Buffer
Definition: buf.h:23
void smgrimmedsync(SMgrRelation reln, ForkNumber forknum)
Definition: smgr.c:643
void rewrite_heap_tuple(RewriteState state, HeapTuple old_tuple, HeapTuple new_tuple)
Definition: rewriteheap.c:363
#define RelationGetRelid(relation)
Definition: rel.h:457
LockWaitPolicy
Definition: lockoptions.h:36
TupleTableSlot * ExecStoreHeapTuple(HeapTuple tuple, TupleTableSlot *slot, bool shouldFree)
Definition: execTuples.c:1322
#define PageGetItem(page, itemId)
Definition: bufpage.h:340
static bool heapam_scan_analyze_next_block(TableScanDesc scan, BlockNumber blockno, BufferAccessStrategy bstrategy)
Pointer Page
Definition: bufpage.h:78
#define ItemPointerSet(pointer, blockNumber, offNum)
Definition: itemptr.h:127
bool index_insert(Relation indexRelation, Datum *values, bool *isnull, ItemPointer heap_t_ctid, Relation heapRelation, IndexUniqueCheck checkUnique, IndexInfo *indexInfo)
Definition: indexam.c:176
IndexScanDesc index_beginscan(Relation heapRelation, Relation indexRelation, Snapshot snapshot, int nkeys, int norderbys)
Definition: indexam.c:203
#define ItemPointerCopy(fromPointer, toPointer)
Definition: itemptr.h:161
void(* IndexBuildCallback)(Relation index, ItemPointer tid, Datum *values, bool *isnull, bool tupleIsAlive, void *state)
Definition: tableam.h:145
IndexFetchTableData xs_base
Definition: heapam.h:80
bool HeapTupleSatisfiesVisibility(HeapTuple tup, Snapshot snapshot, Buffer buffer)
void heap_get_latest_tid(TableScanDesc sscan, ItemPointer tid)
Definition: heapam.c:1661
void RelationTruncate(Relation rel, BlockNumber nblocks)
Definition: storage.c:277
const TableAmRoutine * GetHeapamTableAmRoutine(void)
void log_smgrcreate(const RelFileNode *rnode, ForkNumber forkNum)
Definition: storage.c:175