PostgreSQL Source Code  git master
heapam_handler.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * heapam_handler.c
4  * heap table access method code
5  *
6  * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  * src/backend/access/heap/heapam_handler.c
12  *
13  *
14  * NOTES
15  * This files wires up the lower level heapam.c et al routines with the
16  * tableam abstraction.
17  *
18  *-------------------------------------------------------------------------
19  */
20 #include "postgres.h"
21 
22 #include "miscadmin.h"
23 
24 #include "access/genam.h"
25 #include "access/heapam.h"
26 #include "access/heaptoast.h"
27 #include "access/multixact.h"
28 #include "access/rewriteheap.h"
29 #include "access/tableam.h"
30 #include "access/tsmapi.h"
31 #include "access/xact.h"
32 #include "catalog/catalog.h"
33 #include "catalog/index.h"
34 #include "catalog/storage.h"
35 #include "catalog/storage_xlog.h"
36 #include "commands/progress.h"
37 #include "executor/executor.h"
38 #include "pgstat.h"
39 #include "storage/bufmgr.h"
40 #include "storage/bufpage.h"
41 #include "storage/bufmgr.h"
42 #include "storage/lmgr.h"
43 #include "storage/predicate.h"
44 #include "storage/procarray.h"
45 #include "storage/smgr.h"
46 #include "utils/builtins.h"
47 #include "utils/rel.h"
48 
49 
50 static void reform_and_rewrite_tuple(HeapTuple tuple,
51  Relation OldHeap, Relation NewHeap,
52  Datum *values, bool *isnull, RewriteState rwstate);
53 
54 static bool SampleHeapTupleVisible(TableScanDesc scan, Buffer buffer,
55  HeapTuple tuple,
56  OffsetNumber tupoffset);
57 
59 
61 
62 
63 /* ------------------------------------------------------------------------
64  * Slot related callbacks for heap AM
65  * ------------------------------------------------------------------------
66  */
67 
68 static const TupleTableSlotOps *
70 {
71  return &TTSOpsBufferHeapTuple;
72 }
73 
74 
75 /* ------------------------------------------------------------------------
76  * Index Scan Callbacks for heap AM
77  * ------------------------------------------------------------------------
78  */
79 
80 static IndexFetchTableData *
82 {
84 
85  hscan->xs_base.rel = rel;
86  hscan->xs_cbuf = InvalidBuffer;
87 
88  return &hscan->xs_base;
89 }
90 
91 static void
93 {
94  IndexFetchHeapData *hscan = (IndexFetchHeapData *) scan;
95 
96  if (BufferIsValid(hscan->xs_cbuf))
97  {
98  ReleaseBuffer(hscan->xs_cbuf);
99  hscan->xs_cbuf = InvalidBuffer;
100  }
101 }
102 
103 static void
105 {
106  IndexFetchHeapData *hscan = (IndexFetchHeapData *) scan;
107 
109 
110  pfree(hscan);
111 }
112 
113 static bool
115  ItemPointer tid,
116  Snapshot snapshot,
117  TupleTableSlot *slot,
118  bool *call_again, bool *all_dead)
119 {
120  IndexFetchHeapData *hscan = (IndexFetchHeapData *) scan;
122  bool got_heap_tuple;
123 
124  Assert(TTS_IS_BUFFERTUPLE(slot));
125 
126  /* We can skip the buffer-switching logic if we're in mid-HOT chain. */
127  if (!*call_again)
128  {
129  /* Switch to correct buffer if we don't have it already */
130  Buffer prev_buf = hscan->xs_cbuf;
131 
132  hscan->xs_cbuf = ReleaseAndReadBuffer(hscan->xs_cbuf,
133  hscan->xs_base.rel,
135 
136  /*
137  * Prune page, but only if we weren't already on this page
138  */
139  if (prev_buf != hscan->xs_cbuf)
140  heap_page_prune_opt(hscan->xs_base.rel, hscan->xs_cbuf);
141  }
142 
143  /* Obtain share-lock on the buffer so we can examine visibility */
145  got_heap_tuple = heap_hot_search_buffer(tid,
146  hscan->xs_base.rel,
147  hscan->xs_cbuf,
148  snapshot,
149  &bslot->base.tupdata,
150  all_dead,
151  !*call_again);
152  bslot->base.tupdata.t_self = *tid;
154 
155  if (got_heap_tuple)
156  {
157  /*
158  * Only in a non-MVCC snapshot can more than one member of the HOT
159  * chain be visible.
160  */
161  *call_again = !IsMVCCSnapshot(snapshot);
162 
163  slot->tts_tableOid = RelationGetRelid(scan->rel);
164  ExecStoreBufferHeapTuple(&bslot->base.tupdata, slot, hscan->xs_cbuf);
165  }
166  else
167  {
168  /* We've reached the end of the HOT chain. */
169  *call_again = false;
170  }
171 
172  return got_heap_tuple;
173 }
174 
175 
176 /* ------------------------------------------------------------------------
177  * Callbacks for non-modifying operations on individual tuples for heap AM
178  * ------------------------------------------------------------------------
179  */
180 
181 static bool
183  ItemPointer tid,
184  Snapshot snapshot,
185  TupleTableSlot *slot)
186 {
188  Buffer buffer;
189 
190  Assert(TTS_IS_BUFFERTUPLE(slot));
191 
192  bslot->base.tupdata.t_self = *tid;
193  if (heap_fetch(relation, snapshot, &bslot->base.tupdata, &buffer))
194  {
195  /* store in slot, transferring existing pin */
196  ExecStorePinnedBufferHeapTuple(&bslot->base.tupdata, slot, buffer);
197  slot->tts_tableOid = RelationGetRelid(relation);
198 
199  return true;
200  }
201 
202  return false;
203 }
204 
205 static bool
207 {
208  HeapScanDesc hscan = (HeapScanDesc) scan;
209 
210  return ItemPointerIsValid(tid) &&
212 }
213 
214 static bool
216  Snapshot snapshot)
217 {
219  bool res;
220 
221  Assert(TTS_IS_BUFFERTUPLE(slot));
222  Assert(BufferIsValid(bslot->buffer));
223 
224  /*
225  * We need buffer pin and lock to call HeapTupleSatisfiesVisibility.
226  * Caller should be holding pin, but not lock.
227  */
229  res = HeapTupleSatisfiesVisibility(bslot->base.tuple, snapshot,
230  bslot->buffer);
232 
233  return res;
234 }
235 
236 
237 /* ----------------------------------------------------------------------------
238  * Functions for manipulations of physical tuples for heap AM.
239  * ----------------------------------------------------------------------------
240  */
241 
242 static void
244  int options, BulkInsertState bistate)
245 {
246  bool shouldFree = true;
247  HeapTuple tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree);
248 
249  /* Update the tuple with table oid */
250  slot->tts_tableOid = RelationGetRelid(relation);
251  tuple->t_tableOid = slot->tts_tableOid;
252 
253  /* Perform the insertion, and copy the resulting ItemPointer */
254  heap_insert(relation, tuple, cid, options, bistate);
255  ItemPointerCopy(&tuple->t_self, &slot->tts_tid);
256 
257  if (shouldFree)
258  pfree(tuple);
259 }
260 
261 static void
263  CommandId cid, int options,
264  BulkInsertState bistate, uint32 specToken)
265 {
266  bool shouldFree = true;
267  HeapTuple tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree);
268 
269  /* Update the tuple with table oid */
270  slot->tts_tableOid = RelationGetRelid(relation);
271  tuple->t_tableOid = slot->tts_tableOid;
272 
273  HeapTupleHeaderSetSpeculativeToken(tuple->t_data, specToken);
274  options |= HEAP_INSERT_SPECULATIVE;
275 
276  /* Perform the insertion, and copy the resulting ItemPointer */
277  heap_insert(relation, tuple, cid, options, bistate);
278  ItemPointerCopy(&tuple->t_self, &slot->tts_tid);
279 
280  if (shouldFree)
281  pfree(tuple);
282 }
283 
284 static void
286  uint32 specToken, bool succeeded)
287 {
288  bool shouldFree = true;
289  HeapTuple tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree);
290 
291  /* adjust the tuple's state accordingly */
292  if (succeeded)
293  heap_finish_speculative(relation, &slot->tts_tid);
294  else
295  heap_abort_speculative(relation, &slot->tts_tid);
296 
297  if (shouldFree)
298  pfree(tuple);
299 }
300 
301 static TM_Result
303  Snapshot snapshot, Snapshot crosscheck, bool wait,
304  TM_FailureData *tmfd, bool changingPart)
305 {
306  /*
307  * Currently Deleting of index tuples are handled at vacuum, in case if
308  * the storage itself is cleaning the dead tuples by itself, it is the
309  * time to call the index tuple deletion also.
310  */
311  return heap_delete(relation, tid, cid, crosscheck, wait, tmfd, changingPart);
312 }
313 
314 
315 static TM_Result
317  CommandId cid, Snapshot snapshot, Snapshot crosscheck,
318  bool wait, TM_FailureData *tmfd,
319  LockTupleMode *lockmode, bool *update_indexes)
320 {
321  bool shouldFree = true;
322  HeapTuple tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree);
323  TM_Result result;
324 
325  /* Update the tuple with table oid */
326  slot->tts_tableOid = RelationGetRelid(relation);
327  tuple->t_tableOid = slot->tts_tableOid;
328 
329  result = heap_update(relation, otid, tuple, cid, crosscheck, wait,
330  tmfd, lockmode);
331  ItemPointerCopy(&tuple->t_self, &slot->tts_tid);
332 
333  /*
334  * Decide whether new index entries are needed for the tuple
335  *
336  * Note: heap_update returns the tid (location) of the new tuple in the
337  * t_self field.
338  *
339  * If it's a HOT update, we mustn't insert new index entries.
340  */
341  *update_indexes = result == TM_Ok && !HeapTupleIsHeapOnly(tuple);
342 
343  if (shouldFree)
344  pfree(tuple);
345 
346  return result;
347 }
348 
349 static TM_Result
352  LockWaitPolicy wait_policy, uint8 flags,
353  TM_FailureData *tmfd)
354 {
356  TM_Result result;
357  Buffer buffer;
358  HeapTuple tuple = &bslot->base.tupdata;
359  bool follow_updates;
360 
361  follow_updates = (flags & TUPLE_LOCK_FLAG_LOCK_UPDATE_IN_PROGRESS) != 0;
362  tmfd->traversed = false;
363 
364  Assert(TTS_IS_BUFFERTUPLE(slot));
365 
366 tuple_lock_retry:
367  tuple->t_self = *tid;
368  result = heap_lock_tuple(relation, tuple, cid, mode, wait_policy,
369  follow_updates, &buffer, tmfd);
370 
371  if (result == TM_Updated &&
373  {
374  ReleaseBuffer(buffer);
375  /* Should not encounter speculative tuple on recheck */
377 
378  if (!ItemPointerEquals(&tmfd->ctid, &tuple->t_self))
379  {
380  SnapshotData SnapshotDirty;
381  TransactionId priorXmax;
382 
383  /* it was updated, so look at the updated version */
384  *tid = tmfd->ctid;
385  /* updated row should have xmin matching this xmax */
386  priorXmax = tmfd->xmax;
387 
388  /* signal that a tuple later in the chain is getting locked */
389  tmfd->traversed = true;
390 
391  /*
392  * fetch target tuple
393  *
394  * Loop here to deal with updated or busy tuples
395  */
396  InitDirtySnapshot(SnapshotDirty);
397  for (;;)
398  {
400  ereport(ERROR,
401  (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
402  errmsg("tuple to be locked was already moved to another partition due to concurrent update")));
403 
404  tuple->t_self = *tid;
405  if (heap_fetch(relation, &SnapshotDirty, tuple, &buffer))
406  {
407  /*
408  * If xmin isn't what we're expecting, the slot must have
409  * been recycled and reused for an unrelated tuple. This
410  * implies that the latest version of the row was deleted,
411  * so we need do nothing. (Should be safe to examine xmin
412  * without getting buffer's content lock. We assume
413  * reading a TransactionId to be atomic, and Xmin never
414  * changes in an existing tuple, except to invalid or
415  * frozen, and neither of those can match priorXmax.)
416  */
418  priorXmax))
419  {
420  ReleaseBuffer(buffer);
421  return TM_Deleted;
422  }
423 
424  /* otherwise xmin should not be dirty... */
425  if (TransactionIdIsValid(SnapshotDirty.xmin))
426  ereport(ERROR,
428  errmsg_internal("t_xmin is uncommitted in tuple to be updated")));
429 
430  /*
431  * If tuple is being updated by other transaction then we
432  * have to wait for its commit/abort, or die trying.
433  */
434  if (TransactionIdIsValid(SnapshotDirty.xmax))
435  {
436  ReleaseBuffer(buffer);
437  switch (wait_policy)
438  {
439  case LockWaitBlock:
440  XactLockTableWait(SnapshotDirty.xmax,
441  relation, &tuple->t_self,
443  break;
444  case LockWaitSkip:
445  if (!ConditionalXactLockTableWait(SnapshotDirty.xmax))
446  /* skip instead of waiting */
447  return TM_WouldBlock;
448  break;
449  case LockWaitError:
450  if (!ConditionalXactLockTableWait(SnapshotDirty.xmax))
451  ereport(ERROR,
452  (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
453  errmsg("could not obtain lock on row in relation \"%s\"",
454  RelationGetRelationName(relation))));
455  break;
456  }
457  continue; /* loop back to repeat heap_fetch */
458  }
459 
460  /*
461  * If tuple was inserted by our own transaction, we have
462  * to check cmin against cid: cmin >= current CID means
463  * our command cannot see the tuple, so we should ignore
464  * it. Otherwise heap_lock_tuple() will throw an error,
465  * and so would any later attempt to update or delete the
466  * tuple. (We need not check cmax because
467  * HeapTupleSatisfiesDirty will consider a tuple deleted
468  * by our transaction dead, regardless of cmax.) We just
469  * checked that priorXmax == xmin, so we can test that
470  * variable instead of doing HeapTupleHeaderGetXmin again.
471  */
472  if (TransactionIdIsCurrentTransactionId(priorXmax) &&
473  HeapTupleHeaderGetCmin(tuple->t_data) >= cid)
474  {
475  tmfd->xmax = priorXmax;
476 
477  /*
478  * Cmin is the problematic value, so store that. See
479  * above.
480  */
481  tmfd->cmax = HeapTupleHeaderGetCmin(tuple->t_data);
482  ReleaseBuffer(buffer);
483  return TM_SelfModified;
484  }
485 
486  /*
487  * This is a live tuple, so try to lock it again.
488  */
489  ReleaseBuffer(buffer);
490  goto tuple_lock_retry;
491  }
492 
493  /*
494  * If the referenced slot was actually empty, the latest
495  * version of the row must have been deleted, so we need do
496  * nothing.
497  */
498  if (tuple->t_data == NULL)
499  {
500  return TM_Deleted;
501  }
502 
503  /*
504  * As above, if xmin isn't what we're expecting, do nothing.
505  */
507  priorXmax))
508  {
509  if (BufferIsValid(buffer))
510  ReleaseBuffer(buffer);
511  return TM_Deleted;
512  }
513 
514  /*
515  * If we get here, the tuple was found but failed
516  * SnapshotDirty. Assuming the xmin is either a committed xact
517  * or our own xact (as it certainly should be if we're trying
518  * to modify the tuple), this must mean that the row was
519  * updated or deleted by either a committed xact or our own
520  * xact. If it was deleted, we can ignore it; if it was
521  * updated then chain up to the next version and repeat the
522  * whole process.
523  *
524  * As above, it should be safe to examine xmax and t_ctid
525  * without the buffer content lock, because they can't be
526  * changing.
527  */
528  if (ItemPointerEquals(&tuple->t_self, &tuple->t_data->t_ctid))
529  {
530  /* deleted, so forget about it */
531  if (BufferIsValid(buffer))
532  ReleaseBuffer(buffer);
533  return TM_Deleted;
534  }
535 
536  /* updated, so look at the updated row */
537  *tid = tuple->t_data->t_ctid;
538  /* updated row should have xmin matching this xmax */
539  priorXmax = HeapTupleHeaderGetUpdateXid(tuple->t_data);
540  if (BufferIsValid(buffer))
541  ReleaseBuffer(buffer);
542  /* loop back to fetch next in chain */
543  }
544  }
545  else
546  {
547  /* tuple was deleted, so give up */
548  return TM_Deleted;
549  }
550  }
551 
552  slot->tts_tableOid = RelationGetRelid(relation);
553  tuple->t_tableOid = slot->tts_tableOid;
554 
555  /* store in slot, transferring existing pin */
556  ExecStorePinnedBufferHeapTuple(tuple, slot, buffer);
557 
558  return result;
559 }
560 
561 static void
563 {
564  /*
565  * If we skipped writing WAL, then we need to sync the heap (but not
566  * indexes since those use WAL anyway / don't go through tableam)
567  */
568  if (options & HEAP_INSERT_SKIP_WAL)
569  heap_sync(relation);
570 }
571 
572 
573 /* ------------------------------------------------------------------------
574  * DDL related callbacks for heap AM.
575  * ------------------------------------------------------------------------
576  */
577 
578 static void
580  const RelFileNode *newrnode,
581  char persistence,
582  TransactionId *freezeXid,
583  MultiXactId *minmulti)
584 {
585  SMgrRelation srel;
586 
587  /*
588  * Initialize to the minimum XID that could put tuples in the table. We
589  * know that no xacts older than RecentXmin are still running, so that
590  * will do.
591  */
592  *freezeXid = RecentXmin;
593 
594  /*
595  * Similarly, initialize the minimum Multixact to the first value that
596  * could possibly be stored in tuples in the table. Running transactions
597  * could reuse values from their local cache, so we are careful to
598  * consider all currently running multis.
599  *
600  * XXX this could be refined further, but is it worth the hassle?
601  */
602  *minmulti = GetOldestMultiXactId();
603 
604  srel = RelationCreateStorage(*newrnode, persistence);
605 
606  /*
607  * If required, set up an init fork for an unlogged table so that it can
608  * be correctly reinitialized on restart. An immediate sync is required
609  * even if the page has been logged, because the write did not go through
610  * shared_buffers and therefore a concurrent checkpoint may have moved the
611  * redo pointer past our xlog record. Recovery may as well remove it
612  * while replaying, for example, XLOG_DBASE_CREATE or XLOG_TBLSPC_CREATE
613  * record. Therefore, logging is necessary even if wal_level=minimal.
614  */
615  if (persistence == RELPERSISTENCE_UNLOGGED)
616  {
617  Assert(rel->rd_rel->relkind == RELKIND_RELATION ||
618  rel->rd_rel->relkind == RELKIND_MATVIEW ||
619  rel->rd_rel->relkind == RELKIND_TOASTVALUE);
620  smgrcreate(srel, INIT_FORKNUM, false);
621  log_smgrcreate(newrnode, INIT_FORKNUM);
623  }
624 
625  smgrclose(srel);
626 }
627 
628 static void
630 {
631  RelationTruncate(rel, 0);
632 }
633 
634 static void
636 {
637  SMgrRelation dstrel;
638 
639  dstrel = smgropen(*newrnode, rel->rd_backend);
640  RelationOpenSmgr(rel);
641 
642  /*
643  * Since we copy the file directly without looking at the shared buffers,
644  * we'd better first flush out any pages of the source relation that are
645  * in shared buffers. We assume no new changes will be made while we are
646  * holding exclusive lock on the rel.
647  */
649 
650  /*
651  * Create and copy all forks of the relation, and schedule unlinking of
652  * old physical files.
653  *
654  * NOTE: any conflict in relfilenode value will be caught in
655  * RelationCreateStorage().
656  */
657  RelationCreateStorage(*newrnode, rel->rd_rel->relpersistence);
658 
659  /* copy main fork */
661  rel->rd_rel->relpersistence);
662 
663  /* copy those extra forks that exist */
664  for (ForkNumber forkNum = MAIN_FORKNUM + 1;
665  forkNum <= MAX_FORKNUM; forkNum++)
666  {
667  if (smgrexists(rel->rd_smgr, forkNum))
668  {
669  smgrcreate(dstrel, forkNum, false);
670 
671  /*
672  * WAL log creation if the relation is persistent, or this is the
673  * init fork of an unlogged relation.
674  */
675  if (rel->rd_rel->relpersistence == RELPERSISTENCE_PERMANENT ||
676  (rel->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED &&
677  forkNum == INIT_FORKNUM))
678  log_smgrcreate(newrnode, forkNum);
679  RelationCopyStorage(rel->rd_smgr, dstrel, forkNum,
680  rel->rd_rel->relpersistence);
681  }
682  }
683 
684 
685  /* drop old relation, and close new one */
686  RelationDropStorage(rel);
687  smgrclose(dstrel);
688 }
689 
690 static void
692  Relation OldIndex, bool use_sort,
694  TransactionId *xid_cutoff,
695  MultiXactId *multi_cutoff,
696  double *num_tuples,
697  double *tups_vacuumed,
698  double *tups_recently_dead)
699 {
700  RewriteState rwstate;
701  IndexScanDesc indexScan;
702  TableScanDesc tableScan;
703  HeapScanDesc heapScan;
704  bool use_wal;
705  bool is_system_catalog;
706  Tuplesortstate *tuplesort;
707  TupleDesc oldTupDesc = RelationGetDescr(OldHeap);
708  TupleDesc newTupDesc = RelationGetDescr(NewHeap);
709  TupleTableSlot *slot;
710  int natts;
711  Datum *values;
712  bool *isnull;
714 
715  /* Remember if it's a system catalog */
716  is_system_catalog = IsSystemRelation(OldHeap);
717 
718  /*
719  * We need to log the copied data in WAL iff WAL archiving/streaming is
720  * enabled AND it's a WAL-logged rel.
721  */
722  use_wal = XLogIsNeeded() && RelationNeedsWAL(NewHeap);
723 
724  /* use_wal off requires smgr_targblock be initially invalid */
726 
727  /* Preallocate values/isnull arrays */
728  natts = newTupDesc->natts;
729  values = (Datum *) palloc(natts * sizeof(Datum));
730  isnull = (bool *) palloc(natts * sizeof(bool));
731 
732  /* Initialize the rewrite operation */
733  rwstate = begin_heap_rewrite(OldHeap, NewHeap, OldestXmin, *xid_cutoff,
734  *multi_cutoff, use_wal);
735 
736 
737  /* Set up sorting if wanted */
738  if (use_sort)
739  tuplesort = tuplesort_begin_cluster(oldTupDesc, OldIndex,
741  NULL, false);
742  else
743  tuplesort = NULL;
744 
745  /*
746  * Prepare to scan the OldHeap. To ensure we see recently-dead tuples
747  * that still need to be copied, we scan with SnapshotAny and use
748  * HeapTupleSatisfiesVacuum for the visibility test.
749  */
750  if (OldIndex != NULL && !use_sort)
751  {
752  const int ci_index[] = {
755  };
756  int64 ci_val[2];
757 
758  /* Set phase and OIDOldIndex to columns */
760  ci_val[1] = RelationGetRelid(OldIndex);
761  pgstat_progress_update_multi_param(2, ci_index, ci_val);
762 
763  tableScan = NULL;
764  heapScan = NULL;
765  indexScan = index_beginscan(OldHeap, OldIndex, SnapshotAny, 0, 0);
766  index_rescan(indexScan, NULL, 0, NULL, 0);
767  }
768  else
769  {
770  /* In scan-and-sort mode and also VACUUM FULL, set phase */
773 
774  tableScan = table_beginscan(OldHeap, SnapshotAny, 0, (ScanKey) NULL);
775  heapScan = (HeapScanDesc) tableScan;
776  indexScan = NULL;
777 
778  /* Set total heap blocks */
780  heapScan->rs_nblocks);
781  }
782 
783  slot = table_slot_create(OldHeap, NULL);
784  hslot = (BufferHeapTupleTableSlot *) slot;
785 
786  /*
787  * Scan through the OldHeap, either in OldIndex order or sequentially;
788  * copy each tuple into the NewHeap, or transiently to the tuplesort
789  * module. Note that we don't bother sorting dead tuples (they won't get
790  * to the new table anyway).
791  */
792  for (;;)
793  {
794  HeapTuple tuple;
795  Buffer buf;
796  bool isdead;
797 
799 
800  if (indexScan != NULL)
801  {
802  if (!index_getnext_slot(indexScan, ForwardScanDirection, slot))
803  break;
804 
805  /* Since we used no scan keys, should never need to recheck */
806  if (indexScan->xs_recheck)
807  elog(ERROR, "CLUSTER does not support lossy index conditions");
808  }
809  else
810  {
811  if (!table_scan_getnextslot(tableScan, ForwardScanDirection, slot))
812  break;
813 
814  /*
815  * In scan-and-sort mode and also VACUUM FULL, set heap blocks
816  * scanned
817  */
819  heapScan->rs_cblock + 1);
820  }
821 
822  tuple = ExecFetchSlotHeapTuple(slot, false, NULL);
823  buf = hslot->buffer;
824 
826 
827  switch (HeapTupleSatisfiesVacuum(tuple, OldestXmin, buf))
828  {
829  case HEAPTUPLE_DEAD:
830  /* Definitely dead */
831  isdead = true;
832  break;
834  *tups_recently_dead += 1;
835  /* fall through */
836  case HEAPTUPLE_LIVE:
837  /* Live or recently dead, must copy it */
838  isdead = false;
839  break;
841 
842  /*
843  * Since we hold exclusive lock on the relation, normally the
844  * only way to see this is if it was inserted earlier in our
845  * own transaction. However, it can happen in system
846  * catalogs, since we tend to release write lock before commit
847  * there. Give a warning if neither case applies; but in any
848  * case we had better copy it.
849  */
850  if (!is_system_catalog &&
852  elog(WARNING, "concurrent insert in progress within table \"%s\"",
853  RelationGetRelationName(OldHeap));
854  /* treat as live */
855  isdead = false;
856  break;
858 
859  /*
860  * Similar situation to INSERT_IN_PROGRESS case.
861  */
862  if (!is_system_catalog &&
864  elog(WARNING, "concurrent delete in progress within table \"%s\"",
865  RelationGetRelationName(OldHeap));
866  /* treat as recently dead */
867  *tups_recently_dead += 1;
868  isdead = false;
869  break;
870  default:
871  elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
872  isdead = false; /* keep compiler quiet */
873  break;
874  }
875 
877 
878  if (isdead)
879  {
880  *tups_vacuumed += 1;
881  /* heap rewrite module still needs to see it... */
882  if (rewrite_heap_dead_tuple(rwstate, tuple))
883  {
884  /* A previous recently-dead tuple is now known dead */
885  *tups_vacuumed += 1;
886  *tups_recently_dead -= 1;
887  }
888  continue;
889  }
890 
891  *num_tuples += 1;
892  if (tuplesort != NULL)
893  {
894  tuplesort_putheaptuple(tuplesort, tuple);
895 
896  /*
897  * In scan-and-sort mode, report increase in number of tuples
898  * scanned
899  */
901  *num_tuples);
902  }
903  else
904  {
905  const int ct_index[] = {
908  };
909  int64 ct_val[2];
910 
911  reform_and_rewrite_tuple(tuple, OldHeap, NewHeap,
912  values, isnull, rwstate);
913 
914  /*
915  * In indexscan mode and also VACUUM FULL, report increase in
916  * number of tuples scanned and written
917  */
918  ct_val[0] = *num_tuples;
919  ct_val[1] = *num_tuples;
920  pgstat_progress_update_multi_param(2, ct_index, ct_val);
921  }
922  }
923 
924  if (indexScan != NULL)
925  index_endscan(indexScan);
926  if (tableScan != NULL)
927  table_endscan(tableScan);
928  if (slot)
930 
931  /*
932  * In scan-and-sort mode, complete the sort, then read out all live tuples
933  * from the tuplestore and write them to the new relation.
934  */
935  if (tuplesort != NULL)
936  {
937  double n_tuples = 0;
938 
939  /* Report that we are now sorting tuples */
942 
943  tuplesort_performsort(tuplesort);
944 
945  /* Report that we are now writing new heap */
948 
949  for (;;)
950  {
951  HeapTuple tuple;
952 
954 
955  tuple = tuplesort_getheaptuple(tuplesort, true);
956  if (tuple == NULL)
957  break;
958 
959  n_tuples += 1;
961  OldHeap, NewHeap,
962  values, isnull,
963  rwstate);
964  /* Report n_tuples */
966  n_tuples);
967  }
968 
969  tuplesort_end(tuplesort);
970  }
971 
972  /* Write out any remaining tuples, and fsync if needed */
973  end_heap_rewrite(rwstate);
974 
975  /* Clean up */
976  pfree(values);
977  pfree(isnull);
978 }
979 
980 static bool
982  BufferAccessStrategy bstrategy)
983 {
984  HeapScanDesc hscan = (HeapScanDesc) scan;
985 
986  /*
987  * We must maintain a pin on the target page's buffer to ensure that
988  * concurrent activity - e.g. HOT pruning - doesn't delete tuples out from
989  * under us. Hence, pin the page until we are done looking at it. We
990  * also choose to hold sharelock on the buffer throughout --- we could
991  * release and re-acquire sharelock for each tuple, but since we aren't
992  * doing much work per tuple, the extra lock traffic is probably better
993  * avoided.
994  */
995  hscan->rs_cblock = blockno;
996  hscan->rs_cindex = FirstOffsetNumber;
998  blockno, RBM_NORMAL, bstrategy);
1000 
1001  /* in heap all blocks can contain tuples, so always return true */
1002  return true;
1003 }
1004 
1005 static bool
1007  double *liverows, double *deadrows,
1008  TupleTableSlot *slot)
1009 {
1010  HeapScanDesc hscan = (HeapScanDesc) scan;
1011  Page targpage;
1012  OffsetNumber maxoffset;
1013  BufferHeapTupleTableSlot *hslot;
1014 
1015  Assert(TTS_IS_BUFFERTUPLE(slot));
1016 
1017  hslot = (BufferHeapTupleTableSlot *) slot;
1018  targpage = BufferGetPage(hscan->rs_cbuf);
1019  maxoffset = PageGetMaxOffsetNumber(targpage);
1020 
1021  /* Inner loop over all tuples on the selected page */
1022  for (; hscan->rs_cindex <= maxoffset; hscan->rs_cindex++)
1023  {
1024  ItemId itemid;
1025  HeapTuple targtuple = &hslot->base.tupdata;
1026  bool sample_it = false;
1027 
1028  itemid = PageGetItemId(targpage, hscan->rs_cindex);
1029 
1030  /*
1031  * We ignore unused and redirect line pointers. DEAD line pointers
1032  * should be counted as dead, because we need vacuum to run to get rid
1033  * of them. Note that this rule agrees with the way that
1034  * heap_page_prune() counts things.
1035  */
1036  if (!ItemIdIsNormal(itemid))
1037  {
1038  if (ItemIdIsDead(itemid))
1039  *deadrows += 1;
1040  continue;
1041  }
1042 
1043  ItemPointerSet(&targtuple->t_self, hscan->rs_cblock, hscan->rs_cindex);
1044 
1045  targtuple->t_tableOid = RelationGetRelid(scan->rs_rd);
1046  targtuple->t_data = (HeapTupleHeader) PageGetItem(targpage, itemid);
1047  targtuple->t_len = ItemIdGetLength(itemid);
1048 
1049  switch (HeapTupleSatisfiesVacuum(targtuple, OldestXmin,
1050  hscan->rs_cbuf))
1051  {
1052  case HEAPTUPLE_LIVE:
1053  sample_it = true;
1054  *liverows += 1;
1055  break;
1056 
1057  case HEAPTUPLE_DEAD:
1059  /* Count dead and recently-dead rows */
1060  *deadrows += 1;
1061  break;
1062 
1064 
1065  /*
1066  * Insert-in-progress rows are not counted. We assume that
1067  * when the inserting transaction commits or aborts, it will
1068  * send a stats message to increment the proper count. This
1069  * works right only if that transaction ends after we finish
1070  * analyzing the table; if things happen in the other order,
1071  * its stats update will be overwritten by ours. However, the
1072  * error will be large only if the other transaction runs long
1073  * enough to insert many tuples, so assuming it will finish
1074  * after us is the safer option.
1075  *
1076  * A special case is that the inserting transaction might be
1077  * our own. In this case we should count and sample the row,
1078  * to accommodate users who load a table and analyze it in one
1079  * transaction. (pgstat_report_analyze has to adjust the
1080  * numbers we send to the stats collector to make this come
1081  * out right.)
1082  */
1084  {
1085  sample_it = true;
1086  *liverows += 1;
1087  }
1088  break;
1089 
1091 
1092  /*
1093  * We count and sample delete-in-progress rows the same as
1094  * live ones, so that the stats counters come out right if the
1095  * deleting transaction commits after us, per the same
1096  * reasoning given above.
1097  *
1098  * If the delete was done by our own transaction, however, we
1099  * must count the row as dead to make pgstat_report_analyze's
1100  * stats adjustments come out right. (Note: this works out
1101  * properly when the row was both inserted and deleted in our
1102  * xact.)
1103  *
1104  * The net effect of these choices is that we act as though an
1105  * IN_PROGRESS transaction hasn't happened yet, except if it
1106  * is our own transaction, which we assume has happened.
1107  *
1108  * This approach ensures that we behave sanely if we see both
1109  * the pre-image and post-image rows for a row being updated
1110  * by a concurrent transaction: we will sample the pre-image
1111  * but not the post-image. We also get sane results if the
1112  * concurrent transaction never commits.
1113  */
1115  *deadrows += 1;
1116  else
1117  {
1118  sample_it = true;
1119  *liverows += 1;
1120  }
1121  break;
1122 
1123  default:
1124  elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
1125  break;
1126  }
1127 
1128  if (sample_it)
1129  {
1130  ExecStoreBufferHeapTuple(targtuple, slot, hscan->rs_cbuf);
1131  hscan->rs_cindex++;
1132 
1133  /* note that we leave the buffer locked here! */
1134  return true;
1135  }
1136  }
1137 
1138  /* Now release the lock and pin on the page */
1139  UnlockReleaseBuffer(hscan->rs_cbuf);
1140  hscan->rs_cbuf = InvalidBuffer;
1141 
1142  /* also prevent old slot contents from having pin on page */
1143  ExecClearTuple(slot);
1144 
1145  return false;
1146 }
1147 
1148 static double
1150  Relation indexRelation,
1151  IndexInfo *indexInfo,
1152  bool allow_sync,
1153  bool anyvisible,
1154  bool progress,
1155  BlockNumber start_blockno,
1156  BlockNumber numblocks,
1158  void *callback_state,
1159  TableScanDesc scan)
1160 {
1161  HeapScanDesc hscan;
1162  bool is_system_catalog;
1163  bool checking_uniqueness;
1164  HeapTuple heapTuple;
1166  bool isnull[INDEX_MAX_KEYS];
1167  double reltuples;
1168  ExprState *predicate;
1169  TupleTableSlot *slot;
1170  EState *estate;
1171  ExprContext *econtext;
1172  Snapshot snapshot;
1173  bool need_unregister_snapshot = false;
1175  BlockNumber previous_blkno = InvalidBlockNumber;
1176  BlockNumber root_blkno = InvalidBlockNumber;
1177  OffsetNumber root_offsets[MaxHeapTuplesPerPage];
1178 
1179  /*
1180  * sanity checks
1181  */
1182  Assert(OidIsValid(indexRelation->rd_rel->relam));
1183 
1184  /* Remember if it's a system catalog */
1185  is_system_catalog = IsSystemRelation(heapRelation);
1186 
1187  /* See whether we're verifying uniqueness/exclusion properties */
1188  checking_uniqueness = (indexInfo->ii_Unique ||
1189  indexInfo->ii_ExclusionOps != NULL);
1190 
1191  /*
1192  * "Any visible" mode is not compatible with uniqueness checks; make sure
1193  * only one of those is requested.
1194  */
1195  Assert(!(anyvisible && checking_uniqueness));
1196 
1197  /*
1198  * Need an EState for evaluation of index expressions and partial-index
1199  * predicates. Also a slot to hold the current tuple.
1200  */
1201  estate = CreateExecutorState();
1202  econtext = GetPerTupleExprContext(estate);
1203  slot = table_slot_create(heapRelation, NULL);
1204 
1205  /* Arrange for econtext's scan tuple to be the tuple under test */
1206  econtext->ecxt_scantuple = slot;
1207 
1208  /* Set up execution state for predicate, if any. */
1209  predicate = ExecPrepareQual(indexInfo->ii_Predicate, estate);
1210 
1211  /*
1212  * Prepare for scan of the base relation. In a normal index build, we use
1213  * SnapshotAny because we must retrieve all tuples and do our own time
1214  * qual checks (because we have to index RECENTLY_DEAD tuples). In a
1215  * concurrent build, or during bootstrap, we take a regular MVCC snapshot
1216  * and index whatever's live according to that.
1217  */
1218  OldestXmin = InvalidTransactionId;
1219 
1220  /* okay to ignore lazy VACUUMs here */
1221  if (!IsBootstrapProcessingMode() && !indexInfo->ii_Concurrent)
1222  OldestXmin = GetOldestXmin(heapRelation, PROCARRAY_FLAGS_VACUUM);
1223 
1224  if (!scan)
1225  {
1226  /*
1227  * Serial index build.
1228  *
1229  * Must begin our own heap scan in this case. We may also need to
1230  * register a snapshot whose lifetime is under our direct control.
1231  */
1232  if (!TransactionIdIsValid(OldestXmin))
1233  {
1235  need_unregister_snapshot = true;
1236  }
1237  else
1238  snapshot = SnapshotAny;
1239 
1240  scan = table_beginscan_strat(heapRelation, /* relation */
1241  snapshot, /* snapshot */
1242  0, /* number of keys */
1243  NULL, /* scan key */
1244  true, /* buffer access strategy OK */
1245  allow_sync); /* syncscan OK? */
1246  }
1247  else
1248  {
1249  /*
1250  * Parallel index build.
1251  *
1252  * Parallel case never registers/unregisters own snapshot. Snapshot
1253  * is taken from parallel heap scan, and is SnapshotAny or an MVCC
1254  * snapshot, based on same criteria as serial case.
1255  */
1257  Assert(allow_sync);
1258  snapshot = scan->rs_snapshot;
1259  }
1260 
1261  hscan = (HeapScanDesc) scan;
1262 
1263  /* Publish number of blocks to scan */
1264  if (progress)
1265  {
1266  BlockNumber nblocks;
1267 
1268  if (hscan->rs_base.rs_parallel != NULL)
1269  {
1271 
1273  nblocks = pbscan->phs_nblocks;
1274  }
1275  else
1276  nblocks = hscan->rs_nblocks;
1277 
1279  nblocks);
1280  }
1281 
1282  /*
1283  * Must call GetOldestXmin() with SnapshotAny. Should never call
1284  * GetOldestXmin() with MVCC snapshot. (It's especially worth checking
1285  * this for parallel builds, since ambuild routines that support parallel
1286  * builds must work these details out for themselves.)
1287  */
1288  Assert(snapshot == SnapshotAny || IsMVCCSnapshot(snapshot));
1289  Assert(snapshot == SnapshotAny ? TransactionIdIsValid(OldestXmin) :
1290  !TransactionIdIsValid(OldestXmin));
1291  Assert(snapshot == SnapshotAny || !anyvisible);
1292 
1293  /* set our scan endpoints */
1294  if (!allow_sync)
1295  heap_setscanlimits(scan, start_blockno, numblocks);
1296  else
1297  {
1298  /* syncscan can only be requested on whole relation */
1299  Assert(start_blockno == 0);
1300  Assert(numblocks == InvalidBlockNumber);
1301  }
1302 
1303  reltuples = 0;
1304 
1305  /*
1306  * Scan all tuples in the base relation.
1307  */
1308  while ((heapTuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
1309  {
1310  bool tupleIsAlive;
1311 
1313 
1314  /* Report scan progress, if asked to. */
1315  if (progress)
1316  {
1317  BlockNumber blocks_done = heapam_scan_get_blocks_done(hscan);
1318 
1319  if (blocks_done != previous_blkno)
1320  {
1322  blocks_done);
1323  previous_blkno = blocks_done;
1324  }
1325  }
1326 
1327  /*
1328  * When dealing with a HOT-chain of updated tuples, we want to index
1329  * the values of the live tuple (if any), but index it under the TID
1330  * of the chain's root tuple. This approach is necessary to preserve
1331  * the HOT-chain structure in the heap. So we need to be able to find
1332  * the root item offset for every tuple that's in a HOT-chain. When
1333  * first reaching a new page of the relation, call
1334  * heap_get_root_tuples() to build a map of root item offsets on the
1335  * page.
1336  *
1337  * It might look unsafe to use this information across buffer
1338  * lock/unlock. However, we hold ShareLock on the table so no
1339  * ordinary insert/update/delete should occur; and we hold pin on the
1340  * buffer continuously while visiting the page, so no pruning
1341  * operation can occur either.
1342  *
1343  * Also, although our opinions about tuple liveness could change while
1344  * we scan the page (due to concurrent transaction commits/aborts),
1345  * the chain root locations won't, so this info doesn't need to be
1346  * rebuilt after waiting for another transaction.
1347  *
1348  * Note the implied assumption that there is no more than one live
1349  * tuple per HOT-chain --- else we could create more than one index
1350  * entry pointing to the same root tuple.
1351  */
1352  if (hscan->rs_cblock != root_blkno)
1353  {
1354  Page page = BufferGetPage(hscan->rs_cbuf);
1355 
1357  heap_get_root_tuples(page, root_offsets);
1359 
1360  root_blkno = hscan->rs_cblock;
1361  }
1362 
1363  if (snapshot == SnapshotAny)
1364  {
1365  /* do our own time qual check */
1366  bool indexIt;
1367  TransactionId xwait;
1368 
1369  recheck:
1370 
1371  /*
1372  * We could possibly get away with not locking the buffer here,
1373  * since caller should hold ShareLock on the relation, but let's
1374  * be conservative about it. (This remark is still correct even
1375  * with HOT-pruning: our pin on the buffer prevents pruning.)
1376  */
1378 
1379  /*
1380  * The criteria for counting a tuple as live in this block need to
1381  * match what analyze.c's heapam_scan_analyze_next_tuple() does,
1382  * otherwise CREATE INDEX and ANALYZE may produce wildly different
1383  * reltuples values, e.g. when there are many recently-dead
1384  * tuples.
1385  */
1386  switch (HeapTupleSatisfiesVacuum(heapTuple, OldestXmin,
1387  hscan->rs_cbuf))
1388  {
1389  case HEAPTUPLE_DEAD:
1390  /* Definitely dead, we can ignore it */
1391  indexIt = false;
1392  tupleIsAlive = false;
1393  break;
1394  case HEAPTUPLE_LIVE:
1395  /* Normal case, index and unique-check it */
1396  indexIt = true;
1397  tupleIsAlive = true;
1398  /* Count it as live, too */
1399  reltuples += 1;
1400  break;
1402 
1403  /*
1404  * If tuple is recently deleted then we must index it
1405  * anyway to preserve MVCC semantics. (Pre-existing
1406  * transactions could try to use the index after we finish
1407  * building it, and may need to see such tuples.)
1408  *
1409  * However, if it was HOT-updated then we must only index
1410  * the live tuple at the end of the HOT-chain. Since this
1411  * breaks semantics for pre-existing snapshots, mark the
1412  * index as unusable for them.
1413  *
1414  * We don't count recently-dead tuples in reltuples, even
1415  * if we index them; see heapam_scan_analyze_next_tuple().
1416  */
1417  if (HeapTupleIsHotUpdated(heapTuple))
1418  {
1419  indexIt = false;
1420  /* mark the index as unsafe for old snapshots */
1421  indexInfo->ii_BrokenHotChain = true;
1422  }
1423  else
1424  indexIt = true;
1425  /* In any case, exclude the tuple from unique-checking */
1426  tupleIsAlive = false;
1427  break;
1429 
1430  /*
1431  * In "anyvisible" mode, this tuple is visible and we
1432  * don't need any further checks.
1433  */
1434  if (anyvisible)
1435  {
1436  indexIt = true;
1437  tupleIsAlive = true;
1438  reltuples += 1;
1439  break;
1440  }
1441 
1442  /*
1443  * Since caller should hold ShareLock or better, normally
1444  * the only way to see this is if it was inserted earlier
1445  * in our own transaction. However, it can happen in
1446  * system catalogs, since we tend to release write lock
1447  * before commit there. Give a warning if neither case
1448  * applies.
1449  */
1450  xwait = HeapTupleHeaderGetXmin(heapTuple->t_data);
1452  {
1453  if (!is_system_catalog)
1454  elog(WARNING, "concurrent insert in progress within table \"%s\"",
1455  RelationGetRelationName(heapRelation));
1456 
1457  /*
1458  * If we are performing uniqueness checks, indexing
1459  * such a tuple could lead to a bogus uniqueness
1460  * failure. In that case we wait for the inserting
1461  * transaction to finish and check again.
1462  */
1463  if (checking_uniqueness)
1464  {
1465  /*
1466  * Must drop the lock on the buffer before we wait
1467  */
1469  XactLockTableWait(xwait, heapRelation,
1470  &heapTuple->t_self,
1473  goto recheck;
1474  }
1475  }
1476  else
1477  {
1478  /*
1479  * For consistency with
1480  * heapam_scan_analyze_next_tuple(), count
1481  * HEAPTUPLE_INSERT_IN_PROGRESS tuples as live only
1482  * when inserted by our own transaction.
1483  */
1484  reltuples += 1;
1485  }
1486 
1487  /*
1488  * We must index such tuples, since if the index build
1489  * commits then they're good.
1490  */
1491  indexIt = true;
1492  tupleIsAlive = true;
1493  break;
1495 
1496  /*
1497  * As with INSERT_IN_PROGRESS case, this is unexpected
1498  * unless it's our own deletion or a system catalog; but
1499  * in anyvisible mode, this tuple is visible.
1500  */
1501  if (anyvisible)
1502  {
1503  indexIt = true;
1504  tupleIsAlive = false;
1505  reltuples += 1;
1506  break;
1507  }
1508 
1509  xwait = HeapTupleHeaderGetUpdateXid(heapTuple->t_data);
1511  {
1512  if (!is_system_catalog)
1513  elog(WARNING, "concurrent delete in progress within table \"%s\"",
1514  RelationGetRelationName(heapRelation));
1515 
1516  /*
1517  * If we are performing uniqueness checks, assuming
1518  * the tuple is dead could lead to missing a
1519  * uniqueness violation. In that case we wait for the
1520  * deleting transaction to finish and check again.
1521  *
1522  * Also, if it's a HOT-updated tuple, we should not
1523  * index it but rather the live tuple at the end of
1524  * the HOT-chain. However, the deleting transaction
1525  * could abort, possibly leaving this tuple as live
1526  * after all, in which case it has to be indexed. The
1527  * only way to know what to do is to wait for the
1528  * deleting transaction to finish and check again.
1529  */
1530  if (checking_uniqueness ||
1531  HeapTupleIsHotUpdated(heapTuple))
1532  {
1533  /*
1534  * Must drop the lock on the buffer before we wait
1535  */
1537  XactLockTableWait(xwait, heapRelation,
1538  &heapTuple->t_self,
1541  goto recheck;
1542  }
1543 
1544  /*
1545  * Otherwise index it but don't check for uniqueness,
1546  * the same as a RECENTLY_DEAD tuple.
1547  */
1548  indexIt = true;
1549 
1550  /*
1551  * Count HEAPTUPLE_DELETE_IN_PROGRESS tuples as live,
1552  * if they were not deleted by the current
1553  * transaction. That's what
1554  * heapam_scan_analyze_next_tuple() does, and we want
1555  * the behavior to be consistent.
1556  */
1557  reltuples += 1;
1558  }
1559  else if (HeapTupleIsHotUpdated(heapTuple))
1560  {
1561  /*
1562  * It's a HOT-updated tuple deleted by our own xact.
1563  * We can assume the deletion will commit (else the
1564  * index contents don't matter), so treat the same as
1565  * RECENTLY_DEAD HOT-updated tuples.
1566  */
1567  indexIt = false;
1568  /* mark the index as unsafe for old snapshots */
1569  indexInfo->ii_BrokenHotChain = true;
1570  }
1571  else
1572  {
1573  /*
1574  * It's a regular tuple deleted by our own xact. Index
1575  * it, but don't check for uniqueness nor count in
1576  * reltuples, the same as a RECENTLY_DEAD tuple.
1577  */
1578  indexIt = true;
1579  }
1580  /* In any case, exclude the tuple from unique-checking */
1581  tupleIsAlive = false;
1582  break;
1583  default:
1584  elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
1585  indexIt = tupleIsAlive = false; /* keep compiler quiet */
1586  break;
1587  }
1588 
1590 
1591  if (!indexIt)
1592  continue;
1593  }
1594  else
1595  {
1596  /* heap_getnext did the time qual check */
1597  tupleIsAlive = true;
1598  reltuples += 1;
1599  }
1600 
1602 
1603  /* Set up for predicate or expression evaluation */
1604  ExecStoreBufferHeapTuple(heapTuple, slot, hscan->rs_cbuf);
1605 
1606  /*
1607  * In a partial index, discard tuples that don't satisfy the
1608  * predicate.
1609  */
1610  if (predicate != NULL)
1611  {
1612  if (!ExecQual(predicate, econtext))
1613  continue;
1614  }
1615 
1616  /*
1617  * For the current heap tuple, extract all the attributes we use in
1618  * this index, and note which are null. This also performs evaluation
1619  * of any expressions needed.
1620  */
1621  FormIndexDatum(indexInfo,
1622  slot,
1623  estate,
1624  values,
1625  isnull);
1626 
1627  /*
1628  * You'd think we should go ahead and build the index tuple here, but
1629  * some index AMs want to do further processing on the data first. So
1630  * pass the values[] and isnull[] arrays, instead.
1631  */
1632 
1633  if (HeapTupleIsHeapOnly(heapTuple))
1634  {
1635  /*
1636  * For a heap-only tuple, pretend its TID is that of the root. See
1637  * src/backend/access/heap/README.HOT for discussion.
1638  */
1639  HeapTupleData rootTuple;
1640  OffsetNumber offnum;
1641 
1642  rootTuple = *heapTuple;
1643  offnum = ItemPointerGetOffsetNumber(&heapTuple->t_self);
1644 
1645  if (!OffsetNumberIsValid(root_offsets[offnum - 1]))
1646  ereport(ERROR,
1648  errmsg_internal("failed to find parent tuple for heap-only tuple at (%u,%u) in table \"%s\"",
1649  ItemPointerGetBlockNumber(&heapTuple->t_self),
1650  offnum,
1651  RelationGetRelationName(heapRelation))));
1652 
1654  root_offsets[offnum - 1]);
1655 
1656  /* Call the AM's callback routine to process the tuple */
1657  callback(indexRelation, &rootTuple, values, isnull, tupleIsAlive,
1658  callback_state);
1659  }
1660  else
1661  {
1662  /* Call the AM's callback routine to process the tuple */
1663  callback(indexRelation, heapTuple, values, isnull, tupleIsAlive,
1664  callback_state);
1665  }
1666  }
1667 
1668  /* Report scan progress one last time. */
1669  if (progress)
1670  {
1671  BlockNumber blks_done;
1672 
1673  if (hscan->rs_base.rs_parallel != NULL)
1674  {
1676 
1678  blks_done = pbscan->phs_nblocks;
1679  }
1680  else
1681  blks_done = hscan->rs_nblocks;
1682 
1684  blks_done);
1685  }
1686 
1687  table_endscan(scan);
1688 
1689  /* we can now forget our snapshot, if set and registered by us */
1690  if (need_unregister_snapshot)
1691  UnregisterSnapshot(snapshot);
1692 
1694 
1695  FreeExecutorState(estate);
1696 
1697  /* These may have been pointing to the now-gone estate */
1698  indexInfo->ii_ExpressionsState = NIL;
1699  indexInfo->ii_PredicateState = NULL;
1700 
1701  return reltuples;
1702 }
1703 
1704 static void
1706  Relation indexRelation,
1707  IndexInfo *indexInfo,
1708  Snapshot snapshot,
1710 {
1711  TableScanDesc scan;
1712  HeapScanDesc hscan;
1713  HeapTuple heapTuple;
1715  bool isnull[INDEX_MAX_KEYS];
1716  ExprState *predicate;
1717  TupleTableSlot *slot;
1718  EState *estate;
1719  ExprContext *econtext;
1720  BlockNumber root_blkno = InvalidBlockNumber;
1721  OffsetNumber root_offsets[MaxHeapTuplesPerPage];
1722  bool in_index[MaxHeapTuplesPerPage];
1723  BlockNumber previous_blkno = InvalidBlockNumber;
1724 
1725  /* state variables for the merge */
1726  ItemPointer indexcursor = NULL;
1727  ItemPointerData decoded;
1728  bool tuplesort_empty = false;
1729 
1730  /*
1731  * sanity checks
1732  */
1733  Assert(OidIsValid(indexRelation->rd_rel->relam));
1734 
1735  /*
1736  * Need an EState for evaluation of index expressions and partial-index
1737  * predicates. Also a slot to hold the current tuple.
1738  */
1739  estate = CreateExecutorState();
1740  econtext = GetPerTupleExprContext(estate);
1741  slot = MakeSingleTupleTableSlot(RelationGetDescr(heapRelation),
1742  &TTSOpsHeapTuple);
1743 
1744  /* Arrange for econtext's scan tuple to be the tuple under test */
1745  econtext->ecxt_scantuple = slot;
1746 
1747  /* Set up execution state for predicate, if any. */
1748  predicate = ExecPrepareQual(indexInfo->ii_Predicate, estate);
1749 
1750  /*
1751  * Prepare for scan of the base relation. We need just those tuples
1752  * satisfying the passed-in reference snapshot. We must disable syncscan
1753  * here, because it's critical that we read from block zero forward to
1754  * match the sorted TIDs.
1755  */
1756  scan = table_beginscan_strat(heapRelation, /* relation */
1757  snapshot, /* snapshot */
1758  0, /* number of keys */
1759  NULL, /* scan key */
1760  true, /* buffer access strategy OK */
1761  false); /* syncscan not OK */
1762  hscan = (HeapScanDesc) scan;
1763 
1765  hscan->rs_nblocks);
1766 
1767  /*
1768  * Scan all tuples matching the snapshot.
1769  */
1770  while ((heapTuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
1771  {
1772  ItemPointer heapcursor = &heapTuple->t_self;
1773  ItemPointerData rootTuple;
1774  OffsetNumber root_offnum;
1775 
1777 
1778  state->htups += 1;
1779 
1780  if ((previous_blkno == InvalidBlockNumber) ||
1781  (hscan->rs_cblock != previous_blkno))
1782  {
1784  hscan->rs_cblock);
1785  previous_blkno = hscan->rs_cblock;
1786  }
1787 
1788  /*
1789  * As commented in table_index_build_scan, we should index heap-only
1790  * tuples under the TIDs of their root tuples; so when we advance onto
1791  * a new heap page, build a map of root item offsets on the page.
1792  *
1793  * This complicates merging against the tuplesort output: we will
1794  * visit the live tuples in order by their offsets, but the root
1795  * offsets that we need to compare against the index contents might be
1796  * ordered differently. So we might have to "look back" within the
1797  * tuplesort output, but only within the current page. We handle that
1798  * by keeping a bool array in_index[] showing all the
1799  * already-passed-over tuplesort output TIDs of the current page. We
1800  * clear that array here, when advancing onto a new heap page.
1801  */
1802  if (hscan->rs_cblock != root_blkno)
1803  {
1804  Page page = BufferGetPage(hscan->rs_cbuf);
1805 
1807  heap_get_root_tuples(page, root_offsets);
1809 
1810  memset(in_index, 0, sizeof(in_index));
1811 
1812  root_blkno = hscan->rs_cblock;
1813  }
1814 
1815  /* Convert actual tuple TID to root TID */
1816  rootTuple = *heapcursor;
1817  root_offnum = ItemPointerGetOffsetNumber(heapcursor);
1818 
1819  if (HeapTupleIsHeapOnly(heapTuple))
1820  {
1821  root_offnum = root_offsets[root_offnum - 1];
1822  if (!OffsetNumberIsValid(root_offnum))
1823  ereport(ERROR,
1825  errmsg_internal("failed to find parent tuple for heap-only tuple at (%u,%u) in table \"%s\"",
1826  ItemPointerGetBlockNumber(heapcursor),
1827  ItemPointerGetOffsetNumber(heapcursor),
1828  RelationGetRelationName(heapRelation))));
1829  ItemPointerSetOffsetNumber(&rootTuple, root_offnum);
1830  }
1831 
1832  /*
1833  * "merge" by skipping through the index tuples until we find or pass
1834  * the current root tuple.
1835  */
1836  while (!tuplesort_empty &&
1837  (!indexcursor ||
1838  ItemPointerCompare(indexcursor, &rootTuple) < 0))
1839  {
1840  Datum ts_val;
1841  bool ts_isnull;
1842 
1843  if (indexcursor)
1844  {
1845  /*
1846  * Remember index items seen earlier on the current heap page
1847  */
1848  if (ItemPointerGetBlockNumber(indexcursor) == root_blkno)
1849  in_index[ItemPointerGetOffsetNumber(indexcursor) - 1] = true;
1850  }
1851 
1852  tuplesort_empty = !tuplesort_getdatum(state->tuplesort, true,
1853  &ts_val, &ts_isnull, NULL);
1854  Assert(tuplesort_empty || !ts_isnull);
1855  if (!tuplesort_empty)
1856  {
1857  itemptr_decode(&decoded, DatumGetInt64(ts_val));
1858  indexcursor = &decoded;
1859 
1860  /* If int8 is pass-by-ref, free (encoded) TID Datum memory */
1861 #ifndef USE_FLOAT8_BYVAL
1862  pfree(DatumGetPointer(ts_val));
1863 #endif
1864  }
1865  else
1866  {
1867  /* Be tidy */
1868  indexcursor = NULL;
1869  }
1870  }
1871 
1872  /*
1873  * If the tuplesort has overshot *and* we didn't see a match earlier,
1874  * then this tuple is missing from the index, so insert it.
1875  */
1876  if ((tuplesort_empty ||
1877  ItemPointerCompare(indexcursor, &rootTuple) > 0) &&
1878  !in_index[root_offnum - 1])
1879  {
1881 
1882  /* Set up for predicate or expression evaluation */
1883  ExecStoreHeapTuple(heapTuple, slot, false);
1884 
1885  /*
1886  * In a partial index, discard tuples that don't satisfy the
1887  * predicate.
1888  */
1889  if (predicate != NULL)
1890  {
1891  if (!ExecQual(predicate, econtext))
1892  continue;
1893  }
1894 
1895  /*
1896  * For the current heap tuple, extract all the attributes we use
1897  * in this index, and note which are null. This also performs
1898  * evaluation of any expressions needed.
1899  */
1900  FormIndexDatum(indexInfo,
1901  slot,
1902  estate,
1903  values,
1904  isnull);
1905 
1906  /*
1907  * You'd think we should go ahead and build the index tuple here,
1908  * but some index AMs want to do further processing on the data
1909  * first. So pass the values[] and isnull[] arrays, instead.
1910  */
1911 
1912  /*
1913  * If the tuple is already committed dead, you might think we
1914  * could suppress uniqueness checking, but this is no longer true
1915  * in the presence of HOT, because the insert is actually a proxy
1916  * for a uniqueness check on the whole HOT-chain. That is, the
1917  * tuple we have here could be dead because it was already
1918  * HOT-updated, and if so the updating transaction will not have
1919  * thought it should insert index entries. The index AM will
1920  * check the whole HOT-chain and correctly detect a conflict if
1921  * there is one.
1922  */
1923 
1924  index_insert(indexRelation,
1925  values,
1926  isnull,
1927  &rootTuple,
1928  heapRelation,
1929  indexInfo->ii_Unique ?
1931  indexInfo);
1932 
1933  state->tups_inserted += 1;
1934  }
1935  }
1936 
1937  table_endscan(scan);
1938 
1940 
1941  FreeExecutorState(estate);
1942 
1943  /* These may have been pointing to the now-gone estate */
1944  indexInfo->ii_ExpressionsState = NIL;
1945  indexInfo->ii_PredicateState = NULL;
1946 }
1947 
1948 /*
1949  * Return the number of blocks that have been read by this scan since
1950  * starting. This is meant for progress reporting rather than be fully
1951  * accurate: in a parallel scan, workers can be concurrently reading blocks
1952  * further ahead than what we report.
1953  */
1954 static BlockNumber
1956 {
1957  ParallelBlockTableScanDesc bpscan = NULL;
1958  BlockNumber startblock;
1959  BlockNumber blocks_done;
1960 
1961  if (hscan->rs_base.rs_parallel != NULL)
1962  {
1964  startblock = bpscan->phs_startblock;
1965  }
1966  else
1967  startblock = hscan->rs_startblock;
1968 
1969  /*
1970  * Might have wrapped around the end of the relation, if startblock was
1971  * not zero.
1972  */
1973  if (hscan->rs_cblock > startblock)
1974  blocks_done = hscan->rs_cblock - startblock;
1975  else
1976  {
1977  BlockNumber nblocks;
1978 
1979  nblocks = bpscan != NULL ? bpscan->phs_nblocks : hscan->rs_nblocks;
1980  blocks_done = nblocks - startblock +
1981  hscan->rs_cblock;
1982  }
1983 
1984  return blocks_done;
1985 }
1986 
1987 
1988 /* ------------------------------------------------------------------------
1989  * Miscellaneous callbacks for the heap AM
1990  * ------------------------------------------------------------------------
1991  */
1992 
1993 /*
1994  * Check to see whether the table needs a TOAST table. It does only if
1995  * (1) there are any toastable attributes, and (2) the maximum length
1996  * of a tuple could exceed TOAST_TUPLE_THRESHOLD. (We don't want to
1997  * create a toast table for something like "f1 varchar(20)".)
1998  */
1999 static bool
2001 {
2002  int32 data_length = 0;
2003  bool maxlength_unknown = false;
2004  bool has_toastable_attrs = false;
2005  TupleDesc tupdesc = rel->rd_att;
2006  int32 tuple_length;
2007  int i;
2008 
2009  for (i = 0; i < tupdesc->natts; i++)
2010  {
2011  Form_pg_attribute att = TupleDescAttr(tupdesc, i);
2012 
2013  if (att->attisdropped)
2014  continue;
2015  data_length = att_align_nominal(data_length, att->attalign);
2016  if (att->attlen > 0)
2017  {
2018  /* Fixed-length types are never toastable */
2019  data_length += att->attlen;
2020  }
2021  else
2022  {
2023  int32 maxlen = type_maximum_size(att->atttypid,
2024  att->atttypmod);
2025 
2026  if (maxlen < 0)
2027  maxlength_unknown = true;
2028  else
2029  data_length += maxlen;
2030  if (att->attstorage != 'p')
2031  has_toastable_attrs = true;
2032  }
2033  }
2034  if (!has_toastable_attrs)
2035  return false; /* nothing to toast? */
2036  if (maxlength_unknown)
2037  return true; /* any unlimited-length attrs? */
2038  tuple_length = MAXALIGN(SizeofHeapTupleHeader +
2039  BITMAPLEN(tupdesc->natts)) +
2040  MAXALIGN(data_length);
2041  return (tuple_length > TOAST_TUPLE_THRESHOLD);
2042 }
2043 
2044 
2045 /* ------------------------------------------------------------------------
2046  * Planner related callbacks for the heap AM
2047  * ------------------------------------------------------------------------
2048  */
2049 
2050 #define HEAP_OVERHEAD_BYTES_PER_TUPLE \
2051  (MAXALIGN(SizeofHeapTupleHeader) + sizeof(ItemIdData))
2052 #define HEAP_USABLE_BYTES_PER_PAGE \
2053  (BLCKSZ - SizeOfPageHeaderData)
2054 
2055 static void
2057  BlockNumber *pages, double *tuples,
2058  double *allvisfrac)
2059 {
2060  table_block_relation_estimate_size(rel, attr_widths, pages,
2061  tuples, allvisfrac,
2064 }
2065 
2066 
2067 /* ------------------------------------------------------------------------
2068  * Executor related callbacks for the heap AM
2069  * ------------------------------------------------------------------------
2070  */
2071 
2072 static bool
2074  TBMIterateResult *tbmres)
2075 {
2076  HeapScanDesc hscan = (HeapScanDesc) scan;
2077  BlockNumber page = tbmres->blockno;
2078  Buffer buffer;
2079  Snapshot snapshot;
2080  int ntup;
2081 
2082  hscan->rs_cindex = 0;
2083  hscan->rs_ntuples = 0;
2084 
2085  /*
2086  * Ignore any claimed entries past what we think is the end of the
2087  * relation. It may have been extended after the start of our scan (we
2088  * only hold an AccessShareLock, and it could be inserts from this
2089  * backend).
2090  */
2091  if (page >= hscan->rs_nblocks)
2092  return false;
2093 
2094  /*
2095  * Acquire pin on the target heap page, trading in any pin we held before.
2096  */
2097  hscan->rs_cbuf = ReleaseAndReadBuffer(hscan->rs_cbuf,
2098  scan->rs_rd,
2099  page);
2100  hscan->rs_cblock = page;
2101  buffer = hscan->rs_cbuf;
2102  snapshot = scan->rs_snapshot;
2103 
2104  ntup = 0;
2105 
2106  /*
2107  * Prune and repair fragmentation for the whole page, if possible.
2108  */
2109  heap_page_prune_opt(scan->rs_rd, buffer);
2110 
2111  /*
2112  * We must hold share lock on the buffer content while examining tuple
2113  * visibility. Afterwards, however, the tuples we have found to be
2114  * visible are guaranteed good as long as we hold the buffer pin.
2115  */
2116  LockBuffer(buffer, BUFFER_LOCK_SHARE);
2117 
2118  /*
2119  * We need two separate strategies for lossy and non-lossy cases.
2120  */
2121  if (tbmres->ntuples >= 0)
2122  {
2123  /*
2124  * Bitmap is non-lossy, so we just look through the offsets listed in
2125  * tbmres; but we have to follow any HOT chain starting at each such
2126  * offset.
2127  */
2128  int curslot;
2129 
2130  for (curslot = 0; curslot < tbmres->ntuples; curslot++)
2131  {
2132  OffsetNumber offnum = tbmres->offsets[curslot];
2133  ItemPointerData tid;
2134  HeapTupleData heapTuple;
2135 
2136  ItemPointerSet(&tid, page, offnum);
2137  if (heap_hot_search_buffer(&tid, scan->rs_rd, buffer, snapshot,
2138  &heapTuple, NULL, true))
2139  hscan->rs_vistuples[ntup++] = ItemPointerGetOffsetNumber(&tid);
2140  }
2141  }
2142  else
2143  {
2144  /*
2145  * Bitmap is lossy, so we must examine each line pointer on the page.
2146  * But we can ignore HOT chains, since we'll check each tuple anyway.
2147  */
2148  Page dp = (Page) BufferGetPage(buffer);
2149  OffsetNumber maxoff = PageGetMaxOffsetNumber(dp);
2150  OffsetNumber offnum;
2151 
2152  for (offnum = FirstOffsetNumber; offnum <= maxoff; offnum = OffsetNumberNext(offnum))
2153  {
2154  ItemId lp;
2155  HeapTupleData loctup;
2156  bool valid;
2157 
2158  lp = PageGetItemId(dp, offnum);
2159  if (!ItemIdIsNormal(lp))
2160  continue;
2161  loctup.t_data = (HeapTupleHeader) PageGetItem((Page) dp, lp);
2162  loctup.t_len = ItemIdGetLength(lp);
2163  loctup.t_tableOid = scan->rs_rd->rd_id;
2164  ItemPointerSet(&loctup.t_self, page, offnum);
2165  valid = HeapTupleSatisfiesVisibility(&loctup, snapshot, buffer);
2166  if (valid)
2167  {
2168  hscan->rs_vistuples[ntup++] = offnum;
2169  PredicateLockTuple(scan->rs_rd, &loctup, snapshot);
2170  }
2171  CheckForSerializableConflictOut(valid, scan->rs_rd, &loctup,
2172  buffer, snapshot);
2173  }
2174  }
2175 
2176  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
2177 
2178  Assert(ntup <= MaxHeapTuplesPerPage);
2179  hscan->rs_ntuples = ntup;
2180 
2181  return ntup > 0;
2182 }
2183 
2184 static bool
2186  TBMIterateResult *tbmres,
2187  TupleTableSlot *slot)
2188 {
2189  HeapScanDesc hscan = (HeapScanDesc) scan;
2190  OffsetNumber targoffset;
2191  Page dp;
2192  ItemId lp;
2193 
2194  /*
2195  * Out of range? If so, nothing more to look at on this page
2196  */
2197  if (hscan->rs_cindex < 0 || hscan->rs_cindex >= hscan->rs_ntuples)
2198  return false;
2199 
2200  targoffset = hscan->rs_vistuples[hscan->rs_cindex];
2201  dp = (Page) BufferGetPage(hscan->rs_cbuf);
2202  lp = PageGetItemId(dp, targoffset);
2203  Assert(ItemIdIsNormal(lp));
2204 
2205  hscan->rs_ctup.t_data = (HeapTupleHeader) PageGetItem((Page) dp, lp);
2206  hscan->rs_ctup.t_len = ItemIdGetLength(lp);
2207  hscan->rs_ctup.t_tableOid = scan->rs_rd->rd_id;
2208  ItemPointerSet(&hscan->rs_ctup.t_self, hscan->rs_cblock, targoffset);
2209 
2211 
2212  /*
2213  * Set up the result slot to point to this tuple. Note that the slot
2214  * acquires a pin on the buffer.
2215  */
2217  slot,
2218  hscan->rs_cbuf);
2219 
2220  hscan->rs_cindex++;
2221 
2222  return true;
2223 }
2224 
2225 static bool
2227 {
2228  HeapScanDesc hscan = (HeapScanDesc) scan;
2229  TsmRoutine *tsm = scanstate->tsmroutine;
2230  BlockNumber blockno;
2231 
2232  /* return false immediately if relation is empty */
2233  if (hscan->rs_nblocks == 0)
2234  return false;
2235 
2236  if (tsm->NextSampleBlock)
2237  {
2238  blockno = tsm->NextSampleBlock(scanstate, hscan->rs_nblocks);
2239  hscan->rs_cblock = blockno;
2240  }
2241  else
2242  {
2243  /* scanning table sequentially */
2244 
2245  if (hscan->rs_cblock == InvalidBlockNumber)
2246  {
2247  Assert(!hscan->rs_inited);
2248  blockno = hscan->rs_startblock;
2249  }
2250  else
2251  {
2252  Assert(hscan->rs_inited);
2253 
2254  blockno = hscan->rs_cblock + 1;
2255 
2256  if (blockno >= hscan->rs_nblocks)
2257  {
2258  /* wrap to beginning of rel, might not have started at 0 */
2259  blockno = 0;
2260  }
2261 
2262  /*
2263  * Report our new scan position for synchronization purposes.
2264  *
2265  * Note: we do this before checking for end of scan so that the
2266  * final state of the position hint is back at the start of the
2267  * rel. That's not strictly necessary, but otherwise when you run
2268  * the same query multiple times the starting position would shift
2269  * a little bit backwards on every invocation, which is confusing.
2270  * We don't guarantee any specific ordering in general, though.
2271  */
2272  if (scan->rs_flags & SO_ALLOW_SYNC)
2273  ss_report_location(scan->rs_rd, blockno);
2274 
2275  if (blockno == hscan->rs_startblock)
2276  {
2277  blockno = InvalidBlockNumber;
2278  }
2279  }
2280  }
2281 
2282  if (!BlockNumberIsValid(blockno))
2283  {
2284  if (BufferIsValid(hscan->rs_cbuf))
2285  ReleaseBuffer(hscan->rs_cbuf);
2286  hscan->rs_cbuf = InvalidBuffer;
2287  hscan->rs_cblock = InvalidBlockNumber;
2288  hscan->rs_inited = false;
2289 
2290  return false;
2291  }
2292 
2293  heapgetpage(scan, blockno);
2294  hscan->rs_inited = true;
2295 
2296  return true;
2297 }
2298 
2299 static bool
2301  TupleTableSlot *slot)
2302 {
2303  HeapScanDesc hscan = (HeapScanDesc) scan;
2304  TsmRoutine *tsm = scanstate->tsmroutine;
2305  BlockNumber blockno = hscan->rs_cblock;
2306  bool pagemode = (scan->rs_flags & SO_ALLOW_PAGEMODE) != 0;
2307 
2308  Page page;
2309  bool all_visible;
2310  OffsetNumber maxoffset;
2311 
2312  /*
2313  * When not using pagemode, we must lock the buffer during tuple
2314  * visibility checks.
2315  */
2316  if (!pagemode)
2318 
2319  page = (Page) BufferGetPage(hscan->rs_cbuf);
2320  all_visible = PageIsAllVisible(page) &&
2322  maxoffset = PageGetMaxOffsetNumber(page);
2323 
2324  for (;;)
2325  {
2326  OffsetNumber tupoffset;
2327 
2329 
2330  /* Ask the tablesample method which tuples to check on this page. */
2331  tupoffset = tsm->NextSampleTuple(scanstate,
2332  blockno,
2333  maxoffset);
2334 
2335  if (OffsetNumberIsValid(tupoffset))
2336  {
2337  ItemId itemid;
2338  bool visible;
2339  HeapTuple tuple = &(hscan->rs_ctup);
2340 
2341  /* Skip invalid tuple pointers. */
2342  itemid = PageGetItemId(page, tupoffset);
2343  if (!ItemIdIsNormal(itemid))
2344  continue;
2345 
2346  tuple->t_data = (HeapTupleHeader) PageGetItem(page, itemid);
2347  tuple->t_len = ItemIdGetLength(itemid);
2348  ItemPointerSet(&(tuple->t_self), blockno, tupoffset);
2349 
2350 
2351  if (all_visible)
2352  visible = true;
2353  else
2354  visible = SampleHeapTupleVisible(scan, hscan->rs_cbuf,
2355  tuple, tupoffset);
2356 
2357  /* in pagemode, heapgetpage did this for us */
2358  if (!pagemode)
2359  CheckForSerializableConflictOut(visible, scan->rs_rd, tuple,
2360  hscan->rs_cbuf, scan->rs_snapshot);
2361 
2362  /* Try next tuple from same page. */
2363  if (!visible)
2364  continue;
2365 
2366  /* Found visible tuple, return it. */
2367  if (!pagemode)
2369 
2370  ExecStoreBufferHeapTuple(tuple, slot, hscan->rs_cbuf);
2371 
2372  /* Count successfully-fetched tuples as heap fetches */
2374 
2375  return true;
2376  }
2377  else
2378  {
2379  /*
2380  * If we get here, it means we've exhausted the items on this page
2381  * and it's time to move to the next.
2382  */
2383  if (!pagemode)
2385 
2386  ExecClearTuple(slot);
2387  return false;
2388  }
2389  }
2390 
2391  Assert(0);
2392 }
2393 
2394 
2395 /* ----------------------------------------------------------------------------
2396  * Helper functions for the above.
2397  * ----------------------------------------------------------------------------
2398  */
2399 
2400 /*
2401  * Reconstruct and rewrite the given tuple
2402  *
2403  * We cannot simply copy the tuple as-is, for several reasons:
2404  *
2405  * 1. We'd like to squeeze out the values of any dropped columns, both
2406  * to save space and to ensure we have no corner-case failures. (It's
2407  * possible for example that the new table hasn't got a TOAST table
2408  * and so is unable to store any large values of dropped cols.)
2409  *
2410  * 2. The tuple might not even be legal for the new table; this is
2411  * currently only known to happen as an after-effect of ALTER TABLE
2412  * SET WITHOUT OIDS.
2413  *
2414  * So, we must reconstruct the tuple from component Datums.
2415  */
2416 static void
2418  Relation OldHeap, Relation NewHeap,
2419  Datum *values, bool *isnull, RewriteState rwstate)
2420 {
2421  TupleDesc oldTupDesc = RelationGetDescr(OldHeap);
2422  TupleDesc newTupDesc = RelationGetDescr(NewHeap);
2423  HeapTuple copiedTuple;
2424  int i;
2425 
2426  heap_deform_tuple(tuple, oldTupDesc, values, isnull);
2427 
2428  /* Be sure to null out any dropped columns */
2429  for (i = 0; i < newTupDesc->natts; i++)
2430  {
2431  if (TupleDescAttr(newTupDesc, i)->attisdropped)
2432  isnull[i] = true;
2433  }
2434 
2435  copiedTuple = heap_form_tuple(newTupDesc, values, isnull);
2436 
2437  /* The heap rewrite module does the rest */
2438  rewrite_heap_tuple(rwstate, tuple, copiedTuple);
2439 
2440  heap_freetuple(copiedTuple);
2441 }
2442 
2443 /*
2444  * Check visibility of the tuple.
2445  */
2446 static bool
2448  HeapTuple tuple,
2449  OffsetNumber tupoffset)
2450 {
2451  HeapScanDesc hscan = (HeapScanDesc) scan;
2452 
2453  if (scan->rs_flags & SO_ALLOW_PAGEMODE)
2454  {
2455  /*
2456  * In pageatatime mode, heapgetpage() already did visibility checks,
2457  * so just look at the info it left in rs_vistuples[].
2458  *
2459  * We use a binary search over the known-sorted array. Note: we could
2460  * save some effort if we insisted that NextSampleTuple select tuples
2461  * in increasing order, but it's not clear that there would be enough
2462  * gain to justify the restriction.
2463  */
2464  int start = 0,
2465  end = hscan->rs_ntuples - 1;
2466 
2467  while (start <= end)
2468  {
2469  int mid = (start + end) / 2;
2470  OffsetNumber curoffset = hscan->rs_vistuples[mid];
2471 
2472  if (tupoffset == curoffset)
2473  return true;
2474  else if (tupoffset < curoffset)
2475  end = mid - 1;
2476  else
2477  start = mid + 1;
2478  }
2479 
2480  return false;
2481  }
2482  else
2483  {
2484  /* Otherwise, we have to check the tuple individually. */
2485  return HeapTupleSatisfiesVisibility(tuple, scan->rs_snapshot,
2486  buffer);
2487  }
2488 }
2489 
2490 
2491 /* ------------------------------------------------------------------------
2492  * Definition of the heap table access method.
2493  * ------------------------------------------------------------------------
2494  */
2495 
2496 static const TableAmRoutine heapam_methods = {
2498 
2499  .slot_callbacks = heapam_slot_callbacks,
2500 
2501  .scan_begin = heap_beginscan,
2502  .scan_end = heap_endscan,
2503  .scan_rescan = heap_rescan,
2504  .scan_getnextslot = heap_getnextslot,
2505 
2506  .parallelscan_estimate = table_block_parallelscan_estimate,
2507  .parallelscan_initialize = table_block_parallelscan_initialize,
2508  .parallelscan_reinitialize = table_block_parallelscan_reinitialize,
2509 
2510  .index_fetch_begin = heapam_index_fetch_begin,
2511  .index_fetch_reset = heapam_index_fetch_reset,
2512  .index_fetch_end = heapam_index_fetch_end,
2513  .index_fetch_tuple = heapam_index_fetch_tuple,
2514 
2515  .tuple_insert = heapam_tuple_insert,
2516  .tuple_insert_speculative = heapam_tuple_insert_speculative,
2517  .tuple_complete_speculative = heapam_tuple_complete_speculative,
2518  .multi_insert = heap_multi_insert,
2519  .tuple_delete = heapam_tuple_delete,
2520  .tuple_update = heapam_tuple_update,
2521  .tuple_lock = heapam_tuple_lock,
2522  .finish_bulk_insert = heapam_finish_bulk_insert,
2523 
2524  .tuple_fetch_row_version = heapam_fetch_row_version,
2525  .tuple_get_latest_tid = heap_get_latest_tid,
2526  .tuple_tid_valid = heapam_tuple_tid_valid,
2527  .tuple_satisfies_snapshot = heapam_tuple_satisfies_snapshot,
2528  .compute_xid_horizon_for_tuples = heap_compute_xid_horizon_for_tuples,
2529 
2530  .relation_set_new_filenode = heapam_relation_set_new_filenode,
2531  .relation_nontransactional_truncate = heapam_relation_nontransactional_truncate,
2532  .relation_copy_data = heapam_relation_copy_data,
2533  .relation_copy_for_cluster = heapam_relation_copy_for_cluster,
2534  .relation_vacuum = heap_vacuum_rel,
2535  .scan_analyze_next_block = heapam_scan_analyze_next_block,
2536  .scan_analyze_next_tuple = heapam_scan_analyze_next_tuple,
2537  .index_build_range_scan = heapam_index_build_range_scan,
2538  .index_validate_scan = heapam_index_validate_scan,
2539 
2540  .relation_size = table_block_relation_size,
2541  .relation_needs_toast_table = heapam_relation_needs_toast_table,
2542 
2543  .relation_estimate_size = heapam_estimate_rel_size,
2544 
2545  .scan_bitmap_next_block = heapam_scan_bitmap_next_block,
2546  .scan_bitmap_next_tuple = heapam_scan_bitmap_next_tuple,
2547  .scan_sample_next_block = heapam_scan_sample_next_block,
2548  .scan_sample_next_tuple = heapam_scan_sample_next_tuple
2549 };
2550 
2551 
2552 const TableAmRoutine *
2554 {
2555  return &heapam_methods;
2556 }
2557 
2558 Datum
2560 {
2561  PG_RETURN_POINTER(&heapam_methods);
2562 }
TupleTableSlot * table_slot_create(Relation relation, List **reglist)
Definition: tableam.c:77
#define HeapTupleHeaderGetUpdateXid(tup)
Definition: htup_details.h:365
int32 ItemPointerCompare(ItemPointer arg1, ItemPointer arg2)
Definition: itemptr.c:52
#define ItemPointerIsValid(pointer)
Definition: itemptr.h:82
void FormIndexDatum(IndexInfo *indexInfo, TupleTableSlot *slot, EState *estate, Datum *values, bool *isnull)
Definition: index.c:2511
void heap_insert(Relation relation, HeapTuple tup, CommandId cid, int options, BulkInsertState bistate)
Definition: heapam.c:1868
#define PG_RETURN_POINTER(x)
Definition: fmgr.h:351
#define NIL
Definition: pg_list.h:65
Oid tts_tableOid
Definition: tuptable.h:131
uint32 CommandId
Definition: c.h:521
ItemPointerData ctid
Definition: tableam.h:123
#define PROGRESS_CLUSTER_PHASE_SEQ_SCAN_HEAP
Definition: progress.h:48
static PgChecksumMode mode
Definition: pg_checksums.c:61
#define BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:86
bool tuplesort_getdatum(Tuplesortstate *state, bool forward, Datum *val, bool *isNull, Datum *abbrev)
Definition: tuplesort.c:2245
TransactionId heap_compute_xid_horizon_for_tuples(Relation rel, ItemPointerData *tids, int nitems)
Definition: heapam.c:6986
#define SizeofHeapTupleHeader
Definition: htup_details.h:184
BlockNumber rs_cblock
Definition: heapam.h:58
LockTupleMode
Definition: lockoptions.h:49
NodeTag type
Definition: tableam.h:164
void tuplesort_performsort(Tuplesortstate *state)
Definition: tuplesort.c:1791
void heap_abort_speculative(Relation relation, ItemPointer tid)
Definition: heapam.c:5567
void end_heap_rewrite(RewriteState state)
Definition: rewriteheap.c:313
HeapTuple tuplesort_getheaptuple(Tuplesortstate *state, bool forward)
Definition: tuplesort.c:2196
void smgrclose(SMgrRelation reln)
Definition: smgr.c:256
void table_block_parallelscan_reinitialize(Relation rel, ParallelTableScanDesc pscan)
Definition: tableam.c:392
#define InitDirtySnapshot(snapshotdata)
Definition: snapmgr.h:76
static bool heapam_tuple_tid_valid(TableScanDesc scan, ItemPointer tid)
void smgrcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo)
Definition: smgr.c:333
List * ii_Predicate
Definition: execnodes.h:162
static void heapam_tuple_complete_speculative(Relation relation, TupleTableSlot *slot, uint32 specToken, bool succeeded)
#define att_align_nominal(cur_offset, attalign)
Definition: tupmacs.h:146
uint64 table_block_relation_size(Relation rel, ForkNumber forkNumber)
Definition: tableam.c:508
bool IsSystemRelation(Relation relation)
Definition: catalog.c:70
#define TransactionIdEquals(id1, id2)
Definition: transam.h:43
#define PageIsAllVisible(page)
Definition: bufpage.h:385
#define PROGRESS_CLUSTER_HEAP_TUPLES_WRITTEN
Definition: progress.h:42
static TupleTableSlot * ExecClearTuple(TupleTableSlot *slot)
Definition: tuptable.h:426
uint32 TransactionId
Definition: c.h:507
Snapshot RegisterSnapshot(Snapshot snapshot)
Definition: snapmgr.c:865
static const TupleTableSlotOps * heapam_slot_callbacks(Relation relation)
RewriteState begin_heap_rewrite(Relation old_heap, Relation new_heap, TransactionId oldest_xmin, TransactionId freeze_xid, MultiXactId cutoff_multi, bool use_wal)
Definition: rewriteheap.c:247
void heap_endscan(TableScanDesc sscan)
Definition: heapam.c:1245
#define RelationGetDescr(relation)
Definition: rel.h:445
TupleTableSlot * MakeSingleTupleTableSlot(TupleDesc tupdesc, const TupleTableSlotOps *tts_ops)
Definition: execTuples.c:1203
bool TransactionIdIsCurrentTransactionId(TransactionId xid)
Definition: xact.c:854
static void heapam_index_fetch_end(IndexFetchTableData *scan)
static void reform_and_rewrite_tuple(HeapTuple tuple, Relation OldHeap, Relation NewHeap, Datum *values, bool *isnull, RewriteState rwstate)
static TM_Result heapam_tuple_update(Relation relation, ItemPointer otid, TupleTableSlot *slot, CommandId cid, Snapshot snapshot, Snapshot crosscheck, bool wait, TM_FailureData *tmfd, LockTupleMode *lockmode, bool *update_indexes)
HeapTupleHeaderData * HeapTupleHeader
Definition: htup.h:23
CommandId HeapTupleHeaderGetCmin(HeapTupleHeader tup)
Definition: combocid.c:105
#define TupleDescAttr(tupdesc, i)
Definition: tupdesc.h:92
bool heap_fetch(Relation relation, Snapshot snapshot, HeapTuple tuple, Buffer *userbuf)
Definition: heapam.c:1412
struct SMgrRelationData * rd_smgr
Definition: rel.h:56
struct ParallelBlockTableScanDescData * ParallelBlockTableScanDesc
Definition: relscan.h:82
TableScanDescData rs_base
Definition: heapam.h:48
void pgstat_progress_update_param(int index, int64 val)
Definition: pgstat.c:3220
ExprState * ii_PredicateState
Definition: execnodes.h:163
const TupleTableSlotOps TTSOpsBufferHeapTuple
Definition: execTuples.c:87
MemoryContext ecxt_per_tuple_memory
Definition: execnodes.h:232
Buffer ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
Definition: bufmgr.c:642
#define XLogIsNeeded()
Definition: xlog.h:181
CommandId cmax
Definition: tableam.h:125
#define MaxHeapTuplesPerPage
Definition: htup_details.h:574
unsigned char uint8
Definition: c.h:356
#define HeapTupleHeaderSetSpeculativeToken(tup, token)
Definition: htup_details.h:440
#define HeapTupleHeaderIsSpeculative(tup)
Definition: htup_details.h:429
Tuplesortstate * tuplesort_begin_cluster(TupleDesc tupDesc, Relation indexRel, int workMem, SortCoordinate coordinate, bool randomAccess)
Definition: tuplesort.c:880
#define InvalidBuffer
Definition: buf.h:25
void index_rescan(IndexScanDesc scan, ScanKey keys, int nkeys, ScanKey orderbys, int norderbys)
Definition: indexam.c:289
static void heapam_relation_set_new_filenode(Relation rel, const RelFileNode *newrnode, char persistence, TransactionId *freezeXid, MultiXactId *minmulti)
HeapTuple tuple
Definition: tuptable.h:250
int errcode(int sqlerrcode)
Definition: elog.c:570
TransactionId RecentXmin
Definition: snapmgr.c:167
#define PROGRESS_CLUSTER_INDEX_RELID
Definition: progress.h:40
#define PROGRESS_CLUSTER_PHASE_WRITE_NEW_HEAP
Definition: progress.h:51
static TM_Result heapam_tuple_lock(Relation relation, ItemPointer tid, Snapshot snapshot, TupleTableSlot *slot, CommandId cid, LockTupleMode mode, LockWaitPolicy wait_policy, uint8 flags, TM_FailureData *tmfd)
static void heapam_finish_bulk_insert(Relation relation, int options)
void heap_sync(Relation rel)
Definition: heapam.c:8938
#define HEAP_INSERT_SKIP_WAL
Definition: heapam.h:32
void MemoryContextReset(MemoryContext context)
Definition: mcxt.c:136
uint32 BlockNumber
Definition: block.h:31
void ReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:3365
#define BITMAPLEN(NATTS)
Definition: htup_details.h:547
#define PROCARRAY_FLAGS_VACUUM
Definition: procarray.h:52
static void heapam_relation_nontransactional_truncate(Relation rel)
HeapTuple heap_form_tuple(TupleDesc tupleDescriptor, Datum *values, bool *isnull)
Definition: heaptuple.c:1020
static bool heapam_tuple_satisfies_snapshot(Relation rel, TupleTableSlot *slot, Snapshot snapshot)
#define TUPLE_LOCK_FLAG_LOCK_UPDATE_IN_PROGRESS
Definition: tableam.h:137
bool smgrexists(SMgrRelation reln, ForkNumber forknum)
Definition: smgr.c:247
static bool table_scan_getnextslot(TableScanDesc sscan, ScanDirection direction, TupleTableSlot *slot)
Definition: tableam.h:872
Form_pg_class rd_rel
Definition: rel.h:83
void heap_freetuple(HeapTuple htup)
Definition: heaptuple.c:1338
static bool ExecQual(ExprState *state, ExprContext *econtext)
Definition: executor.h:365
uint32 rs_flags
Definition: relscan.h:43
#define ItemIdIsDead(itemId)
Definition: itemid.h:113
Snapshot GetTransactionSnapshot(void)
Definition: snapmgr.c:306
#define OidIsValid(objectId)
Definition: c.h:638
static IndexFetchTableData * heapam_index_fetch_begin(Relation rel)
#define PageGetMaxOffsetNumber(page)
Definition: bufpage.h:357
#define RelationGetTargetBlock(relation)
Definition: rel.h:504
Size table_block_parallelscan_initialize(Relation rel, ParallelTableScanDesc pscan)
Definition: tableam.c:374
void CheckForSerializableConflictOut(bool visible, Relation relation, HeapTuple tuple, Buffer buffer, Snapshot snapshot)
Definition: predicate.c:4057
static TableScanDesc table_beginscan_strat(Relation rel, Snapshot snapshot, int nkeys, struct ScanKeyData *key, bool allow_strat, bool allow_sync)
Definition: tableam.h:760
static void heapam_index_fetch_reset(IndexFetchTableData *scan)
#define HEAP_INSERT_SPECULATIVE
Definition: heapam.h:36
static bool heapam_scan_bitmap_next_block(TableScanDesc scan, TBMIterateResult *tbmres)
void heapgetpage(TableScanDesc sscan, BlockNumber page)
Definition: heapam.c:352
signed int int32
Definition: c.h:346
struct HeapScanDescData * HeapScanDesc
Definition: heapam.h:72
bool ConditionalXactLockTableWait(TransactionId xid)
Definition: lmgr.c:697
HeapTupleData rs_ctup
Definition: heapam.h:65
uint16 OffsetNumber
Definition: off.h:24
Size table_block_parallelscan_estimate(Relation rel)
Definition: tableam.c:368
HeapTupleHeader t_data
Definition: htup.h:68
BlockNumber blockno
Definition: tidbitmap.h:42
#define RelationOpenSmgr(relation)
Definition: rel.h:476
bool heap_hot_search_buffer(ItemPointer tid, Relation relation, Buffer buffer, Snapshot snapshot, HeapTuple heapTuple, bool *all_dead, bool first_call)
Definition: heapam.c:1526
void FreeExecutorState(EState *estate)
Definition: execUtils.c:190
#define HeapTupleIsHotUpdated(tuple)
Definition: htup_details.h:676
#define GetPerTupleExprContext(estate)
Definition: executor.h:501
List * ii_ExpressionsState
Definition: execnodes.h:161
static const TableAmRoutine heapam_methods
#define ItemIdGetLength(itemId)
Definition: itemid.h:59
void pfree(void *pointer)
Definition: mcxt.c:1056
SMgrRelation RelationCreateStorage(RelFileNode rnode, char relpersistence)
Definition: storage.c:79
NextSampleTuple_function NextSampleTuple
Definition: tsmapi.h:74
void UnlockReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:3388
TransactionId xmax
Definition: tableam.h:124
#define ERROR
Definition: elog.h:43
static void heapam_relation_copy_for_cluster(Relation OldHeap, Relation NewHeap, Relation OldIndex, bool use_sort, TransactionId OldestXmin, TransactionId *xid_cutoff, MultiXactId *multi_cutoff, double *num_tuples, double *tups_vacuumed, double *tups_recently_dead)
static TableScanDesc table_beginscan(Relation rel, Snapshot snapshot, int nkeys, struct ScanKeyData *key)
Definition: tableam.h:736
ItemPointerData t_ctid
Definition: htup_details.h:160
int32 type_maximum_size(Oid type_oid, int32 typemod)
Definition: format_type.c:395
ItemPointerData t_self
Definition: htup.h:65
static void callback(struct sockaddr *addr, struct sockaddr *mask, void *unused)
Definition: test_ifaddrs.c:48
static bool SampleHeapTupleVisible(TableScanDesc scan, Buffer buffer, HeapTuple tuple, OffsetNumber tupoffset)
static bool heapam_scan_sample_next_block(TableScanDesc scan, SampleScanState *scanstate)
#define DatumGetInt64(X)
Definition: postgres.h:607
static void heapam_tuple_insert(Relation relation, TupleTableSlot *slot, CommandId cid, int options, BulkInsertState bistate)
#define pgstat_count_heap_fetch(rel)
Definition: pgstat.h:1368
Tuplesortstate * tuplesort
Definition: index.h:35
uint32 t_len
Definition: htup.h:64
#define PROGRESS_CLUSTER_PHASE_SORT_TUPLES
Definition: progress.h:50
Buffer xs_cbuf
Definition: heapam.h:81
static char * buf
Definition: pg_test_fsync.c:68
HeapTuple heap_getnext(TableScanDesc sscan, ScanDirection direction)
Definition: heapam.c:1290
void ExecDropSingleTupleTableSlot(TupleTableSlot *slot)
Definition: execTuples.c:1219
OffsetNumber offsets[FLEXIBLE_ARRAY_MEMBER]
Definition: tidbitmap.h:46
#define HEAP_OVERHEAD_BYTES_PER_TUPLE
#define FirstOffsetNumber
Definition: off.h:27
void heap_setscanlimits(TableScanDesc sscan, BlockNumber startBlk, BlockNumber numBlks)
Definition: heapam.c:329
#define PROGRESS_CLUSTER_HEAP_TUPLES_SCANNED
Definition: progress.h:41
#define InvalidTransactionId
Definition: transam.h:31
#define RelationGetRelationName(relation)
Definition: rel.h:453
ExprState * ExecPrepareQual(List *qual, EState *estate)
Definition: execExpr.c:518
static TransactionId OldestXmin
Definition: vacuumlazy.c:145
FormData_pg_attribute * Form_pg_attribute
Definition: pg_attribute.h:200
static bool heapam_scan_sample_next_tuple(TableScanDesc scan, SampleScanState *scanstate, TupleTableSlot *slot)
bool ii_BrokenHotChain
Definition: execnodes.h:173
unsigned int uint32
Definition: c.h:358
Oid t_tableOid
Definition: htup.h:66
TransactionId xmax
Definition: snapshot.h:158
TransactionId xmin
Definition: snapshot.h:157
static void heapam_tuple_insert_speculative(Relation relation, TupleTableSlot *slot, CommandId cid, int options, BulkInsertState bistate, uint32 specToken)
bool rs_inited
Definition: heapam.h:57
NextSampleBlock_function NextSampleBlock
Definition: tsmapi.h:73
void index_endscan(IndexScanDesc scan)
Definition: indexam.c:315
#define BufferGetPage(buffer)
Definition: bufmgr.h:159
BlockNumber rs_startblock
Definition: heapam.h:52
static void heapam_estimate_rel_size(Relation rel, int32 *attr_widths, BlockNumber *pages, double *tuples, double *allvisfrac)
static bool heapam_scan_bitmap_next_tuple(TableScanDesc scan, TBMIterateResult *tbmres, TupleTableSlot *slot)
void heap_get_root_tuples(Page page, OffsetNumber *root_offsets)
Definition: pruneheap.c:745
HTSV_Result HeapTupleSatisfiesVacuum(HeapTuple htup, TransactionId OldestXmin, Buffer buffer)
#define ereport(elevel, rest)
Definition: elog.h:141
HeapTuple ExecFetchSlotHeapTuple(TupleTableSlot *slot, bool materialize, bool *shouldFree)
Definition: execTuples.c:1609
Datum heap_tableam_handler(PG_FUNCTION_ARGS)
Oid rd_id
Definition: rel.h:85
SMgrRelation smgropen(RelFileNode rnode, BackendId backend)
Definition: smgr.c:145
ForkNumber
Definition: relpath.h:40
EState * CreateExecutorState(void)
Definition: execUtils.c:88
void UnregisterSnapshot(Snapshot snapshot)
Definition: snapmgr.c:907
#define ERRCODE_DATA_CORRUPTED
Definition: pg_basebackup.c:45
int rs_ntuples
Definition: heapam.h:69
static bool heapam_fetch_row_version(Relation relation, ItemPointer tid, Snapshot snapshot, TupleTableSlot *slot)
void(* IndexBuildCallback)(Relation index, HeapTuple htup, Datum *values, bool *isnull, bool tupleIsAlive, void *state)
Definition: tableam.h:143
#define WARNING
Definition: elog.h:40
#define PageGetItemId(page, offsetNumber)
Definition: bufpage.h:235
int progress
Definition: pgbench.c:232
#define TOAST_TUPLE_THRESHOLD
Definition: heaptoast.h:48
#define HEAP_USABLE_BYTES_PER_PAGE
TM_Result
Definition: tableam.h:68
void table_block_relation_estimate_size(Relation rel, int32 *attr_widths, BlockNumber *pages, double *tuples, double *allvisfrac, Size overhead_bytes_per_tuple, Size usable_bytes_per_page)
Definition: tableam.c:548
MultiXactId GetOldestMultiXactId(void)
Definition: multixact.c:2493
void * palloc0(Size size)
Definition: mcxt.c:980
#define PROGRESS_SCAN_BLOCKS_DONE
Definition: progress.h:103
void heap_finish_speculative(Relation relation, ItemPointer tid)
Definition: heapam.c:5476
void RelationDropStorage(Relation rel)
Definition: storage.c:148
uintptr_t Datum
Definition: postgres.h:367
#define TTS_IS_BUFFERTUPLE(slot)
Definition: tuptable.h:231
void heap_vacuum_rel(Relation onerel, VacuumParams *params, BufferAccessStrategy bstrategy)
Definition: vacuumlazy.c:190
static double heapam_index_build_range_scan(Relation heapRelation, Relation indexRelation, IndexInfo *indexInfo, bool allow_sync, bool anyvisible, bool progress, BlockNumber start_blockno, BlockNumber numblocks, IndexBuildCallback callback, void *callback_state, TableScanDesc scan)
BlockNumber rs_nblocks
Definition: heapam.h:51
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:3602
#define ItemPointerIndicatesMovedPartitions(pointer)
Definition: itemptr.h:184
TupleTableSlot * ExecStoreBufferHeapTuple(HeapTuple tuple, TupleTableSlot *slot, Buffer buffer)
Definition: execTuples.c:1357
TupleDesc rd_att
Definition: rel.h:84
#define IsMVCCSnapshot(snapshot)
Definition: snapmgr.h:97
static void itemptr_decode(ItemPointer itemptr, int64 encoded)
Definition: index.h:187
void RelationCopyStorage(SMgrRelation src, SMgrRelation dst, ForkNumber forkNum, char relpersistence)
Definition: storage.c:337
int maintenance_work_mem
Definition: globals.c:122
static bool heapam_index_fetch_tuple(struct IndexFetchTableData *scan, ItemPointer tid, Snapshot snapshot, TupleTableSlot *slot, bool *call_again, bool *all_dead)
#define BlockNumberIsValid(blockNumber)
Definition: block.h:70
TransactionId GetOldestXmin(Relation rel, int flags)
Definition: procarray.c:1304
Buffer rs_cbuf
Definition: heapam.h:59
static void heapam_relation_copy_data(Relation rel, const RelFileNode *newrnode)
TransactionId MultiXactId
Definition: c.h:517
int errmsg_internal(const char *fmt,...)
Definition: elog.c:814
bool ii_Unique
Definition: execnodes.h:170
void XactLockTableWait(TransactionId xid, Relation rel, ItemPointer ctid, XLTW_Oper oper)
Definition: lmgr.c:624
BackendId rd_backend
Definition: rel.h:58
#define HeapTupleIsHeapOnly(tuple)
Definition: htup_details.h:685
#define Assert(condition)
Definition: c.h:732
#define PROGRESS_CLUSTER_PHASE_INDEX_SCAN_HEAP
Definition: progress.h:49
TableScanDesc heap_beginscan(Relation relation, Snapshot snapshot, int nkeys, ScanKey key, ParallelTableScanDesc parallel_scan, uint32 flags)
Definition: heapam.c:1132
Definition: regguts.h:298
double tups_inserted
Definition: index.h:37
OffsetNumber rs_vistuples[MaxHeapTuplesPerPage]
Definition: heapam.h:70
Definition: tableam.h:74
#define ItemIdIsNormal(itemId)
Definition: itemid.h:99
bool takenDuringRecovery
Definition: snapshot.h:184
void FlushRelationBuffers(Relation rel)
Definition: bufmgr.c:3204
#define HeapTupleHeaderGetXmin(tup)
Definition: htup_details.h:313
void pgstat_progress_update_multi_param(int nparam, const int *index, const int64 *val)
Definition: pgstat.c:3242
#define INDEX_MAX_KEYS
#define OffsetNumberNext(offsetNumber)
Definition: off.h:52
TM_Result heap_update(Relation relation, ItemPointer otid, HeapTuple newtup, CommandId cid, Snapshot crosscheck, bool wait, TM_FailureData *tmfd, LockTupleMode *lockmode)
Definition: heapam.c:2893
#define InvalidBlockNumber
Definition: block.h:33
static TM_Result heapam_tuple_delete(Relation relation, ItemPointer tid, CommandId cid, Snapshot snapshot, Snapshot crosscheck, bool wait, TM_FailureData *tmfd, bool changingPart)
bool index_getnext_slot(IndexScanDesc scan, ScanDirection direction, TupleTableSlot *slot)
Definition: indexam.c:607
#define MAX_FORKNUM
Definition: relpath.h:55
Buffer ReleaseAndReadBuffer(Buffer buffer, Relation relation, BlockNumber blockNum)
Definition: bufmgr.c:1521
TupleTableSlot * ecxt_scantuple
Definition: execnodes.h:224
#define MAXALIGN(LEN)
Definition: c.h:685
#define BufferIsValid(bufnum)
Definition: bufmgr.h:113
#define ItemPointerGetOffsetNumber(pointer)
Definition: itemptr.h:117
#define PROGRESS_SCAN_BLOCKS_TOTAL
Definition: progress.h:102
struct TsmRoutine * tsmroutine
Definition: execnodes.h:1357
#define TUPLE_LOCK_FLAG_FIND_LAST_VERSION
Definition: tableam.h:139
void ss_report_location(Relation rel, BlockNumber location)
Definition: syncscan.c:288
#define RelationNeedsWAL(relation)
Definition: rel.h:521
TupleTableSlot * ExecStorePinnedBufferHeapTuple(HeapTuple tuple, TupleTableSlot *slot, Buffer buffer)
Definition: execTuples.c:1383
bool ItemPointerEquals(ItemPointer pointer1, ItemPointer pointer2)
Definition: itemptr.c:29
bool ii_Concurrent
Definition: execnodes.h:172
#define SnapshotAny
Definition: snapmgr.h:69
TM_Result heap_lock_tuple(Relation relation, HeapTuple tuple, CommandId cid, LockTupleMode mode, LockWaitPolicy wait_policy, bool follow_updates, Buffer *buffer, TM_FailureData *tmfd)
Definition: heapam.c:3973
void PredicateLockTuple(Relation relation, HeapTuple tuple, Snapshot snapshot)
Definition: predicate.c:2548
#define DatumGetPointer(X)
Definition: postgres.h:549
Relation rs_rd
Definition: relscan.h:34
double htups
Definition: index.h:37
#define ItemPointerSetOffsetNumber(pointer, offsetNumber)
Definition: itemptr.h:148
void heap_deform_tuple(HeapTuple tuple, TupleDesc tupleDesc, Datum *values, bool *isnull)
Definition: heaptuple.c:1249
static void table_endscan(TableScanDesc scan)
Definition: tableam.h:831
static Datum values[MAXATTR]
Definition: bootstrap.c:167
#define IsBootstrapProcessingMode()
Definition: miscadmin.h:374
bool rewrite_heap_dead_tuple(RewriteState state, HeapTuple old_tuple)
Definition: rewriteheap.c:581
bool heap_getnextslot(TableScanDesc sscan, ScanDirection direction, TupleTableSlot *slot)
Definition: heapam.c:1349
Oid * ii_ExclusionOps
Definition: execnodes.h:164
struct SnapshotData * rs_snapshot
Definition: relscan.h:35
void * palloc(Size size)
Definition: mcxt.c:949
int errmsg(const char *fmt,...)
Definition: elog.c:784
#define elog(elevel,...)
Definition: elog.h:226
int i
#define OffsetNumberIsValid(offsetNumber)
Definition: off.h:39
struct ParallelTableScanDescData * rs_parallel
Definition: relscan.h:45
const TupleTableSlotOps TTSOpsHeapTuple
Definition: execTuples.c:85
void tuplesort_putheaptuple(Tuplesortstate *state, HeapTuple tup)
Definition: tuplesort.c:1457
void heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples, CommandId cid, int options, BulkInsertState bistate)
Definition: heapam.c:2105
TM_Result heap_delete(Relation relation, ItemPointer tid, CommandId cid, Snapshot crosscheck, bool wait, TM_FailureData *tmfd, bool changingPart)
Definition: heapam.c:2442
#define BUFFER_LOCK_SHARE
Definition: bufmgr.h:87
static void heapam_index_validate_scan(Relation heapRelation, Relation indexRelation, IndexInfo *indexInfo, Snapshot snapshot, ValidateIndexState *state)
void heap_rescan(TableScanDesc sscan, ScanKey key, bool set_params, bool allow_strat, bool allow_sync, bool allow_pagemode)
Definition: heapam.c:1208
#define PG_FUNCTION_ARGS
Definition: fmgr.h:188
#define PROGRESS_CLUSTER_HEAP_BLKS_SCANNED
Definition: progress.h:44
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:99
#define PROGRESS_CLUSTER_TOTAL_HEAP_BLKS
Definition: progress.h:43
static bool heapam_relation_needs_toast_table(Relation rel)
HeapTupleTableSlot base
Definition: tuptable.h:259
void heap_page_prune_opt(Relation relation, Buffer buffer)
Definition: pruneheap.c:73
#define ItemPointerGetBlockNumber(pointer)
Definition: itemptr.h:98
#define pgstat_count_heap_getnext(rel)
Definition: pgstat.h:1363
static bool heapam_scan_analyze_next_tuple(TableScanDesc scan, TransactionId OldestXmin, double *liverows, double *deadrows, TupleTableSlot *slot)
Relation rel
Definition: relscan.h:91
#define TransactionIdIsValid(xid)
Definition: transam.h:41
static BlockNumber heapam_scan_get_blocks_done(HeapScanDesc hscan)
void tuplesort_end(Tuplesortstate *state)
Definition: tuplesort.c:1236
bool traversed
Definition: tableam.h:126
HeapTupleData tupdata
Definition: tuptable.h:253
#define PROGRESS_CLUSTER_PHASE
Definition: progress.h:39
ItemPointerData tts_tid
Definition: tuptable.h:130
int Buffer
Definition: buf.h:23
void smgrimmedsync(SMgrRelation reln, ForkNumber forknum)
Definition: smgr.c:637
void rewrite_heap_tuple(RewriteState state, HeapTuple old_tuple, HeapTuple new_tuple)
Definition: rewriteheap.c:379
#define RelationGetRelid(relation)
Definition: rel.h:419
LockWaitPolicy
Definition: lockoptions.h:36
float4 reltuples
Definition: pg_class.h:63
TupleTableSlot * ExecStoreHeapTuple(HeapTuple tuple, TupleTableSlot *slot, bool shouldFree)
Definition: execTuples.c:1317
#define PageGetItem(page, itemId)
Definition: bufpage.h:340
static bool heapam_scan_analyze_next_block(TableScanDesc scan, BlockNumber blockno, BufferAccessStrategy bstrategy)
Pointer Page
Definition: bufpage.h:78
#define ItemPointerSet(pointer, blockNumber, offNum)
Definition: itemptr.h:127
bool index_insert(Relation indexRelation, Datum *values, bool *isnull, ItemPointer heap_t_ctid, Relation heapRelation, IndexUniqueCheck checkUnique, IndexInfo *indexInfo)
Definition: indexam.c:170
IndexScanDesc index_beginscan(Relation heapRelation, Relation indexRelation, Snapshot snapshot, int nkeys, int norderbys)
Definition: indexam.c:197
#define ItemPointerCopy(fromPointer, toPointer)
Definition: itemptr.h:161
IndexFetchTableData xs_base
Definition: heapam.h:79
bool HeapTupleSatisfiesVisibility(HeapTuple tup, Snapshot snapshot, Buffer buffer)
void heap_get_latest_tid(TableScanDesc sscan, ItemPointer tid)
Definition: heapam.c:1670
void RelationTruncate(Relation rel, BlockNumber nblocks)
Definition: storage.c:230
const TableAmRoutine * GetHeapamTableAmRoutine(void)
void log_smgrcreate(const RelFileNode *rnode, ForkNumber forkNum)
Definition: storage.c:128