PostgreSQL Source Code  git master
heapam_handler.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * heapam_handler.c
4  * heap table access method code
5  *
6  * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  * src/backend/access/heap/heapam_handler.c
12  *
13  *
14  * NOTES
15  * This files wires up the lower level heapam.c et al routines with the
16  * tableam abstraction.
17  *
18  *-------------------------------------------------------------------------
19  */
20 #include "postgres.h"
21 
22 #include "access/genam.h"
23 #include "access/heapam.h"
24 #include "access/heaptoast.h"
25 #include "access/multixact.h"
26 #include "access/rewriteheap.h"
27 #include "access/tableam.h"
28 #include "access/tsmapi.h"
29 #include "access/xact.h"
30 #include "catalog/catalog.h"
31 #include "catalog/index.h"
32 #include "catalog/storage.h"
33 #include "catalog/storage_xlog.h"
34 #include "commands/progress.h"
35 #include "executor/executor.h"
36 #include "miscadmin.h"
37 #include "pgstat.h"
38 #include "storage/bufmgr.h"
39 #include "storage/bufpage.h"
40 #include "storage/lmgr.h"
41 #include "storage/predicate.h"
42 #include "storage/procarray.h"
43 #include "storage/smgr.h"
44 #include "utils/builtins.h"
45 #include "utils/rel.h"
46 
47 static void reform_and_rewrite_tuple(HeapTuple tuple,
48  Relation OldHeap, Relation NewHeap,
49  Datum *values, bool *isnull, RewriteState rwstate);
50 
51 static bool SampleHeapTupleVisible(TableScanDesc scan, Buffer buffer,
52  HeapTuple tuple,
53  OffsetNumber tupoffset);
54 
56 
58 
59 
60 /* ------------------------------------------------------------------------
61  * Slot related callbacks for heap AM
62  * ------------------------------------------------------------------------
63  */
64 
65 static const TupleTableSlotOps *
67 {
68  return &TTSOpsBufferHeapTuple;
69 }
70 
71 
72 /* ------------------------------------------------------------------------
73  * Index Scan Callbacks for heap AM
74  * ------------------------------------------------------------------------
75  */
76 
77 static IndexFetchTableData *
79 {
81 
82  hscan->xs_base.rel = rel;
83  hscan->xs_cbuf = InvalidBuffer;
84 
85  return &hscan->xs_base;
86 }
87 
88 static void
90 {
91  IndexFetchHeapData *hscan = (IndexFetchHeapData *) scan;
92 
93  if (BufferIsValid(hscan->xs_cbuf))
94  {
95  ReleaseBuffer(hscan->xs_cbuf);
96  hscan->xs_cbuf = InvalidBuffer;
97  }
98 }
99 
100 static void
102 {
103  IndexFetchHeapData *hscan = (IndexFetchHeapData *) scan;
104 
106 
107  pfree(hscan);
108 }
109 
110 static bool
112  ItemPointer tid,
113  Snapshot snapshot,
114  TupleTableSlot *slot,
115  bool *call_again, bool *all_dead)
116 {
117  IndexFetchHeapData *hscan = (IndexFetchHeapData *) scan;
119  bool got_heap_tuple;
120 
121  Assert(TTS_IS_BUFFERTUPLE(slot));
122 
123  /* We can skip the buffer-switching logic if we're in mid-HOT chain. */
124  if (!*call_again)
125  {
126  /* Switch to correct buffer if we don't have it already */
127  Buffer prev_buf = hscan->xs_cbuf;
128 
129  hscan->xs_cbuf = ReleaseAndReadBuffer(hscan->xs_cbuf,
130  hscan->xs_base.rel,
132 
133  /*
134  * Prune page, but only if we weren't already on this page
135  */
136  if (prev_buf != hscan->xs_cbuf)
137  heap_page_prune_opt(hscan->xs_base.rel, hscan->xs_cbuf);
138  }
139 
140  /* Obtain share-lock on the buffer so we can examine visibility */
142  got_heap_tuple = heap_hot_search_buffer(tid,
143  hscan->xs_base.rel,
144  hscan->xs_cbuf,
145  snapshot,
146  &bslot->base.tupdata,
147  all_dead,
148  !*call_again);
149  bslot->base.tupdata.t_self = *tid;
151 
152  if (got_heap_tuple)
153  {
154  /*
155  * Only in a non-MVCC snapshot can more than one member of the HOT
156  * chain be visible.
157  */
158  *call_again = !IsMVCCSnapshot(snapshot);
159 
160  slot->tts_tableOid = RelationGetRelid(scan->rel);
161  ExecStoreBufferHeapTuple(&bslot->base.tupdata, slot, hscan->xs_cbuf);
162  }
163  else
164  {
165  /* We've reached the end of the HOT chain. */
166  *call_again = false;
167  }
168 
169  return got_heap_tuple;
170 }
171 
172 
173 /* ------------------------------------------------------------------------
174  * Callbacks for non-modifying operations on individual tuples for heap AM
175  * ------------------------------------------------------------------------
176  */
177 
178 static bool
180  ItemPointer tid,
181  Snapshot snapshot,
182  TupleTableSlot *slot)
183 {
185  Buffer buffer;
186 
187  Assert(TTS_IS_BUFFERTUPLE(slot));
188 
189  bslot->base.tupdata.t_self = *tid;
190  if (heap_fetch(relation, snapshot, &bslot->base.tupdata, &buffer))
191  {
192  /* store in slot, transferring existing pin */
193  ExecStorePinnedBufferHeapTuple(&bslot->base.tupdata, slot, buffer);
194  slot->tts_tableOid = RelationGetRelid(relation);
195 
196  return true;
197  }
198 
199  return false;
200 }
201 
202 static bool
204 {
205  HeapScanDesc hscan = (HeapScanDesc) scan;
206 
207  return ItemPointerIsValid(tid) &&
209 }
210 
211 static bool
213  Snapshot snapshot)
214 {
216  bool res;
217 
218  Assert(TTS_IS_BUFFERTUPLE(slot));
219  Assert(BufferIsValid(bslot->buffer));
220 
221  /*
222  * We need buffer pin and lock to call HeapTupleSatisfiesVisibility.
223  * Caller should be holding pin, but not lock.
224  */
226  res = HeapTupleSatisfiesVisibility(bslot->base.tuple, snapshot,
227  bslot->buffer);
229 
230  return res;
231 }
232 
233 
234 /* ----------------------------------------------------------------------------
235  * Functions for manipulations of physical tuples for heap AM.
236  * ----------------------------------------------------------------------------
237  */
238 
239 static void
241  int options, BulkInsertState bistate)
242 {
243  bool shouldFree = true;
244  HeapTuple tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree);
245 
246  /* Update the tuple with table oid */
247  slot->tts_tableOid = RelationGetRelid(relation);
248  tuple->t_tableOid = slot->tts_tableOid;
249 
250  /* Perform the insertion, and copy the resulting ItemPointer */
251  heap_insert(relation, tuple, cid, options, bistate);
252  ItemPointerCopy(&tuple->t_self, &slot->tts_tid);
253 
254  if (shouldFree)
255  pfree(tuple);
256 }
257 
258 static void
260  CommandId cid, int options,
261  BulkInsertState bistate, uint32 specToken)
262 {
263  bool shouldFree = true;
264  HeapTuple tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree);
265 
266  /* Update the tuple with table oid */
267  slot->tts_tableOid = RelationGetRelid(relation);
268  tuple->t_tableOid = slot->tts_tableOid;
269 
270  HeapTupleHeaderSetSpeculativeToken(tuple->t_data, specToken);
271  options |= HEAP_INSERT_SPECULATIVE;
272 
273  /* Perform the insertion, and copy the resulting ItemPointer */
274  heap_insert(relation, tuple, cid, options, bistate);
275  ItemPointerCopy(&tuple->t_self, &slot->tts_tid);
276 
277  if (shouldFree)
278  pfree(tuple);
279 }
280 
281 static void
283  uint32 specToken, bool succeeded)
284 {
285  bool shouldFree = true;
286  HeapTuple tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree);
287 
288  /* adjust the tuple's state accordingly */
289  if (succeeded)
290  heap_finish_speculative(relation, &slot->tts_tid);
291  else
292  heap_abort_speculative(relation, &slot->tts_tid);
293 
294  if (shouldFree)
295  pfree(tuple);
296 }
297 
298 static TM_Result
300  Snapshot snapshot, Snapshot crosscheck, bool wait,
301  TM_FailureData *tmfd, bool changingPart)
302 {
303  /*
304  * Currently Deleting of index tuples are handled at vacuum, in case if
305  * the storage itself is cleaning the dead tuples by itself, it is the
306  * time to call the index tuple deletion also.
307  */
308  return heap_delete(relation, tid, cid, crosscheck, wait, tmfd, changingPart);
309 }
310 
311 
312 static TM_Result
314  CommandId cid, Snapshot snapshot, Snapshot crosscheck,
315  bool wait, TM_FailureData *tmfd,
316  LockTupleMode *lockmode, bool *update_indexes)
317 {
318  bool shouldFree = true;
319  HeapTuple tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree);
320  TM_Result result;
321 
322  /* Update the tuple with table oid */
323  slot->tts_tableOid = RelationGetRelid(relation);
324  tuple->t_tableOid = slot->tts_tableOid;
325 
326  result = heap_update(relation, otid, tuple, cid, crosscheck, wait,
327  tmfd, lockmode);
328  ItemPointerCopy(&tuple->t_self, &slot->tts_tid);
329 
330  /*
331  * Decide whether new index entries are needed for the tuple
332  *
333  * Note: heap_update returns the tid (location) of the new tuple in the
334  * t_self field.
335  *
336  * If it's a HOT update, we mustn't insert new index entries.
337  */
338  *update_indexes = result == TM_Ok && !HeapTupleIsHeapOnly(tuple);
339 
340  if (shouldFree)
341  pfree(tuple);
342 
343  return result;
344 }
345 
346 static TM_Result
349  LockWaitPolicy wait_policy, uint8 flags,
350  TM_FailureData *tmfd)
351 {
353  TM_Result result;
354  Buffer buffer;
355  HeapTuple tuple = &bslot->base.tupdata;
356  bool follow_updates;
357 
358  follow_updates = (flags & TUPLE_LOCK_FLAG_LOCK_UPDATE_IN_PROGRESS) != 0;
359  tmfd->traversed = false;
360 
361  Assert(TTS_IS_BUFFERTUPLE(slot));
362 
363 tuple_lock_retry:
364  tuple->t_self = *tid;
365  result = heap_lock_tuple(relation, tuple, cid, mode, wait_policy,
366  follow_updates, &buffer, tmfd);
367 
368  if (result == TM_Updated &&
370  {
371  ReleaseBuffer(buffer);
372  /* Should not encounter speculative tuple on recheck */
374 
375  if (!ItemPointerEquals(&tmfd->ctid, &tuple->t_self))
376  {
377  SnapshotData SnapshotDirty;
378  TransactionId priorXmax;
379 
380  /* it was updated, so look at the updated version */
381  *tid = tmfd->ctid;
382  /* updated row should have xmin matching this xmax */
383  priorXmax = tmfd->xmax;
384 
385  /* signal that a tuple later in the chain is getting locked */
386  tmfd->traversed = true;
387 
388  /*
389  * fetch target tuple
390  *
391  * Loop here to deal with updated or busy tuples
392  */
393  InitDirtySnapshot(SnapshotDirty);
394  for (;;)
395  {
397  ereport(ERROR,
398  (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
399  errmsg("tuple to be locked was already moved to another partition due to concurrent update")));
400 
401  tuple->t_self = *tid;
402  if (heap_fetch(relation, &SnapshotDirty, tuple, &buffer))
403  {
404  /*
405  * If xmin isn't what we're expecting, the slot must have
406  * been recycled and reused for an unrelated tuple. This
407  * implies that the latest version of the row was deleted,
408  * so we need do nothing. (Should be safe to examine xmin
409  * without getting buffer's content lock. We assume
410  * reading a TransactionId to be atomic, and Xmin never
411  * changes in an existing tuple, except to invalid or
412  * frozen, and neither of those can match priorXmax.)
413  */
415  priorXmax))
416  {
417  ReleaseBuffer(buffer);
418  return TM_Deleted;
419  }
420 
421  /* otherwise xmin should not be dirty... */
422  if (TransactionIdIsValid(SnapshotDirty.xmin))
423  ereport(ERROR,
425  errmsg_internal("t_xmin is uncommitted in tuple to be updated")));
426 
427  /*
428  * If tuple is being updated by other transaction then we
429  * have to wait for its commit/abort, or die trying.
430  */
431  if (TransactionIdIsValid(SnapshotDirty.xmax))
432  {
433  ReleaseBuffer(buffer);
434  switch (wait_policy)
435  {
436  case LockWaitBlock:
437  XactLockTableWait(SnapshotDirty.xmax,
438  relation, &tuple->t_self,
440  break;
441  case LockWaitSkip:
442  if (!ConditionalXactLockTableWait(SnapshotDirty.xmax))
443  /* skip instead of waiting */
444  return TM_WouldBlock;
445  break;
446  case LockWaitError:
447  if (!ConditionalXactLockTableWait(SnapshotDirty.xmax))
448  ereport(ERROR,
449  (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
450  errmsg("could not obtain lock on row in relation \"%s\"",
451  RelationGetRelationName(relation))));
452  break;
453  }
454  continue; /* loop back to repeat heap_fetch */
455  }
456 
457  /*
458  * If tuple was inserted by our own transaction, we have
459  * to check cmin against cid: cmin >= current CID means
460  * our command cannot see the tuple, so we should ignore
461  * it. Otherwise heap_lock_tuple() will throw an error,
462  * and so would any later attempt to update or delete the
463  * tuple. (We need not check cmax because
464  * HeapTupleSatisfiesDirty will consider a tuple deleted
465  * by our transaction dead, regardless of cmax.) We just
466  * checked that priorXmax == xmin, so we can test that
467  * variable instead of doing HeapTupleHeaderGetXmin again.
468  */
469  if (TransactionIdIsCurrentTransactionId(priorXmax) &&
470  HeapTupleHeaderGetCmin(tuple->t_data) >= cid)
471  {
472  tmfd->xmax = priorXmax;
473 
474  /*
475  * Cmin is the problematic value, so store that. See
476  * above.
477  */
478  tmfd->cmax = HeapTupleHeaderGetCmin(tuple->t_data);
479  ReleaseBuffer(buffer);
480  return TM_SelfModified;
481  }
482 
483  /*
484  * This is a live tuple, so try to lock it again.
485  */
486  ReleaseBuffer(buffer);
487  goto tuple_lock_retry;
488  }
489 
490  /*
491  * If the referenced slot was actually empty, the latest
492  * version of the row must have been deleted, so we need do
493  * nothing.
494  */
495  if (tuple->t_data == NULL)
496  {
497  return TM_Deleted;
498  }
499 
500  /*
501  * As above, if xmin isn't what we're expecting, do nothing.
502  */
504  priorXmax))
505  {
506  if (BufferIsValid(buffer))
507  ReleaseBuffer(buffer);
508  return TM_Deleted;
509  }
510 
511  /*
512  * If we get here, the tuple was found but failed
513  * SnapshotDirty. Assuming the xmin is either a committed xact
514  * or our own xact (as it certainly should be if we're trying
515  * to modify the tuple), this must mean that the row was
516  * updated or deleted by either a committed xact or our own
517  * xact. If it was deleted, we can ignore it; if it was
518  * updated then chain up to the next version and repeat the
519  * whole process.
520  *
521  * As above, it should be safe to examine xmax and t_ctid
522  * without the buffer content lock, because they can't be
523  * changing.
524  */
525  if (ItemPointerEquals(&tuple->t_self, &tuple->t_data->t_ctid))
526  {
527  /* deleted, so forget about it */
528  if (BufferIsValid(buffer))
529  ReleaseBuffer(buffer);
530  return TM_Deleted;
531  }
532 
533  /* updated, so look at the updated row */
534  *tid = tuple->t_data->t_ctid;
535  /* updated row should have xmin matching this xmax */
536  priorXmax = HeapTupleHeaderGetUpdateXid(tuple->t_data);
537  if (BufferIsValid(buffer))
538  ReleaseBuffer(buffer);
539  /* loop back to fetch next in chain */
540  }
541  }
542  else
543  {
544  /* tuple was deleted, so give up */
545  return TM_Deleted;
546  }
547  }
548 
549  slot->tts_tableOid = RelationGetRelid(relation);
550  tuple->t_tableOid = slot->tts_tableOid;
551 
552  /* store in slot, transferring existing pin */
553  ExecStorePinnedBufferHeapTuple(tuple, slot, buffer);
554 
555  return result;
556 }
557 
558 static void
560 {
561  /*
562  * If we skipped writing WAL, then we need to sync the heap (but not
563  * indexes since those use WAL anyway / don't go through tableam)
564  */
565  if (options & HEAP_INSERT_SKIP_WAL)
566  heap_sync(relation);
567 }
568 
569 
570 /* ------------------------------------------------------------------------
571  * DDL related callbacks for heap AM.
572  * ------------------------------------------------------------------------
573  */
574 
575 static void
577  const RelFileNode *newrnode,
578  char persistence,
579  TransactionId *freezeXid,
580  MultiXactId *minmulti)
581 {
582  SMgrRelation srel;
583 
584  /*
585  * Initialize to the minimum XID that could put tuples in the table. We
586  * know that no xacts older than RecentXmin are still running, so that
587  * will do.
588  */
589  *freezeXid = RecentXmin;
590 
591  /*
592  * Similarly, initialize the minimum Multixact to the first value that
593  * could possibly be stored in tuples in the table. Running transactions
594  * could reuse values from their local cache, so we are careful to
595  * consider all currently running multis.
596  *
597  * XXX this could be refined further, but is it worth the hassle?
598  */
599  *minmulti = GetOldestMultiXactId();
600 
601  srel = RelationCreateStorage(*newrnode, persistence);
602 
603  /*
604  * If required, set up an init fork for an unlogged table so that it can
605  * be correctly reinitialized on restart. An immediate sync is required
606  * even if the page has been logged, because the write did not go through
607  * shared_buffers and therefore a concurrent checkpoint may have moved the
608  * redo pointer past our xlog record. Recovery may as well remove it
609  * while replaying, for example, XLOG_DBASE_CREATE or XLOG_TBLSPC_CREATE
610  * record. Therefore, logging is necessary even if wal_level=minimal.
611  */
612  if (persistence == RELPERSISTENCE_UNLOGGED)
613  {
614  Assert(rel->rd_rel->relkind == RELKIND_RELATION ||
615  rel->rd_rel->relkind == RELKIND_MATVIEW ||
616  rel->rd_rel->relkind == RELKIND_TOASTVALUE);
617  smgrcreate(srel, INIT_FORKNUM, false);
618  log_smgrcreate(newrnode, INIT_FORKNUM);
620  }
621 
622  smgrclose(srel);
623 }
624 
625 static void
627 {
628  RelationTruncate(rel, 0);
629 }
630 
631 static void
633 {
634  SMgrRelation dstrel;
635 
636  dstrel = smgropen(*newrnode, rel->rd_backend);
637  RelationOpenSmgr(rel);
638 
639  /*
640  * Since we copy the file directly without looking at the shared buffers,
641  * we'd better first flush out any pages of the source relation that are
642  * in shared buffers. We assume no new changes will be made while we are
643  * holding exclusive lock on the rel.
644  */
646 
647  /*
648  * Create and copy all forks of the relation, and schedule unlinking of
649  * old physical files.
650  *
651  * NOTE: any conflict in relfilenode value will be caught in
652  * RelationCreateStorage().
653  */
654  RelationCreateStorage(*newrnode, rel->rd_rel->relpersistence);
655 
656  /* copy main fork */
658  rel->rd_rel->relpersistence);
659 
660  /* copy those extra forks that exist */
661  for (ForkNumber forkNum = MAIN_FORKNUM + 1;
662  forkNum <= MAX_FORKNUM; forkNum++)
663  {
664  if (smgrexists(rel->rd_smgr, forkNum))
665  {
666  smgrcreate(dstrel, forkNum, false);
667 
668  /*
669  * WAL log creation if the relation is persistent, or this is the
670  * init fork of an unlogged relation.
671  */
672  if (rel->rd_rel->relpersistence == RELPERSISTENCE_PERMANENT ||
673  (rel->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED &&
674  forkNum == INIT_FORKNUM))
675  log_smgrcreate(newrnode, forkNum);
676  RelationCopyStorage(rel->rd_smgr, dstrel, forkNum,
677  rel->rd_rel->relpersistence);
678  }
679  }
680 
681 
682  /* drop old relation, and close new one */
683  RelationDropStorage(rel);
684  smgrclose(dstrel);
685 }
686 
687 static void
689  Relation OldIndex, bool use_sort,
691  TransactionId *xid_cutoff,
692  MultiXactId *multi_cutoff,
693  double *num_tuples,
694  double *tups_vacuumed,
695  double *tups_recently_dead)
696 {
697  RewriteState rwstate;
698  IndexScanDesc indexScan;
699  TableScanDesc tableScan;
700  HeapScanDesc heapScan;
701  bool use_wal;
702  bool is_system_catalog;
703  Tuplesortstate *tuplesort;
704  TupleDesc oldTupDesc = RelationGetDescr(OldHeap);
705  TupleDesc newTupDesc = RelationGetDescr(NewHeap);
706  TupleTableSlot *slot;
707  int natts;
708  Datum *values;
709  bool *isnull;
711 
712  /* Remember if it's a system catalog */
713  is_system_catalog = IsSystemRelation(OldHeap);
714 
715  /*
716  * We need to log the copied data in WAL iff WAL archiving/streaming is
717  * enabled AND it's a WAL-logged rel.
718  */
719  use_wal = XLogIsNeeded() && RelationNeedsWAL(NewHeap);
720 
721  /* use_wal off requires smgr_targblock be initially invalid */
723 
724  /* Preallocate values/isnull arrays */
725  natts = newTupDesc->natts;
726  values = (Datum *) palloc(natts * sizeof(Datum));
727  isnull = (bool *) palloc(natts * sizeof(bool));
728 
729  /* Initialize the rewrite operation */
730  rwstate = begin_heap_rewrite(OldHeap, NewHeap, OldestXmin, *xid_cutoff,
731  *multi_cutoff, use_wal);
732 
733 
734  /* Set up sorting if wanted */
735  if (use_sort)
736  tuplesort = tuplesort_begin_cluster(oldTupDesc, OldIndex,
738  NULL, false);
739  else
740  tuplesort = NULL;
741 
742  /*
743  * Prepare to scan the OldHeap. To ensure we see recently-dead tuples
744  * that still need to be copied, we scan with SnapshotAny and use
745  * HeapTupleSatisfiesVacuum for the visibility test.
746  */
747  if (OldIndex != NULL && !use_sort)
748  {
749  const int ci_index[] = {
752  };
753  int64 ci_val[2];
754 
755  /* Set phase and OIDOldIndex to columns */
757  ci_val[1] = RelationGetRelid(OldIndex);
758  pgstat_progress_update_multi_param(2, ci_index, ci_val);
759 
760  tableScan = NULL;
761  heapScan = NULL;
762  indexScan = index_beginscan(OldHeap, OldIndex, SnapshotAny, 0, 0);
763  index_rescan(indexScan, NULL, 0, NULL, 0);
764  }
765  else
766  {
767  /* In scan-and-sort mode and also VACUUM FULL, set phase */
770 
771  tableScan = table_beginscan(OldHeap, SnapshotAny, 0, (ScanKey) NULL);
772  heapScan = (HeapScanDesc) tableScan;
773  indexScan = NULL;
774 
775  /* Set total heap blocks */
777  heapScan->rs_nblocks);
778  }
779 
780  slot = table_slot_create(OldHeap, NULL);
781  hslot = (BufferHeapTupleTableSlot *) slot;
782 
783  /*
784  * Scan through the OldHeap, either in OldIndex order or sequentially;
785  * copy each tuple into the NewHeap, or transiently to the tuplesort
786  * module. Note that we don't bother sorting dead tuples (they won't get
787  * to the new table anyway).
788  */
789  for (;;)
790  {
791  HeapTuple tuple;
792  Buffer buf;
793  bool isdead;
794 
796 
797  if (indexScan != NULL)
798  {
799  if (!index_getnext_slot(indexScan, ForwardScanDirection, slot))
800  break;
801 
802  /* Since we used no scan keys, should never need to recheck */
803  if (indexScan->xs_recheck)
804  elog(ERROR, "CLUSTER does not support lossy index conditions");
805  }
806  else
807  {
808  if (!table_scan_getnextslot(tableScan, ForwardScanDirection, slot))
809  break;
810 
811  /*
812  * In scan-and-sort mode and also VACUUM FULL, set heap blocks
813  * scanned
814  */
816  heapScan->rs_cblock + 1);
817  }
818 
819  tuple = ExecFetchSlotHeapTuple(slot, false, NULL);
820  buf = hslot->buffer;
821 
823 
824  switch (HeapTupleSatisfiesVacuum(tuple, OldestXmin, buf))
825  {
826  case HEAPTUPLE_DEAD:
827  /* Definitely dead */
828  isdead = true;
829  break;
831  *tups_recently_dead += 1;
832  /* fall through */
833  case HEAPTUPLE_LIVE:
834  /* Live or recently dead, must copy it */
835  isdead = false;
836  break;
838 
839  /*
840  * Since we hold exclusive lock on the relation, normally the
841  * only way to see this is if it was inserted earlier in our
842  * own transaction. However, it can happen in system
843  * catalogs, since we tend to release write lock before commit
844  * there. Give a warning if neither case applies; but in any
845  * case we had better copy it.
846  */
847  if (!is_system_catalog &&
849  elog(WARNING, "concurrent insert in progress within table \"%s\"",
850  RelationGetRelationName(OldHeap));
851  /* treat as live */
852  isdead = false;
853  break;
855 
856  /*
857  * Similar situation to INSERT_IN_PROGRESS case.
858  */
859  if (!is_system_catalog &&
861  elog(WARNING, "concurrent delete in progress within table \"%s\"",
862  RelationGetRelationName(OldHeap));
863  /* treat as recently dead */
864  *tups_recently_dead += 1;
865  isdead = false;
866  break;
867  default:
868  elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
869  isdead = false; /* keep compiler quiet */
870  break;
871  }
872 
874 
875  if (isdead)
876  {
877  *tups_vacuumed += 1;
878  /* heap rewrite module still needs to see it... */
879  if (rewrite_heap_dead_tuple(rwstate, tuple))
880  {
881  /* A previous recently-dead tuple is now known dead */
882  *tups_vacuumed += 1;
883  *tups_recently_dead -= 1;
884  }
885  continue;
886  }
887 
888  *num_tuples += 1;
889  if (tuplesort != NULL)
890  {
891  tuplesort_putheaptuple(tuplesort, tuple);
892 
893  /*
894  * In scan-and-sort mode, report increase in number of tuples
895  * scanned
896  */
898  *num_tuples);
899  }
900  else
901  {
902  const int ct_index[] = {
905  };
906  int64 ct_val[2];
907 
908  reform_and_rewrite_tuple(tuple, OldHeap, NewHeap,
909  values, isnull, rwstate);
910 
911  /*
912  * In indexscan mode and also VACUUM FULL, report increase in
913  * number of tuples scanned and written
914  */
915  ct_val[0] = *num_tuples;
916  ct_val[1] = *num_tuples;
917  pgstat_progress_update_multi_param(2, ct_index, ct_val);
918  }
919  }
920 
921  if (indexScan != NULL)
922  index_endscan(indexScan);
923  if (tableScan != NULL)
924  table_endscan(tableScan);
925  if (slot)
927 
928  /*
929  * In scan-and-sort mode, complete the sort, then read out all live tuples
930  * from the tuplestore and write them to the new relation.
931  */
932  if (tuplesort != NULL)
933  {
934  double n_tuples = 0;
935 
936  /* Report that we are now sorting tuples */
939 
940  tuplesort_performsort(tuplesort);
941 
942  /* Report that we are now writing new heap */
945 
946  for (;;)
947  {
948  HeapTuple tuple;
949 
951 
952  tuple = tuplesort_getheaptuple(tuplesort, true);
953  if (tuple == NULL)
954  break;
955 
956  n_tuples += 1;
958  OldHeap, NewHeap,
959  values, isnull,
960  rwstate);
961  /* Report n_tuples */
963  n_tuples);
964  }
965 
966  tuplesort_end(tuplesort);
967  }
968 
969  /* Write out any remaining tuples, and fsync if needed */
970  end_heap_rewrite(rwstate);
971 
972  /* Clean up */
973  pfree(values);
974  pfree(isnull);
975 }
976 
977 static bool
979  BufferAccessStrategy bstrategy)
980 {
981  HeapScanDesc hscan = (HeapScanDesc) scan;
982 
983  /*
984  * We must maintain a pin on the target page's buffer to ensure that
985  * concurrent activity - e.g. HOT pruning - doesn't delete tuples out from
986  * under us. Hence, pin the page until we are done looking at it. We
987  * also choose to hold sharelock on the buffer throughout --- we could
988  * release and re-acquire sharelock for each tuple, but since we aren't
989  * doing much work per tuple, the extra lock traffic is probably better
990  * avoided.
991  */
992  hscan->rs_cblock = blockno;
993  hscan->rs_cindex = FirstOffsetNumber;
995  blockno, RBM_NORMAL, bstrategy);
997 
998  /* in heap all blocks can contain tuples, so always return true */
999  return true;
1000 }
1001 
1002 static bool
1004  double *liverows, double *deadrows,
1005  TupleTableSlot *slot)
1006 {
1007  HeapScanDesc hscan = (HeapScanDesc) scan;
1008  Page targpage;
1009  OffsetNumber maxoffset;
1010  BufferHeapTupleTableSlot *hslot;
1011 
1012  Assert(TTS_IS_BUFFERTUPLE(slot));
1013 
1014  hslot = (BufferHeapTupleTableSlot *) slot;
1015  targpage = BufferGetPage(hscan->rs_cbuf);
1016  maxoffset = PageGetMaxOffsetNumber(targpage);
1017 
1018  /* Inner loop over all tuples on the selected page */
1019  for (; hscan->rs_cindex <= maxoffset; hscan->rs_cindex++)
1020  {
1021  ItemId itemid;
1022  HeapTuple targtuple = &hslot->base.tupdata;
1023  bool sample_it = false;
1024 
1025  itemid = PageGetItemId(targpage, hscan->rs_cindex);
1026 
1027  /*
1028  * We ignore unused and redirect line pointers. DEAD line pointers
1029  * should be counted as dead, because we need vacuum to run to get rid
1030  * of them. Note that this rule agrees with the way that
1031  * heap_page_prune() counts things.
1032  */
1033  if (!ItemIdIsNormal(itemid))
1034  {
1035  if (ItemIdIsDead(itemid))
1036  *deadrows += 1;
1037  continue;
1038  }
1039 
1040  ItemPointerSet(&targtuple->t_self, hscan->rs_cblock, hscan->rs_cindex);
1041 
1042  targtuple->t_tableOid = RelationGetRelid(scan->rs_rd);
1043  targtuple->t_data = (HeapTupleHeader) PageGetItem(targpage, itemid);
1044  targtuple->t_len = ItemIdGetLength(itemid);
1045 
1046  switch (HeapTupleSatisfiesVacuum(targtuple, OldestXmin,
1047  hscan->rs_cbuf))
1048  {
1049  case HEAPTUPLE_LIVE:
1050  sample_it = true;
1051  *liverows += 1;
1052  break;
1053 
1054  case HEAPTUPLE_DEAD:
1056  /* Count dead and recently-dead rows */
1057  *deadrows += 1;
1058  break;
1059 
1061 
1062  /*
1063  * Insert-in-progress rows are not counted. We assume that
1064  * when the inserting transaction commits or aborts, it will
1065  * send a stats message to increment the proper count. This
1066  * works right only if that transaction ends after we finish
1067  * analyzing the table; if things happen in the other order,
1068  * its stats update will be overwritten by ours. However, the
1069  * error will be large only if the other transaction runs long
1070  * enough to insert many tuples, so assuming it will finish
1071  * after us is the safer option.
1072  *
1073  * A special case is that the inserting transaction might be
1074  * our own. In this case we should count and sample the row,
1075  * to accommodate users who load a table and analyze it in one
1076  * transaction. (pgstat_report_analyze has to adjust the
1077  * numbers we send to the stats collector to make this come
1078  * out right.)
1079  */
1081  {
1082  sample_it = true;
1083  *liverows += 1;
1084  }
1085  break;
1086 
1088 
1089  /*
1090  * We count and sample delete-in-progress rows the same as
1091  * live ones, so that the stats counters come out right if the
1092  * deleting transaction commits after us, per the same
1093  * reasoning given above.
1094  *
1095  * If the delete was done by our own transaction, however, we
1096  * must count the row as dead to make pgstat_report_analyze's
1097  * stats adjustments come out right. (Note: this works out
1098  * properly when the row was both inserted and deleted in our
1099  * xact.)
1100  *
1101  * The net effect of these choices is that we act as though an
1102  * IN_PROGRESS transaction hasn't happened yet, except if it
1103  * is our own transaction, which we assume has happened.
1104  *
1105  * This approach ensures that we behave sanely if we see both
1106  * the pre-image and post-image rows for a row being updated
1107  * by a concurrent transaction: we will sample the pre-image
1108  * but not the post-image. We also get sane results if the
1109  * concurrent transaction never commits.
1110  */
1112  *deadrows += 1;
1113  else
1114  {
1115  sample_it = true;
1116  *liverows += 1;
1117  }
1118  break;
1119 
1120  default:
1121  elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
1122  break;
1123  }
1124 
1125  if (sample_it)
1126  {
1127  ExecStoreBufferHeapTuple(targtuple, slot, hscan->rs_cbuf);
1128  hscan->rs_cindex++;
1129 
1130  /* note that we leave the buffer locked here! */
1131  return true;
1132  }
1133  }
1134 
1135  /* Now release the lock and pin on the page */
1136  UnlockReleaseBuffer(hscan->rs_cbuf);
1137  hscan->rs_cbuf = InvalidBuffer;
1138 
1139  /* also prevent old slot contents from having pin on page */
1140  ExecClearTuple(slot);
1141 
1142  return false;
1143 }
1144 
1145 static double
1147  Relation indexRelation,
1148  IndexInfo *indexInfo,
1149  bool allow_sync,
1150  bool anyvisible,
1151  bool progress,
1152  BlockNumber start_blockno,
1153  BlockNumber numblocks,
1155  void *callback_state,
1156  TableScanDesc scan)
1157 {
1158  HeapScanDesc hscan;
1159  bool is_system_catalog;
1160  bool checking_uniqueness;
1161  HeapTuple heapTuple;
1163  bool isnull[INDEX_MAX_KEYS];
1164  double reltuples;
1165  ExprState *predicate;
1166  TupleTableSlot *slot;
1167  EState *estate;
1168  ExprContext *econtext;
1169  Snapshot snapshot;
1170  bool need_unregister_snapshot = false;
1172  BlockNumber previous_blkno = InvalidBlockNumber;
1173  BlockNumber root_blkno = InvalidBlockNumber;
1174  OffsetNumber root_offsets[MaxHeapTuplesPerPage];
1175 
1176  /*
1177  * sanity checks
1178  */
1179  Assert(OidIsValid(indexRelation->rd_rel->relam));
1180 
1181  /* Remember if it's a system catalog */
1182  is_system_catalog = IsSystemRelation(heapRelation);
1183 
1184  /* See whether we're verifying uniqueness/exclusion properties */
1185  checking_uniqueness = (indexInfo->ii_Unique ||
1186  indexInfo->ii_ExclusionOps != NULL);
1187 
1188  /*
1189  * "Any visible" mode is not compatible with uniqueness checks; make sure
1190  * only one of those is requested.
1191  */
1192  Assert(!(anyvisible && checking_uniqueness));
1193 
1194  /*
1195  * Need an EState for evaluation of index expressions and partial-index
1196  * predicates. Also a slot to hold the current tuple.
1197  */
1198  estate = CreateExecutorState();
1199  econtext = GetPerTupleExprContext(estate);
1200  slot = table_slot_create(heapRelation, NULL);
1201 
1202  /* Arrange for econtext's scan tuple to be the tuple under test */
1203  econtext->ecxt_scantuple = slot;
1204 
1205  /* Set up execution state for predicate, if any. */
1206  predicate = ExecPrepareQual(indexInfo->ii_Predicate, estate);
1207 
1208  /*
1209  * Prepare for scan of the base relation. In a normal index build, we use
1210  * SnapshotAny because we must retrieve all tuples and do our own time
1211  * qual checks (because we have to index RECENTLY_DEAD tuples). In a
1212  * concurrent build, or during bootstrap, we take a regular MVCC snapshot
1213  * and index whatever's live according to that.
1214  */
1215  OldestXmin = InvalidTransactionId;
1216 
1217  /* okay to ignore lazy VACUUMs here */
1218  if (!IsBootstrapProcessingMode() && !indexInfo->ii_Concurrent)
1219  OldestXmin = GetOldestXmin(heapRelation, PROCARRAY_FLAGS_VACUUM);
1220 
1221  if (!scan)
1222  {
1223  /*
1224  * Serial index build.
1225  *
1226  * Must begin our own heap scan in this case. We may also need to
1227  * register a snapshot whose lifetime is under our direct control.
1228  */
1229  if (!TransactionIdIsValid(OldestXmin))
1230  {
1232  need_unregister_snapshot = true;
1233  }
1234  else
1235  snapshot = SnapshotAny;
1236 
1237  scan = table_beginscan_strat(heapRelation, /* relation */
1238  snapshot, /* snapshot */
1239  0, /* number of keys */
1240  NULL, /* scan key */
1241  true, /* buffer access strategy OK */
1242  allow_sync); /* syncscan OK? */
1243  }
1244  else
1245  {
1246  /*
1247  * Parallel index build.
1248  *
1249  * Parallel case never registers/unregisters own snapshot. Snapshot
1250  * is taken from parallel heap scan, and is SnapshotAny or an MVCC
1251  * snapshot, based on same criteria as serial case.
1252  */
1254  Assert(allow_sync);
1255  snapshot = scan->rs_snapshot;
1256  }
1257 
1258  hscan = (HeapScanDesc) scan;
1259 
1260  /* Publish number of blocks to scan */
1261  if (progress)
1262  {
1263  BlockNumber nblocks;
1264 
1265  if (hscan->rs_base.rs_parallel != NULL)
1266  {
1268 
1270  nblocks = pbscan->phs_nblocks;
1271  }
1272  else
1273  nblocks = hscan->rs_nblocks;
1274 
1276  nblocks);
1277  }
1278 
1279  /*
1280  * Must call GetOldestXmin() with SnapshotAny. Should never call
1281  * GetOldestXmin() with MVCC snapshot. (It's especially worth checking
1282  * this for parallel builds, since ambuild routines that support parallel
1283  * builds must work these details out for themselves.)
1284  */
1285  Assert(snapshot == SnapshotAny || IsMVCCSnapshot(snapshot));
1286  Assert(snapshot == SnapshotAny ? TransactionIdIsValid(OldestXmin) :
1287  !TransactionIdIsValid(OldestXmin));
1288  Assert(snapshot == SnapshotAny || !anyvisible);
1289 
1290  /* set our scan endpoints */
1291  if (!allow_sync)
1292  heap_setscanlimits(scan, start_blockno, numblocks);
1293  else
1294  {
1295  /* syncscan can only be requested on whole relation */
1296  Assert(start_blockno == 0);
1297  Assert(numblocks == InvalidBlockNumber);
1298  }
1299 
1300  reltuples = 0;
1301 
1302  /*
1303  * Scan all tuples in the base relation.
1304  */
1305  while ((heapTuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
1306  {
1307  bool tupleIsAlive;
1308 
1310 
1311  /* Report scan progress, if asked to. */
1312  if (progress)
1313  {
1314  BlockNumber blocks_done = heapam_scan_get_blocks_done(hscan);
1315 
1316  if (blocks_done != previous_blkno)
1317  {
1319  blocks_done);
1320  previous_blkno = blocks_done;
1321  }
1322  }
1323 
1324  /*
1325  * When dealing with a HOT-chain of updated tuples, we want to index
1326  * the values of the live tuple (if any), but index it under the TID
1327  * of the chain's root tuple. This approach is necessary to preserve
1328  * the HOT-chain structure in the heap. So we need to be able to find
1329  * the root item offset for every tuple that's in a HOT-chain. When
1330  * first reaching a new page of the relation, call
1331  * heap_get_root_tuples() to build a map of root item offsets on the
1332  * page.
1333  *
1334  * It might look unsafe to use this information across buffer
1335  * lock/unlock. However, we hold ShareLock on the table so no
1336  * ordinary insert/update/delete should occur; and we hold pin on the
1337  * buffer continuously while visiting the page, so no pruning
1338  * operation can occur either.
1339  *
1340  * Also, although our opinions about tuple liveness could change while
1341  * we scan the page (due to concurrent transaction commits/aborts),
1342  * the chain root locations won't, so this info doesn't need to be
1343  * rebuilt after waiting for another transaction.
1344  *
1345  * Note the implied assumption that there is no more than one live
1346  * tuple per HOT-chain --- else we could create more than one index
1347  * entry pointing to the same root tuple.
1348  */
1349  if (hscan->rs_cblock != root_blkno)
1350  {
1351  Page page = BufferGetPage(hscan->rs_cbuf);
1352 
1354  heap_get_root_tuples(page, root_offsets);
1356 
1357  root_blkno = hscan->rs_cblock;
1358  }
1359 
1360  if (snapshot == SnapshotAny)
1361  {
1362  /* do our own time qual check */
1363  bool indexIt;
1364  TransactionId xwait;
1365 
1366  recheck:
1367 
1368  /*
1369  * We could possibly get away with not locking the buffer here,
1370  * since caller should hold ShareLock on the relation, but let's
1371  * be conservative about it. (This remark is still correct even
1372  * with HOT-pruning: our pin on the buffer prevents pruning.)
1373  */
1375 
1376  /*
1377  * The criteria for counting a tuple as live in this block need to
1378  * match what analyze.c's heapam_scan_analyze_next_tuple() does,
1379  * otherwise CREATE INDEX and ANALYZE may produce wildly different
1380  * reltuples values, e.g. when there are many recently-dead
1381  * tuples.
1382  */
1383  switch (HeapTupleSatisfiesVacuum(heapTuple, OldestXmin,
1384  hscan->rs_cbuf))
1385  {
1386  case HEAPTUPLE_DEAD:
1387  /* Definitely dead, we can ignore it */
1388  indexIt = false;
1389  tupleIsAlive = false;
1390  break;
1391  case HEAPTUPLE_LIVE:
1392  /* Normal case, index and unique-check it */
1393  indexIt = true;
1394  tupleIsAlive = true;
1395  /* Count it as live, too */
1396  reltuples += 1;
1397  break;
1399 
1400  /*
1401  * If tuple is recently deleted then we must index it
1402  * anyway to preserve MVCC semantics. (Pre-existing
1403  * transactions could try to use the index after we finish
1404  * building it, and may need to see such tuples.)
1405  *
1406  * However, if it was HOT-updated then we must only index
1407  * the live tuple at the end of the HOT-chain. Since this
1408  * breaks semantics for pre-existing snapshots, mark the
1409  * index as unusable for them.
1410  *
1411  * We don't count recently-dead tuples in reltuples, even
1412  * if we index them; see heapam_scan_analyze_next_tuple().
1413  */
1414  if (HeapTupleIsHotUpdated(heapTuple))
1415  {
1416  indexIt = false;
1417  /* mark the index as unsafe for old snapshots */
1418  indexInfo->ii_BrokenHotChain = true;
1419  }
1420  else
1421  indexIt = true;
1422  /* In any case, exclude the tuple from unique-checking */
1423  tupleIsAlive = false;
1424  break;
1426 
1427  /*
1428  * In "anyvisible" mode, this tuple is visible and we
1429  * don't need any further checks.
1430  */
1431  if (anyvisible)
1432  {
1433  indexIt = true;
1434  tupleIsAlive = true;
1435  reltuples += 1;
1436  break;
1437  }
1438 
1439  /*
1440  * Since caller should hold ShareLock or better, normally
1441  * the only way to see this is if it was inserted earlier
1442  * in our own transaction. However, it can happen in
1443  * system catalogs, since we tend to release write lock
1444  * before commit there. Give a warning if neither case
1445  * applies.
1446  */
1447  xwait = HeapTupleHeaderGetXmin(heapTuple->t_data);
1449  {
1450  if (!is_system_catalog)
1451  elog(WARNING, "concurrent insert in progress within table \"%s\"",
1452  RelationGetRelationName(heapRelation));
1453 
1454  /*
1455  * If we are performing uniqueness checks, indexing
1456  * such a tuple could lead to a bogus uniqueness
1457  * failure. In that case we wait for the inserting
1458  * transaction to finish and check again.
1459  */
1460  if (checking_uniqueness)
1461  {
1462  /*
1463  * Must drop the lock on the buffer before we wait
1464  */
1466  XactLockTableWait(xwait, heapRelation,
1467  &heapTuple->t_self,
1470  goto recheck;
1471  }
1472  }
1473  else
1474  {
1475  /*
1476  * For consistency with
1477  * heapam_scan_analyze_next_tuple(), count
1478  * HEAPTUPLE_INSERT_IN_PROGRESS tuples as live only
1479  * when inserted by our own transaction.
1480  */
1481  reltuples += 1;
1482  }
1483 
1484  /*
1485  * We must index such tuples, since if the index build
1486  * commits then they're good.
1487  */
1488  indexIt = true;
1489  tupleIsAlive = true;
1490  break;
1492 
1493  /*
1494  * As with INSERT_IN_PROGRESS case, this is unexpected
1495  * unless it's our own deletion or a system catalog; but
1496  * in anyvisible mode, this tuple is visible.
1497  */
1498  if (anyvisible)
1499  {
1500  indexIt = true;
1501  tupleIsAlive = false;
1502  reltuples += 1;
1503  break;
1504  }
1505 
1506  xwait = HeapTupleHeaderGetUpdateXid(heapTuple->t_data);
1508  {
1509  if (!is_system_catalog)
1510  elog(WARNING, "concurrent delete in progress within table \"%s\"",
1511  RelationGetRelationName(heapRelation));
1512 
1513  /*
1514  * If we are performing uniqueness checks, assuming
1515  * the tuple is dead could lead to missing a
1516  * uniqueness violation. In that case we wait for the
1517  * deleting transaction to finish and check again.
1518  *
1519  * Also, if it's a HOT-updated tuple, we should not
1520  * index it but rather the live tuple at the end of
1521  * the HOT-chain. However, the deleting transaction
1522  * could abort, possibly leaving this tuple as live
1523  * after all, in which case it has to be indexed. The
1524  * only way to know what to do is to wait for the
1525  * deleting transaction to finish and check again.
1526  */
1527  if (checking_uniqueness ||
1528  HeapTupleIsHotUpdated(heapTuple))
1529  {
1530  /*
1531  * Must drop the lock on the buffer before we wait
1532  */
1534  XactLockTableWait(xwait, heapRelation,
1535  &heapTuple->t_self,
1538  goto recheck;
1539  }
1540 
1541  /*
1542  * Otherwise index it but don't check for uniqueness,
1543  * the same as a RECENTLY_DEAD tuple.
1544  */
1545  indexIt = true;
1546 
1547  /*
1548  * Count HEAPTUPLE_DELETE_IN_PROGRESS tuples as live,
1549  * if they were not deleted by the current
1550  * transaction. That's what
1551  * heapam_scan_analyze_next_tuple() does, and we want
1552  * the behavior to be consistent.
1553  */
1554  reltuples += 1;
1555  }
1556  else if (HeapTupleIsHotUpdated(heapTuple))
1557  {
1558  /*
1559  * It's a HOT-updated tuple deleted by our own xact.
1560  * We can assume the deletion will commit (else the
1561  * index contents don't matter), so treat the same as
1562  * RECENTLY_DEAD HOT-updated tuples.
1563  */
1564  indexIt = false;
1565  /* mark the index as unsafe for old snapshots */
1566  indexInfo->ii_BrokenHotChain = true;
1567  }
1568  else
1569  {
1570  /*
1571  * It's a regular tuple deleted by our own xact. Index
1572  * it, but don't check for uniqueness nor count in
1573  * reltuples, the same as a RECENTLY_DEAD tuple.
1574  */
1575  indexIt = true;
1576  }
1577  /* In any case, exclude the tuple from unique-checking */
1578  tupleIsAlive = false;
1579  break;
1580  default:
1581  elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
1582  indexIt = tupleIsAlive = false; /* keep compiler quiet */
1583  break;
1584  }
1585 
1587 
1588  if (!indexIt)
1589  continue;
1590  }
1591  else
1592  {
1593  /* heap_getnext did the time qual check */
1594  tupleIsAlive = true;
1595  reltuples += 1;
1596  }
1597 
1599 
1600  /* Set up for predicate or expression evaluation */
1601  ExecStoreBufferHeapTuple(heapTuple, slot, hscan->rs_cbuf);
1602 
1603  /*
1604  * In a partial index, discard tuples that don't satisfy the
1605  * predicate.
1606  */
1607  if (predicate != NULL)
1608  {
1609  if (!ExecQual(predicate, econtext))
1610  continue;
1611  }
1612 
1613  /*
1614  * For the current heap tuple, extract all the attributes we use in
1615  * this index, and note which are null. This also performs evaluation
1616  * of any expressions needed.
1617  */
1618  FormIndexDatum(indexInfo,
1619  slot,
1620  estate,
1621  values,
1622  isnull);
1623 
1624  /*
1625  * You'd think we should go ahead and build the index tuple here, but
1626  * some index AMs want to do further processing on the data first. So
1627  * pass the values[] and isnull[] arrays, instead.
1628  */
1629 
1630  if (HeapTupleIsHeapOnly(heapTuple))
1631  {
1632  /*
1633  * For a heap-only tuple, pretend its TID is that of the root. See
1634  * src/backend/access/heap/README.HOT for discussion.
1635  */
1636  ItemPointerData tid;
1637  OffsetNumber offnum;
1638 
1639  offnum = ItemPointerGetOffsetNumber(&heapTuple->t_self);
1640 
1641  if (!OffsetNumberIsValid(root_offsets[offnum - 1]))
1642  ereport(ERROR,
1644  errmsg_internal("failed to find parent tuple for heap-only tuple at (%u,%u) in table \"%s\"",
1645  ItemPointerGetBlockNumber(&heapTuple->t_self),
1646  offnum,
1647  RelationGetRelationName(heapRelation))));
1648 
1649  ItemPointerSet(&tid, ItemPointerGetBlockNumber(&heapTuple->t_self),
1650  root_offsets[offnum - 1]);
1651 
1652  /* Call the AM's callback routine to process the tuple */
1653  callback(indexRelation, &tid, values, isnull, tupleIsAlive,
1654  callback_state);
1655  }
1656  else
1657  {
1658  /* Call the AM's callback routine to process the tuple */
1659  callback(indexRelation, &heapTuple->t_self, values, isnull,
1660  tupleIsAlive, callback_state);
1661  }
1662  }
1663 
1664  /* Report scan progress one last time. */
1665  if (progress)
1666  {
1667  BlockNumber blks_done;
1668 
1669  if (hscan->rs_base.rs_parallel != NULL)
1670  {
1672 
1674  blks_done = pbscan->phs_nblocks;
1675  }
1676  else
1677  blks_done = hscan->rs_nblocks;
1678 
1680  blks_done);
1681  }
1682 
1683  table_endscan(scan);
1684 
1685  /* we can now forget our snapshot, if set and registered by us */
1686  if (need_unregister_snapshot)
1687  UnregisterSnapshot(snapshot);
1688 
1690 
1691  FreeExecutorState(estate);
1692 
1693  /* These may have been pointing to the now-gone estate */
1694  indexInfo->ii_ExpressionsState = NIL;
1695  indexInfo->ii_PredicateState = NULL;
1696 
1697  return reltuples;
1698 }
1699 
1700 static void
1702  Relation indexRelation,
1703  IndexInfo *indexInfo,
1704  Snapshot snapshot,
1706 {
1707  TableScanDesc scan;
1708  HeapScanDesc hscan;
1709  HeapTuple heapTuple;
1711  bool isnull[INDEX_MAX_KEYS];
1712  ExprState *predicate;
1713  TupleTableSlot *slot;
1714  EState *estate;
1715  ExprContext *econtext;
1716  BlockNumber root_blkno = InvalidBlockNumber;
1717  OffsetNumber root_offsets[MaxHeapTuplesPerPage];
1718  bool in_index[MaxHeapTuplesPerPage];
1719  BlockNumber previous_blkno = InvalidBlockNumber;
1720 
1721  /* state variables for the merge */
1722  ItemPointer indexcursor = NULL;
1723  ItemPointerData decoded;
1724  bool tuplesort_empty = false;
1725 
1726  /*
1727  * sanity checks
1728  */
1729  Assert(OidIsValid(indexRelation->rd_rel->relam));
1730 
1731  /*
1732  * Need an EState for evaluation of index expressions and partial-index
1733  * predicates. Also a slot to hold the current tuple.
1734  */
1735  estate = CreateExecutorState();
1736  econtext = GetPerTupleExprContext(estate);
1737  slot = MakeSingleTupleTableSlot(RelationGetDescr(heapRelation),
1738  &TTSOpsHeapTuple);
1739 
1740  /* Arrange for econtext's scan tuple to be the tuple under test */
1741  econtext->ecxt_scantuple = slot;
1742 
1743  /* Set up execution state for predicate, if any. */
1744  predicate = ExecPrepareQual(indexInfo->ii_Predicate, estate);
1745 
1746  /*
1747  * Prepare for scan of the base relation. We need just those tuples
1748  * satisfying the passed-in reference snapshot. We must disable syncscan
1749  * here, because it's critical that we read from block zero forward to
1750  * match the sorted TIDs.
1751  */
1752  scan = table_beginscan_strat(heapRelation, /* relation */
1753  snapshot, /* snapshot */
1754  0, /* number of keys */
1755  NULL, /* scan key */
1756  true, /* buffer access strategy OK */
1757  false); /* syncscan not OK */
1758  hscan = (HeapScanDesc) scan;
1759 
1761  hscan->rs_nblocks);
1762 
1763  /*
1764  * Scan all tuples matching the snapshot.
1765  */
1766  while ((heapTuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
1767  {
1768  ItemPointer heapcursor = &heapTuple->t_self;
1769  ItemPointerData rootTuple;
1770  OffsetNumber root_offnum;
1771 
1773 
1774  state->htups += 1;
1775 
1776  if ((previous_blkno == InvalidBlockNumber) ||
1777  (hscan->rs_cblock != previous_blkno))
1778  {
1780  hscan->rs_cblock);
1781  previous_blkno = hscan->rs_cblock;
1782  }
1783 
1784  /*
1785  * As commented in table_index_build_scan, we should index heap-only
1786  * tuples under the TIDs of their root tuples; so when we advance onto
1787  * a new heap page, build a map of root item offsets on the page.
1788  *
1789  * This complicates merging against the tuplesort output: we will
1790  * visit the live tuples in order by their offsets, but the root
1791  * offsets that we need to compare against the index contents might be
1792  * ordered differently. So we might have to "look back" within the
1793  * tuplesort output, but only within the current page. We handle that
1794  * by keeping a bool array in_index[] showing all the
1795  * already-passed-over tuplesort output TIDs of the current page. We
1796  * clear that array here, when advancing onto a new heap page.
1797  */
1798  if (hscan->rs_cblock != root_blkno)
1799  {
1800  Page page = BufferGetPage(hscan->rs_cbuf);
1801 
1803  heap_get_root_tuples(page, root_offsets);
1805 
1806  memset(in_index, 0, sizeof(in_index));
1807 
1808  root_blkno = hscan->rs_cblock;
1809  }
1810 
1811  /* Convert actual tuple TID to root TID */
1812  rootTuple = *heapcursor;
1813  root_offnum = ItemPointerGetOffsetNumber(heapcursor);
1814 
1815  if (HeapTupleIsHeapOnly(heapTuple))
1816  {
1817  root_offnum = root_offsets[root_offnum - 1];
1818  if (!OffsetNumberIsValid(root_offnum))
1819  ereport(ERROR,
1821  errmsg_internal("failed to find parent tuple for heap-only tuple at (%u,%u) in table \"%s\"",
1822  ItemPointerGetBlockNumber(heapcursor),
1823  ItemPointerGetOffsetNumber(heapcursor),
1824  RelationGetRelationName(heapRelation))));
1825  ItemPointerSetOffsetNumber(&rootTuple, root_offnum);
1826  }
1827 
1828  /*
1829  * "merge" by skipping through the index tuples until we find or pass
1830  * the current root tuple.
1831  */
1832  while (!tuplesort_empty &&
1833  (!indexcursor ||
1834  ItemPointerCompare(indexcursor, &rootTuple) < 0))
1835  {
1836  Datum ts_val;
1837  bool ts_isnull;
1838 
1839  if (indexcursor)
1840  {
1841  /*
1842  * Remember index items seen earlier on the current heap page
1843  */
1844  if (ItemPointerGetBlockNumber(indexcursor) == root_blkno)
1845  in_index[ItemPointerGetOffsetNumber(indexcursor) - 1] = true;
1846  }
1847 
1848  tuplesort_empty = !tuplesort_getdatum(state->tuplesort, true,
1849  &ts_val, &ts_isnull, NULL);
1850  Assert(tuplesort_empty || !ts_isnull);
1851  if (!tuplesort_empty)
1852  {
1853  itemptr_decode(&decoded, DatumGetInt64(ts_val));
1854  indexcursor = &decoded;
1855 
1856  /* If int8 is pass-by-ref, free (encoded) TID Datum memory */
1857 #ifndef USE_FLOAT8_BYVAL
1858  pfree(DatumGetPointer(ts_val));
1859 #endif
1860  }
1861  else
1862  {
1863  /* Be tidy */
1864  indexcursor = NULL;
1865  }
1866  }
1867 
1868  /*
1869  * If the tuplesort has overshot *and* we didn't see a match earlier,
1870  * then this tuple is missing from the index, so insert it.
1871  */
1872  if ((tuplesort_empty ||
1873  ItemPointerCompare(indexcursor, &rootTuple) > 0) &&
1874  !in_index[root_offnum - 1])
1875  {
1877 
1878  /* Set up for predicate or expression evaluation */
1879  ExecStoreHeapTuple(heapTuple, slot, false);
1880 
1881  /*
1882  * In a partial index, discard tuples that don't satisfy the
1883  * predicate.
1884  */
1885  if (predicate != NULL)
1886  {
1887  if (!ExecQual(predicate, econtext))
1888  continue;
1889  }
1890 
1891  /*
1892  * For the current heap tuple, extract all the attributes we use
1893  * in this index, and note which are null. This also performs
1894  * evaluation of any expressions needed.
1895  */
1896  FormIndexDatum(indexInfo,
1897  slot,
1898  estate,
1899  values,
1900  isnull);
1901 
1902  /*
1903  * You'd think we should go ahead and build the index tuple here,
1904  * but some index AMs want to do further processing on the data
1905  * first. So pass the values[] and isnull[] arrays, instead.
1906  */
1907 
1908  /*
1909  * If the tuple is already committed dead, you might think we
1910  * could suppress uniqueness checking, but this is no longer true
1911  * in the presence of HOT, because the insert is actually a proxy
1912  * for a uniqueness check on the whole HOT-chain. That is, the
1913  * tuple we have here could be dead because it was already
1914  * HOT-updated, and if so the updating transaction will not have
1915  * thought it should insert index entries. The index AM will
1916  * check the whole HOT-chain and correctly detect a conflict if
1917  * there is one.
1918  */
1919 
1920  index_insert(indexRelation,
1921  values,
1922  isnull,
1923  &rootTuple,
1924  heapRelation,
1925  indexInfo->ii_Unique ?
1927  indexInfo);
1928 
1929  state->tups_inserted += 1;
1930  }
1931  }
1932 
1933  table_endscan(scan);
1934 
1936 
1937  FreeExecutorState(estate);
1938 
1939  /* These may have been pointing to the now-gone estate */
1940  indexInfo->ii_ExpressionsState = NIL;
1941  indexInfo->ii_PredicateState = NULL;
1942 }
1943 
1944 /*
1945  * Return the number of blocks that have been read by this scan since
1946  * starting. This is meant for progress reporting rather than be fully
1947  * accurate: in a parallel scan, workers can be concurrently reading blocks
1948  * further ahead than what we report.
1949  */
1950 static BlockNumber
1952 {
1953  ParallelBlockTableScanDesc bpscan = NULL;
1954  BlockNumber startblock;
1955  BlockNumber blocks_done;
1956 
1957  if (hscan->rs_base.rs_parallel != NULL)
1958  {
1960  startblock = bpscan->phs_startblock;
1961  }
1962  else
1963  startblock = hscan->rs_startblock;
1964 
1965  /*
1966  * Might have wrapped around the end of the relation, if startblock was
1967  * not zero.
1968  */
1969  if (hscan->rs_cblock > startblock)
1970  blocks_done = hscan->rs_cblock - startblock;
1971  else
1972  {
1973  BlockNumber nblocks;
1974 
1975  nblocks = bpscan != NULL ? bpscan->phs_nblocks : hscan->rs_nblocks;
1976  blocks_done = nblocks - startblock +
1977  hscan->rs_cblock;
1978  }
1979 
1980  return blocks_done;
1981 }
1982 
1983 
1984 /* ------------------------------------------------------------------------
1985  * Miscellaneous callbacks for the heap AM
1986  * ------------------------------------------------------------------------
1987  */
1988 
1989 /*
1990  * Check to see whether the table needs a TOAST table. It does only if
1991  * (1) there are any toastable attributes, and (2) the maximum length
1992  * of a tuple could exceed TOAST_TUPLE_THRESHOLD. (We don't want to
1993  * create a toast table for something like "f1 varchar(20)".)
1994  */
1995 static bool
1997 {
1998  int32 data_length = 0;
1999  bool maxlength_unknown = false;
2000  bool has_toastable_attrs = false;
2001  TupleDesc tupdesc = rel->rd_att;
2002  int32 tuple_length;
2003  int i;
2004 
2005  for (i = 0; i < tupdesc->natts; i++)
2006  {
2007  Form_pg_attribute att = TupleDescAttr(tupdesc, i);
2008 
2009  if (att->attisdropped)
2010  continue;
2011  data_length = att_align_nominal(data_length, att->attalign);
2012  if (att->attlen > 0)
2013  {
2014  /* Fixed-length types are never toastable */
2015  data_length += att->attlen;
2016  }
2017  else
2018  {
2019  int32 maxlen = type_maximum_size(att->atttypid,
2020  att->atttypmod);
2021 
2022  if (maxlen < 0)
2023  maxlength_unknown = true;
2024  else
2025  data_length += maxlen;
2026  if (att->attstorage != 'p')
2027  has_toastable_attrs = true;
2028  }
2029  }
2030  if (!has_toastable_attrs)
2031  return false; /* nothing to toast? */
2032  if (maxlength_unknown)
2033  return true; /* any unlimited-length attrs? */
2034  tuple_length = MAXALIGN(SizeofHeapTupleHeader +
2035  BITMAPLEN(tupdesc->natts)) +
2036  MAXALIGN(data_length);
2037  return (tuple_length > TOAST_TUPLE_THRESHOLD);
2038 }
2039 
2040 /*
2041  * TOAST tables for heap relations are just heap relations.
2042  */
2043 static Oid
2045 {
2046  return rel->rd_rel->relam;
2047 }
2048 
2049 
2050 /* ------------------------------------------------------------------------
2051  * Planner related callbacks for the heap AM
2052  * ------------------------------------------------------------------------
2053  */
2054 
2055 #define HEAP_OVERHEAD_BYTES_PER_TUPLE \
2056  (MAXALIGN(SizeofHeapTupleHeader) + sizeof(ItemIdData))
2057 #define HEAP_USABLE_BYTES_PER_PAGE \
2058  (BLCKSZ - SizeOfPageHeaderData)
2059 
2060 static void
2062  BlockNumber *pages, double *tuples,
2063  double *allvisfrac)
2064 {
2065  table_block_relation_estimate_size(rel, attr_widths, pages,
2066  tuples, allvisfrac,
2069 }
2070 
2071 
2072 /* ------------------------------------------------------------------------
2073  * Executor related callbacks for the heap AM
2074  * ------------------------------------------------------------------------
2075  */
2076 
2077 static bool
2079  TBMIterateResult *tbmres)
2080 {
2081  HeapScanDesc hscan = (HeapScanDesc) scan;
2082  BlockNumber page = tbmres->blockno;
2083  Buffer buffer;
2084  Snapshot snapshot;
2085  int ntup;
2086 
2087  hscan->rs_cindex = 0;
2088  hscan->rs_ntuples = 0;
2089 
2090  /*
2091  * Ignore any claimed entries past what we think is the end of the
2092  * relation. It may have been extended after the start of our scan (we
2093  * only hold an AccessShareLock, and it could be inserts from this
2094  * backend).
2095  */
2096  if (page >= hscan->rs_nblocks)
2097  return false;
2098 
2099  /*
2100  * Acquire pin on the target heap page, trading in any pin we held before.
2101  */
2102  hscan->rs_cbuf = ReleaseAndReadBuffer(hscan->rs_cbuf,
2103  scan->rs_rd,
2104  page);
2105  hscan->rs_cblock = page;
2106  buffer = hscan->rs_cbuf;
2107  snapshot = scan->rs_snapshot;
2108 
2109  ntup = 0;
2110 
2111  /*
2112  * Prune and repair fragmentation for the whole page, if possible.
2113  */
2114  heap_page_prune_opt(scan->rs_rd, buffer);
2115 
2116  /*
2117  * We must hold share lock on the buffer content while examining tuple
2118  * visibility. Afterwards, however, the tuples we have found to be
2119  * visible are guaranteed good as long as we hold the buffer pin.
2120  */
2121  LockBuffer(buffer, BUFFER_LOCK_SHARE);
2122 
2123  /*
2124  * We need two separate strategies for lossy and non-lossy cases.
2125  */
2126  if (tbmres->ntuples >= 0)
2127  {
2128  /*
2129  * Bitmap is non-lossy, so we just look through the offsets listed in
2130  * tbmres; but we have to follow any HOT chain starting at each such
2131  * offset.
2132  */
2133  int curslot;
2134 
2135  for (curslot = 0; curslot < tbmres->ntuples; curslot++)
2136  {
2137  OffsetNumber offnum = tbmres->offsets[curslot];
2138  ItemPointerData tid;
2139  HeapTupleData heapTuple;
2140 
2141  ItemPointerSet(&tid, page, offnum);
2142  if (heap_hot_search_buffer(&tid, scan->rs_rd, buffer, snapshot,
2143  &heapTuple, NULL, true))
2144  hscan->rs_vistuples[ntup++] = ItemPointerGetOffsetNumber(&tid);
2145  }
2146  }
2147  else
2148  {
2149  /*
2150  * Bitmap is lossy, so we must examine each line pointer on the page.
2151  * But we can ignore HOT chains, since we'll check each tuple anyway.
2152  */
2153  Page dp = (Page) BufferGetPage(buffer);
2154  OffsetNumber maxoff = PageGetMaxOffsetNumber(dp);
2155  OffsetNumber offnum;
2156 
2157  for (offnum = FirstOffsetNumber; offnum <= maxoff; offnum = OffsetNumberNext(offnum))
2158  {
2159  ItemId lp;
2160  HeapTupleData loctup;
2161  bool valid;
2162 
2163  lp = PageGetItemId(dp, offnum);
2164  if (!ItemIdIsNormal(lp))
2165  continue;
2166  loctup.t_data = (HeapTupleHeader) PageGetItem((Page) dp, lp);
2167  loctup.t_len = ItemIdGetLength(lp);
2168  loctup.t_tableOid = scan->rs_rd->rd_id;
2169  ItemPointerSet(&loctup.t_self, page, offnum);
2170  valid = HeapTupleSatisfiesVisibility(&loctup, snapshot, buffer);
2171  if (valid)
2172  {
2173  hscan->rs_vistuples[ntup++] = offnum;
2174  PredicateLockTID(scan->rs_rd, &loctup.t_self, snapshot,
2175  HeapTupleHeaderGetXmin(loctup.t_data));
2176  }
2177  HeapCheckForSerializableConflictOut(valid, scan->rs_rd, &loctup,
2178  buffer, snapshot);
2179  }
2180  }
2181 
2182  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
2183 
2184  Assert(ntup <= MaxHeapTuplesPerPage);
2185  hscan->rs_ntuples = ntup;
2186 
2187  return ntup > 0;
2188 }
2189 
2190 static bool
2192  TBMIterateResult *tbmres,
2193  TupleTableSlot *slot)
2194 {
2195  HeapScanDesc hscan = (HeapScanDesc) scan;
2196  OffsetNumber targoffset;
2197  Page dp;
2198  ItemId lp;
2199 
2200  /*
2201  * Out of range? If so, nothing more to look at on this page
2202  */
2203  if (hscan->rs_cindex < 0 || hscan->rs_cindex >= hscan->rs_ntuples)
2204  return false;
2205 
2206  targoffset = hscan->rs_vistuples[hscan->rs_cindex];
2207  dp = (Page) BufferGetPage(hscan->rs_cbuf);
2208  lp = PageGetItemId(dp, targoffset);
2209  Assert(ItemIdIsNormal(lp));
2210 
2211  hscan->rs_ctup.t_data = (HeapTupleHeader) PageGetItem((Page) dp, lp);
2212  hscan->rs_ctup.t_len = ItemIdGetLength(lp);
2213  hscan->rs_ctup.t_tableOid = scan->rs_rd->rd_id;
2214  ItemPointerSet(&hscan->rs_ctup.t_self, hscan->rs_cblock, targoffset);
2215 
2217 
2218  /*
2219  * Set up the result slot to point to this tuple. Note that the slot
2220  * acquires a pin on the buffer.
2221  */
2223  slot,
2224  hscan->rs_cbuf);
2225 
2226  hscan->rs_cindex++;
2227 
2228  return true;
2229 }
2230 
2231 static bool
2233 {
2234  HeapScanDesc hscan = (HeapScanDesc) scan;
2235  TsmRoutine *tsm = scanstate->tsmroutine;
2236  BlockNumber blockno;
2237 
2238  /* return false immediately if relation is empty */
2239  if (hscan->rs_nblocks == 0)
2240  return false;
2241 
2242  if (tsm->NextSampleBlock)
2243  {
2244  blockno = tsm->NextSampleBlock(scanstate, hscan->rs_nblocks);
2245  hscan->rs_cblock = blockno;
2246  }
2247  else
2248  {
2249  /* scanning table sequentially */
2250 
2251  if (hscan->rs_cblock == InvalidBlockNumber)
2252  {
2253  Assert(!hscan->rs_inited);
2254  blockno = hscan->rs_startblock;
2255  }
2256  else
2257  {
2258  Assert(hscan->rs_inited);
2259 
2260  blockno = hscan->rs_cblock + 1;
2261 
2262  if (blockno >= hscan->rs_nblocks)
2263  {
2264  /* wrap to beginning of rel, might not have started at 0 */
2265  blockno = 0;
2266  }
2267 
2268  /*
2269  * Report our new scan position for synchronization purposes.
2270  *
2271  * Note: we do this before checking for end of scan so that the
2272  * final state of the position hint is back at the start of the
2273  * rel. That's not strictly necessary, but otherwise when you run
2274  * the same query multiple times the starting position would shift
2275  * a little bit backwards on every invocation, which is confusing.
2276  * We don't guarantee any specific ordering in general, though.
2277  */
2278  if (scan->rs_flags & SO_ALLOW_SYNC)
2279  ss_report_location(scan->rs_rd, blockno);
2280 
2281  if (blockno == hscan->rs_startblock)
2282  {
2283  blockno = InvalidBlockNumber;
2284  }
2285  }
2286  }
2287 
2288  if (!BlockNumberIsValid(blockno))
2289  {
2290  if (BufferIsValid(hscan->rs_cbuf))
2291  ReleaseBuffer(hscan->rs_cbuf);
2292  hscan->rs_cbuf = InvalidBuffer;
2293  hscan->rs_cblock = InvalidBlockNumber;
2294  hscan->rs_inited = false;
2295 
2296  return false;
2297  }
2298 
2299  heapgetpage(scan, blockno);
2300  hscan->rs_inited = true;
2301 
2302  return true;
2303 }
2304 
2305 static bool
2307  TupleTableSlot *slot)
2308 {
2309  HeapScanDesc hscan = (HeapScanDesc) scan;
2310  TsmRoutine *tsm = scanstate->tsmroutine;
2311  BlockNumber blockno = hscan->rs_cblock;
2312  bool pagemode = (scan->rs_flags & SO_ALLOW_PAGEMODE) != 0;
2313 
2314  Page page;
2315  bool all_visible;
2316  OffsetNumber maxoffset;
2317 
2318  /*
2319  * When not using pagemode, we must lock the buffer during tuple
2320  * visibility checks.
2321  */
2322  if (!pagemode)
2324 
2325  page = (Page) BufferGetPage(hscan->rs_cbuf);
2326  all_visible = PageIsAllVisible(page) &&
2328  maxoffset = PageGetMaxOffsetNumber(page);
2329 
2330  for (;;)
2331  {
2332  OffsetNumber tupoffset;
2333 
2335 
2336  /* Ask the tablesample method which tuples to check on this page. */
2337  tupoffset = tsm->NextSampleTuple(scanstate,
2338  blockno,
2339  maxoffset);
2340 
2341  if (OffsetNumberIsValid(tupoffset))
2342  {
2343  ItemId itemid;
2344  bool visible;
2345  HeapTuple tuple = &(hscan->rs_ctup);
2346 
2347  /* Skip invalid tuple pointers. */
2348  itemid = PageGetItemId(page, tupoffset);
2349  if (!ItemIdIsNormal(itemid))
2350  continue;
2351 
2352  tuple->t_data = (HeapTupleHeader) PageGetItem(page, itemid);
2353  tuple->t_len = ItemIdGetLength(itemid);
2354  ItemPointerSet(&(tuple->t_self), blockno, tupoffset);
2355 
2356 
2357  if (all_visible)
2358  visible = true;
2359  else
2360  visible = SampleHeapTupleVisible(scan, hscan->rs_cbuf,
2361  tuple, tupoffset);
2362 
2363  /* in pagemode, heapgetpage did this for us */
2364  if (!pagemode)
2365  HeapCheckForSerializableConflictOut(visible, scan->rs_rd, tuple,
2366  hscan->rs_cbuf, scan->rs_snapshot);
2367 
2368  /* Try next tuple from same page. */
2369  if (!visible)
2370  continue;
2371 
2372  /* Found visible tuple, return it. */
2373  if (!pagemode)
2375 
2376  ExecStoreBufferHeapTuple(tuple, slot, hscan->rs_cbuf);
2377 
2378  /* Count successfully-fetched tuples as heap fetches */
2380 
2381  return true;
2382  }
2383  else
2384  {
2385  /*
2386  * If we get here, it means we've exhausted the items on this page
2387  * and it's time to move to the next.
2388  */
2389  if (!pagemode)
2391 
2392  ExecClearTuple(slot);
2393  return false;
2394  }
2395  }
2396 
2397  Assert(0);
2398 }
2399 
2400 
2401 /* ----------------------------------------------------------------------------
2402  * Helper functions for the above.
2403  * ----------------------------------------------------------------------------
2404  */
2405 
2406 /*
2407  * Reconstruct and rewrite the given tuple
2408  *
2409  * We cannot simply copy the tuple as-is, for several reasons:
2410  *
2411  * 1. We'd like to squeeze out the values of any dropped columns, both
2412  * to save space and to ensure we have no corner-case failures. (It's
2413  * possible for example that the new table hasn't got a TOAST table
2414  * and so is unable to store any large values of dropped cols.)
2415  *
2416  * 2. The tuple might not even be legal for the new table; this is
2417  * currently only known to happen as an after-effect of ALTER TABLE
2418  * SET WITHOUT OIDS.
2419  *
2420  * So, we must reconstruct the tuple from component Datums.
2421  */
2422 static void
2424  Relation OldHeap, Relation NewHeap,
2425  Datum *values, bool *isnull, RewriteState rwstate)
2426 {
2427  TupleDesc oldTupDesc = RelationGetDescr(OldHeap);
2428  TupleDesc newTupDesc = RelationGetDescr(NewHeap);
2429  HeapTuple copiedTuple;
2430  int i;
2431 
2432  heap_deform_tuple(tuple, oldTupDesc, values, isnull);
2433 
2434  /* Be sure to null out any dropped columns */
2435  for (i = 0; i < newTupDesc->natts; i++)
2436  {
2437  if (TupleDescAttr(newTupDesc, i)->attisdropped)
2438  isnull[i] = true;
2439  }
2440 
2441  copiedTuple = heap_form_tuple(newTupDesc, values, isnull);
2442 
2443  /* The heap rewrite module does the rest */
2444  rewrite_heap_tuple(rwstate, tuple, copiedTuple);
2445 
2446  heap_freetuple(copiedTuple);
2447 }
2448 
2449 /*
2450  * Check visibility of the tuple.
2451  */
2452 static bool
2454  HeapTuple tuple,
2455  OffsetNumber tupoffset)
2456 {
2457  HeapScanDesc hscan = (HeapScanDesc) scan;
2458 
2459  if (scan->rs_flags & SO_ALLOW_PAGEMODE)
2460  {
2461  /*
2462  * In pageatatime mode, heapgetpage() already did visibility checks,
2463  * so just look at the info it left in rs_vistuples[].
2464  *
2465  * We use a binary search over the known-sorted array. Note: we could
2466  * save some effort if we insisted that NextSampleTuple select tuples
2467  * in increasing order, but it's not clear that there would be enough
2468  * gain to justify the restriction.
2469  */
2470  int start = 0,
2471  end = hscan->rs_ntuples - 1;
2472 
2473  while (start <= end)
2474  {
2475  int mid = (start + end) / 2;
2476  OffsetNumber curoffset = hscan->rs_vistuples[mid];
2477 
2478  if (tupoffset == curoffset)
2479  return true;
2480  else if (tupoffset < curoffset)
2481  end = mid - 1;
2482  else
2483  start = mid + 1;
2484  }
2485 
2486  return false;
2487  }
2488  else
2489  {
2490  /* Otherwise, we have to check the tuple individually. */
2491  return HeapTupleSatisfiesVisibility(tuple, scan->rs_snapshot,
2492  buffer);
2493  }
2494 }
2495 
2496 
2497 /* ------------------------------------------------------------------------
2498  * Definition of the heap table access method.
2499  * ------------------------------------------------------------------------
2500  */
2501 
2502 static const TableAmRoutine heapam_methods = {
2504 
2505  .slot_callbacks = heapam_slot_callbacks,
2506 
2507  .scan_begin = heap_beginscan,
2508  .scan_end = heap_endscan,
2509  .scan_rescan = heap_rescan,
2510  .scan_getnextslot = heap_getnextslot,
2511 
2512  .parallelscan_estimate = table_block_parallelscan_estimate,
2513  .parallelscan_initialize = table_block_parallelscan_initialize,
2514  .parallelscan_reinitialize = table_block_parallelscan_reinitialize,
2515 
2516  .index_fetch_begin = heapam_index_fetch_begin,
2517  .index_fetch_reset = heapam_index_fetch_reset,
2518  .index_fetch_end = heapam_index_fetch_end,
2519  .index_fetch_tuple = heapam_index_fetch_tuple,
2520 
2521  .tuple_insert = heapam_tuple_insert,
2522  .tuple_insert_speculative = heapam_tuple_insert_speculative,
2523  .tuple_complete_speculative = heapam_tuple_complete_speculative,
2524  .multi_insert = heap_multi_insert,
2525  .tuple_delete = heapam_tuple_delete,
2526  .tuple_update = heapam_tuple_update,
2527  .tuple_lock = heapam_tuple_lock,
2528  .finish_bulk_insert = heapam_finish_bulk_insert,
2529 
2530  .tuple_fetch_row_version = heapam_fetch_row_version,
2531  .tuple_get_latest_tid = heap_get_latest_tid,
2532  .tuple_tid_valid = heapam_tuple_tid_valid,
2533  .tuple_satisfies_snapshot = heapam_tuple_satisfies_snapshot,
2534  .compute_xid_horizon_for_tuples = heap_compute_xid_horizon_for_tuples,
2535 
2536  .relation_set_new_filenode = heapam_relation_set_new_filenode,
2537  .relation_nontransactional_truncate = heapam_relation_nontransactional_truncate,
2538  .relation_copy_data = heapam_relation_copy_data,
2539  .relation_copy_for_cluster = heapam_relation_copy_for_cluster,
2540  .relation_vacuum = heap_vacuum_rel,
2541  .scan_analyze_next_block = heapam_scan_analyze_next_block,
2542  .scan_analyze_next_tuple = heapam_scan_analyze_next_tuple,
2543  .index_build_range_scan = heapam_index_build_range_scan,
2544  .index_validate_scan = heapam_index_validate_scan,
2545 
2546  .relation_size = table_block_relation_size,
2547  .relation_needs_toast_table = heapam_relation_needs_toast_table,
2548  .relation_toast_am = heapam_relation_toast_am,
2549  .relation_fetch_toast_slice = heap_fetch_toast_slice,
2550 
2551  .relation_estimate_size = heapam_estimate_rel_size,
2552 
2553  .scan_bitmap_next_block = heapam_scan_bitmap_next_block,
2554  .scan_bitmap_next_tuple = heapam_scan_bitmap_next_tuple,
2555  .scan_sample_next_block = heapam_scan_sample_next_block,
2556  .scan_sample_next_tuple = heapam_scan_sample_next_tuple
2557 };
2558 
2559 
2560 const TableAmRoutine *
2562 {
2563  return &heapam_methods;
2564 }
2565 
2566 Datum
2568 {
2569  PG_RETURN_POINTER(&heapam_methods);
2570 }
TupleTableSlot * table_slot_create(Relation relation, List **reglist)
Definition: tableam.c:77
#define HeapTupleHeaderGetUpdateXid(tup)
Definition: htup_details.h:365
int32 ItemPointerCompare(ItemPointer arg1, ItemPointer arg2)
Definition: itemptr.c:52
#define ItemPointerIsValid(pointer)
Definition: itemptr.h:82
void FormIndexDatum(IndexInfo *indexInfo, TupleTableSlot *slot, EState *estate, Datum *values, bool *isnull)
Definition: index.c:2575
void heap_insert(Relation relation, HeapTuple tup, CommandId cid, int options, BulkInsertState bistate)
Definition: heapam.c:1871
#define PG_RETURN_POINTER(x)
Definition: fmgr.h:351
#define NIL
Definition: pg_list.h:65
Oid tts_tableOid
Definition: tuptable.h:131
uint32 CommandId
Definition: c.h:527
ItemPointerData ctid
Definition: tableam.h:124
#define PROGRESS_CLUSTER_PHASE_SEQ_SCAN_HEAP
Definition: progress.h:65
static PgChecksumMode mode
Definition: pg_checksums.c:61
#define BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:86
bool tuplesort_getdatum(Tuplesortstate *state, bool forward, Datum *val, bool *isNull, Datum *abbrev)
Definition: tuplesort.c:2245
TransactionId heap_compute_xid_horizon_for_tuples(Relation rel, ItemPointerData *tids, int nitems)
Definition: heapam.c:6988
#define SizeofHeapTupleHeader
Definition: htup_details.h:184
BlockNumber rs_cblock
Definition: heapam.h:60
LockTupleMode
Definition: lockoptions.h:49
NodeTag type
Definition: tableam.h:165
void tuplesort_performsort(Tuplesortstate *state)
Definition: tuplesort.c:1791
void heap_abort_speculative(Relation relation, ItemPointer tid)
Definition: heapam.c:5569
void end_heap_rewrite(RewriteState state)
Definition: rewriteheap.c:305
HeapTuple tuplesort_getheaptuple(Tuplesortstate *state, bool forward)
Definition: tuplesort.c:2196
void smgrclose(SMgrRelation reln)
Definition: smgr.c:256
void table_block_parallelscan_reinitialize(Relation rel, ParallelTableScanDesc pscan)
Definition: tableam.c:392
#define InitDirtySnapshot(snapshotdata)
Definition: snapmgr.h:76
static bool heapam_tuple_tid_valid(TableScanDesc scan, ItemPointer tid)
void smgrcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo)
Definition: smgr.c:333
List * ii_Predicate
Definition: execnodes.h:162
static void heapam_tuple_complete_speculative(Relation relation, TupleTableSlot *slot, uint32 specToken, bool succeeded)
#define att_align_nominal(cur_offset, attalign)
Definition: tupmacs.h:146
uint64 table_block_relation_size(Relation rel, ForkNumber forkNumber)
Definition: tableam.c:508
bool IsSystemRelation(Relation relation)
Definition: catalog.c:68
#define TransactionIdEquals(id1, id2)
Definition: transam.h:43
#define PageIsAllVisible(page)
Definition: bufpage.h:385
#define PROGRESS_CLUSTER_HEAP_TUPLES_WRITTEN
Definition: progress.h:59
static TupleTableSlot * ExecClearTuple(TupleTableSlot *slot)
Definition: tuptable.h:425
uint32 TransactionId
Definition: c.h:513
Snapshot RegisterSnapshot(Snapshot snapshot)
Definition: snapmgr.c:865
static const TupleTableSlotOps * heapam_slot_callbacks(Relation relation)
RewriteState begin_heap_rewrite(Relation old_heap, Relation new_heap, TransactionId oldest_xmin, TransactionId freeze_xid, MultiXactId cutoff_multi, bool use_wal)
Definition: rewriteheap.c:239
void heap_endscan(TableScanDesc sscan)
Definition: heapam.c:1246
#define RelationGetDescr(relation)
Definition: rel.h:454
TupleTableSlot * MakeSingleTupleTableSlot(TupleDesc tupdesc, const TupleTableSlotOps *tts_ops)
Definition: execTuples.c:1208
bool TransactionIdIsCurrentTransactionId(TransactionId xid)
Definition: xact.c:853
static void heapam_index_fetch_end(IndexFetchTableData *scan)
static void reform_and_rewrite_tuple(HeapTuple tuple, Relation OldHeap, Relation NewHeap, Datum *values, bool *isnull, RewriteState rwstate)
static TM_Result heapam_tuple_update(Relation relation, ItemPointer otid, TupleTableSlot *slot, CommandId cid, Snapshot snapshot, Snapshot crosscheck, bool wait, TM_FailureData *tmfd, LockTupleMode *lockmode, bool *update_indexes)
HeapTupleHeaderData * HeapTupleHeader
Definition: htup.h:23
CommandId HeapTupleHeaderGetCmin(HeapTupleHeader tup)
Definition: combocid.c:104
#define TupleDescAttr(tupdesc, i)
Definition: tupdesc.h:92
bool heap_fetch(Relation relation, Snapshot snapshot, HeapTuple tuple, Buffer *userbuf)
Definition: heapam.c:1413
struct SMgrRelationData * rd_smgr
Definition: rel.h:57
struct ParallelBlockTableScanDescData * ParallelBlockTableScanDesc
Definition: relscan.h:82
TableScanDescData rs_base
Definition: heapam.h:50
void pgstat_progress_update_param(int index, int64 val)
Definition: pgstat.c:3213
ExprState * ii_PredicateState
Definition: execnodes.h:163
void heap_fetch_toast_slice(Relation toastrel, Oid valueid, int32 attrsize, int32 sliceoffset, int32 slicelength, struct varlena *result)
Definition: heaptoast.c:621
const TupleTableSlotOps TTSOpsBufferHeapTuple
Definition: execTuples.c:86
MemoryContext ecxt_per_tuple_memory
Definition: execnodes.h:232
Buffer ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
Definition: bufmgr.c:642
#define XLogIsNeeded()
Definition: xlog.h:181
CommandId cmax
Definition: tableam.h:126
#define MaxHeapTuplesPerPage
Definition: htup_details.h:574
unsigned char uint8
Definition: c.h:365
#define HeapTupleHeaderSetSpeculativeToken(tup, token)
Definition: htup_details.h:440
#define HeapTupleHeaderIsSpeculative(tup)
Definition: htup_details.h:429
Tuplesortstate * tuplesort_begin_cluster(TupleDesc tupDesc, Relation indexRel, int workMem, SortCoordinate coordinate, bool randomAccess)
Definition: tuplesort.c:880
#define InvalidBuffer
Definition: buf.h:25
void index_rescan(IndexScanDesc scan, ScanKey keys, int nkeys, ScanKey orderbys, int norderbys)
Definition: indexam.c:289
static void heapam_relation_set_new_filenode(Relation rel, const RelFileNode *newrnode, char persistence, TransactionId *freezeXid, MultiXactId *minmulti)
HeapTuple tuple
Definition: tuptable.h:250
int errcode(int sqlerrcode)
Definition: elog.c:608
TransactionId RecentXmin
Definition: snapmgr.c:167
#define PROGRESS_CLUSTER_INDEX_RELID
Definition: progress.h:57
#define PROGRESS_CLUSTER_PHASE_WRITE_NEW_HEAP
Definition: progress.h:68
static TM_Result heapam_tuple_lock(Relation relation, ItemPointer tid, Snapshot snapshot, TupleTableSlot *slot, CommandId cid, LockTupleMode mode, LockWaitPolicy wait_policy, uint8 flags, TM_FailureData *tmfd)
static void heapam_finish_bulk_insert(Relation relation, int options)
void heap_sync(Relation rel)
Definition: heapam.c:8940
#define HEAP_INSERT_SKIP_WAL
Definition: heapam.h:34
void MemoryContextReset(MemoryContext context)
Definition: mcxt.c:136
uint32 BlockNumber
Definition: block.h:31
void ReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:3375
#define BITMAPLEN(NATTS)
Definition: htup_details.h:547
#define PROCARRAY_FLAGS_VACUUM
Definition: procarray.h:52
static void heapam_relation_nontransactional_truncate(Relation rel)
HeapTuple heap_form_tuple(TupleDesc tupleDescriptor, Datum *values, bool *isnull)
Definition: heaptuple.c:1020
static bool heapam_tuple_satisfies_snapshot(Relation rel, TupleTableSlot *slot, Snapshot snapshot)
#define TUPLE_LOCK_FLAG_LOCK_UPDATE_IN_PROGRESS
Definition: tableam.h:138
bool smgrexists(SMgrRelation reln, ForkNumber forknum)
Definition: smgr.c:247
static bool table_scan_getnextslot(TableScanDesc sscan, ScanDirection direction, TupleTableSlot *slot)
Definition: tableam.h:904
static Oid heapam_relation_toast_am(Relation rel)
Form_pg_class rd_rel
Definition: rel.h:84
void heap_freetuple(HeapTuple htup)
Definition: heaptuple.c:1338
unsigned int Oid
Definition: postgres_ext.h:31
static bool ExecQual(ExprState *state, ExprContext *econtext)
Definition: executor.h:370
uint32 rs_flags
Definition: relscan.h:43
#define ItemIdIsDead(itemId)
Definition: itemid.h:113
Snapshot GetTransactionSnapshot(void)
Definition: snapmgr.c:306
#define OidIsValid(objectId)
Definition: c.h:644
static IndexFetchTableData * heapam_index_fetch_begin(Relation rel)
#define PageGetMaxOffsetNumber(page)
Definition: bufpage.h:357
#define RelationGetTargetBlock(relation)
Definition: rel.h:513
Size table_block_parallelscan_initialize(Relation rel, ParallelTableScanDesc pscan)
Definition: tableam.c:374
static TableScanDesc table_beginscan_strat(Relation rel, Snapshot snapshot, int nkeys, struct ScanKeyData *key, bool allow_strat, bool allow_sync)
Definition: tableam.h:779
static void heapam_index_fetch_reset(IndexFetchTableData *scan)
#define HEAP_INSERT_SPECULATIVE
Definition: heapam.h:38
static bool heapam_scan_bitmap_next_block(TableScanDesc scan, TBMIterateResult *tbmres)
void heapgetpage(TableScanDesc sscan, BlockNumber page)
Definition: heapam.c:353
signed int int32
Definition: c.h:355
struct HeapScanDescData * HeapScanDesc
Definition: heapam.h:74
bool ConditionalXactLockTableWait(TransactionId xid)
Definition: lmgr.c:697
HeapTupleData rs_ctup
Definition: heapam.h:67
uint16 OffsetNumber
Definition: off.h:24
Size table_block_parallelscan_estimate(Relation rel)
Definition: tableam.c:368
HeapTupleHeader t_data
Definition: htup.h:68
BlockNumber blockno
Definition: tidbitmap.h:42
#define RelationOpenSmgr(relation)
Definition: rel.h:485
bool heap_hot_search_buffer(ItemPointer tid, Relation relation, Buffer buffer, Snapshot snapshot, HeapTuple heapTuple, bool *all_dead, bool first_call)
Definition: heapam.c:1528
void FreeExecutorState(EState *estate)
Definition: execUtils.c:190
#define HeapTupleIsHotUpdated(tuple)
Definition: htup_details.h:676
#define GetPerTupleExprContext(estate)
Definition: executor.h:506
List * ii_ExpressionsState
Definition: execnodes.h:161
static const TableAmRoutine heapam_methods
#define ItemIdGetLength(itemId)
Definition: itemid.h:59
void pfree(void *pointer)
Definition: mcxt.c:1056
SMgrRelation RelationCreateStorage(RelFileNode rnode, char relpersistence)
Definition: storage.c:78
NextSampleTuple_function NextSampleTuple
Definition: tsmapi.h:74
void UnlockReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:3398
TransactionId xmax
Definition: tableam.h:125
#define ERROR
Definition: elog.h:43
static void heapam_relation_copy_for_cluster(Relation OldHeap, Relation NewHeap, Relation OldIndex, bool use_sort, TransactionId OldestXmin, TransactionId *xid_cutoff, MultiXactId *multi_cutoff, double *num_tuples, double *tups_vacuumed, double *tups_recently_dead)
static TableScanDesc table_beginscan(Relation rel, Snapshot snapshot, int nkeys, struct ScanKeyData *key)
Definition: tableam.h:755
ItemPointerData t_ctid
Definition: htup_details.h:160
int32 type_maximum_size(Oid type_oid, int32 typemod)
Definition: format_type.c:395
ItemPointerData t_self
Definition: htup.h:65
static void callback(struct sockaddr *addr, struct sockaddr *mask, void *unused)
Definition: test_ifaddrs.c:48
static bool SampleHeapTupleVisible(TableScanDesc scan, Buffer buffer, HeapTuple tuple, OffsetNumber tupoffset)
static bool heapam_scan_sample_next_block(TableScanDesc scan, SampleScanState *scanstate)
#define DatumGetInt64(X)
Definition: postgres.h:607
static void heapam_tuple_insert(Relation relation, TupleTableSlot *slot, CommandId cid, int options, BulkInsertState bistate)
#define pgstat_count_heap_fetch(rel)
Definition: pgstat.h:1370
Tuplesortstate * tuplesort
Definition: index.h:35
uint32 t_len
Definition: htup.h:64
#define PROGRESS_CLUSTER_PHASE_SORT_TUPLES
Definition: progress.h:67
Buffer xs_cbuf
Definition: heapam.h:83
static char * buf
Definition: pg_test_fsync.c:67
HeapTuple heap_getnext(TableScanDesc sscan, ScanDirection direction)
Definition: heapam.c:1291
void ExecDropSingleTupleTableSlot(TupleTableSlot *slot)
Definition: execTuples.c:1224
OffsetNumber offsets[FLEXIBLE_ARRAY_MEMBER]
Definition: tidbitmap.h:46
#define HEAP_OVERHEAD_BYTES_PER_TUPLE
#define FirstOffsetNumber
Definition: off.h:27
void heap_setscanlimits(TableScanDesc sscan, BlockNumber startBlk, BlockNumber numBlks)
Definition: heapam.c:330
#define PROGRESS_CLUSTER_HEAP_TUPLES_SCANNED
Definition: progress.h:58
#define InvalidTransactionId
Definition: transam.h:31
#define RelationGetRelationName(relation)
Definition: rel.h:462
ExprState * ExecPrepareQual(List *qual, EState *estate)
Definition: execExpr.c:518
static TransactionId OldestXmin
Definition: vacuumlazy.c:299
FormData_pg_attribute * Form_pg_attribute
Definition: pg_attribute.h:200
static bool heapam_scan_sample_next_tuple(TableScanDesc scan, SampleScanState *scanstate, TupleTableSlot *slot)
bool ii_BrokenHotChain
Definition: execnodes.h:173
unsigned int uint32
Definition: c.h:367
Oid t_tableOid
Definition: htup.h:66
TransactionId xmax
Definition: snapshot.h:158
TransactionId xmin
Definition: snapshot.h:157
static void heapam_tuple_insert_speculative(Relation relation, TupleTableSlot *slot, CommandId cid, int options, BulkInsertState bistate, uint32 specToken)
bool rs_inited
Definition: heapam.h:59
NextSampleBlock_function NextSampleBlock
Definition: tsmapi.h:73
void index_endscan(IndexScanDesc scan)
Definition: indexam.c:315
#define BufferGetPage(buffer)
Definition: bufmgr.h:159
BlockNumber rs_startblock
Definition: heapam.h:54
static void heapam_estimate_rel_size(Relation rel, int32 *attr_widths, BlockNumber *pages, double *tuples, double *allvisfrac)
static bool heapam_scan_bitmap_next_tuple(TableScanDesc scan, TBMIterateResult *tbmres, TupleTableSlot *slot)
void heap_get_root_tuples(Page page, OffsetNumber *root_offsets)
Definition: pruneheap.c:745
HTSV_Result HeapTupleSatisfiesVacuum(HeapTuple htup, TransactionId OldestXmin, Buffer buffer)
#define ereport(elevel, rest)
Definition: elog.h:141
HeapTuple ExecFetchSlotHeapTuple(TupleTableSlot *slot, bool materialize, bool *shouldFree)
Definition: execTuples.c:1614
Datum heap_tableam_handler(PG_FUNCTION_ARGS)
Oid rd_id
Definition: rel.h:86
SMgrRelation smgropen(RelFileNode rnode, BackendId backend)
Definition: smgr.c:145
ForkNumber
Definition: relpath.h:40
EState * CreateExecutorState(void)
Definition: execUtils.c:88
void UnregisterSnapshot(Snapshot snapshot)
Definition: snapmgr.c:907
#define ERRCODE_DATA_CORRUPTED
Definition: pg_basebackup.c:45
int rs_ntuples
Definition: heapam.h:71
static bool heapam_fetch_row_version(Relation relation, ItemPointer tid, Snapshot snapshot, TupleTableSlot *slot)
#define WARNING
Definition: elog.h:40
#define PageGetItemId(page, offsetNumber)
Definition: bufpage.h:235
int progress
Definition: pgbench.c:234
#define TOAST_TUPLE_THRESHOLD
Definition: heaptoast.h:48
#define HEAP_USABLE_BYTES_PER_PAGE
TM_Result
Definition: tableam.h:69
void table_block_relation_estimate_size(Relation rel, int32 *attr_widths, BlockNumber *pages, double *tuples, double *allvisfrac, Size overhead_bytes_per_tuple, Size usable_bytes_per_page)
Definition: tableam.c:548
MultiXactId GetOldestMultiXactId(void)
Definition: multixact.c:2493
void * palloc0(Size size)
Definition: mcxt.c:980
#define PROGRESS_SCAN_BLOCKS_DONE
Definition: progress.h:120
void heap_finish_speculative(Relation relation, ItemPointer tid)
Definition: heapam.c:5478
void RelationDropStorage(Relation rel)
Definition: storage.c:147
uintptr_t Datum
Definition: postgres.h:367
#define TTS_IS_BUFFERTUPLE(slot)
Definition: tuptable.h:231
void heap_vacuum_rel(Relation onerel, VacuumParams *params, BufferAccessStrategy bstrategy)
Definition: vacuumlazy.c:376
static double heapam_index_build_range_scan(Relation heapRelation, Relation indexRelation, IndexInfo *indexInfo, bool allow_sync, bool anyvisible, bool progress, BlockNumber start_blockno, BlockNumber numblocks, IndexBuildCallback callback, void *callback_state, TableScanDesc scan)
BlockNumber rs_nblocks
Definition: heapam.h:53
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:3612
void PredicateLockTID(Relation relation, ItemPointer tid, Snapshot snapshot, TransactionId tuple_xid)
Definition: predicate.c:2545
#define ItemPointerIndicatesMovedPartitions(pointer)
Definition: itemptr.h:184
TupleTableSlot * ExecStoreBufferHeapTuple(HeapTuple tuple, TupleTableSlot *slot, Buffer buffer)
Definition: execTuples.c:1362
TupleDesc rd_att
Definition: rel.h:85
#define IsMVCCSnapshot(snapshot)
Definition: snapmgr.h:97
static void itemptr_decode(ItemPointer itemptr, int64 encoded)
Definition: index.h:189
void RelationCopyStorage(SMgrRelation src, SMgrRelation dst, ForkNumber forkNum, char relpersistence)
Definition: storage.c:336
int maintenance_work_mem
Definition: globals.c:122
static bool heapam_index_fetch_tuple(struct IndexFetchTableData *scan, ItemPointer tid, Snapshot snapshot, TupleTableSlot *slot, bool *call_again, bool *all_dead)
#define BlockNumberIsValid(blockNumber)
Definition: block.h:70
TransactionId GetOldestXmin(Relation rel, int flags)
Definition: procarray.c:1306
void HeapCheckForSerializableConflictOut(bool visible, Relation relation, HeapTuple tuple, Buffer buffer, Snapshot snapshot)
Definition: heapam.c:9067
Buffer rs_cbuf
Definition: heapam.h:61
static void heapam_relation_copy_data(Relation rel, const RelFileNode *newrnode)
TransactionId MultiXactId
Definition: c.h:523
int errmsg_internal(const char *fmt,...)
Definition: elog.c:909
bool ii_Unique
Definition: execnodes.h:170
void XactLockTableWait(TransactionId xid, Relation rel, ItemPointer ctid, XLTW_Oper oper)
Definition: lmgr.c:624
BackendId rd_backend
Definition: rel.h:59
#define HeapTupleIsHeapOnly(tuple)
Definition: htup_details.h:685
#define Assert(condition)
Definition: c.h:738
#define PROGRESS_CLUSTER_PHASE_INDEX_SCAN_HEAP
Definition: progress.h:66
TableScanDesc heap_beginscan(Relation relation, Snapshot snapshot, int nkeys, ScanKey key, ParallelTableScanDesc parallel_scan, uint32 flags)
Definition: heapam.c:1133
Definition: regguts.h:298
double tups_inserted
Definition: index.h:37
OffsetNumber rs_vistuples[MaxHeapTuplesPerPage]
Definition: heapam.h:72
Definition: tableam.h:75
#define ItemIdIsNormal(itemId)
Definition: itemid.h:99
bool takenDuringRecovery
Definition: snapshot.h:184
void FlushRelationBuffers(Relation rel)
Definition: bufmgr.c:3214
#define HeapTupleHeaderGetXmin(tup)
Definition: htup_details.h:313
void pgstat_progress_update_multi_param(int nparam, const int *index, const int64 *val)
Definition: pgstat.c:3235
#define INDEX_MAX_KEYS
#define OffsetNumberNext(offsetNumber)
Definition: off.h:52
TM_Result heap_update(Relation relation, ItemPointer otid, HeapTuple newtup, CommandId cid, Snapshot crosscheck, bool wait, TM_FailureData *tmfd, LockTupleMode *lockmode)
Definition: heapam.c:2896
#define InvalidBlockNumber
Definition: block.h:33
static TM_Result heapam_tuple_delete(Relation relation, ItemPointer tid, CommandId cid, Snapshot snapshot, Snapshot crosscheck, bool wait, TM_FailureData *tmfd, bool changingPart)
bool index_getnext_slot(IndexScanDesc scan, ScanDirection direction, TupleTableSlot *slot)
Definition: indexam.c:607
#define MAX_FORKNUM
Definition: relpath.h:55
Buffer ReleaseAndReadBuffer(Buffer buffer, Relation relation, BlockNumber blockNum)
Definition: bufmgr.c:1521
TupleTableSlot * ecxt_scantuple
Definition: execnodes.h:224
#define MAXALIGN(LEN)
Definition: c.h:691
#define BufferIsValid(bufnum)
Definition: bufmgr.h:113
#define ItemPointerGetOffsetNumber(pointer)
Definition: itemptr.h:117
#define PROGRESS_SCAN_BLOCKS_TOTAL
Definition: progress.h:119
struct TsmRoutine * tsmroutine
Definition: execnodes.h:1356
#define TUPLE_LOCK_FLAG_FIND_LAST_VERSION
Definition: tableam.h:140
void ss_report_location(Relation rel, BlockNumber location)
Definition: syncscan.c:288
#define RelationNeedsWAL(relation)
Definition: rel.h:530
TupleTableSlot * ExecStorePinnedBufferHeapTuple(HeapTuple tuple, TupleTableSlot *slot, Buffer buffer)
Definition: execTuples.c:1388
bool ItemPointerEquals(ItemPointer pointer1, ItemPointer pointer2)
Definition: itemptr.c:29
bool ii_Concurrent
Definition: execnodes.h:172
#define SnapshotAny
Definition: snapmgr.h:69
TM_Result heap_lock_tuple(Relation relation, HeapTuple tuple, CommandId cid, LockTupleMode mode, LockWaitPolicy wait_policy, bool follow_updates, Buffer *buffer, TM_FailureData *tmfd)
Definition: heapam.c:3976
#define DatumGetPointer(X)
Definition: postgres.h:549
Relation rs_rd
Definition: relscan.h:34
double htups
Definition: index.h:37
#define ItemPointerSetOffsetNumber(pointer, offsetNumber)
Definition: itemptr.h:148
void heap_deform_tuple(HeapTuple tuple, TupleDesc tupleDesc, Datum *values, bool *isnull)
Definition: heaptuple.c:1249
static void table_endscan(TableScanDesc scan)
Definition: tableam.h:863
static Datum values[MAXATTR]
Definition: bootstrap.c:167
#define IsBootstrapProcessingMode()
Definition: miscadmin.h:372
bool rewrite_heap_dead_tuple(RewriteState state, HeapTuple old_tuple)
Definition: rewriteheap.c:573
bool heap_getnextslot(TableScanDesc sscan, ScanDirection direction, TupleTableSlot *slot)
Definition: heapam.c:1350
Oid * ii_ExclusionOps
Definition: execnodes.h:164
struct SnapshotData * rs_snapshot
Definition: relscan.h:35
void * palloc(Size size)
Definition: mcxt.c:949
int errmsg(const char *fmt,...)
Definition: elog.c:822
#define elog(elevel,...)
Definition: elog.h:228
int i
#define OffsetNumberIsValid(offsetNumber)
Definition: off.h:39
struct ParallelTableScanDescData * rs_parallel
Definition: relscan.h:45
const TupleTableSlotOps TTSOpsHeapTuple
Definition: execTuples.c:84
void tuplesort_putheaptuple(Tuplesortstate *state, HeapTuple tup)
Definition: tuplesort.c:1457
void heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples, CommandId cid, int options, BulkInsertState bistate)
Definition: heapam.c:2108
TM_Result heap_delete(Relation relation, ItemPointer tid, CommandId cid, Snapshot crosscheck, bool wait, TM_FailureData *tmfd, bool changingPart)
Definition: heapam.c:2445
#define BUFFER_LOCK_SHARE
Definition: bufmgr.h:87
static void heapam_index_validate_scan(Relation heapRelation, Relation indexRelation, IndexInfo *indexInfo, Snapshot snapshot, ValidateIndexState *state)
void heap_rescan(TableScanDesc sscan, ScanKey key, bool set_params, bool allow_strat, bool allow_sync, bool allow_pagemode)
Definition: heapam.c:1209
#define PG_FUNCTION_ARGS
Definition: fmgr.h:188
#define PROGRESS_CLUSTER_HEAP_BLKS_SCANNED
Definition: progress.h:61
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:99
#define PROGRESS_CLUSTER_TOTAL_HEAP_BLKS
Definition: progress.h:60
static bool heapam_relation_needs_toast_table(Relation rel)
HeapTupleTableSlot base
Definition: tuptable.h:259
void heap_page_prune_opt(Relation relation, Buffer buffer)
Definition: pruneheap.c:73
#define ItemPointerGetBlockNumber(pointer)
Definition: itemptr.h:98
#define pgstat_count_heap_getnext(rel)
Definition: pgstat.h:1365
static bool heapam_scan_analyze_next_tuple(TableScanDesc scan, TransactionId OldestXmin, double *liverows, double *deadrows, TupleTableSlot *slot)
Relation rel
Definition: relscan.h:91
#define TransactionIdIsValid(xid)
Definition: transam.h:41
static BlockNumber heapam_scan_get_blocks_done(HeapScanDesc hscan)
void tuplesort_end(Tuplesortstate *state)
Definition: tuplesort.c:1236
bool traversed
Definition: tableam.h:127
HeapTupleData tupdata
Definition: tuptable.h:253
#define PROGRESS_CLUSTER_PHASE
Definition: progress.h:56
ItemPointerData tts_tid
Definition: tuptable.h:130
int Buffer
Definition: buf.h:23
void smgrimmedsync(SMgrRelation reln, ForkNumber forknum)
Definition: smgr.c:637
void rewrite_heap_tuple(RewriteState state, HeapTuple old_tuple, HeapTuple new_tuple)
Definition: rewriteheap.c:371
#define RelationGetRelid(relation)
Definition: rel.h:428
LockWaitPolicy
Definition: lockoptions.h:36
TupleTableSlot * ExecStoreHeapTuple(HeapTuple tuple, TupleTableSlot *slot, bool shouldFree)
Definition: execTuples.c:1322
#define PageGetItem(page, itemId)
Definition: bufpage.h:340
static bool heapam_scan_analyze_next_block(TableScanDesc scan, BlockNumber blockno, BufferAccessStrategy bstrategy)
Pointer Page
Definition: bufpage.h:78
#define ItemPointerSet(pointer, blockNumber, offNum)
Definition: itemptr.h:127
bool index_insert(Relation indexRelation, Datum *values, bool *isnull, ItemPointer heap_t_ctid, Relation heapRelation, IndexUniqueCheck checkUnique, IndexInfo *indexInfo)
Definition: indexam.c:170
IndexScanDesc index_beginscan(Relation heapRelation, Relation indexRelation, Snapshot snapshot, int nkeys, int norderbys)
Definition: indexam.c:197
#define ItemPointerCopy(fromPointer, toPointer)
Definition: itemptr.h:161
void(* IndexBuildCallback)(Relation index, ItemPointer tid, Datum *values, bool *isnull, bool tupleIsAlive, void *state)
Definition: tableam.h:144
IndexFetchTableData xs_base
Definition: heapam.h:81
bool HeapTupleSatisfiesVisibility(HeapTuple tup, Snapshot snapshot, Buffer buffer)
void heap_get_latest_tid(TableScanDesc sscan, ItemPointer tid)
Definition: heapam.c:1673
void RelationTruncate(Relation rel, BlockNumber nblocks)
Definition: storage.c:229
const TableAmRoutine * GetHeapamTableAmRoutine(void)
void log_smgrcreate(const RelFileNode *rnode, ForkNumber forkNum)
Definition: storage.c:127