PostgreSQL Source Code git master
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
heapam_handler.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * heapam_handler.c
4 * heap table access method code
5 *
6 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
8 *
9 *
10 * IDENTIFICATION
11 * src/backend/access/heap/heapam_handler.c
12 *
13 *
14 * NOTES
15 * This files wires up the lower level heapam.c et al routines with the
16 * tableam abstraction.
17 *
18 *-------------------------------------------------------------------------
19 */
20#include "postgres.h"
21
22#include "access/genam.h"
23#include "access/heapam.h"
24#include "access/heaptoast.h"
25#include "access/multixact.h"
26#include "access/rewriteheap.h"
27#include "access/syncscan.h"
28#include "access/tableam.h"
29#include "access/tsmapi.h"
31#include "access/xact.h"
32#include "catalog/catalog.h"
33#include "catalog/index.h"
34#include "catalog/storage.h"
36#include "commands/progress.h"
37#include "executor/executor.h"
38#include "miscadmin.h"
39#include "pgstat.h"
40#include "storage/bufmgr.h"
41#include "storage/bufpage.h"
42#include "storage/lmgr.h"
43#include "storage/predicate.h"
44#include "storage/procarray.h"
45#include "storage/smgr.h"
46#include "utils/builtins.h"
47#include "utils/rel.h"
48
49static void reform_and_rewrite_tuple(HeapTuple tuple,
50 Relation OldHeap, Relation NewHeap,
51 Datum *values, bool *isnull, RewriteState rwstate);
52
53static bool SampleHeapTupleVisible(TableScanDesc scan, Buffer buffer,
54 HeapTuple tuple,
55 OffsetNumber tupoffset);
56
58
60 bool *recheck,
61 uint64 *lossy_pages, uint64 *exact_pages);
62
63
64/* ------------------------------------------------------------------------
65 * Slot related callbacks for heap AM
66 * ------------------------------------------------------------------------
67 */
68
69static const TupleTableSlotOps *
71{
73}
74
75
76/* ------------------------------------------------------------------------
77 * Index Scan Callbacks for heap AM
78 * ------------------------------------------------------------------------
79 */
80
83{
85
86 hscan->xs_base.rel = rel;
87 hscan->xs_cbuf = InvalidBuffer;
88
89 return &hscan->xs_base;
90}
91
92static void
94{
95 IndexFetchHeapData *hscan = (IndexFetchHeapData *) scan;
96
97 if (BufferIsValid(hscan->xs_cbuf))
98 {
99 ReleaseBuffer(hscan->xs_cbuf);
100 hscan->xs_cbuf = InvalidBuffer;
101 }
102}
103
104static void
106{
107 IndexFetchHeapData *hscan = (IndexFetchHeapData *) scan;
108
110
111 pfree(hscan);
112}
113
114static bool
116 ItemPointer tid,
117 Snapshot snapshot,
118 TupleTableSlot *slot,
119 bool *call_again, bool *all_dead)
120{
121 IndexFetchHeapData *hscan = (IndexFetchHeapData *) scan;
123 bool got_heap_tuple;
124
126
127 /* We can skip the buffer-switching logic if we're in mid-HOT chain. */
128 if (!*call_again)
129 {
130 /* Switch to correct buffer if we don't have it already */
131 Buffer prev_buf = hscan->xs_cbuf;
132
133 hscan->xs_cbuf = ReleaseAndReadBuffer(hscan->xs_cbuf,
134 hscan->xs_base.rel,
136
137 /*
138 * Prune page, but only if we weren't already on this page
139 */
140 if (prev_buf != hscan->xs_cbuf)
141 heap_page_prune_opt(hscan->xs_base.rel, hscan->xs_cbuf);
142 }
143
144 /* Obtain share-lock on the buffer so we can examine visibility */
146 got_heap_tuple = heap_hot_search_buffer(tid,
147 hscan->xs_base.rel,
148 hscan->xs_cbuf,
149 snapshot,
150 &bslot->base.tupdata,
151 all_dead,
152 !*call_again);
153 bslot->base.tupdata.t_self = *tid;
155
156 if (got_heap_tuple)
157 {
158 /*
159 * Only in a non-MVCC snapshot can more than one member of the HOT
160 * chain be visible.
161 */
162 *call_again = !IsMVCCSnapshot(snapshot);
163
164 slot->tts_tableOid = RelationGetRelid(scan->rel);
165 ExecStoreBufferHeapTuple(&bslot->base.tupdata, slot, hscan->xs_cbuf);
166 }
167 else
168 {
169 /* We've reached the end of the HOT chain. */
170 *call_again = false;
171 }
172
173 return got_heap_tuple;
174}
175
176
177/* ------------------------------------------------------------------------
178 * Callbacks for non-modifying operations on individual tuples for heap AM
179 * ------------------------------------------------------------------------
180 */
181
182static bool
184 ItemPointer tid,
185 Snapshot snapshot,
186 TupleTableSlot *slot)
187{
189 Buffer buffer;
190
192
193 bslot->base.tupdata.t_self = *tid;
194 if (heap_fetch(relation, snapshot, &bslot->base.tupdata, &buffer, false))
195 {
196 /* store in slot, transferring existing pin */
197 ExecStorePinnedBufferHeapTuple(&bslot->base.tupdata, slot, buffer);
198 slot->tts_tableOid = RelationGetRelid(relation);
199
200 return true;
201 }
202
203 return false;
204}
205
206static bool
208{
209 HeapScanDesc hscan = (HeapScanDesc) scan;
210
211 return ItemPointerIsValid(tid) &&
213}
214
215static bool
217 Snapshot snapshot)
218{
220 bool res;
221
223 Assert(BufferIsValid(bslot->buffer));
224
225 /*
226 * We need buffer pin and lock to call HeapTupleSatisfiesVisibility.
227 * Caller should be holding pin, but not lock.
228 */
230 res = HeapTupleSatisfiesVisibility(bslot->base.tuple, snapshot,
231 bslot->buffer);
233
234 return res;
235}
236
237
238/* ----------------------------------------------------------------------------
239 * Functions for manipulations of physical tuples for heap AM.
240 * ----------------------------------------------------------------------------
241 */
242
243static void
245 int options, BulkInsertState bistate)
246{
247 bool shouldFree = true;
248 HeapTuple tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree);
249
250 /* Update the tuple with table oid */
251 slot->tts_tableOid = RelationGetRelid(relation);
252 tuple->t_tableOid = slot->tts_tableOid;
253
254 /* Perform the insertion, and copy the resulting ItemPointer */
255 heap_insert(relation, tuple, cid, options, bistate);
256 ItemPointerCopy(&tuple->t_self, &slot->tts_tid);
257
258 if (shouldFree)
259 pfree(tuple);
260}
261
262static void
264 CommandId cid, int options,
265 BulkInsertState bistate, uint32 specToken)
266{
267 bool shouldFree = true;
268 HeapTuple tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree);
269
270 /* Update the tuple with table oid */
271 slot->tts_tableOid = RelationGetRelid(relation);
272 tuple->t_tableOid = slot->tts_tableOid;
273
276
277 /* Perform the insertion, and copy the resulting ItemPointer */
278 heap_insert(relation, tuple, cid, options, bistate);
279 ItemPointerCopy(&tuple->t_self, &slot->tts_tid);
280
281 if (shouldFree)
282 pfree(tuple);
283}
284
285static void
287 uint32 specToken, bool succeeded)
288{
289 bool shouldFree = true;
290 HeapTuple tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree);
291
292 /* adjust the tuple's state accordingly */
293 if (succeeded)
294 heap_finish_speculative(relation, &slot->tts_tid);
295 else
296 heap_abort_speculative(relation, &slot->tts_tid);
297
298 if (shouldFree)
299 pfree(tuple);
300}
301
302static TM_Result
304 Snapshot snapshot, Snapshot crosscheck, bool wait,
305 TM_FailureData *tmfd, bool changingPart)
306{
307 /*
308 * Currently Deleting of index tuples are handled at vacuum, in case if
309 * the storage itself is cleaning the dead tuples by itself, it is the
310 * time to call the index tuple deletion also.
311 */
312 return heap_delete(relation, tid, cid, crosscheck, wait, tmfd, changingPart);
313}
314
315
316static TM_Result
318 CommandId cid, Snapshot snapshot, Snapshot crosscheck,
319 bool wait, TM_FailureData *tmfd,
320 LockTupleMode *lockmode, TU_UpdateIndexes *update_indexes)
321{
322 bool shouldFree = true;
323 HeapTuple tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree);
324 TM_Result result;
325
326 /* Update the tuple with table oid */
327 slot->tts_tableOid = RelationGetRelid(relation);
328 tuple->t_tableOid = slot->tts_tableOid;
329
330 result = heap_update(relation, otid, tuple, cid, crosscheck, wait,
331 tmfd, lockmode, update_indexes);
332 ItemPointerCopy(&tuple->t_self, &slot->tts_tid);
333
334 /*
335 * Decide whether new index entries are needed for the tuple
336 *
337 * Note: heap_update returns the tid (location) of the new tuple in the
338 * t_self field.
339 *
340 * If the update is not HOT, we must update all indexes. If the update is
341 * HOT, it could be that we updated summarized columns, so we either
342 * update only summarized indexes, or none at all.
343 */
344 if (result != TM_Ok)
345 {
346 Assert(*update_indexes == TU_None);
347 *update_indexes = TU_None;
348 }
349 else if (!HeapTupleIsHeapOnly(tuple))
350 Assert(*update_indexes == TU_All);
351 else
352 Assert((*update_indexes == TU_Summarizing) ||
353 (*update_indexes == TU_None));
354
355 if (shouldFree)
356 pfree(tuple);
357
358 return result;
359}
360
361static TM_Result
364 LockWaitPolicy wait_policy, uint8 flags,
365 TM_FailureData *tmfd)
366{
368 TM_Result result;
369 Buffer buffer;
370 HeapTuple tuple = &bslot->base.tupdata;
371 bool follow_updates;
372
373 follow_updates = (flags & TUPLE_LOCK_FLAG_LOCK_UPDATE_IN_PROGRESS) != 0;
374 tmfd->traversed = false;
375
377
378tuple_lock_retry:
379 tuple->t_self = *tid;
380 result = heap_lock_tuple(relation, tuple, cid, mode, wait_policy,
381 follow_updates, &buffer, tmfd);
382
383 if (result == TM_Updated &&
385 {
386 /* Should not encounter speculative tuple on recheck */
388
389 ReleaseBuffer(buffer);
390
391 if (!ItemPointerEquals(&tmfd->ctid, &tuple->t_self))
392 {
393 SnapshotData SnapshotDirty;
394 TransactionId priorXmax;
395
396 /* it was updated, so look at the updated version */
397 *tid = tmfd->ctid;
398 /* updated row should have xmin matching this xmax */
399 priorXmax = tmfd->xmax;
400
401 /* signal that a tuple later in the chain is getting locked */
402 tmfd->traversed = true;
403
404 /*
405 * fetch target tuple
406 *
407 * Loop here to deal with updated or busy tuples
408 */
409 InitDirtySnapshot(SnapshotDirty);
410 for (;;)
411 {
415 errmsg("tuple to be locked was already moved to another partition due to concurrent update")));
416
417 tuple->t_self = *tid;
418 if (heap_fetch(relation, &SnapshotDirty, tuple, &buffer, true))
419 {
420 /*
421 * If xmin isn't what we're expecting, the slot must have
422 * been recycled and reused for an unrelated tuple. This
423 * implies that the latest version of the row was deleted,
424 * so we need do nothing. (Should be safe to examine xmin
425 * without getting buffer's content lock. We assume
426 * reading a TransactionId to be atomic, and Xmin never
427 * changes in an existing tuple, except to invalid or
428 * frozen, and neither of those can match priorXmax.)
429 */
431 priorXmax))
432 {
433 ReleaseBuffer(buffer);
434 return TM_Deleted;
435 }
436
437 /* otherwise xmin should not be dirty... */
438 if (TransactionIdIsValid(SnapshotDirty.xmin))
441 errmsg_internal("t_xmin %u is uncommitted in tuple (%u,%u) to be updated in table \"%s\"",
442 SnapshotDirty.xmin,
445 RelationGetRelationName(relation))));
446
447 /*
448 * If tuple is being updated by other transaction then we
449 * have to wait for its commit/abort, or die trying.
450 */
451 if (TransactionIdIsValid(SnapshotDirty.xmax))
452 {
453 ReleaseBuffer(buffer);
454 switch (wait_policy)
455 {
456 case LockWaitBlock:
457 XactLockTableWait(SnapshotDirty.xmax,
458 relation, &tuple->t_self,
460 break;
461 case LockWaitSkip:
462 if (!ConditionalXactLockTableWait(SnapshotDirty.xmax, false))
463 /* skip instead of waiting */
464 return TM_WouldBlock;
465 break;
466 case LockWaitError:
469 (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
470 errmsg("could not obtain lock on row in relation \"%s\"",
471 RelationGetRelationName(relation))));
472 break;
473 }
474 continue; /* loop back to repeat heap_fetch */
475 }
476
477 /*
478 * If tuple was inserted by our own transaction, we have
479 * to check cmin against cid: cmin >= current CID means
480 * our command cannot see the tuple, so we should ignore
481 * it. Otherwise heap_lock_tuple() will throw an error,
482 * and so would any later attempt to update or delete the
483 * tuple. (We need not check cmax because
484 * HeapTupleSatisfiesDirty will consider a tuple deleted
485 * by our transaction dead, regardless of cmax.) We just
486 * checked that priorXmax == xmin, so we can test that
487 * variable instead of doing HeapTupleHeaderGetXmin again.
488 */
490 HeapTupleHeaderGetCmin(tuple->t_data) >= cid)
491 {
492 tmfd->xmax = priorXmax;
493
494 /*
495 * Cmin is the problematic value, so store that. See
496 * above.
497 */
498 tmfd->cmax = HeapTupleHeaderGetCmin(tuple->t_data);
499 ReleaseBuffer(buffer);
500 return TM_SelfModified;
501 }
502
503 /*
504 * This is a live tuple, so try to lock it again.
505 */
506 ReleaseBuffer(buffer);
507 goto tuple_lock_retry;
508 }
509
510 /*
511 * If the referenced slot was actually empty, the latest
512 * version of the row must have been deleted, so we need do
513 * nothing.
514 */
515 if (tuple->t_data == NULL)
516 {
517 Assert(!BufferIsValid(buffer));
518 return TM_Deleted;
519 }
520
521 /*
522 * As above, if xmin isn't what we're expecting, do nothing.
523 */
525 priorXmax))
526 {
527 ReleaseBuffer(buffer);
528 return TM_Deleted;
529 }
530
531 /*
532 * If we get here, the tuple was found but failed
533 * SnapshotDirty. Assuming the xmin is either a committed xact
534 * or our own xact (as it certainly should be if we're trying
535 * to modify the tuple), this must mean that the row was
536 * updated or deleted by either a committed xact or our own
537 * xact. If it was deleted, we can ignore it; if it was
538 * updated then chain up to the next version and repeat the
539 * whole process.
540 *
541 * As above, it should be safe to examine xmax and t_ctid
542 * without the buffer content lock, because they can't be
543 * changing. We'd better hold a buffer pin though.
544 */
545 if (ItemPointerEquals(&tuple->t_self, &tuple->t_data->t_ctid))
546 {
547 /* deleted, so forget about it */
548 ReleaseBuffer(buffer);
549 return TM_Deleted;
550 }
551
552 /* updated, so look at the updated row */
553 *tid = tuple->t_data->t_ctid;
554 /* updated row should have xmin matching this xmax */
555 priorXmax = HeapTupleHeaderGetUpdateXid(tuple->t_data);
556 ReleaseBuffer(buffer);
557 /* loop back to fetch next in chain */
558 }
559 }
560 else
561 {
562 /* tuple was deleted, so give up */
563 return TM_Deleted;
564 }
565 }
566
567 slot->tts_tableOid = RelationGetRelid(relation);
568 tuple->t_tableOid = slot->tts_tableOid;
569
570 /* store in slot, transferring existing pin */
571 ExecStorePinnedBufferHeapTuple(tuple, slot, buffer);
572
573 return result;
574}
575
576
577/* ------------------------------------------------------------------------
578 * DDL related callbacks for heap AM.
579 * ------------------------------------------------------------------------
580 */
581
582static void
584 const RelFileLocator *newrlocator,
585 char persistence,
586 TransactionId *freezeXid,
587 MultiXactId *minmulti)
588{
589 SMgrRelation srel;
590
591 /*
592 * Initialize to the minimum XID that could put tuples in the table. We
593 * know that no xacts older than RecentXmin are still running, so that
594 * will do.
595 */
596 *freezeXid = RecentXmin;
597
598 /*
599 * Similarly, initialize the minimum Multixact to the first value that
600 * could possibly be stored in tuples in the table. Running transactions
601 * could reuse values from their local cache, so we are careful to
602 * consider all currently running multis.
603 *
604 * XXX this could be refined further, but is it worth the hassle?
605 */
606 *minmulti = GetOldestMultiXactId();
607
608 srel = RelationCreateStorage(*newrlocator, persistence, true);
609
610 /*
611 * If required, set up an init fork for an unlogged table so that it can
612 * be correctly reinitialized on restart.
613 */
614 if (persistence == RELPERSISTENCE_UNLOGGED)
615 {
616 Assert(rel->rd_rel->relkind == RELKIND_RELATION ||
617 rel->rd_rel->relkind == RELKIND_TOASTVALUE);
618 smgrcreate(srel, INIT_FORKNUM, false);
619 log_smgrcreate(newrlocator, INIT_FORKNUM);
620 }
621
622 smgrclose(srel);
623}
624
625static void
627{
628 RelationTruncate(rel, 0);
629}
630
631static void
633{
634 SMgrRelation dstrel;
635
636 /*
637 * Since we copy the file directly without looking at the shared buffers,
638 * we'd better first flush out any pages of the source relation that are
639 * in shared buffers. We assume no new changes will be made while we are
640 * holding exclusive lock on the rel.
641 */
643
644 /*
645 * Create and copy all forks of the relation, and schedule unlinking of
646 * old physical files.
647 *
648 * NOTE: any conflict in relfilenumber value will be caught in
649 * RelationCreateStorage().
650 */
651 dstrel = RelationCreateStorage(*newrlocator, rel->rd_rel->relpersistence, true);
652
653 /* copy main fork */
655 rel->rd_rel->relpersistence);
656
657 /* copy those extra forks that exist */
658 for (ForkNumber forkNum = MAIN_FORKNUM + 1;
659 forkNum <= MAX_FORKNUM; forkNum++)
660 {
661 if (smgrexists(RelationGetSmgr(rel), forkNum))
662 {
663 smgrcreate(dstrel, forkNum, false);
664
665 /*
666 * WAL log creation if the relation is persistent, or this is the
667 * init fork of an unlogged relation.
668 */
669 if (RelationIsPermanent(rel) ||
670 (rel->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED &&
671 forkNum == INIT_FORKNUM))
672 log_smgrcreate(newrlocator, forkNum);
673 RelationCopyStorage(RelationGetSmgr(rel), dstrel, forkNum,
674 rel->rd_rel->relpersistence);
675 }
676 }
677
678
679 /* drop old relation, and close new one */
681 smgrclose(dstrel);
682}
683
684static void
686 Relation OldIndex, bool use_sort,
687 TransactionId OldestXmin,
688 TransactionId *xid_cutoff,
689 MultiXactId *multi_cutoff,
690 double *num_tuples,
691 double *tups_vacuumed,
692 double *tups_recently_dead)
693{
694 RewriteState rwstate;
695 IndexScanDesc indexScan;
696 TableScanDesc tableScan;
697 HeapScanDesc heapScan;
698 bool is_system_catalog;
699 Tuplesortstate *tuplesort;
700 TupleDesc oldTupDesc = RelationGetDescr(OldHeap);
701 TupleDesc newTupDesc = RelationGetDescr(NewHeap);
702 TupleTableSlot *slot;
703 int natts;
704 Datum *values;
705 bool *isnull;
707 BlockNumber prev_cblock = InvalidBlockNumber;
708
709 /* Remember if it's a system catalog */
710 is_system_catalog = IsSystemRelation(OldHeap);
711
712 /*
713 * Valid smgr_targblock implies something already wrote to the relation.
714 * This may be harmless, but this function hasn't planned for it.
715 */
717
718 /* Preallocate values/isnull arrays */
719 natts = newTupDesc->natts;
720 values = (Datum *) palloc(natts * sizeof(Datum));
721 isnull = (bool *) palloc(natts * sizeof(bool));
722
723 /* Initialize the rewrite operation */
724 rwstate = begin_heap_rewrite(OldHeap, NewHeap, OldestXmin, *xid_cutoff,
725 *multi_cutoff);
726
727
728 /* Set up sorting if wanted */
729 if (use_sort)
730 tuplesort = tuplesort_begin_cluster(oldTupDesc, OldIndex,
732 NULL, TUPLESORT_NONE);
733 else
734 tuplesort = NULL;
735
736 /*
737 * Prepare to scan the OldHeap. To ensure we see recently-dead tuples
738 * that still need to be copied, we scan with SnapshotAny and use
739 * HeapTupleSatisfiesVacuum for the visibility test.
740 */
741 if (OldIndex != NULL && !use_sort)
742 {
743 const int ci_index[] = {
746 };
747 int64 ci_val[2];
748
749 /* Set phase and OIDOldIndex to columns */
751 ci_val[1] = RelationGetRelid(OldIndex);
752 pgstat_progress_update_multi_param(2, ci_index, ci_val);
753
754 tableScan = NULL;
755 heapScan = NULL;
756 indexScan = index_beginscan(OldHeap, OldIndex, SnapshotAny, NULL, 0, 0);
757 index_rescan(indexScan, NULL, 0, NULL, 0);
758 }
759 else
760 {
761 /* In scan-and-sort mode and also VACUUM FULL, set phase */
764
765 tableScan = table_beginscan(OldHeap, SnapshotAny, 0, (ScanKey) NULL);
766 heapScan = (HeapScanDesc) tableScan;
767 indexScan = NULL;
768
769 /* Set total heap blocks */
771 heapScan->rs_nblocks);
772 }
773
774 slot = table_slot_create(OldHeap, NULL);
775 hslot = (BufferHeapTupleTableSlot *) slot;
776
777 /*
778 * Scan through the OldHeap, either in OldIndex order or sequentially;
779 * copy each tuple into the NewHeap, or transiently to the tuplesort
780 * module. Note that we don't bother sorting dead tuples (they won't get
781 * to the new table anyway).
782 */
783 for (;;)
784 {
785 HeapTuple tuple;
786 Buffer buf;
787 bool isdead;
788
790
791 if (indexScan != NULL)
792 {
793 if (!index_getnext_slot(indexScan, ForwardScanDirection, slot))
794 break;
795
796 /* Since we used no scan keys, should never need to recheck */
797 if (indexScan->xs_recheck)
798 elog(ERROR, "CLUSTER does not support lossy index conditions");
799 }
800 else
801 {
802 if (!table_scan_getnextslot(tableScan, ForwardScanDirection, slot))
803 {
804 /*
805 * If the last pages of the scan were empty, we would go to
806 * the next phase while heap_blks_scanned != heap_blks_total.
807 * Instead, to ensure that heap_blks_scanned is equivalent to
808 * heap_blks_total after the table scan phase, this parameter
809 * is manually updated to the correct value when the table
810 * scan finishes.
811 */
813 heapScan->rs_nblocks);
814 break;
815 }
816
817 /*
818 * In scan-and-sort mode and also VACUUM FULL, set heap blocks
819 * scanned
820 *
821 * Note that heapScan may start at an offset and wrap around, i.e.
822 * rs_startblock may be >0, and rs_cblock may end with a number
823 * below rs_startblock. To prevent showing this wraparound to the
824 * user, we offset rs_cblock by rs_startblock (modulo rs_nblocks).
825 */
826 if (prev_cblock != heapScan->rs_cblock)
827 {
829 (heapScan->rs_cblock +
830 heapScan->rs_nblocks -
831 heapScan->rs_startblock
832 ) % heapScan->rs_nblocks + 1);
833 prev_cblock = heapScan->rs_cblock;
834 }
835 }
836
837 tuple = ExecFetchSlotHeapTuple(slot, false, NULL);
838 buf = hslot->buffer;
839
841
842 switch (HeapTupleSatisfiesVacuum(tuple, OldestXmin, buf))
843 {
844 case HEAPTUPLE_DEAD:
845 /* Definitely dead */
846 isdead = true;
847 break;
849 *tups_recently_dead += 1;
850 /* fall through */
851 case HEAPTUPLE_LIVE:
852 /* Live or recently dead, must copy it */
853 isdead = false;
854 break;
856
857 /*
858 * Since we hold exclusive lock on the relation, normally the
859 * only way to see this is if it was inserted earlier in our
860 * own transaction. However, it can happen in system
861 * catalogs, since we tend to release write lock before commit
862 * there. Give a warning if neither case applies; but in any
863 * case we had better copy it.
864 */
865 if (!is_system_catalog &&
867 elog(WARNING, "concurrent insert in progress within table \"%s\"",
868 RelationGetRelationName(OldHeap));
869 /* treat as live */
870 isdead = false;
871 break;
873
874 /*
875 * Similar situation to INSERT_IN_PROGRESS case.
876 */
877 if (!is_system_catalog &&
879 elog(WARNING, "concurrent delete in progress within table \"%s\"",
880 RelationGetRelationName(OldHeap));
881 /* treat as recently dead */
882 *tups_recently_dead += 1;
883 isdead = false;
884 break;
885 default:
886 elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
887 isdead = false; /* keep compiler quiet */
888 break;
889 }
890
892
893 if (isdead)
894 {
895 *tups_vacuumed += 1;
896 /* heap rewrite module still needs to see it... */
897 if (rewrite_heap_dead_tuple(rwstate, tuple))
898 {
899 /* A previous recently-dead tuple is now known dead */
900 *tups_vacuumed += 1;
901 *tups_recently_dead -= 1;
902 }
903 continue;
904 }
905
906 *num_tuples += 1;
907 if (tuplesort != NULL)
908 {
909 tuplesort_putheaptuple(tuplesort, tuple);
910
911 /*
912 * In scan-and-sort mode, report increase in number of tuples
913 * scanned
914 */
916 *num_tuples);
917 }
918 else
919 {
920 const int ct_index[] = {
923 };
924 int64 ct_val[2];
925
926 reform_and_rewrite_tuple(tuple, OldHeap, NewHeap,
927 values, isnull, rwstate);
928
929 /*
930 * In indexscan mode and also VACUUM FULL, report increase in
931 * number of tuples scanned and written
932 */
933 ct_val[0] = *num_tuples;
934 ct_val[1] = *num_tuples;
935 pgstat_progress_update_multi_param(2, ct_index, ct_val);
936 }
937 }
938
939 if (indexScan != NULL)
940 index_endscan(indexScan);
941 if (tableScan != NULL)
942 table_endscan(tableScan);
943 if (slot)
945
946 /*
947 * In scan-and-sort mode, complete the sort, then read out all live tuples
948 * from the tuplestore and write them to the new relation.
949 */
950 if (tuplesort != NULL)
951 {
952 double n_tuples = 0;
953
954 /* Report that we are now sorting tuples */
957
958 tuplesort_performsort(tuplesort);
959
960 /* Report that we are now writing new heap */
963
964 for (;;)
965 {
966 HeapTuple tuple;
967
969
970 tuple = tuplesort_getheaptuple(tuplesort, true);
971 if (tuple == NULL)
972 break;
973
974 n_tuples += 1;
976 OldHeap, NewHeap,
977 values, isnull,
978 rwstate);
979 /* Report n_tuples */
981 n_tuples);
982 }
983
984 tuplesort_end(tuplesort);
985 }
986
987 /* Write out any remaining tuples, and fsync if needed */
988 end_heap_rewrite(rwstate);
989
990 /* Clean up */
991 pfree(values);
992 pfree(isnull);
993}
994
995/*
996 * Prepare to analyze the next block in the read stream. Returns false if
997 * the stream is exhausted and true otherwise. The scan must have been started
998 * with SO_TYPE_ANALYZE option.
999 *
1000 * This routine holds a buffer pin and lock on the heap page. They are held
1001 * until heapam_scan_analyze_next_tuple() returns false. That is until all the
1002 * items of the heap page are analyzed.
1003 */
1004static bool
1006{
1007 HeapScanDesc hscan = (HeapScanDesc) scan;
1008
1009 /*
1010 * We must maintain a pin on the target page's buffer to ensure that
1011 * concurrent activity - e.g. HOT pruning - doesn't delete tuples out from
1012 * under us. It comes from the stream already pinned. We also choose to
1013 * hold sharelock on the buffer throughout --- we could release and
1014 * re-acquire sharelock for each tuple, but since we aren't doing much
1015 * work per tuple, the extra lock traffic is probably better avoided.
1016 */
1017 hscan->rs_cbuf = read_stream_next_buffer(stream, NULL);
1018 if (!BufferIsValid(hscan->rs_cbuf))
1019 return false;
1020
1022
1023 hscan->rs_cblock = BufferGetBlockNumber(hscan->rs_cbuf);
1025 return true;
1026}
1027
1028static bool
1030 double *liverows, double *deadrows,
1031 TupleTableSlot *slot)
1032{
1033 HeapScanDesc hscan = (HeapScanDesc) scan;
1034 Page targpage;
1035 OffsetNumber maxoffset;
1037
1039
1040 hslot = (BufferHeapTupleTableSlot *) slot;
1041 targpage = BufferGetPage(hscan->rs_cbuf);
1042 maxoffset = PageGetMaxOffsetNumber(targpage);
1043
1044 /* Inner loop over all tuples on the selected page */
1045 for (; hscan->rs_cindex <= maxoffset; hscan->rs_cindex++)
1046 {
1047 ItemId itemid;
1048 HeapTuple targtuple = &hslot->base.tupdata;
1049 bool sample_it = false;
1050
1051 itemid = PageGetItemId(targpage, hscan->rs_cindex);
1052
1053 /*
1054 * We ignore unused and redirect line pointers. DEAD line pointers
1055 * should be counted as dead, because we need vacuum to run to get rid
1056 * of them. Note that this rule agrees with the way that
1057 * heap_page_prune_and_freeze() counts things.
1058 */
1059 if (!ItemIdIsNormal(itemid))
1060 {
1061 if (ItemIdIsDead(itemid))
1062 *deadrows += 1;
1063 continue;
1064 }
1065
1066 ItemPointerSet(&targtuple->t_self, hscan->rs_cblock, hscan->rs_cindex);
1067
1068 targtuple->t_tableOid = RelationGetRelid(scan->rs_rd);
1069 targtuple->t_data = (HeapTupleHeader) PageGetItem(targpage, itemid);
1070 targtuple->t_len = ItemIdGetLength(itemid);
1071
1072 switch (HeapTupleSatisfiesVacuum(targtuple, OldestXmin,
1073 hscan->rs_cbuf))
1074 {
1075 case HEAPTUPLE_LIVE:
1076 sample_it = true;
1077 *liverows += 1;
1078 break;
1079
1080 case HEAPTUPLE_DEAD:
1082 /* Count dead and recently-dead rows */
1083 *deadrows += 1;
1084 break;
1085
1087
1088 /*
1089 * Insert-in-progress rows are not counted. We assume that
1090 * when the inserting transaction commits or aborts, it will
1091 * send a stats message to increment the proper count. This
1092 * works right only if that transaction ends after we finish
1093 * analyzing the table; if things happen in the other order,
1094 * its stats update will be overwritten by ours. However, the
1095 * error will be large only if the other transaction runs long
1096 * enough to insert many tuples, so assuming it will finish
1097 * after us is the safer option.
1098 *
1099 * A special case is that the inserting transaction might be
1100 * our own. In this case we should count and sample the row,
1101 * to accommodate users who load a table and analyze it in one
1102 * transaction. (pgstat_report_analyze has to adjust the
1103 * numbers we report to the cumulative stats system to make
1104 * this come out right.)
1105 */
1107 {
1108 sample_it = true;
1109 *liverows += 1;
1110 }
1111 break;
1112
1114
1115 /*
1116 * We count and sample delete-in-progress rows the same as
1117 * live ones, so that the stats counters come out right if the
1118 * deleting transaction commits after us, per the same
1119 * reasoning given above.
1120 *
1121 * If the delete was done by our own transaction, however, we
1122 * must count the row as dead to make pgstat_report_analyze's
1123 * stats adjustments come out right. (Note: this works out
1124 * properly when the row was both inserted and deleted in our
1125 * xact.)
1126 *
1127 * The net effect of these choices is that we act as though an
1128 * IN_PROGRESS transaction hasn't happened yet, except if it
1129 * is our own transaction, which we assume has happened.
1130 *
1131 * This approach ensures that we behave sanely if we see both
1132 * the pre-image and post-image rows for a row being updated
1133 * by a concurrent transaction: we will sample the pre-image
1134 * but not the post-image. We also get sane results if the
1135 * concurrent transaction never commits.
1136 */
1138 *deadrows += 1;
1139 else
1140 {
1141 sample_it = true;
1142 *liverows += 1;
1143 }
1144 break;
1145
1146 default:
1147 elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
1148 break;
1149 }
1150
1151 if (sample_it)
1152 {
1153 ExecStoreBufferHeapTuple(targtuple, slot, hscan->rs_cbuf);
1154 hscan->rs_cindex++;
1155
1156 /* note that we leave the buffer locked here! */
1157 return true;
1158 }
1159 }
1160
1161 /* Now release the lock and pin on the page */
1163 hscan->rs_cbuf = InvalidBuffer;
1164
1165 /* also prevent old slot contents from having pin on page */
1166 ExecClearTuple(slot);
1167
1168 return false;
1169}
1170
1171static double
1173 Relation indexRelation,
1174 IndexInfo *indexInfo,
1175 bool allow_sync,
1176 bool anyvisible,
1177 bool progress,
1178 BlockNumber start_blockno,
1179 BlockNumber numblocks,
1181 void *callback_state,
1182 TableScanDesc scan)
1183{
1184 HeapScanDesc hscan;
1185 bool is_system_catalog;
1186 bool checking_uniqueness;
1187 HeapTuple heapTuple;
1189 bool isnull[INDEX_MAX_KEYS];
1190 double reltuples;
1191 ExprState *predicate;
1192 TupleTableSlot *slot;
1193 EState *estate;
1194 ExprContext *econtext;
1195 Snapshot snapshot;
1196 bool need_unregister_snapshot = false;
1197 TransactionId OldestXmin;
1198 BlockNumber previous_blkno = InvalidBlockNumber;
1199 BlockNumber root_blkno = InvalidBlockNumber;
1200 OffsetNumber root_offsets[MaxHeapTuplesPerPage];
1201
1202 /*
1203 * sanity checks
1204 */
1205 Assert(OidIsValid(indexRelation->rd_rel->relam));
1206
1207 /* Remember if it's a system catalog */
1208 is_system_catalog = IsSystemRelation(heapRelation);
1209
1210 /* See whether we're verifying uniqueness/exclusion properties */
1211 checking_uniqueness = (indexInfo->ii_Unique ||
1212 indexInfo->ii_ExclusionOps != NULL);
1213
1214 /*
1215 * "Any visible" mode is not compatible with uniqueness checks; make sure
1216 * only one of those is requested.
1217 */
1218 Assert(!(anyvisible && checking_uniqueness));
1219
1220 /*
1221 * Need an EState for evaluation of index expressions and partial-index
1222 * predicates. Also a slot to hold the current tuple.
1223 */
1224 estate = CreateExecutorState();
1225 econtext = GetPerTupleExprContext(estate);
1226 slot = table_slot_create(heapRelation, NULL);
1227
1228 /* Arrange for econtext's scan tuple to be the tuple under test */
1229 econtext->ecxt_scantuple = slot;
1230
1231 /* Set up execution state for predicate, if any. */
1232 predicate = ExecPrepareQual(indexInfo->ii_Predicate, estate);
1233
1234 /*
1235 * Prepare for scan of the base relation. In a normal index build, we use
1236 * SnapshotAny because we must retrieve all tuples and do our own time
1237 * qual checks (because we have to index RECENTLY_DEAD tuples). In a
1238 * concurrent build, or during bootstrap, we take a regular MVCC snapshot
1239 * and index whatever's live according to that.
1240 */
1241 OldestXmin = InvalidTransactionId;
1242
1243 /* okay to ignore lazy VACUUMs here */
1244 if (!IsBootstrapProcessingMode() && !indexInfo->ii_Concurrent)
1245 OldestXmin = GetOldestNonRemovableTransactionId(heapRelation);
1246
1247 if (!scan)
1248 {
1249 /*
1250 * Serial index build.
1251 *
1252 * Must begin our own heap scan in this case. We may also need to
1253 * register a snapshot whose lifetime is under our direct control.
1254 */
1255 if (!TransactionIdIsValid(OldestXmin))
1256 {
1258 need_unregister_snapshot = true;
1259 }
1260 else
1261 snapshot = SnapshotAny;
1262
1263 scan = table_beginscan_strat(heapRelation, /* relation */
1264 snapshot, /* snapshot */
1265 0, /* number of keys */
1266 NULL, /* scan key */
1267 true, /* buffer access strategy OK */
1268 allow_sync); /* syncscan OK? */
1269 }
1270 else
1271 {
1272 /*
1273 * Parallel index build.
1274 *
1275 * Parallel case never registers/unregisters own snapshot. Snapshot
1276 * is taken from parallel heap scan, and is SnapshotAny or an MVCC
1277 * snapshot, based on same criteria as serial case.
1278 */
1280 Assert(allow_sync);
1281 snapshot = scan->rs_snapshot;
1282 }
1283
1284 hscan = (HeapScanDesc) scan;
1285
1286 /*
1287 * Must have called GetOldestNonRemovableTransactionId() if using
1288 * SnapshotAny. Shouldn't have for an MVCC snapshot. (It's especially
1289 * worth checking this for parallel builds, since ambuild routines that
1290 * support parallel builds must work these details out for themselves.)
1291 */
1292 Assert(snapshot == SnapshotAny || IsMVCCSnapshot(snapshot));
1293 Assert(snapshot == SnapshotAny ? TransactionIdIsValid(OldestXmin) :
1294 !TransactionIdIsValid(OldestXmin));
1295 Assert(snapshot == SnapshotAny || !anyvisible);
1296
1297 /* Publish number of blocks to scan */
1298 if (progress)
1299 {
1300 BlockNumber nblocks;
1301
1302 if (hscan->rs_base.rs_parallel != NULL)
1303 {
1305
1307 nblocks = pbscan->phs_nblocks;
1308 }
1309 else
1310 nblocks = hscan->rs_nblocks;
1311
1313 nblocks);
1314 }
1315
1316 /* set our scan endpoints */
1317 if (!allow_sync)
1318 heap_setscanlimits(scan, start_blockno, numblocks);
1319 else
1320 {
1321 /* syncscan can only be requested on whole relation */
1322 Assert(start_blockno == 0);
1323 Assert(numblocks == InvalidBlockNumber);
1324 }
1325
1326 reltuples = 0;
1327
1328 /*
1329 * Scan all tuples in the base relation.
1330 */
1331 while ((heapTuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
1332 {
1333 bool tupleIsAlive;
1334
1336
1337 /* Report scan progress, if asked to. */
1338 if (progress)
1339 {
1340 BlockNumber blocks_done = heapam_scan_get_blocks_done(hscan);
1341
1342 if (blocks_done != previous_blkno)
1343 {
1345 blocks_done);
1346 previous_blkno = blocks_done;
1347 }
1348 }
1349
1350 /*
1351 * When dealing with a HOT-chain of updated tuples, we want to index
1352 * the values of the live tuple (if any), but index it under the TID
1353 * of the chain's root tuple. This approach is necessary to preserve
1354 * the HOT-chain structure in the heap. So we need to be able to find
1355 * the root item offset for every tuple that's in a HOT-chain. When
1356 * first reaching a new page of the relation, call
1357 * heap_get_root_tuples() to build a map of root item offsets on the
1358 * page.
1359 *
1360 * It might look unsafe to use this information across buffer
1361 * lock/unlock. However, we hold ShareLock on the table so no
1362 * ordinary insert/update/delete should occur; and we hold pin on the
1363 * buffer continuously while visiting the page, so no pruning
1364 * operation can occur either.
1365 *
1366 * In cases with only ShareUpdateExclusiveLock on the table, it's
1367 * possible for some HOT tuples to appear that we didn't know about
1368 * when we first read the page. To handle that case, we re-obtain the
1369 * list of root offsets when a HOT tuple points to a root item that we
1370 * don't know about.
1371 *
1372 * Also, although our opinions about tuple liveness could change while
1373 * we scan the page (due to concurrent transaction commits/aborts),
1374 * the chain root locations won't, so this info doesn't need to be
1375 * rebuilt after waiting for another transaction.
1376 *
1377 * Note the implied assumption that there is no more than one live
1378 * tuple per HOT-chain --- else we could create more than one index
1379 * entry pointing to the same root tuple.
1380 */
1381 if (hscan->rs_cblock != root_blkno)
1382 {
1383 Page page = BufferGetPage(hscan->rs_cbuf);
1384
1386 heap_get_root_tuples(page, root_offsets);
1388
1389 root_blkno = hscan->rs_cblock;
1390 }
1391
1392 if (snapshot == SnapshotAny)
1393 {
1394 /* do our own time qual check */
1395 bool indexIt;
1396 TransactionId xwait;
1397
1398 recheck:
1399
1400 /*
1401 * We could possibly get away with not locking the buffer here,
1402 * since caller should hold ShareLock on the relation, but let's
1403 * be conservative about it. (This remark is still correct even
1404 * with HOT-pruning: our pin on the buffer prevents pruning.)
1405 */
1407
1408 /*
1409 * The criteria for counting a tuple as live in this block need to
1410 * match what analyze.c's heapam_scan_analyze_next_tuple() does,
1411 * otherwise CREATE INDEX and ANALYZE may produce wildly different
1412 * reltuples values, e.g. when there are many recently-dead
1413 * tuples.
1414 */
1415 switch (HeapTupleSatisfiesVacuum(heapTuple, OldestXmin,
1416 hscan->rs_cbuf))
1417 {
1418 case HEAPTUPLE_DEAD:
1419 /* Definitely dead, we can ignore it */
1420 indexIt = false;
1421 tupleIsAlive = false;
1422 break;
1423 case HEAPTUPLE_LIVE:
1424 /* Normal case, index and unique-check it */
1425 indexIt = true;
1426 tupleIsAlive = true;
1427 /* Count it as live, too */
1428 reltuples += 1;
1429 break;
1431
1432 /*
1433 * If tuple is recently deleted then we must index it
1434 * anyway to preserve MVCC semantics. (Pre-existing
1435 * transactions could try to use the index after we finish
1436 * building it, and may need to see such tuples.)
1437 *
1438 * However, if it was HOT-updated then we must only index
1439 * the live tuple at the end of the HOT-chain. Since this
1440 * breaks semantics for pre-existing snapshots, mark the
1441 * index as unusable for them.
1442 *
1443 * We don't count recently-dead tuples in reltuples, even
1444 * if we index them; see heapam_scan_analyze_next_tuple().
1445 */
1446 if (HeapTupleIsHotUpdated(heapTuple))
1447 {
1448 indexIt = false;
1449 /* mark the index as unsafe for old snapshots */
1450 indexInfo->ii_BrokenHotChain = true;
1451 }
1452 else
1453 indexIt = true;
1454 /* In any case, exclude the tuple from unique-checking */
1455 tupleIsAlive = false;
1456 break;
1458
1459 /*
1460 * In "anyvisible" mode, this tuple is visible and we
1461 * don't need any further checks.
1462 */
1463 if (anyvisible)
1464 {
1465 indexIt = true;
1466 tupleIsAlive = true;
1467 reltuples += 1;
1468 break;
1469 }
1470
1471 /*
1472 * Since caller should hold ShareLock or better, normally
1473 * the only way to see this is if it was inserted earlier
1474 * in our own transaction. However, it can happen in
1475 * system catalogs, since we tend to release write lock
1476 * before commit there. Give a warning if neither case
1477 * applies.
1478 */
1479 xwait = HeapTupleHeaderGetXmin(heapTuple->t_data);
1481 {
1482 if (!is_system_catalog)
1483 elog(WARNING, "concurrent insert in progress within table \"%s\"",
1484 RelationGetRelationName(heapRelation));
1485
1486 /*
1487 * If we are performing uniqueness checks, indexing
1488 * such a tuple could lead to a bogus uniqueness
1489 * failure. In that case we wait for the inserting
1490 * transaction to finish and check again.
1491 */
1492 if (checking_uniqueness)
1493 {
1494 /*
1495 * Must drop the lock on the buffer before we wait
1496 */
1498 XactLockTableWait(xwait, heapRelation,
1499 &heapTuple->t_self,
1502 goto recheck;
1503 }
1504 }
1505 else
1506 {
1507 /*
1508 * For consistency with
1509 * heapam_scan_analyze_next_tuple(), count
1510 * HEAPTUPLE_INSERT_IN_PROGRESS tuples as live only
1511 * when inserted by our own transaction.
1512 */
1513 reltuples += 1;
1514 }
1515
1516 /*
1517 * We must index such tuples, since if the index build
1518 * commits then they're good.
1519 */
1520 indexIt = true;
1521 tupleIsAlive = true;
1522 break;
1524
1525 /*
1526 * As with INSERT_IN_PROGRESS case, this is unexpected
1527 * unless it's our own deletion or a system catalog; but
1528 * in anyvisible mode, this tuple is visible.
1529 */
1530 if (anyvisible)
1531 {
1532 indexIt = true;
1533 tupleIsAlive = false;
1534 reltuples += 1;
1535 break;
1536 }
1537
1538 xwait = HeapTupleHeaderGetUpdateXid(heapTuple->t_data);
1540 {
1541 if (!is_system_catalog)
1542 elog(WARNING, "concurrent delete in progress within table \"%s\"",
1543 RelationGetRelationName(heapRelation));
1544
1545 /*
1546 * If we are performing uniqueness checks, assuming
1547 * the tuple is dead could lead to missing a
1548 * uniqueness violation. In that case we wait for the
1549 * deleting transaction to finish and check again.
1550 *
1551 * Also, if it's a HOT-updated tuple, we should not
1552 * index it but rather the live tuple at the end of
1553 * the HOT-chain. However, the deleting transaction
1554 * could abort, possibly leaving this tuple as live
1555 * after all, in which case it has to be indexed. The
1556 * only way to know what to do is to wait for the
1557 * deleting transaction to finish and check again.
1558 */
1559 if (checking_uniqueness ||
1560 HeapTupleIsHotUpdated(heapTuple))
1561 {
1562 /*
1563 * Must drop the lock on the buffer before we wait
1564 */
1566 XactLockTableWait(xwait, heapRelation,
1567 &heapTuple->t_self,
1570 goto recheck;
1571 }
1572
1573 /*
1574 * Otherwise index it but don't check for uniqueness,
1575 * the same as a RECENTLY_DEAD tuple.
1576 */
1577 indexIt = true;
1578
1579 /*
1580 * Count HEAPTUPLE_DELETE_IN_PROGRESS tuples as live,
1581 * if they were not deleted by the current
1582 * transaction. That's what
1583 * heapam_scan_analyze_next_tuple() does, and we want
1584 * the behavior to be consistent.
1585 */
1586 reltuples += 1;
1587 }
1588 else if (HeapTupleIsHotUpdated(heapTuple))
1589 {
1590 /*
1591 * It's a HOT-updated tuple deleted by our own xact.
1592 * We can assume the deletion will commit (else the
1593 * index contents don't matter), so treat the same as
1594 * RECENTLY_DEAD HOT-updated tuples.
1595 */
1596 indexIt = false;
1597 /* mark the index as unsafe for old snapshots */
1598 indexInfo->ii_BrokenHotChain = true;
1599 }
1600 else
1601 {
1602 /*
1603 * It's a regular tuple deleted by our own xact. Index
1604 * it, but don't check for uniqueness nor count in
1605 * reltuples, the same as a RECENTLY_DEAD tuple.
1606 */
1607 indexIt = true;
1608 }
1609 /* In any case, exclude the tuple from unique-checking */
1610 tupleIsAlive = false;
1611 break;
1612 default:
1613 elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
1614 indexIt = tupleIsAlive = false; /* keep compiler quiet */
1615 break;
1616 }
1617
1619
1620 if (!indexIt)
1621 continue;
1622 }
1623 else
1624 {
1625 /* heap_getnext did the time qual check */
1626 tupleIsAlive = true;
1627 reltuples += 1;
1628 }
1629
1631
1632 /* Set up for predicate or expression evaluation */
1633 ExecStoreBufferHeapTuple(heapTuple, slot, hscan->rs_cbuf);
1634
1635 /*
1636 * In a partial index, discard tuples that don't satisfy the
1637 * predicate.
1638 */
1639 if (predicate != NULL)
1640 {
1641 if (!ExecQual(predicate, econtext))
1642 continue;
1643 }
1644
1645 /*
1646 * For the current heap tuple, extract all the attributes we use in
1647 * this index, and note which are null. This also performs evaluation
1648 * of any expressions needed.
1649 */
1650 FormIndexDatum(indexInfo,
1651 slot,
1652 estate,
1653 values,
1654 isnull);
1655
1656 /*
1657 * You'd think we should go ahead and build the index tuple here, but
1658 * some index AMs want to do further processing on the data first. So
1659 * pass the values[] and isnull[] arrays, instead.
1660 */
1661
1662 if (HeapTupleIsHeapOnly(heapTuple))
1663 {
1664 /*
1665 * For a heap-only tuple, pretend its TID is that of the root. See
1666 * src/backend/access/heap/README.HOT for discussion.
1667 */
1668 ItemPointerData tid;
1669 OffsetNumber offnum;
1670
1671 offnum = ItemPointerGetOffsetNumber(&heapTuple->t_self);
1672
1673 /*
1674 * If a HOT tuple points to a root that we don't know about,
1675 * obtain root items afresh. If that still fails, report it as
1676 * corruption.
1677 */
1678 if (root_offsets[offnum - 1] == InvalidOffsetNumber)
1679 {
1680 Page page = BufferGetPage(hscan->rs_cbuf);
1681
1683 heap_get_root_tuples(page, root_offsets);
1685 }
1686
1687 if (!OffsetNumberIsValid(root_offsets[offnum - 1]))
1688 ereport(ERROR,
1690 errmsg_internal("failed to find parent tuple for heap-only tuple at (%u,%u) in table \"%s\"",
1691 ItemPointerGetBlockNumber(&heapTuple->t_self),
1692 offnum,
1693 RelationGetRelationName(heapRelation))));
1694
1696 root_offsets[offnum - 1]);
1697
1698 /* Call the AM's callback routine to process the tuple */
1699 callback(indexRelation, &tid, values, isnull, tupleIsAlive,
1700 callback_state);
1701 }
1702 else
1703 {
1704 /* Call the AM's callback routine to process the tuple */
1705 callback(indexRelation, &heapTuple->t_self, values, isnull,
1706 tupleIsAlive, callback_state);
1707 }
1708 }
1709
1710 /* Report scan progress one last time. */
1711 if (progress)
1712 {
1713 BlockNumber blks_done;
1714
1715 if (hscan->rs_base.rs_parallel != NULL)
1716 {
1718
1720 blks_done = pbscan->phs_nblocks;
1721 }
1722 else
1723 blks_done = hscan->rs_nblocks;
1724
1726 blks_done);
1727 }
1728
1729 table_endscan(scan);
1730
1731 /* we can now forget our snapshot, if set and registered by us */
1732 if (need_unregister_snapshot)
1733 UnregisterSnapshot(snapshot);
1734
1736
1737 FreeExecutorState(estate);
1738
1739 /* These may have been pointing to the now-gone estate */
1740 indexInfo->ii_ExpressionsState = NIL;
1741 indexInfo->ii_PredicateState = NULL;
1742
1743 return reltuples;
1744}
1745
1746static void
1748 Relation indexRelation,
1749 IndexInfo *indexInfo,
1750 Snapshot snapshot,
1752{
1753 TableScanDesc scan;
1754 HeapScanDesc hscan;
1755 HeapTuple heapTuple;
1757 bool isnull[INDEX_MAX_KEYS];
1758 ExprState *predicate;
1759 TupleTableSlot *slot;
1760 EState *estate;
1761 ExprContext *econtext;
1762 BlockNumber root_blkno = InvalidBlockNumber;
1763 OffsetNumber root_offsets[MaxHeapTuplesPerPage];
1764 bool in_index[MaxHeapTuplesPerPage];
1765 BlockNumber previous_blkno = InvalidBlockNumber;
1766
1767 /* state variables for the merge */
1768 ItemPointer indexcursor = NULL;
1769 ItemPointerData decoded;
1770 bool tuplesort_empty = false;
1771
1772 /*
1773 * sanity checks
1774 */
1775 Assert(OidIsValid(indexRelation->rd_rel->relam));
1776
1777 /*
1778 * Need an EState for evaluation of index expressions and partial-index
1779 * predicates. Also a slot to hold the current tuple.
1780 */
1781 estate = CreateExecutorState();
1782 econtext = GetPerTupleExprContext(estate);
1783 slot = MakeSingleTupleTableSlot(RelationGetDescr(heapRelation),
1785
1786 /* Arrange for econtext's scan tuple to be the tuple under test */
1787 econtext->ecxt_scantuple = slot;
1788
1789 /* Set up execution state for predicate, if any. */
1790 predicate = ExecPrepareQual(indexInfo->ii_Predicate, estate);
1791
1792 /*
1793 * Prepare for scan of the base relation. We need just those tuples
1794 * satisfying the passed-in reference snapshot. We must disable syncscan
1795 * here, because it's critical that we read from block zero forward to
1796 * match the sorted TIDs.
1797 */
1798 scan = table_beginscan_strat(heapRelation, /* relation */
1799 snapshot, /* snapshot */
1800 0, /* number of keys */
1801 NULL, /* scan key */
1802 true, /* buffer access strategy OK */
1803 false); /* syncscan not OK */
1804 hscan = (HeapScanDesc) scan;
1805
1807 hscan->rs_nblocks);
1808
1809 /*
1810 * Scan all tuples matching the snapshot.
1811 */
1812 while ((heapTuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
1813 {
1814 ItemPointer heapcursor = &heapTuple->t_self;
1815 ItemPointerData rootTuple;
1816 OffsetNumber root_offnum;
1817
1819
1820 state->htups += 1;
1821
1822 if ((previous_blkno == InvalidBlockNumber) ||
1823 (hscan->rs_cblock != previous_blkno))
1824 {
1826 hscan->rs_cblock);
1827 previous_blkno = hscan->rs_cblock;
1828 }
1829
1830 /*
1831 * As commented in table_index_build_scan, we should index heap-only
1832 * tuples under the TIDs of their root tuples; so when we advance onto
1833 * a new heap page, build a map of root item offsets on the page.
1834 *
1835 * This complicates merging against the tuplesort output: we will
1836 * visit the live tuples in order by their offsets, but the root
1837 * offsets that we need to compare against the index contents might be
1838 * ordered differently. So we might have to "look back" within the
1839 * tuplesort output, but only within the current page. We handle that
1840 * by keeping a bool array in_index[] showing all the
1841 * already-passed-over tuplesort output TIDs of the current page. We
1842 * clear that array here, when advancing onto a new heap page.
1843 */
1844 if (hscan->rs_cblock != root_blkno)
1845 {
1846 Page page = BufferGetPage(hscan->rs_cbuf);
1847
1849 heap_get_root_tuples(page, root_offsets);
1851
1852 memset(in_index, 0, sizeof(in_index));
1853
1854 root_blkno = hscan->rs_cblock;
1855 }
1856
1857 /* Convert actual tuple TID to root TID */
1858 rootTuple = *heapcursor;
1859 root_offnum = ItemPointerGetOffsetNumber(heapcursor);
1860
1861 if (HeapTupleIsHeapOnly(heapTuple))
1862 {
1863 root_offnum = root_offsets[root_offnum - 1];
1864 if (!OffsetNumberIsValid(root_offnum))
1865 ereport(ERROR,
1867 errmsg_internal("failed to find parent tuple for heap-only tuple at (%u,%u) in table \"%s\"",
1868 ItemPointerGetBlockNumber(heapcursor),
1869 ItemPointerGetOffsetNumber(heapcursor),
1870 RelationGetRelationName(heapRelation))));
1871 ItemPointerSetOffsetNumber(&rootTuple, root_offnum);
1872 }
1873
1874 /*
1875 * "merge" by skipping through the index tuples until we find or pass
1876 * the current root tuple.
1877 */
1878 while (!tuplesort_empty &&
1879 (!indexcursor ||
1880 ItemPointerCompare(indexcursor, &rootTuple) < 0))
1881 {
1882 Datum ts_val;
1883 bool ts_isnull;
1884
1885 if (indexcursor)
1886 {
1887 /*
1888 * Remember index items seen earlier on the current heap page
1889 */
1890 if (ItemPointerGetBlockNumber(indexcursor) == root_blkno)
1891 in_index[ItemPointerGetOffsetNumber(indexcursor) - 1] = true;
1892 }
1893
1894 tuplesort_empty = !tuplesort_getdatum(state->tuplesort, true,
1895 false, &ts_val, &ts_isnull,
1896 NULL);
1897 Assert(tuplesort_empty || !ts_isnull);
1898 if (!tuplesort_empty)
1899 {
1900 itemptr_decode(&decoded, DatumGetInt64(ts_val));
1901 indexcursor = &decoded;
1902 }
1903 else
1904 {
1905 /* Be tidy */
1906 indexcursor = NULL;
1907 }
1908 }
1909
1910 /*
1911 * If the tuplesort has overshot *and* we didn't see a match earlier,
1912 * then this tuple is missing from the index, so insert it.
1913 */
1914 if ((tuplesort_empty ||
1915 ItemPointerCompare(indexcursor, &rootTuple) > 0) &&
1916 !in_index[root_offnum - 1])
1917 {
1919
1920 /* Set up for predicate or expression evaluation */
1921 ExecStoreHeapTuple(heapTuple, slot, false);
1922
1923 /*
1924 * In a partial index, discard tuples that don't satisfy the
1925 * predicate.
1926 */
1927 if (predicate != NULL)
1928 {
1929 if (!ExecQual(predicate, econtext))
1930 continue;
1931 }
1932
1933 /*
1934 * For the current heap tuple, extract all the attributes we use
1935 * in this index, and note which are null. This also performs
1936 * evaluation of any expressions needed.
1937 */
1938 FormIndexDatum(indexInfo,
1939 slot,
1940 estate,
1941 values,
1942 isnull);
1943
1944 /*
1945 * You'd think we should go ahead and build the index tuple here,
1946 * but some index AMs want to do further processing on the data
1947 * first. So pass the values[] and isnull[] arrays, instead.
1948 */
1949
1950 /*
1951 * If the tuple is already committed dead, you might think we
1952 * could suppress uniqueness checking, but this is no longer true
1953 * in the presence of HOT, because the insert is actually a proxy
1954 * for a uniqueness check on the whole HOT-chain. That is, the
1955 * tuple we have here could be dead because it was already
1956 * HOT-updated, and if so the updating transaction will not have
1957 * thought it should insert index entries. The index AM will
1958 * check the whole HOT-chain and correctly detect a conflict if
1959 * there is one.
1960 */
1961
1962 index_insert(indexRelation,
1963 values,
1964 isnull,
1965 &rootTuple,
1966 heapRelation,
1967 indexInfo->ii_Unique ?
1969 false,
1970 indexInfo);
1971
1972 state->tups_inserted += 1;
1973 }
1974 }
1975
1976 table_endscan(scan);
1977
1979
1980 FreeExecutorState(estate);
1981
1982 /* These may have been pointing to the now-gone estate */
1983 indexInfo->ii_ExpressionsState = NIL;
1984 indexInfo->ii_PredicateState = NULL;
1985}
1986
1987/*
1988 * Return the number of blocks that have been read by this scan since
1989 * starting. This is meant for progress reporting rather than be fully
1990 * accurate: in a parallel scan, workers can be concurrently reading blocks
1991 * further ahead than what we report.
1992 */
1993static BlockNumber
1995{
1996 ParallelBlockTableScanDesc bpscan = NULL;
1997 BlockNumber startblock;
1998 BlockNumber blocks_done;
1999
2000 if (hscan->rs_base.rs_parallel != NULL)
2001 {
2003 startblock = bpscan->phs_startblock;
2004 }
2005 else
2006 startblock = hscan->rs_startblock;
2007
2008 /*
2009 * Might have wrapped around the end of the relation, if startblock was
2010 * not zero.
2011 */
2012 if (hscan->rs_cblock > startblock)
2013 blocks_done = hscan->rs_cblock - startblock;
2014 else
2015 {
2016 BlockNumber nblocks;
2017
2018 nblocks = bpscan != NULL ? bpscan->phs_nblocks : hscan->rs_nblocks;
2019 blocks_done = nblocks - startblock +
2020 hscan->rs_cblock;
2021 }
2022
2023 return blocks_done;
2024}
2025
2026
2027/* ------------------------------------------------------------------------
2028 * Miscellaneous callbacks for the heap AM
2029 * ------------------------------------------------------------------------
2030 */
2031
2032/*
2033 * Check to see whether the table needs a TOAST table. It does only if
2034 * (1) there are any toastable attributes, and (2) the maximum length
2035 * of a tuple could exceed TOAST_TUPLE_THRESHOLD. (We don't want to
2036 * create a toast table for something like "f1 varchar(20)".)
2037 */
2038static bool
2040{
2041 int32 data_length = 0;
2042 bool maxlength_unknown = false;
2043 bool has_toastable_attrs = false;
2044 TupleDesc tupdesc = rel->rd_att;
2045 int32 tuple_length;
2046 int i;
2047
2048 for (i = 0; i < tupdesc->natts; i++)
2049 {
2050 Form_pg_attribute att = TupleDescAttr(tupdesc, i);
2051
2052 if (att->attisdropped)
2053 continue;
2054 if (att->attgenerated == ATTRIBUTE_GENERATED_VIRTUAL)
2055 continue;
2056 data_length = att_align_nominal(data_length, att->attalign);
2057 if (att->attlen > 0)
2058 {
2059 /* Fixed-length types are never toastable */
2060 data_length += att->attlen;
2061 }
2062 else
2063 {
2064 int32 maxlen = type_maximum_size(att->atttypid,
2065 att->atttypmod);
2066
2067 if (maxlen < 0)
2068 maxlength_unknown = true;
2069 else
2070 data_length += maxlen;
2071 if (att->attstorage != TYPSTORAGE_PLAIN)
2072 has_toastable_attrs = true;
2073 }
2074 }
2075 if (!has_toastable_attrs)
2076 return false; /* nothing to toast? */
2077 if (maxlength_unknown)
2078 return true; /* any unlimited-length attrs? */
2079 tuple_length = MAXALIGN(SizeofHeapTupleHeader +
2080 BITMAPLEN(tupdesc->natts)) +
2081 MAXALIGN(data_length);
2082 return (tuple_length > TOAST_TUPLE_THRESHOLD);
2083}
2084
2085/*
2086 * TOAST tables for heap relations are just heap relations.
2087 */
2088static Oid
2090{
2091 return rel->rd_rel->relam;
2092}
2093
2094
2095/* ------------------------------------------------------------------------
2096 * Planner related callbacks for the heap AM
2097 * ------------------------------------------------------------------------
2098 */
2099
2100#define HEAP_OVERHEAD_BYTES_PER_TUPLE \
2101 (MAXALIGN(SizeofHeapTupleHeader) + sizeof(ItemIdData))
2102#define HEAP_USABLE_BYTES_PER_PAGE \
2103 (BLCKSZ - SizeOfPageHeaderData)
2104
2105static void
2107 BlockNumber *pages, double *tuples,
2108 double *allvisfrac)
2109{
2110 table_block_relation_estimate_size(rel, attr_widths, pages,
2111 tuples, allvisfrac,
2114}
2115
2116
2117/* ------------------------------------------------------------------------
2118 * Executor related callbacks for the heap AM
2119 * ------------------------------------------------------------------------
2120 */
2121
2122static bool
2124 TupleTableSlot *slot,
2125 bool *recheck,
2126 uint64 *lossy_pages,
2127 uint64 *exact_pages)
2128{
2130 HeapScanDesc hscan = (HeapScanDesc) bscan;
2131 OffsetNumber targoffset;
2132 Page page;
2133 ItemId lp;
2134
2135 /*
2136 * Out of range? If so, nothing more to look at on this page
2137 */
2138 while (hscan->rs_cindex >= hscan->rs_ntuples)
2139 {
2140 /*
2141 * Returns false if the bitmap is exhausted and there are no further
2142 * blocks we need to scan.
2143 */
2144 if (!BitmapHeapScanNextBlock(scan, recheck, lossy_pages, exact_pages))
2145 return false;
2146 }
2147
2148 targoffset = hscan->rs_vistuples[hscan->rs_cindex];
2149 page = BufferGetPage(hscan->rs_cbuf);
2150 lp = PageGetItemId(page, targoffset);
2152
2153 hscan->rs_ctup.t_data = (HeapTupleHeader) PageGetItem(page, lp);
2154 hscan->rs_ctup.t_len = ItemIdGetLength(lp);
2155 hscan->rs_ctup.t_tableOid = scan->rs_rd->rd_id;
2156 ItemPointerSet(&hscan->rs_ctup.t_self, hscan->rs_cblock, targoffset);
2157
2159
2160 /*
2161 * Set up the result slot to point to this tuple. Note that the slot
2162 * acquires a pin on the buffer.
2163 */
2165 slot,
2166 hscan->rs_cbuf);
2167
2168 hscan->rs_cindex++;
2169
2170 return true;
2171}
2172
2173static bool
2175{
2176 HeapScanDesc hscan = (HeapScanDesc) scan;
2177 TsmRoutine *tsm = scanstate->tsmroutine;
2178 BlockNumber blockno;
2179
2180 /* return false immediately if relation is empty */
2181 if (hscan->rs_nblocks == 0)
2182 return false;
2183
2184 /* release previous scan buffer, if any */
2185 if (BufferIsValid(hscan->rs_cbuf))
2186 {
2187 ReleaseBuffer(hscan->rs_cbuf);
2188 hscan->rs_cbuf = InvalidBuffer;
2189 }
2190
2191 if (tsm->NextSampleBlock)
2192 blockno = tsm->NextSampleBlock(scanstate, hscan->rs_nblocks);
2193 else
2194 {
2195 /* scanning table sequentially */
2196
2197 if (hscan->rs_cblock == InvalidBlockNumber)
2198 {
2199 Assert(!hscan->rs_inited);
2200 blockno = hscan->rs_startblock;
2201 }
2202 else
2203 {
2204 Assert(hscan->rs_inited);
2205
2206 blockno = hscan->rs_cblock + 1;
2207
2208 if (blockno >= hscan->rs_nblocks)
2209 {
2210 /* wrap to beginning of rel, might not have started at 0 */
2211 blockno = 0;
2212 }
2213
2214 /*
2215 * Report our new scan position for synchronization purposes.
2216 *
2217 * Note: we do this before checking for end of scan so that the
2218 * final state of the position hint is back at the start of the
2219 * rel. That's not strictly necessary, but otherwise when you run
2220 * the same query multiple times the starting position would shift
2221 * a little bit backwards on every invocation, which is confusing.
2222 * We don't guarantee any specific ordering in general, though.
2223 */
2224 if (scan->rs_flags & SO_ALLOW_SYNC)
2225 ss_report_location(scan->rs_rd, blockno);
2226
2227 if (blockno == hscan->rs_startblock)
2228 {
2229 blockno = InvalidBlockNumber;
2230 }
2231 }
2232 }
2233
2234 hscan->rs_cblock = blockno;
2235
2236 if (!BlockNumberIsValid(blockno))
2237 {
2238 hscan->rs_inited = false;
2239 return false;
2240 }
2241
2242 Assert(hscan->rs_cblock < hscan->rs_nblocks);
2243
2244 /*
2245 * Be sure to check for interrupts at least once per page. Checks at
2246 * higher code levels won't be able to stop a sample scan that encounters
2247 * many pages' worth of consecutive dead tuples.
2248 */
2250
2251 /* Read page using selected strategy */
2253 blockno, RBM_NORMAL, hscan->rs_strategy);
2254
2255 /* in pagemode, prune the page and determine visible tuple offsets */
2256 if (hscan->rs_base.rs_flags & SO_ALLOW_PAGEMODE)
2258
2259 hscan->rs_inited = true;
2260 return true;
2261}
2262
2263static bool
2265 TupleTableSlot *slot)
2266{
2267 HeapScanDesc hscan = (HeapScanDesc) scan;
2268 TsmRoutine *tsm = scanstate->tsmroutine;
2269 BlockNumber blockno = hscan->rs_cblock;
2270 bool pagemode = (scan->rs_flags & SO_ALLOW_PAGEMODE) != 0;
2271
2272 Page page;
2273 bool all_visible;
2274 OffsetNumber maxoffset;
2275
2276 /*
2277 * When not using pagemode, we must lock the buffer during tuple
2278 * visibility checks.
2279 */
2280 if (!pagemode)
2282
2283 page = (Page) BufferGetPage(hscan->rs_cbuf);
2284 all_visible = PageIsAllVisible(page) &&
2286 maxoffset = PageGetMaxOffsetNumber(page);
2287
2288 for (;;)
2289 {
2290 OffsetNumber tupoffset;
2291
2293
2294 /* Ask the tablesample method which tuples to check on this page. */
2295 tupoffset = tsm->NextSampleTuple(scanstate,
2296 blockno,
2297 maxoffset);
2298
2299 if (OffsetNumberIsValid(tupoffset))
2300 {
2301 ItemId itemid;
2302 bool visible;
2303 HeapTuple tuple = &(hscan->rs_ctup);
2304
2305 /* Skip invalid tuple pointers. */
2306 itemid = PageGetItemId(page, tupoffset);
2307 if (!ItemIdIsNormal(itemid))
2308 continue;
2309
2310 tuple->t_data = (HeapTupleHeader) PageGetItem(page, itemid);
2311 tuple->t_len = ItemIdGetLength(itemid);
2312 ItemPointerSet(&(tuple->t_self), blockno, tupoffset);
2313
2314
2315 if (all_visible)
2316 visible = true;
2317 else
2318 visible = SampleHeapTupleVisible(scan, hscan->rs_cbuf,
2319 tuple, tupoffset);
2320
2321 /* in pagemode, heap_prepare_pagescan did this for us */
2322 if (!pagemode)
2323 HeapCheckForSerializableConflictOut(visible, scan->rs_rd, tuple,
2324 hscan->rs_cbuf, scan->rs_snapshot);
2325
2326 /* Try next tuple from same page. */
2327 if (!visible)
2328 continue;
2329
2330 /* Found visible tuple, return it. */
2331 if (!pagemode)
2333
2334 ExecStoreBufferHeapTuple(tuple, slot, hscan->rs_cbuf);
2335
2336 /* Count successfully-fetched tuples as heap fetches */
2338
2339 return true;
2340 }
2341 else
2342 {
2343 /*
2344 * If we get here, it means we've exhausted the items on this page
2345 * and it's time to move to the next.
2346 */
2347 if (!pagemode)
2349
2350 ExecClearTuple(slot);
2351 return false;
2352 }
2353 }
2354
2355 Assert(0);
2356}
2357
2358
2359/* ----------------------------------------------------------------------------
2360 * Helper functions for the above.
2361 * ----------------------------------------------------------------------------
2362 */
2363
2364/*
2365 * Reconstruct and rewrite the given tuple
2366 *
2367 * We cannot simply copy the tuple as-is, for several reasons:
2368 *
2369 * 1. We'd like to squeeze out the values of any dropped columns, both
2370 * to save space and to ensure we have no corner-case failures. (It's
2371 * possible for example that the new table hasn't got a TOAST table
2372 * and so is unable to store any large values of dropped cols.)
2373 *
2374 * 2. The tuple might not even be legal for the new table; this is
2375 * currently only known to happen as an after-effect of ALTER TABLE
2376 * SET WITHOUT OIDS.
2377 *
2378 * So, we must reconstruct the tuple from component Datums.
2379 */
2380static void
2382 Relation OldHeap, Relation NewHeap,
2383 Datum *values, bool *isnull, RewriteState rwstate)
2384{
2385 TupleDesc oldTupDesc = RelationGetDescr(OldHeap);
2386 TupleDesc newTupDesc = RelationGetDescr(NewHeap);
2387 HeapTuple copiedTuple;
2388 int i;
2389
2390 heap_deform_tuple(tuple, oldTupDesc, values, isnull);
2391
2392 /* Be sure to null out any dropped columns */
2393 for (i = 0; i < newTupDesc->natts; i++)
2394 {
2395 if (TupleDescCompactAttr(newTupDesc, i)->attisdropped)
2396 isnull[i] = true;
2397 }
2398
2399 copiedTuple = heap_form_tuple(newTupDesc, values, isnull);
2400
2401 /* The heap rewrite module does the rest */
2402 rewrite_heap_tuple(rwstate, tuple, copiedTuple);
2403
2404 heap_freetuple(copiedTuple);
2405}
2406
2407/*
2408 * Check visibility of the tuple.
2409 */
2410static bool
2412 HeapTuple tuple,
2413 OffsetNumber tupoffset)
2414{
2415 HeapScanDesc hscan = (HeapScanDesc) scan;
2416
2417 if (scan->rs_flags & SO_ALLOW_PAGEMODE)
2418 {
2419 uint32 start = 0,
2420 end = hscan->rs_ntuples;
2421
2422 /*
2423 * In pageatatime mode, heap_prepare_pagescan() already did visibility
2424 * checks, so just look at the info it left in rs_vistuples[].
2425 *
2426 * We use a binary search over the known-sorted array. Note: we could
2427 * save some effort if we insisted that NextSampleTuple select tuples
2428 * in increasing order, but it's not clear that there would be enough
2429 * gain to justify the restriction.
2430 */
2431 while (start < end)
2432 {
2433 uint32 mid = start + (end - start) / 2;
2434 OffsetNumber curoffset = hscan->rs_vistuples[mid];
2435
2436 if (tupoffset == curoffset)
2437 return true;
2438 else if (tupoffset < curoffset)
2439 end = mid;
2440 else
2441 start = mid + 1;
2442 }
2443
2444 return false;
2445 }
2446 else
2447 {
2448 /* Otherwise, we have to check the tuple individually. */
2449 return HeapTupleSatisfiesVisibility(tuple, scan->rs_snapshot,
2450 buffer);
2451 }
2452}
2453
2454/*
2455 * Helper function get the next block of a bitmap heap scan. Returns true when
2456 * it got the next block and saved it in the scan descriptor and false when
2457 * the bitmap and or relation are exhausted.
2458 */
2459static bool
2461 bool *recheck,
2462 uint64 *lossy_pages, uint64 *exact_pages)
2463{
2465 HeapScanDesc hscan = (HeapScanDesc) bscan;
2466 BlockNumber block;
2467 void *per_buffer_data;
2468 Buffer buffer;
2469 Snapshot snapshot;
2470 int ntup;
2471 TBMIterateResult *tbmres;
2473 int noffsets = -1;
2474
2476 Assert(hscan->rs_read_stream);
2477
2478 hscan->rs_cindex = 0;
2479 hscan->rs_ntuples = 0;
2480
2481 /* Release buffer containing previous block. */
2482 if (BufferIsValid(hscan->rs_cbuf))
2483 {
2484 ReleaseBuffer(hscan->rs_cbuf);
2485 hscan->rs_cbuf = InvalidBuffer;
2486 }
2487
2489 &per_buffer_data);
2490
2491 if (BufferIsInvalid(hscan->rs_cbuf))
2492 {
2493 /* the bitmap is exhausted */
2494 return false;
2495 }
2496
2497 Assert(per_buffer_data);
2498
2499 tbmres = per_buffer_data;
2500
2502 Assert(BufferGetBlockNumber(hscan->rs_cbuf) == tbmres->blockno);
2503
2504 /* Exact pages need their tuple offsets extracted. */
2505 if (!tbmres->lossy)
2506 noffsets = tbm_extract_page_tuple(tbmres, offsets,
2508
2509 *recheck = tbmres->recheck;
2510
2511 block = hscan->rs_cblock = tbmres->blockno;
2512 buffer = hscan->rs_cbuf;
2513 snapshot = scan->rs_snapshot;
2514
2515 ntup = 0;
2516
2517 /*
2518 * Prune and repair fragmentation for the whole page, if possible.
2519 */
2520 heap_page_prune_opt(scan->rs_rd, buffer);
2521
2522 /*
2523 * We must hold share lock on the buffer content while examining tuple
2524 * visibility. Afterwards, however, the tuples we have found to be
2525 * visible are guaranteed good as long as we hold the buffer pin.
2526 */
2528
2529 /*
2530 * We need two separate strategies for lossy and non-lossy cases.
2531 */
2532 if (!tbmres->lossy)
2533 {
2534 /*
2535 * Bitmap is non-lossy, so we just look through the offsets listed in
2536 * tbmres; but we have to follow any HOT chain starting at each such
2537 * offset.
2538 */
2539 int curslot;
2540
2541 /* We must have extracted the tuple offsets by now */
2542 Assert(noffsets > -1);
2543
2544 for (curslot = 0; curslot < noffsets; curslot++)
2545 {
2546 OffsetNumber offnum = offsets[curslot];
2547 ItemPointerData tid;
2548 HeapTupleData heapTuple;
2549
2550 ItemPointerSet(&tid, block, offnum);
2551 if (heap_hot_search_buffer(&tid, scan->rs_rd, buffer, snapshot,
2552 &heapTuple, NULL, true))
2553 hscan->rs_vistuples[ntup++] = ItemPointerGetOffsetNumber(&tid);
2554 }
2555 }
2556 else
2557 {
2558 /*
2559 * Bitmap is lossy, so we must examine each line pointer on the page.
2560 * But we can ignore HOT chains, since we'll check each tuple anyway.
2561 */
2562 Page page = BufferGetPage(buffer);
2563 OffsetNumber maxoff = PageGetMaxOffsetNumber(page);
2564 OffsetNumber offnum;
2565
2566 for (offnum = FirstOffsetNumber; offnum <= maxoff; offnum = OffsetNumberNext(offnum))
2567 {
2568 ItemId lp;
2569 HeapTupleData loctup;
2570 bool valid;
2571
2572 lp = PageGetItemId(page, offnum);
2573 if (!ItemIdIsNormal(lp))
2574 continue;
2575 loctup.t_data = (HeapTupleHeader) PageGetItem(page, lp);
2576 loctup.t_len = ItemIdGetLength(lp);
2577 loctup.t_tableOid = scan->rs_rd->rd_id;
2578 ItemPointerSet(&loctup.t_self, block, offnum);
2579 valid = HeapTupleSatisfiesVisibility(&loctup, snapshot, buffer);
2580 if (valid)
2581 {
2582 hscan->rs_vistuples[ntup++] = offnum;
2583 PredicateLockTID(scan->rs_rd, &loctup.t_self, snapshot,
2585 }
2586 HeapCheckForSerializableConflictOut(valid, scan->rs_rd, &loctup,
2587 buffer, snapshot);
2588 }
2589 }
2590
2592
2594 hscan->rs_ntuples = ntup;
2595
2596 if (tbmres->lossy)
2597 (*lossy_pages)++;
2598 else
2599 (*exact_pages)++;
2600
2601 /*
2602 * Return true to indicate that a valid block was found and the bitmap is
2603 * not exhausted. If there are no visible tuples on this page,
2604 * hscan->rs_ntuples will be 0 and heapam_scan_bitmap_next_tuple() will
2605 * return false returning control to this function to advance to the next
2606 * block in the bitmap.
2607 */
2608 return true;
2609}
2610
2611/* ------------------------------------------------------------------------
2612 * Definition of the heap table access method.
2613 * ------------------------------------------------------------------------
2614 */
2615
2617 .type = T_TableAmRoutine,
2618
2619 .slot_callbacks = heapam_slot_callbacks,
2620
2621 .scan_begin = heap_beginscan,
2622 .scan_end = heap_endscan,
2623 .scan_rescan = heap_rescan,
2624 .scan_getnextslot = heap_getnextslot,
2625
2626 .scan_set_tidrange = heap_set_tidrange,
2627 .scan_getnextslot_tidrange = heap_getnextslot_tidrange,
2628
2629 .parallelscan_estimate = table_block_parallelscan_estimate,
2630 .parallelscan_initialize = table_block_parallelscan_initialize,
2631 .parallelscan_reinitialize = table_block_parallelscan_reinitialize,
2632
2633 .index_fetch_begin = heapam_index_fetch_begin,
2634 .index_fetch_reset = heapam_index_fetch_reset,
2635 .index_fetch_end = heapam_index_fetch_end,
2636 .index_fetch_tuple = heapam_index_fetch_tuple,
2637
2638 .tuple_insert = heapam_tuple_insert,
2639 .tuple_insert_speculative = heapam_tuple_insert_speculative,
2640 .tuple_complete_speculative = heapam_tuple_complete_speculative,
2641 .multi_insert = heap_multi_insert,
2642 .tuple_delete = heapam_tuple_delete,
2643 .tuple_update = heapam_tuple_update,
2644 .tuple_lock = heapam_tuple_lock,
2645
2646 .tuple_fetch_row_version = heapam_fetch_row_version,
2647 .tuple_get_latest_tid = heap_get_latest_tid,
2648 .tuple_tid_valid = heapam_tuple_tid_valid,
2649 .tuple_satisfies_snapshot = heapam_tuple_satisfies_snapshot,
2650 .index_delete_tuples = heap_index_delete_tuples,
2651
2652 .relation_set_new_filelocator = heapam_relation_set_new_filelocator,
2653 .relation_nontransactional_truncate = heapam_relation_nontransactional_truncate,
2654 .relation_copy_data = heapam_relation_copy_data,
2655 .relation_copy_for_cluster = heapam_relation_copy_for_cluster,
2656 .relation_vacuum = heap_vacuum_rel,
2657 .scan_analyze_next_block = heapam_scan_analyze_next_block,
2658 .scan_analyze_next_tuple = heapam_scan_analyze_next_tuple,
2659 .index_build_range_scan = heapam_index_build_range_scan,
2660 .index_validate_scan = heapam_index_validate_scan,
2661
2662 .relation_size = table_block_relation_size,
2663 .relation_needs_toast_table = heapam_relation_needs_toast_table,
2664 .relation_toast_am = heapam_relation_toast_am,
2665 .relation_fetch_toast_slice = heap_fetch_toast_slice,
2666
2667 .relation_estimate_size = heapam_estimate_rel_size,
2668
2669 .scan_bitmap_next_tuple = heapam_scan_bitmap_next_tuple,
2670 .scan_sample_next_block = heapam_scan_sample_next_block,
2671 .scan_sample_next_tuple = heapam_scan_sample_next_tuple
2672};
2673
2674
2675const TableAmRoutine *
2677{
2678 return &heapam_methods;
2679}
2680
2681Datum
2683{
2685}
void pgstat_progress_update_param(int index, int64 val)
void pgstat_progress_update_multi_param(int nparam, const int *index, const int64 *val)
uint32 BlockNumber
Definition: block.h:31
#define InvalidBlockNumber
Definition: block.h:33
static bool BlockNumberIsValid(BlockNumber blockNumber)
Definition: block.h:71
static Datum values[MAXATTR]
Definition: bootstrap.c:151
int Buffer
Definition: buf.h:23
#define BufferIsInvalid(buffer)
Definition: buf.h:31
#define InvalidBuffer
Definition: buf.h:25
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:4161
Buffer ReleaseAndReadBuffer(Buffer buffer, Relation relation, BlockNumber blockNum)
Definition: bufmgr.c:3007
void ReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:5303
void UnlockReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:5320
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:5537
void FlushRelationBuffers(Relation rel)
Definition: bufmgr.c:4873
Buffer ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
Definition: bufmgr.c:798
#define BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:196
#define BUFFER_LOCK_SHARE
Definition: bufmgr.h:197
static Page BufferGetPage(Buffer buffer)
Definition: bufmgr.h:414
@ RBM_NORMAL
Definition: bufmgr.h:46
static bool BufferIsValid(Buffer bufnum)
Definition: bufmgr.h:365
static bool PageIsAllVisible(const PageData *page)
Definition: bufpage.h:429
static Item PageGetItem(const PageData *page, const ItemIdData *itemId)
Definition: bufpage.h:354
static ItemId PageGetItemId(Page page, OffsetNumber offsetNumber)
Definition: bufpage.h:244
PageData * Page
Definition: bufpage.h:82
static OffsetNumber PageGetMaxOffsetNumber(const PageData *page)
Definition: bufpage.h:372
#define MAXALIGN(LEN)
Definition: c.h:782
uint8_t uint8
Definition: c.h:500
int64_t int64
Definition: c.h:499
TransactionId MultiXactId
Definition: c.h:633
int32_t int32
Definition: c.h:498
uint64_t uint64
Definition: c.h:503
uint32_t uint32
Definition: c.h:502
uint32 CommandId
Definition: c.h:637
uint32 TransactionId
Definition: c.h:623
#define OidIsValid(objectId)
Definition: c.h:746
bool IsSystemRelation(Relation relation)
Definition: catalog.c:73
CommandId HeapTupleHeaderGetCmin(const HeapTupleHeaderData *tup)
Definition: combocid.c:104
int errmsg_internal(const char *fmt,...)
Definition: elog.c:1158
int errcode(int sqlerrcode)
Definition: elog.c:854
int errmsg(const char *fmt,...)
Definition: elog.c:1071
#define WARNING
Definition: elog.h:36
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:226
#define ereport(elevel,...)
Definition: elog.h:149
ExprState * ExecPrepareQual(List *qual, EState *estate)
Definition: execExpr.c:793
TupleTableSlot * ExecStorePinnedBufferHeapTuple(HeapTuple tuple, TupleTableSlot *slot, Buffer buffer)
Definition: execTuples.c:1607
TupleTableSlot * MakeSingleTupleTableSlot(TupleDesc tupdesc, const TupleTableSlotOps *tts_ops)
Definition: execTuples.c:1427
void ExecDropSingleTupleTableSlot(TupleTableSlot *slot)
Definition: execTuples.c:1443
const TupleTableSlotOps TTSOpsBufferHeapTuple
Definition: execTuples.c:87
HeapTuple ExecFetchSlotHeapTuple(TupleTableSlot *slot, bool materialize, bool *shouldFree)
Definition: execTuples.c:1833
TupleTableSlot * ExecStoreBufferHeapTuple(HeapTuple tuple, TupleTableSlot *slot, Buffer buffer)
Definition: execTuples.c:1581
const TupleTableSlotOps TTSOpsHeapTuple
Definition: execTuples.c:85
TupleTableSlot * ExecStoreHeapTuple(HeapTuple tuple, TupleTableSlot *slot, bool shouldFree)
Definition: execTuples.c:1541
void FreeExecutorState(EState *estate)
Definition: execUtils.c:193
EState * CreateExecutorState(void)
Definition: execUtils.c:88
#define GetPerTupleExprContext(estate)
Definition: executor.h:678
static bool ExecQual(ExprState *state, ExprContext *econtext)
Definition: executor.h:541
#define PG_RETURN_POINTER(x)
Definition: fmgr.h:361
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
int32 type_maximum_size(Oid type_oid, int32 typemod)
Definition: format_type.c:412
@ UNIQUE_CHECK_NO
Definition: genam.h:140
@ UNIQUE_CHECK_YES
Definition: genam.h:141
int maintenance_work_mem
Definition: globals.c:134
Assert(PointerIsAligned(start, uint64))
return str start
void heap_finish_speculative(Relation relation, ItemPointer tid)
Definition: heapam.c:6052
void heap_insert(Relation relation, HeapTuple tup, CommandId cid, int options, BulkInsertState bistate)
Definition: heapam.c:2056
bool heap_fetch(Relation relation, Snapshot snapshot, HeapTuple tuple, Buffer *userbuf, bool keep_buf)
Definition: heapam.c:1573
TM_Result heap_delete(Relation relation, ItemPointer tid, CommandId cid, Snapshot crosscheck, bool wait, TM_FailureData *tmfd, bool changingPart)
Definition: heapam.c:2745
bool heap_getnextslot(TableScanDesc sscan, ScanDirection direction, TupleTableSlot *slot)
Definition: heapam.c:1363
void heap_endscan(TableScanDesc sscan)
Definition: heapam.c:1275
void heap_rescan(TableScanDesc sscan, ScanKey key, bool set_params, bool allow_strat, bool allow_sync, bool allow_pagemode)
Definition: heapam.c:1222
TM_Result heap_update(Relation relation, ItemPointer otid, HeapTuple newtup, CommandId cid, Snapshot crosscheck, bool wait, TM_FailureData *tmfd, LockTupleMode *lockmode, TU_UpdateIndexes *update_indexes)
Definition: heapam.c:3212
HeapTuple heap_getnext(TableScanDesc sscan, ScanDirection direction)
Definition: heapam.c:1314
bool heap_hot_search_buffer(ItemPointer tid, Relation relation, Buffer buffer, Snapshot snapshot, HeapTuple heapTuple, bool *all_dead, bool first_call)
Definition: heapam.c:1693
bool heap_getnextslot_tidrange(TableScanDesc sscan, ScanDirection direction, TupleTableSlot *slot)
Definition: heapam.c:1466
void heap_set_tidrange(TableScanDesc sscan, ItemPointer mintid, ItemPointer maxtid)
Definition: heapam.c:1393
void heap_abort_speculative(Relation relation, ItemPointer tid)
Definition: heapam.c:6139
TableScanDesc heap_beginscan(Relation relation, Snapshot snapshot, int nkeys, ScanKey key, ParallelTableScanDesc parallel_scan, uint32 flags)
Definition: heapam.c:1079
void heap_prepare_pagescan(TableScanDesc sscan)
Definition: heapam.c:531
TransactionId heap_index_delete_tuples(Relation rel, TM_IndexDeleteOp *delstate)
Definition: heapam.c:8074
void heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples, CommandId cid, int options, BulkInsertState bistate)
Definition: heapam.c:2325
TM_Result heap_lock_tuple(Relation relation, HeapTuple tuple, CommandId cid, LockTupleMode mode, LockWaitPolicy wait_policy, bool follow_updates, Buffer *buffer, TM_FailureData *tmfd)
Definition: heapam.c:4543
void heap_get_latest_tid(TableScanDesc sscan, ItemPointer tid)
Definition: heapam.c:1845
void heap_setscanlimits(TableScanDesc sscan, BlockNumber startBlk, BlockNumber numBlks)
Definition: heapam.c:459
void HeapCheckForSerializableConflictOut(bool visible, Relation relation, HeapTuple tuple, Buffer buffer, Snapshot snapshot)
Definition: heapam.c:9201
#define HEAP_INSERT_SPECULATIVE
Definition: heapam.h:39
struct HeapScanDescData * HeapScanDesc
Definition: heapam.h:100
@ HEAPTUPLE_RECENTLY_DEAD
Definition: heapam.h:126
@ HEAPTUPLE_INSERT_IN_PROGRESS
Definition: heapam.h:127
@ HEAPTUPLE_LIVE
Definition: heapam.h:125
@ HEAPTUPLE_DELETE_IN_PROGRESS
Definition: heapam.h:128
@ HEAPTUPLE_DEAD
Definition: heapam.h:124
struct BitmapHeapScanDescData * BitmapHeapScanDesc
Definition: heapam.h:108
static double heapam_index_build_range_scan(Relation heapRelation, Relation indexRelation, IndexInfo *indexInfo, bool allow_sync, bool anyvisible, bool progress, BlockNumber start_blockno, BlockNumber numblocks, IndexBuildCallback callback, void *callback_state, TableScanDesc scan)
#define HEAP_OVERHEAD_BYTES_PER_TUPLE
static void heapam_estimate_rel_size(Relation rel, int32 *attr_widths, BlockNumber *pages, double *tuples, double *allvisfrac)
static const TableAmRoutine heapam_methods
static bool BitmapHeapScanNextBlock(TableScanDesc scan, bool *recheck, uint64 *lossy_pages, uint64 *exact_pages)
static void heapam_index_validate_scan(Relation heapRelation, Relation indexRelation, IndexInfo *indexInfo, Snapshot snapshot, ValidateIndexState *state)
static bool heapam_index_fetch_tuple(struct IndexFetchTableData *scan, ItemPointer tid, Snapshot snapshot, TupleTableSlot *slot, bool *call_again, bool *all_dead)
static void heapam_relation_copy_for_cluster(Relation OldHeap, Relation NewHeap, Relation OldIndex, bool use_sort, TransactionId OldestXmin, TransactionId *xid_cutoff, MultiXactId *multi_cutoff, double *num_tuples, double *tups_vacuumed, double *tups_recently_dead)
static void heapam_tuple_insert_speculative(Relation relation, TupleTableSlot *slot, CommandId cid, int options, BulkInsertState bistate, uint32 specToken)
static bool heapam_scan_sample_next_block(TableScanDesc scan, SampleScanState *scanstate)
static void heapam_tuple_insert(Relation relation, TupleTableSlot *slot, CommandId cid, int options, BulkInsertState bistate)
static bool heapam_scan_analyze_next_block(TableScanDesc scan, ReadStream *stream)
static TM_Result heapam_tuple_delete(Relation relation, ItemPointer tid, CommandId cid, Snapshot snapshot, Snapshot crosscheck, bool wait, TM_FailureData *tmfd, bool changingPart)
static void heapam_index_fetch_reset(IndexFetchTableData *scan)
static Oid heapam_relation_toast_am(Relation rel)
const TableAmRoutine * GetHeapamTableAmRoutine(void)
static TM_Result heapam_tuple_update(Relation relation, ItemPointer otid, TupleTableSlot *slot, CommandId cid, Snapshot snapshot, Snapshot crosscheck, bool wait, TM_FailureData *tmfd, LockTupleMode *lockmode, TU_UpdateIndexes *update_indexes)
static bool heapam_relation_needs_toast_table(Relation rel)
static BlockNumber heapam_scan_get_blocks_done(HeapScanDesc hscan)
static bool heapam_scan_analyze_next_tuple(TableScanDesc scan, TransactionId OldestXmin, double *liverows, double *deadrows, TupleTableSlot *slot)
static bool SampleHeapTupleVisible(TableScanDesc scan, Buffer buffer, HeapTuple tuple, OffsetNumber tupoffset)
static bool heapam_scan_sample_next_tuple(TableScanDesc scan, SampleScanState *scanstate, TupleTableSlot *slot)
static IndexFetchTableData * heapam_index_fetch_begin(Relation rel)
static bool heapam_tuple_tid_valid(TableScanDesc scan, ItemPointer tid)
#define HEAP_USABLE_BYTES_PER_PAGE
static TM_Result heapam_tuple_lock(Relation relation, ItemPointer tid, Snapshot snapshot, TupleTableSlot *slot, CommandId cid, LockTupleMode mode, LockWaitPolicy wait_policy, uint8 flags, TM_FailureData *tmfd)
static void heapam_relation_copy_data(Relation rel, const RelFileLocator *newrlocator)
static void heapam_index_fetch_end(IndexFetchTableData *scan)
static void heapam_tuple_complete_speculative(Relation relation, TupleTableSlot *slot, uint32 specToken, bool succeeded)
static void reform_and_rewrite_tuple(HeapTuple tuple, Relation OldHeap, Relation NewHeap, Datum *values, bool *isnull, RewriteState rwstate)
static void heapam_relation_set_new_filelocator(Relation rel, const RelFileLocator *newrlocator, char persistence, TransactionId *freezeXid, MultiXactId *minmulti)
static bool heapam_fetch_row_version(Relation relation, ItemPointer tid, Snapshot snapshot, TupleTableSlot *slot)
static bool heapam_scan_bitmap_next_tuple(TableScanDesc scan, TupleTableSlot *slot, bool *recheck, uint64 *lossy_pages, uint64 *exact_pages)
static void heapam_relation_nontransactional_truncate(Relation rel)
static bool heapam_tuple_satisfies_snapshot(Relation rel, TupleTableSlot *slot, Snapshot snapshot)
Datum heap_tableam_handler(PG_FUNCTION_ARGS)
static const TupleTableSlotOps * heapam_slot_callbacks(Relation relation)
bool HeapTupleSatisfiesVisibility(HeapTuple htup, Snapshot snapshot, Buffer buffer)
HTSV_Result HeapTupleSatisfiesVacuum(HeapTuple htup, TransactionId OldestXmin, Buffer buffer)
void heap_fetch_toast_slice(Relation toastrel, Oid valueid, int32 attrsize, int32 sliceoffset, int32 slicelength, struct varlena *result)
Definition: heaptoast.c:626
#define TOAST_TUPLE_THRESHOLD
Definition: heaptoast.h:48
HeapTuple heap_form_tuple(TupleDesc tupleDescriptor, const Datum *values, const bool *isnull)
Definition: heaptuple.c:1117
void heap_deform_tuple(HeapTuple tuple, TupleDesc tupleDesc, Datum *values, bool *isnull)
Definition: heaptuple.c:1346
void heap_freetuple(HeapTuple htup)
Definition: heaptuple.c:1435
HeapTupleHeaderData * HeapTupleHeader
Definition: htup.h:23
static bool HeapTupleIsHotUpdated(const HeapTupleData *tuple)
Definition: htup_details.h:768
#define SizeofHeapTupleHeader
Definition: htup_details.h:185
static void HeapTupleHeaderSetSpeculativeToken(HeapTupleHeaderData *tup, BlockNumber token)
Definition: htup_details.h:474
static int BITMAPLEN(int NATTS)
Definition: htup_details.h:594
static bool HeapTupleIsHeapOnly(const HeapTupleData *tuple)
Definition: htup_details.h:786
static TransactionId HeapTupleHeaderGetXmin(const HeapTupleHeaderData *tup)
Definition: htup_details.h:324
static bool HeapTupleHeaderIsSpeculative(const HeapTupleHeaderData *tup)
Definition: htup_details.h:461
static TransactionId HeapTupleHeaderGetUpdateXid(const HeapTupleHeaderData *tup)
Definition: htup_details.h:397
#define MaxHeapTuplesPerPage
Definition: htup_details.h:624
void FormIndexDatum(IndexInfo *indexInfo, TupleTableSlot *slot, EState *estate, Datum *values, bool *isnull)
Definition: index.c:2730
static void itemptr_decode(ItemPointer itemptr, int64 encoded)
Definition: index.h:211
bool index_getnext_slot(IndexScanDesc scan, ScanDirection direction, TupleTableSlot *slot)
Definition: indexam.c:720
bool index_insert(Relation indexRelation, Datum *values, bool *isnull, ItemPointer heap_t_ctid, Relation heapRelation, IndexUniqueCheck checkUnique, bool indexUnchanged, IndexInfo *indexInfo)
Definition: indexam.c:213
IndexScanDesc index_beginscan(Relation heapRelation, Relation indexRelation, Snapshot snapshot, IndexScanInstrumentation *instrument, int nkeys, int norderbys)
Definition: indexam.c:256
void index_endscan(IndexScanDesc scan)
Definition: indexam.c:382
void index_rescan(IndexScanDesc scan, ScanKey keys, int nkeys, ScanKey orderbys, int norderbys)
Definition: indexam.c:356
int i
Definition: isn.c:77
#define ItemIdGetLength(itemId)
Definition: itemid.h:59
#define ItemIdIsNormal(itemId)
Definition: itemid.h:99
#define ItemIdIsDead(itemId)
Definition: itemid.h:113
int32 ItemPointerCompare(ItemPointer arg1, ItemPointer arg2)
Definition: itemptr.c:51
bool ItemPointerEquals(ItemPointer pointer1, ItemPointer pointer2)
Definition: itemptr.c:35
static void ItemPointerSet(ItemPointerData *pointer, BlockNumber blockNumber, OffsetNumber offNum)
Definition: itemptr.h:135
static void ItemPointerSetOffsetNumber(ItemPointerData *pointer, OffsetNumber offsetNumber)
Definition: itemptr.h:158
static OffsetNumber ItemPointerGetOffsetNumber(const ItemPointerData *pointer)
Definition: itemptr.h:124
static bool ItemPointerIndicatesMovedPartitions(const ItemPointerData *pointer)
Definition: itemptr.h:197
static BlockNumber ItemPointerGetBlockNumber(const ItemPointerData *pointer)
Definition: itemptr.h:103
static void ItemPointerCopy(const ItemPointerData *fromPointer, ItemPointerData *toPointer)
Definition: itemptr.h:172
static bool ItemPointerIsValid(const ItemPointerData *pointer)
Definition: itemptr.h:83
void XactLockTableWait(TransactionId xid, Relation rel, ItemPointer ctid, XLTW_Oper oper)
Definition: lmgr.c:663
bool ConditionalXactLockTableWait(TransactionId xid, bool logLockFailure)
Definition: lmgr.c:736
@ XLTW_FetchUpdated
Definition: lmgr.h:33
@ XLTW_InsertIndexUnique
Definition: lmgr.h:32
bool log_lock_failure
Definition: lock.c:54
LockWaitPolicy
Definition: lockoptions.h:37
@ LockWaitSkip
Definition: lockoptions.h:41
@ LockWaitBlock
Definition: lockoptions.h:39
@ LockWaitError
Definition: lockoptions.h:43
LockTupleMode
Definition: lockoptions.h:50
void MemoryContextReset(MemoryContext context)
Definition: mcxt.c:414
void pfree(void *pointer)
Definition: mcxt.c:2147
void * palloc0(Size size)
Definition: mcxt.c:1970
void * palloc(Size size)
Definition: mcxt.c:1940
#define IsBootstrapProcessingMode()
Definition: miscadmin.h:477
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:123
MultiXactId GetOldestMultiXactId(void)
Definition: multixact.c:2660
#define InvalidOffsetNumber
Definition: off.h:26
#define OffsetNumberIsValid(offsetNumber)
Definition: off.h:39
#define OffsetNumberNext(offsetNumber)
Definition: off.h:52
uint16 OffsetNumber
Definition: off.h:24
#define FirstOffsetNumber
Definition: off.h:27
FormData_pg_attribute * Form_pg_attribute
Definition: pg_attribute.h:202
#define ERRCODE_DATA_CORRUPTED
Definition: pg_basebackup.c:41
static PgChecksumMode mode
Definition: pg_checksums.c:55
#define INDEX_MAX_KEYS
#define NIL
Definition: pg_list.h:68
static char * buf
Definition: pg_test_fsync.c:72
#define ERRCODE_T_R_SERIALIZATION_FAILURE
Definition: pgbench.c:77
static int progress
Definition: pgbench.c:262
#define pgstat_count_heap_fetch(rel)
Definition: pgstat.h:689
#define pgstat_count_heap_getnext(rel)
Definition: pgstat.h:684
static int64 DatumGetInt64(Datum X)
Definition: postgres.h:390
uintptr_t Datum
Definition: postgres.h:69
unsigned int Oid
Definition: postgres_ext.h:30
void PredicateLockTID(Relation relation, ItemPointer tid, Snapshot snapshot, TransactionId tuple_xid)
Definition: predicate.c:2621
TransactionId GetOldestNonRemovableTransactionId(Relation rel)
Definition: procarray.c:2005
#define PROGRESS_CLUSTER_INDEX_RELID
Definition: progress.h:62
#define PROGRESS_CLUSTER_HEAP_BLKS_SCANNED
Definition: progress.h:66
#define PROGRESS_CLUSTER_PHASE_SORT_TUPLES
Definition: progress.h:72
#define PROGRESS_CLUSTER_PHASE_SEQ_SCAN_HEAP
Definition: progress.h:70
#define PROGRESS_SCAN_BLOCKS_DONE
Definition: progress.h:125
#define PROGRESS_CLUSTER_PHASE
Definition: progress.h:61
#define PROGRESS_CLUSTER_HEAP_TUPLES_SCANNED
Definition: progress.h:63
#define PROGRESS_CLUSTER_TOTAL_HEAP_BLKS
Definition: progress.h:65
#define PROGRESS_CLUSTER_HEAP_TUPLES_WRITTEN
Definition: progress.h:64
#define PROGRESS_CLUSTER_PHASE_INDEX_SCAN_HEAP
Definition: progress.h:71
#define PROGRESS_SCAN_BLOCKS_TOTAL
Definition: progress.h:124
#define PROGRESS_CLUSTER_PHASE_WRITE_NEW_HEAP
Definition: progress.h:73
void heap_get_root_tuples(Page page, OffsetNumber *root_offsets)
Definition: pruneheap.c:1785
void heap_page_prune_opt(Relation relation, Buffer buffer)
Definition: pruneheap.c:193
Buffer read_stream_next_buffer(ReadStream *stream, void **per_buffer_data)
Definition: read_stream.c:770
#define RelationGetRelid(relation)
Definition: rel.h:516
static SMgrRelation RelationGetSmgr(Relation rel)
Definition: rel.h:578
#define RelationGetDescr(relation)
Definition: rel.h:542
#define RelationGetRelationName(relation)
Definition: rel.h:550
#define RelationGetTargetBlock(relation)
Definition: rel.h:612
#define RelationIsPermanent(relation)
Definition: rel.h:628
ForkNumber
Definition: relpath.h:56
@ MAIN_FORKNUM
Definition: relpath.h:58
@ INIT_FORKNUM
Definition: relpath.h:61
#define MAX_FORKNUM
Definition: relpath.h:70
struct ParallelBlockTableScanDescData * ParallelBlockTableScanDesc
Definition: relscan.h:102
void end_heap_rewrite(RewriteState state)
Definition: rewriteheap.c:297
bool rewrite_heap_dead_tuple(RewriteState state, HeapTuple old_tuple)
Definition: rewriteheap.c:543
RewriteState begin_heap_rewrite(Relation old_heap, Relation new_heap, TransactionId oldest_xmin, TransactionId freeze_xid, MultiXactId cutoff_multi)
Definition: rewriteheap.c:234
void rewrite_heap_tuple(RewriteState state, HeapTuple old_tuple, HeapTuple new_tuple)
Definition: rewriteheap.c:341
@ ForwardScanDirection
Definition: sdir.h:28
void smgrcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo)
Definition: smgr.c:481
void smgrclose(SMgrRelation reln)
Definition: smgr.c:374
bool smgrexists(SMgrRelation reln, ForkNumber forknum)
Definition: smgr.c:462
TransactionId RecentXmin
Definition: snapmgr.c:159
Snapshot GetTransactionSnapshot(void)
Definition: snapmgr.c:271
void UnregisterSnapshot(Snapshot snapshot)
Definition: snapmgr.c:853
Snapshot RegisterSnapshot(Snapshot snapshot)
Definition: snapmgr.c:811
#define SnapshotAny
Definition: snapmgr.h:33
#define InitDirtySnapshot(snapshotdata)
Definition: snapmgr.h:42
#define IsMVCCSnapshot(snapshot)
Definition: snapmgr.h:55
void RelationCopyStorage(SMgrRelation src, SMgrRelation dst, ForkNumber forkNum, char relpersistence)
Definition: storage.c:478
SMgrRelation RelationCreateStorage(RelFileLocator rlocator, char relpersistence, bool register_delete)
Definition: storage.c:122
void log_smgrcreate(const RelFileLocator *rlocator, ForkNumber forkNum)
Definition: storage.c:187
void RelationDropStorage(Relation rel)
Definition: storage.c:207
void RelationTruncate(Relation rel, BlockNumber nblocks)
Definition: storage.c:289
MemoryContext ecxt_per_tuple_memory
Definition: execnodes.h:276
TupleTableSlot * ecxt_scantuple
Definition: execnodes.h:268
BufferAccessStrategy rs_strategy
Definition: heapam.h:71
uint32 rs_ntuples
Definition: heapam.h:97
bool rs_inited
Definition: heapam.h:65
Buffer rs_cbuf
Definition: heapam.h:68
BlockNumber rs_startblock
Definition: heapam.h:60
HeapTupleData rs_ctup
Definition: heapam.h:73
OffsetNumber rs_vistuples[MaxHeapTuplesPerPage]
Definition: heapam.h:98
BlockNumber rs_nblocks
Definition: heapam.h:59
ReadStream * rs_read_stream
Definition: heapam.h:76
uint32 rs_cindex
Definition: heapam.h:96
BlockNumber rs_cblock
Definition: heapam.h:67
TableScanDescData rs_base
Definition: heapam.h:56
ItemPointerData t_self
Definition: htup.h:65
uint32 t_len
Definition: htup.h:64
HeapTupleHeader t_data
Definition: htup.h:68
Oid t_tableOid
Definition: htup.h:66
ItemPointerData t_ctid
Definition: htup_details.h:161
Buffer xs_cbuf
Definition: heapam.h:117
IndexFetchTableData xs_base
Definition: heapam.h:115
bool ii_Unique
Definition: execnodes.h:209
bool ii_BrokenHotChain
Definition: execnodes.h:215
ExprState * ii_PredicateState
Definition: execnodes.h:202
Oid * ii_ExclusionOps
Definition: execnodes.h:203
bool ii_Concurrent
Definition: execnodes.h:214
List * ii_ExpressionsState
Definition: execnodes.h:200
List * ii_Predicate
Definition: execnodes.h:201
TupleDesc rd_att
Definition: rel.h:112
Oid rd_id
Definition: rel.h:113
Form_pg_class rd_rel
Definition: rel.h:111
struct TsmRoutine * tsmroutine
Definition: execnodes.h:1641
TransactionId xmin
Definition: snapshot.h:153
TransactionId xmax
Definition: snapshot.h:154
bool takenDuringRecovery
Definition: snapshot.h:180
BlockNumber blockno
Definition: tidbitmap.h:64
bool traversed
Definition: tableam.h:146
TransactionId xmax
Definition: tableam.h:144
CommandId cmax
Definition: tableam.h:145
ItemPointerData ctid
Definition: tableam.h:143
NodeTag type
Definition: tableam.h:286
Relation rs_rd
Definition: relscan.h:36
uint32 rs_flags
Definition: relscan.h:64
struct SnapshotData * rs_snapshot
Definition: relscan.h:37
struct ParallelTableScanDescData * rs_parallel
Definition: relscan.h:66
NextSampleTuple_function NextSampleTuple
Definition: tsmapi.h:74
NextSampleBlock_function NextSampleBlock
Definition: tsmapi.h:73
Oid tts_tableOid
Definition: tuptable.h:130
ItemPointerData tts_tid
Definition: tuptable.h:129
Definition: regguts.h:323
void ss_report_location(Relation rel, BlockNumber location)
Definition: syncscan.c:289
TupleTableSlot * table_slot_create(Relation relation, List **reglist)
Definition: tableam.c:92
Size table_block_parallelscan_initialize(Relation rel, ParallelTableScanDesc pscan)
Definition: tableam.c:389
void table_block_parallelscan_reinitialize(Relation rel, ParallelTableScanDesc pscan)
Definition: tableam.c:407
uint64 table_block_relation_size(Relation rel, ForkNumber forkNumber)
Definition: tableam.c:617
Size table_block_parallelscan_estimate(Relation rel)
Definition: tableam.c:383
void table_block_relation_estimate_size(Relation rel, int32 *attr_widths, BlockNumber *pages, double *tuples, double *allvisfrac, Size overhead_bytes_per_tuple, Size usable_bytes_per_page)
Definition: tableam.c:654
@ SO_ALLOW_PAGEMODE
Definition: tableam.h:61
@ SO_ALLOW_SYNC
Definition: tableam.h:59
@ SO_TYPE_BITMAPSCAN
Definition: tableam.h:49
static TableScanDesc table_beginscan(Relation rel, Snapshot snapshot, int nkeys, struct ScanKeyData *key)
Definition: tableam.h:870
TU_UpdateIndexes
Definition: tableam.h:110
@ TU_Summarizing
Definition: tableam.h:118
@ TU_All
Definition: tableam.h:115
@ TU_None
Definition: tableam.h:112
static void table_endscan(TableScanDesc scan)
Definition: tableam.h:979
TM_Result
Definition: tableam.h:72
@ TM_Ok
Definition: tableam.h:77
@ TM_Deleted
Definition: tableam.h:92
@ TM_WouldBlock
Definition: tableam.h:102
@ TM_Updated
Definition: tableam.h:89
@ TM_SelfModified
Definition: tableam.h:83
static TableScanDesc table_beginscan_strat(Relation rel, Snapshot snapshot, int nkeys, struct ScanKeyData *key, bool allow_strat, bool allow_sync)
Definition: tableam.h:894
#define TUPLE_LOCK_FLAG_FIND_LAST_VERSION
Definition: tableam.h:261
void(* IndexBuildCallback)(Relation index, ItemPointer tid, Datum *values, bool *isnull, bool tupleIsAlive, void *state)
Definition: tableam.h:265
static bool table_scan_getnextslot(TableScanDesc sscan, ScanDirection direction, TupleTableSlot *slot)
Definition: tableam.h:1015
#define TUPLE_LOCK_FLAG_LOCK_UPDATE_IN_PROGRESS
Definition: tableam.h:259
static void callback(struct sockaddr *addr, struct sockaddr *mask, void *unused)
Definition: test_ifaddrs.c:46
int tbm_extract_page_tuple(TBMIterateResult *iteritem, OffsetNumber *offsets, uint32 max_offsets)
Definition: tidbitmap.c:901
#define TBM_MAX_TUPLES_PER_PAGE
Definition: tidbitmap.h:35
#define InvalidTransactionId
Definition: transam.h:31
#define TransactionIdEquals(id1, id2)
Definition: transam.h:43
#define TransactionIdIsValid(xid)
Definition: transam.h:41
static FormData_pg_attribute * TupleDescAttr(TupleDesc tupdesc, int i)
Definition: tupdesc.h:160
static CompactAttribute * TupleDescCompactAttr(TupleDesc tupdesc, int i)
Definition: tupdesc.h:175
void tuplesort_performsort(Tuplesortstate *state)
Definition: tuplesort.c:1363
void tuplesort_end(Tuplesortstate *state)
Definition: tuplesort.c:951
#define TUPLESORT_NONE
Definition: tuplesort.h:94
HeapTuple tuplesort_getheaptuple(Tuplesortstate *state, bool forward)
Tuplesortstate * tuplesort_begin_cluster(TupleDesc tupDesc, Relation indexRel, int workMem, SortCoordinate coordinate, int sortopt)
void tuplesort_putheaptuple(Tuplesortstate *state, HeapTuple tup)
bool tuplesort_getdatum(Tuplesortstate *state, bool forward, bool copy, Datum *val, bool *isNull, Datum *abbrev)
#define att_align_nominal(cur_offset, attalign)
Definition: tupmacs.h:150
#define TTS_IS_BUFFERTUPLE(slot)
Definition: tuptable.h:241
static TupleTableSlot * ExecClearTuple(TupleTableSlot *slot)
Definition: tuptable.h:458
void heap_vacuum_rel(Relation rel, VacuumParams *params, BufferAccessStrategy bstrategy)
Definition: vacuumlazy.c:615
bool TransactionIdIsCurrentTransactionId(TransactionId xid)
Definition: xact.c:941