PostgreSQL Source Code  git master
heapam.h File Reference
#include "access/relation.h"
#include "access/relscan.h"
#include "access/sdir.h"
#include "access/skey.h"
#include "access/table.h"
#include "access/tableam.h"
#include "nodes/lockoptions.h"
#include "nodes/primnodes.h"
#include "storage/bufpage.h"
#include "storage/dsm.h"
#include "storage/lockdefs.h"
#include "storage/shm_toc.h"
#include "utils/relcache.h"
#include "utils/snapshot.h"
Include dependency graph for heapam.h:
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Data Structures

struct  HeapScanDescData
 
struct  IndexFetchHeapData
 
struct  HeapTupleFreeze
 
struct  HeapPageFreeze
 
struct  PruneResult
 

Macros

#define HEAP_INSERT_SKIP_FSM   TABLE_INSERT_SKIP_FSM
 
#define HEAP_INSERT_FROZEN   TABLE_INSERT_FROZEN
 
#define HEAP_INSERT_NO_LOGICAL   TABLE_INSERT_NO_LOGICAL
 
#define HEAP_INSERT_SPECULATIVE   0x0010
 
#define MaxLockTupleMode   LockTupleExclusive
 
#define HEAP_FREEZE_CHECK_XMIN_COMMITTED   0x01
 
#define HEAP_FREEZE_CHECK_XMAX_ABORTED   0x02
 
#define HeapScanIsValid(scan)   PointerIsValid(scan)
 

Typedefs

typedef struct BulkInsertStateDataBulkInsertState
 
typedef struct HeapScanDescData HeapScanDescData
 
typedef struct HeapScanDescDataHeapScanDesc
 
typedef struct IndexFetchHeapData IndexFetchHeapData
 
typedef struct HeapTupleFreeze HeapTupleFreeze
 
typedef struct HeapPageFreeze HeapPageFreeze
 
typedef struct PruneResult PruneResult
 

Enumerations

enum  HTSV_Result {
  HEAPTUPLE_DEAD , HEAPTUPLE_LIVE , HEAPTUPLE_RECENTLY_DEAD , HEAPTUPLE_INSERT_IN_PROGRESS ,
  HEAPTUPLE_DELETE_IN_PROGRESS
}
 

Functions

TableScanDesc heap_beginscan (Relation relation, Snapshot snapshot, int nkeys, ScanKey key, ParallelTableScanDesc parallel_scan, uint32 flags)
 
void heap_setscanlimits (TableScanDesc sscan, BlockNumber startBlk, BlockNumber numBlks)
 
void heapgetpage (TableScanDesc sscan, BlockNumber block)
 
void heap_rescan (TableScanDesc sscan, ScanKey key, bool set_params, bool allow_strat, bool allow_sync, bool allow_pagemode)
 
void heap_endscan (TableScanDesc sscan)
 
HeapTuple heap_getnext (TableScanDesc sscan, ScanDirection direction)
 
bool heap_getnextslot (TableScanDesc sscan, ScanDirection direction, struct TupleTableSlot *slot)
 
void heap_set_tidrange (TableScanDesc sscan, ItemPointer mintid, ItemPointer maxtid)
 
bool heap_getnextslot_tidrange (TableScanDesc sscan, ScanDirection direction, TupleTableSlot *slot)
 
bool heap_fetch (Relation relation, Snapshot snapshot, HeapTuple tuple, Buffer *userbuf, bool keep_buf)
 
bool heap_hot_search_buffer (ItemPointer tid, Relation relation, Buffer buffer, Snapshot snapshot, HeapTuple heapTuple, bool *all_dead, bool first_call)
 
void heap_get_latest_tid (TableScanDesc sscan, ItemPointer tid)
 
BulkInsertState GetBulkInsertState (void)
 
void FreeBulkInsertState (BulkInsertState)
 
void ReleaseBulkInsertStatePin (BulkInsertState bistate)
 
void heap_insert (Relation relation, HeapTuple tup, CommandId cid, int options, BulkInsertState bistate)
 
void heap_multi_insert (Relation relation, struct TupleTableSlot **slots, int ntuples, CommandId cid, int options, BulkInsertState bistate)
 
TM_Result heap_delete (Relation relation, ItemPointer tid, CommandId cid, Snapshot crosscheck, bool wait, struct TM_FailureData *tmfd, bool changingPart)
 
void heap_finish_speculative (Relation relation, ItemPointer tid)
 
void heap_abort_speculative (Relation relation, ItemPointer tid)
 
TM_Result heap_update (Relation relation, ItemPointer otid, HeapTuple newtup, CommandId cid, Snapshot crosscheck, bool wait, struct TM_FailureData *tmfd, LockTupleMode *lockmode, TU_UpdateIndexes *update_indexes)
 
TM_Result heap_lock_tuple (Relation relation, HeapTuple tuple, CommandId cid, LockTupleMode mode, LockWaitPolicy wait_policy, bool follow_updates, Buffer *buffer, struct TM_FailureData *tmfd)
 
void heap_inplace_update (Relation relation, HeapTuple tuple)
 
bool heap_prepare_freeze_tuple (HeapTupleHeader tuple, const struct VacuumCutoffs *cutoffs, HeapPageFreeze *pagefrz, HeapTupleFreeze *frz, bool *totally_frozen)
 
void heap_freeze_execute_prepared (Relation rel, Buffer buffer, TransactionId snapshotConflictHorizon, HeapTupleFreeze *tuples, int ntuples)
 
bool heap_freeze_tuple (HeapTupleHeader tuple, TransactionId relfrozenxid, TransactionId relminmxid, TransactionId FreezeLimit, TransactionId MultiXactCutoff)
 
bool heap_tuple_should_freeze (HeapTupleHeader tuple, const struct VacuumCutoffs *cutoffs, TransactionId *NoFreezePageRelfrozenXid, MultiXactId *NoFreezePageRelminMxid)
 
bool heap_tuple_needs_eventual_freeze (HeapTupleHeader tuple)
 
void simple_heap_insert (Relation relation, HeapTuple tup)
 
void simple_heap_delete (Relation relation, ItemPointer tid)
 
void simple_heap_update (Relation relation, ItemPointer otid, HeapTuple tup, TU_UpdateIndexes *update_indexes)
 
TransactionId heap_index_delete_tuples (Relation rel, TM_IndexDeleteOp *delstate)
 
void heap_page_prune_opt (Relation relation, Buffer buffer)
 
void heap_page_prune (Relation relation, Buffer buffer, struct GlobalVisState *vistest, PruneResult *presult, OffsetNumber *off_loc)
 
void heap_page_prune_execute (Buffer buffer, OffsetNumber *redirected, int nredirected, OffsetNumber *nowdead, int ndead, OffsetNumber *nowunused, int nunused)
 
void heap_get_root_tuples (Page page, OffsetNumber *root_offsets)
 
void heap_vacuum_rel (Relation rel, struct VacuumParams *params, BufferAccessStrategy bstrategy)
 
bool HeapTupleSatisfiesVisibility (HeapTuple htup, Snapshot snapshot, Buffer buffer)
 
TM_Result HeapTupleSatisfiesUpdate (HeapTuple htup, CommandId curcid, Buffer buffer)
 
HTSV_Result HeapTupleSatisfiesVacuum (HeapTuple htup, TransactionId OldestXmin, Buffer buffer)
 
HTSV_Result HeapTupleSatisfiesVacuumHorizon (HeapTuple htup, Buffer buffer, TransactionId *dead_after)
 
void HeapTupleSetHintBits (HeapTupleHeader tuple, Buffer buffer, uint16 infomask, TransactionId xid)
 
bool HeapTupleHeaderIsOnlyLocked (HeapTupleHeader tuple)
 
bool HeapTupleIsSurelyDead (HeapTuple htup, struct GlobalVisState *vistest)
 
bool ResolveCminCmaxDuringDecoding (struct HTAB *tuplecid_data, Snapshot snapshot, HeapTuple htup, Buffer buffer, CommandId *cmin, CommandId *cmax)
 
void HeapCheckForSerializableConflictOut (bool visible, Relation relation, HeapTuple tuple, Buffer buffer, Snapshot snapshot)
 

Macro Definition Documentation

◆ HEAP_FREEZE_CHECK_XMAX_ABORTED

#define HEAP_FREEZE_CHECK_XMAX_ABORTED   0x02

Definition at line 108 of file heapam.h.

◆ HEAP_FREEZE_CHECK_XMIN_COMMITTED

#define HEAP_FREEZE_CHECK_XMIN_COMMITTED   0x01

Definition at line 107 of file heapam.h.

◆ HEAP_INSERT_FROZEN

#define HEAP_INSERT_FROZEN   TABLE_INSERT_FROZEN

Definition at line 35 of file heapam.h.

◆ HEAP_INSERT_NO_LOGICAL

#define HEAP_INSERT_NO_LOGICAL   TABLE_INSERT_NO_LOGICAL

Definition at line 36 of file heapam.h.

◆ HEAP_INSERT_SKIP_FSM

#define HEAP_INSERT_SKIP_FSM   TABLE_INSERT_SKIP_FSM

Definition at line 34 of file heapam.h.

◆ HEAP_INSERT_SPECULATIVE

#define HEAP_INSERT_SPECULATIVE   0x0010

Definition at line 37 of file heapam.h.

◆ HeapScanIsValid

#define HeapScanIsValid (   scan)    PointerIsValid(scan)

Definition at line 216 of file heapam.h.

◆ MaxLockTupleMode

#define MaxLockTupleMode   LockTupleExclusive

Definition at line 43 of file heapam.h.

Typedef Documentation

◆ BulkInsertState

Definition at line 39 of file heapam.h.

◆ HeapPageFreeze

◆ HeapScanDesc

typedef struct HeapScanDescData* HeapScanDesc

Definition at line 80 of file heapam.h.

◆ HeapScanDescData

◆ HeapTupleFreeze

◆ IndexFetchHeapData

◆ PruneResult

typedef struct PruneResult PruneResult

Enumeration Type Documentation

◆ HTSV_Result

Enumerator
HEAPTUPLE_DEAD 
HEAPTUPLE_LIVE 
HEAPTUPLE_RECENTLY_DEAD 
HEAPTUPLE_INSERT_IN_PROGRESS 
HEAPTUPLE_DELETE_IN_PROGRESS 

Definition at line 94 of file heapam.h.

95 {
96  HEAPTUPLE_DEAD, /* tuple is dead and deletable */
97  HEAPTUPLE_LIVE, /* tuple is live (committed, no deleter) */
98  HEAPTUPLE_RECENTLY_DEAD, /* tuple is dead, but not deletable yet */
99  HEAPTUPLE_INSERT_IN_PROGRESS, /* inserting xact is still in progress */
100  HEAPTUPLE_DELETE_IN_PROGRESS /* deleting xact is still in progress */
101 } HTSV_Result;
HTSV_Result
Definition: heapam.h:95
@ HEAPTUPLE_RECENTLY_DEAD
Definition: heapam.h:98
@ HEAPTUPLE_INSERT_IN_PROGRESS
Definition: heapam.h:99
@ HEAPTUPLE_LIVE
Definition: heapam.h:97
@ HEAPTUPLE_DELETE_IN_PROGRESS
Definition: heapam.h:100
@ HEAPTUPLE_DEAD
Definition: heapam.h:96

Function Documentation

◆ FreeBulkInsertState()

void FreeBulkInsertState ( BulkInsertState  bistate)

Definition at line 1778 of file heapam.c.

1779 {
1780  if (bistate->current_buf != InvalidBuffer)
1781  ReleaseBuffer(bistate->current_buf);
1782  FreeAccessStrategy(bistate->strategy);
1783  pfree(bistate);
1784 }
#define InvalidBuffer
Definition: buf.h:25
void ReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:4480
void FreeAccessStrategy(BufferAccessStrategy strategy)
Definition: freelist.c:639
void pfree(void *pointer)
Definition: mcxt.c:1456
BufferAccessStrategy strategy
Definition: hio.h:31
Buffer current_buf
Definition: hio.h:32

References BulkInsertStateData::current_buf, FreeAccessStrategy(), InvalidBuffer, pfree(), ReleaseBuffer(), and BulkInsertStateData::strategy.

Referenced by ATRewriteTable(), CopyFrom(), CopyMultiInsertBufferCleanup(), intorel_shutdown(), and transientrel_shutdown().

◆ GetBulkInsertState()

BulkInsertState GetBulkInsertState ( void  )

Definition at line 1761 of file heapam.c.

1762 {
1763  BulkInsertState bistate;
1764 
1765  bistate = (BulkInsertState) palloc(sizeof(BulkInsertStateData));
1767  bistate->current_buf = InvalidBuffer;
1768  bistate->next_free = InvalidBlockNumber;
1769  bistate->last_free = InvalidBlockNumber;
1770  bistate->already_extended_by = 0;
1771  return bistate;
1772 }
#define InvalidBlockNumber
Definition: block.h:33
@ BAS_BULKWRITE
Definition: bufmgr.h:37
BufferAccessStrategy GetAccessStrategy(BufferAccessStrategyType btype)
Definition: freelist.c:541
struct BulkInsertStateData * BulkInsertState
Definition: heapam.h:39
void * palloc(Size size)
Definition: mcxt.c:1226
BlockNumber last_free
Definition: hio.h:49
uint32 already_extended_by
Definition: hio.h:50
BlockNumber next_free
Definition: hio.h:48

References BulkInsertStateData::already_extended_by, BAS_BULKWRITE, BulkInsertStateData::current_buf, GetAccessStrategy(), InvalidBlockNumber, InvalidBuffer, BulkInsertStateData::last_free, BulkInsertStateData::next_free, palloc(), and BulkInsertStateData::strategy.

Referenced by ATRewriteTable(), CopyFrom(), CopyMultiInsertBufferInit(), intorel_startup(), and transientrel_startup().

◆ heap_abort_speculative()

void heap_abort_speculative ( Relation  relation,
ItemPointer  tid 
)

Definition at line 5722 of file heapam.c.

5723 {
5725  ItemId lp;
5726  HeapTupleData tp;
5727  Page page;
5728  BlockNumber block;
5729  Buffer buffer;
5730  TransactionId prune_xid;
5731 
5732  Assert(ItemPointerIsValid(tid));
5733 
5734  block = ItemPointerGetBlockNumber(tid);
5735  buffer = ReadBuffer(relation, block);
5736  page = BufferGetPage(buffer);
5737 
5739 
5740  /*
5741  * Page can't be all visible, we just inserted into it, and are still
5742  * running.
5743  */
5744  Assert(!PageIsAllVisible(page));
5745 
5746  lp = PageGetItemId(page, ItemPointerGetOffsetNumber(tid));
5747  Assert(ItemIdIsNormal(lp));
5748 
5749  tp.t_tableOid = RelationGetRelid(relation);
5750  tp.t_data = (HeapTupleHeader) PageGetItem(page, lp);
5751  tp.t_len = ItemIdGetLength(lp);
5752  tp.t_self = *tid;
5753 
5754  /*
5755  * Sanity check that the tuple really is a speculatively inserted tuple,
5756  * inserted by us.
5757  */
5758  if (tp.t_data->t_choice.t_heap.t_xmin != xid)
5759  elog(ERROR, "attempted to kill a tuple inserted by another transaction");
5760  if (!(IsToastRelation(relation) || HeapTupleHeaderIsSpeculative(tp.t_data)))
5761  elog(ERROR, "attempted to kill a non-speculative tuple");
5763 
5764  /*
5765  * No need to check for serializable conflicts here. There is never a
5766  * need for a combo CID, either. No need to extract replica identity, or
5767  * do anything special with infomask bits.
5768  */
5769 
5771 
5772  /*
5773  * The tuple will become DEAD immediately. Flag that this page is a
5774  * candidate for pruning by setting xmin to TransactionXmin. While not
5775  * immediately prunable, it is the oldest xid we can cheaply determine
5776  * that's safe against wraparound / being older than the table's
5777  * relfrozenxid. To defend against the unlikely case of a new relation
5778  * having a newer relfrozenxid than our TransactionXmin, use relfrozenxid
5779  * if so (vacuum can't subsequently move relfrozenxid to beyond
5780  * TransactionXmin, so there's no race here).
5781  */
5783  if (TransactionIdPrecedes(TransactionXmin, relation->rd_rel->relfrozenxid))
5784  prune_xid = relation->rd_rel->relfrozenxid;
5785  else
5786  prune_xid = TransactionXmin;
5787  PageSetPrunable(page, prune_xid);
5788 
5789  /* store transaction information of xact deleting the tuple */
5792 
5793  /*
5794  * Set the tuple header xmin to InvalidTransactionId. This makes the
5795  * tuple immediately invisible everyone. (In particular, to any
5796  * transactions waiting on the speculative token, woken up later.)
5797  */
5799 
5800  /* Clear the speculative insertion token too */
5801  tp.t_data->t_ctid = tp.t_self;
5802 
5803  MarkBufferDirty(buffer);
5804 
5805  /*
5806  * XLOG stuff
5807  *
5808  * The WAL records generated here match heap_delete(). The same recovery
5809  * routines are used.
5810  */
5811  if (RelationNeedsWAL(relation))
5812  {
5813  xl_heap_delete xlrec;
5814  XLogRecPtr recptr;
5815 
5816  xlrec.flags = XLH_DELETE_IS_SUPER;
5818  tp.t_data->t_infomask2);
5820  xlrec.xmax = xid;
5821 
5822  XLogBeginInsert();
5823  XLogRegisterData((char *) &xlrec, SizeOfHeapDelete);
5824  XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
5825 
5826  /* No replica identity & replication origin logged */
5827 
5828  recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_DELETE);
5829 
5830  PageSetLSN(page, recptr);
5831  }
5832 
5833  END_CRIT_SECTION();
5834 
5835  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
5836 
5837  if (HeapTupleHasExternal(&tp))
5838  {
5839  Assert(!IsToastRelation(relation));
5840  heap_toast_delete(relation, &tp, true);
5841  }
5842 
5843  /*
5844  * Never need to mark tuple for invalidation, since catalogs don't support
5845  * speculative insertion
5846  */
5847 
5848  /* Now we can release the buffer */
5849  ReleaseBuffer(buffer);
5850 
5851  /* count deletion, as we counted the insertion too */
5852  pgstat_count_heap_delete(relation);
5853 }
uint32 BlockNumber
Definition: block.h:31
int Buffer
Definition: buf.h:23
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:2111
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:4715
Buffer ReadBuffer(Relation reln, BlockNumber blockNum)
Definition: bufmgr.c:708
#define BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:157
static Page BufferGetPage(Buffer buffer)
Definition: bufmgr.h:350
#define BUFFER_LOCK_EXCLUSIVE
Definition: bufmgr.h:159
Pointer Page
Definition: bufpage.h:78
static Item PageGetItem(Page page, ItemId itemId)
Definition: bufpage.h:351
static ItemId PageGetItemId(Page page, OffsetNumber offsetNumber)
Definition: bufpage.h:240
static bool PageIsAllVisible(Page page)
Definition: bufpage.h:426
static void PageSetLSN(Page page, XLogRecPtr lsn)
Definition: bufpage.h:388
#define PageSetPrunable(page, xid)
Definition: bufpage.h:444
uint32 TransactionId
Definition: c.h:641
bool IsToastRelation(Relation relation)
Definition: catalog.c:147
#define ERROR
Definition: elog.h:39
static uint8 compute_infobits(uint16 infomask, uint16 infomask2)
Definition: heapam.c:2461
#define XLOG_HEAP_DELETE
Definition: heapam_xlog.h:33
#define SizeOfHeapDelete
Definition: heapam_xlog.h:115
#define XLH_DELETE_IS_SUPER
Definition: heapam_xlog.h:99
void heap_toast_delete(Relation rel, HeapTuple oldtup, bool is_speculative)
Definition: heaptoast.c:43
HeapTupleHeaderData * HeapTupleHeader
Definition: htup.h:23
#define HEAP_KEYS_UPDATED
Definition: htup_details.h:275
#define HeapTupleHeaderIsHeapOnly(tup)
Definition: htup_details.h:499
#define HeapTupleHeaderSetXmin(tup, xid)
Definition: htup_details.h:315
#define HEAP_XMAX_BITS
Definition: htup_details.h:267
#define HeapTupleHasExternal(tuple)
Definition: htup_details.h:671
#define HEAP_MOVED
Definition: htup_details.h:213
#define HeapTupleHeaderIsSpeculative(tup)
Definition: htup_details.h:428
#define ItemIdGetLength(itemId)
Definition: itemid.h:59
#define ItemIdIsNormal(itemId)
Definition: itemid.h:99
static OffsetNumber ItemPointerGetOffsetNumber(const ItemPointerData *pointer)
Definition: itemptr.h:124
static BlockNumber ItemPointerGetBlockNumber(const ItemPointerData *pointer)
Definition: itemptr.h:103
static bool ItemPointerIsValid(const ItemPointerData *pointer)
Definition: itemptr.h:83
Assert(fmt[strlen(fmt) - 1] !='\n')
#define START_CRIT_SECTION()
Definition: miscadmin.h:148
#define END_CRIT_SECTION()
Definition: miscadmin.h:150
void pgstat_count_heap_delete(Relation rel)
#define RelationGetRelid(relation)
Definition: rel.h:504
#define RelationNeedsWAL(relation)
Definition: rel.h:629
TransactionId TransactionXmin
Definition: snapmgr.c:104
ItemPointerData t_self
Definition: htup.h:65
uint32 t_len
Definition: htup.h:64
HeapTupleHeader t_data
Definition: htup.h:68
Oid t_tableOid
Definition: htup.h:66
TransactionId t_xmin
Definition: htup_details.h:124
union HeapTupleHeaderData::@45 t_choice
ItemPointerData t_ctid
Definition: htup_details.h:161
HeapTupleFields t_heap
Definition: htup_details.h:157
Form_pg_class rd_rel
Definition: rel.h:111
TransactionId xmax
Definition: heapam_xlog.h:109
OffsetNumber offnum
Definition: heapam_xlog.h:110
uint8 infobits_set
Definition: heapam_xlog.h:111
bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition: transam.c:280
#define InvalidTransactionId
Definition: transam.h:31
#define TransactionIdIsValid(xid)
Definition: transam.h:41
TransactionId GetCurrentTransactionId(void)
Definition: xact.c:445
uint64 XLogRecPtr
Definition: xlogdefs.h:21
void XLogRegisterData(char *data, uint32 len)
Definition: xloginsert.c:351
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition: xloginsert.c:461
void XLogRegisterBuffer(uint8 block_id, Buffer buffer, uint8 flags)
Definition: xloginsert.c:243
void XLogBeginInsert(void)
Definition: xloginsert.c:150
#define REGBUF_STANDARD
Definition: xloginsert.h:34

References Assert(), BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferGetPage(), compute_infobits(), elog(), END_CRIT_SECTION, ERROR, xl_heap_delete::flags, GetCurrentTransactionId(), HEAP_KEYS_UPDATED, HEAP_MOVED, heap_toast_delete(), HEAP_XMAX_BITS, HeapTupleHasExternal, HeapTupleHeaderIsHeapOnly, HeapTupleHeaderIsSpeculative, HeapTupleHeaderSetXmin, xl_heap_delete::infobits_set, InvalidTransactionId, IsToastRelation(), ItemIdGetLength, ItemIdIsNormal, ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), ItemPointerIsValid(), LockBuffer(), MarkBufferDirty(), xl_heap_delete::offnum, PageGetItem(), PageGetItemId(), PageIsAllVisible(), PageSetLSN(), PageSetPrunable, pgstat_count_heap_delete(), RelationData::rd_rel, ReadBuffer(), REGBUF_STANDARD, RelationGetRelid, RelationNeedsWAL, ReleaseBuffer(), SizeOfHeapDelete, START_CRIT_SECTION, HeapTupleHeaderData::t_choice, HeapTupleHeaderData::t_ctid, HeapTupleData::t_data, HeapTupleHeaderData::t_heap, HeapTupleHeaderData::t_infomask, HeapTupleHeaderData::t_infomask2, HeapTupleData::t_len, HeapTupleData::t_self, HeapTupleData::t_tableOid, HeapTupleFields::t_xmin, TransactionIdIsValid, TransactionIdPrecedes(), TransactionXmin, XLH_DELETE_IS_SUPER, XLOG_HEAP_DELETE, XLogBeginInsert(), XLogInsert(), XLogRegisterBuffer(), XLogRegisterData(), and xl_heap_delete::xmax.

Referenced by heapam_tuple_complete_speculative(), and toast_delete_datum().

◆ heap_beginscan()

TableScanDesc heap_beginscan ( Relation  relation,
Snapshot  snapshot,
int  nkeys,
ScanKey  key,
ParallelTableScanDesc  parallel_scan,
uint32  flags 
)

Definition at line 931 of file heapam.c.

935 {
936  HeapScanDesc scan;
937 
938  /*
939  * increment relation ref count while scanning relation
940  *
941  * This is just to make really sure the relcache entry won't go away while
942  * the scan has a pointer to it. Caller should be holding the rel open
943  * anyway, so this is redundant in all normal scenarios...
944  */
946 
947  /*
948  * allocate and initialize scan descriptor
949  */
950  scan = (HeapScanDesc) palloc(sizeof(HeapScanDescData));
951 
952  scan->rs_base.rs_rd = relation;
953  scan->rs_base.rs_snapshot = snapshot;
954  scan->rs_base.rs_nkeys = nkeys;
955  scan->rs_base.rs_flags = flags;
956  scan->rs_base.rs_parallel = parallel_scan;
957  scan->rs_strategy = NULL; /* set in initscan */
958 
959  /*
960  * Disable page-at-a-time mode if it's not a MVCC-safe snapshot.
961  */
962  if (!(snapshot && IsMVCCSnapshot(snapshot)))
964 
965  /*
966  * For seqscan and sample scans in a serializable transaction, acquire a
967  * predicate lock on the entire relation. This is required not only to
968  * lock all the matching tuples, but also to conflict with new insertions
969  * into the table. In an indexscan, we take page locks on the index pages
970  * covering the range specified in the scan qual, but in a heap scan there
971  * is nothing more fine-grained to lock. A bitmap scan is a different
972  * story, there we have already scanned the index and locked the index
973  * pages covering the predicate. But in that case we still have to lock
974  * any matching heap tuples. For sample scan we could optimize the locking
975  * to be at least page-level granularity, but we'd need to add per-tuple
976  * locking for that.
977  */
979  {
980  /*
981  * Ensure a missing snapshot is noticed reliably, even if the
982  * isolation mode means predicate locking isn't performed (and
983  * therefore the snapshot isn't used here).
984  */
985  Assert(snapshot);
986  PredicateLockRelation(relation, snapshot);
987  }
988 
989  /* we only need to set this up once */
990  scan->rs_ctup.t_tableOid = RelationGetRelid(relation);
991 
992  /*
993  * Allocate memory to keep track of page allocation for parallel workers
994  * when doing a parallel scan.
995  */
996  if (parallel_scan != NULL)
998  else
999  scan->rs_parallelworkerdata = NULL;
1000 
1001  /*
1002  * we do this here instead of in initscan() because heap_rescan also calls
1003  * initscan() and we don't want to allocate memory again
1004  */
1005  if (nkeys > 0)
1006  scan->rs_base.rs_key = (ScanKey) palloc(sizeof(ScanKeyData) * nkeys);
1007  else
1008  scan->rs_base.rs_key = NULL;
1009 
1010  initscan(scan, key, false);
1011 
1012  return (TableScanDesc) scan;
1013 }
static void initscan(HeapScanDesc scan, ScanKey key, bool keep_startblock)
Definition: heapam.c:233
struct HeapScanDescData * HeapScanDesc
Definition: heapam.h:80
if(TABLE==NULL||TABLE_index==NULL)
Definition: isn.c:77
void PredicateLockRelation(Relation relation, Snapshot snapshot)
Definition: predicate.c:2510
void RelationIncrementReferenceCount(Relation rel)
Definition: relcache.c:2127
ScanKeyData * ScanKey
Definition: skey.h:75
#define IsMVCCSnapshot(snapshot)
Definition: snapmgr.h:62
BufferAccessStrategy rs_strategy
Definition: heapam.h:65
ParallelBlockTableScanWorkerData * rs_parallelworkerdata
Definition: heapam.h:73
HeapTupleData rs_ctup
Definition: heapam.h:67
TableScanDescData rs_base
Definition: heapam.h:50
Relation rs_rd
Definition: relscan.h:34
uint32 rs_flags
Definition: relscan.h:47
struct ScanKeyData * rs_key
Definition: relscan.h:37
struct SnapshotData * rs_snapshot
Definition: relscan.h:35
struct ParallelTableScanDescData * rs_parallel
Definition: relscan.h:49
@ SO_ALLOW_PAGEMODE
Definition: tableam.h:61
@ SO_TYPE_SAMPLESCAN
Definition: tableam.h:50
@ SO_TYPE_SEQSCAN
Definition: tableam.h:48

References Assert(), if(), initscan(), IsMVCCSnapshot, sort-test::key, palloc(), PredicateLockRelation(), RelationGetRelid, RelationIncrementReferenceCount(), HeapScanDescData::rs_base, HeapScanDescData::rs_ctup, TableScanDescData::rs_flags, TableScanDescData::rs_key, TableScanDescData::rs_nkeys, TableScanDescData::rs_parallel, HeapScanDescData::rs_parallelworkerdata, TableScanDescData::rs_rd, TableScanDescData::rs_snapshot, HeapScanDescData::rs_strategy, SO_ALLOW_PAGEMODE, SO_TYPE_SAMPLESCAN, SO_TYPE_SEQSCAN, and HeapTupleData::t_tableOid.

◆ heap_delete()

TM_Result heap_delete ( Relation  relation,
ItemPointer  tid,
CommandId  cid,
Snapshot  crosscheck,
bool  wait,
struct TM_FailureData tmfd,
bool  changingPart 
)

Definition at line 2506 of file heapam.c.

2509 {
2510  TM_Result result;
2512  ItemId lp;
2513  HeapTupleData tp;
2514  Page page;
2515  BlockNumber block;
2516  Buffer buffer;
2517  Buffer vmbuffer = InvalidBuffer;
2518  TransactionId new_xmax;
2519  uint16 new_infomask,
2520  new_infomask2;
2521  bool have_tuple_lock = false;
2522  bool iscombo;
2523  bool all_visible_cleared = false;
2524  HeapTuple old_key_tuple = NULL; /* replica identity of the tuple */
2525  bool old_key_copied = false;
2526 
2527  Assert(ItemPointerIsValid(tid));
2528 
2529  /*
2530  * Forbid this during a parallel operation, lest it allocate a combo CID.
2531  * Other workers might need that combo CID for visibility checks, and we
2532  * have no provision for broadcasting it to them.
2533  */
2534  if (IsInParallelMode())
2535  ereport(ERROR,
2536  (errcode(ERRCODE_INVALID_TRANSACTION_STATE),
2537  errmsg("cannot delete tuples during a parallel operation")));
2538 
2539  block = ItemPointerGetBlockNumber(tid);
2540  buffer = ReadBuffer(relation, block);
2541  page = BufferGetPage(buffer);
2542 
2543  /*
2544  * Before locking the buffer, pin the visibility map page if it appears to
2545  * be necessary. Since we haven't got the lock yet, someone else might be
2546  * in the middle of changing this, so we'll need to recheck after we have
2547  * the lock.
2548  */
2549  if (PageIsAllVisible(page))
2550  visibilitymap_pin(relation, block, &vmbuffer);
2551 
2553 
2554  lp = PageGetItemId(page, ItemPointerGetOffsetNumber(tid));
2555  Assert(ItemIdIsNormal(lp));
2556 
2557  tp.t_tableOid = RelationGetRelid(relation);
2558  tp.t_data = (HeapTupleHeader) PageGetItem(page, lp);
2559  tp.t_len = ItemIdGetLength(lp);
2560  tp.t_self = *tid;
2561 
2562 l1:
2563 
2564  /*
2565  * If we didn't pin the visibility map page and the page has become all
2566  * visible while we were busy locking the buffer, we'll have to unlock and
2567  * re-lock, to avoid holding the buffer lock across an I/O. That's a bit
2568  * unfortunate, but hopefully shouldn't happen often.
2569  */
2570  if (vmbuffer == InvalidBuffer && PageIsAllVisible(page))
2571  {
2572  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
2573  visibilitymap_pin(relation, block, &vmbuffer);
2575  }
2576 
2577  result = HeapTupleSatisfiesUpdate(&tp, cid, buffer);
2578 
2579  if (result == TM_Invisible)
2580  {
2581  UnlockReleaseBuffer(buffer);
2582  ereport(ERROR,
2583  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
2584  errmsg("attempted to delete invisible tuple")));
2585  }
2586  else if (result == TM_BeingModified && wait)
2587  {
2588  TransactionId xwait;
2589  uint16 infomask;
2590 
2591  /* must copy state data before unlocking buffer */
2592  xwait = HeapTupleHeaderGetRawXmax(tp.t_data);
2593  infomask = tp.t_data->t_infomask;
2594 
2595  /*
2596  * Sleep until concurrent transaction ends -- except when there's a
2597  * single locker and it's our own transaction. Note we don't care
2598  * which lock mode the locker has, because we need the strongest one.
2599  *
2600  * Before sleeping, we need to acquire tuple lock to establish our
2601  * priority for the tuple (see heap_lock_tuple). LockTuple will
2602  * release us when we are next-in-line for the tuple.
2603  *
2604  * If we are forced to "start over" below, we keep the tuple lock;
2605  * this arranges that we stay at the head of the line while rechecking
2606  * tuple state.
2607  */
2608  if (infomask & HEAP_XMAX_IS_MULTI)
2609  {
2610  bool current_is_member = false;
2611 
2612  if (DoesMultiXactIdConflict((MultiXactId) xwait, infomask,
2613  LockTupleExclusive, &current_is_member))
2614  {
2615  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
2616 
2617  /*
2618  * Acquire the lock, if necessary (but skip it when we're
2619  * requesting a lock and already have one; avoids deadlock).
2620  */
2621  if (!current_is_member)
2623  LockWaitBlock, &have_tuple_lock);
2624 
2625  /* wait for multixact */
2627  relation, &(tp.t_self), XLTW_Delete,
2628  NULL);
2630 
2631  /*
2632  * If xwait had just locked the tuple then some other xact
2633  * could update this tuple before we get to this point. Check
2634  * for xmax change, and start over if so.
2635  *
2636  * We also must start over if we didn't pin the VM page, and
2637  * the page has become all visible.
2638  */
2639  if ((vmbuffer == InvalidBuffer && PageIsAllVisible(page)) ||
2640  xmax_infomask_changed(tp.t_data->t_infomask, infomask) ||
2642  xwait))
2643  goto l1;
2644  }
2645 
2646  /*
2647  * You might think the multixact is necessarily done here, but not
2648  * so: it could have surviving members, namely our own xact or
2649  * other subxacts of this backend. It is legal for us to delete
2650  * the tuple in either case, however (the latter case is
2651  * essentially a situation of upgrading our former shared lock to
2652  * exclusive). We don't bother changing the on-disk hint bits
2653  * since we are about to overwrite the xmax altogether.
2654  */
2655  }
2656  else if (!TransactionIdIsCurrentTransactionId(xwait))
2657  {
2658  /*
2659  * Wait for regular transaction to end; but first, acquire tuple
2660  * lock.
2661  */
2662  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
2664  LockWaitBlock, &have_tuple_lock);
2665  XactLockTableWait(xwait, relation, &(tp.t_self), XLTW_Delete);
2667 
2668  /*
2669  * xwait is done, but if xwait had just locked the tuple then some
2670  * other xact could update this tuple before we get to this point.
2671  * Check for xmax change, and start over if so.
2672  *
2673  * We also must start over if we didn't pin the VM page, and the
2674  * page has become all visible.
2675  */
2676  if ((vmbuffer == InvalidBuffer && PageIsAllVisible(page)) ||
2677  xmax_infomask_changed(tp.t_data->t_infomask, infomask) ||
2679  xwait))
2680  goto l1;
2681 
2682  /* Otherwise check if it committed or aborted */
2683  UpdateXmaxHintBits(tp.t_data, buffer, xwait);
2684  }
2685 
2686  /*
2687  * We may overwrite if previous xmax aborted, or if it committed but
2688  * only locked the tuple without updating it.
2689  */
2690  if ((tp.t_data->t_infomask & HEAP_XMAX_INVALID) ||
2693  result = TM_Ok;
2694  else if (!ItemPointerEquals(&tp.t_self, &tp.t_data->t_ctid))
2695  result = TM_Updated;
2696  else
2697  result = TM_Deleted;
2698  }
2699 
2700  if (crosscheck != InvalidSnapshot && result == TM_Ok)
2701  {
2702  /* Perform additional check for transaction-snapshot mode RI updates */
2703  if (!HeapTupleSatisfiesVisibility(&tp, crosscheck, buffer))
2704  result = TM_Updated;
2705  }
2706 
2707  if (result != TM_Ok)
2708  {
2709  Assert(result == TM_SelfModified ||
2710  result == TM_Updated ||
2711  result == TM_Deleted ||
2712  result == TM_BeingModified);
2714  Assert(result != TM_Updated ||
2715  !ItemPointerEquals(&tp.t_self, &tp.t_data->t_ctid));
2716  tmfd->ctid = tp.t_data->t_ctid;
2718  if (result == TM_SelfModified)
2719  tmfd->cmax = HeapTupleHeaderGetCmax(tp.t_data);
2720  else
2721  tmfd->cmax = InvalidCommandId;
2722  UnlockReleaseBuffer(buffer);
2723  if (have_tuple_lock)
2724  UnlockTupleTuplock(relation, &(tp.t_self), LockTupleExclusive);
2725  if (vmbuffer != InvalidBuffer)
2726  ReleaseBuffer(vmbuffer);
2727  return result;
2728  }
2729 
2730  /*
2731  * We're about to do the actual delete -- check for conflict first, to
2732  * avoid possibly having to roll back work we've just done.
2733  *
2734  * This is safe without a recheck as long as there is no possibility of
2735  * another process scanning the page between this check and the delete
2736  * being visible to the scan (i.e., an exclusive buffer content lock is
2737  * continuously held from this point until the tuple delete is visible).
2738  */
2739  CheckForSerializableConflictIn(relation, tid, BufferGetBlockNumber(buffer));
2740 
2741  /* replace cid with a combo CID if necessary */
2742  HeapTupleHeaderAdjustCmax(tp.t_data, &cid, &iscombo);
2743 
2744  /*
2745  * Compute replica identity tuple before entering the critical section so
2746  * we don't PANIC upon a memory allocation failure.
2747  */
2748  old_key_tuple = ExtractReplicaIdentity(relation, &tp, true, &old_key_copied);
2749 
2750  /*
2751  * If this is the first possibly-multixact-able operation in the current
2752  * transaction, set my per-backend OldestMemberMXactId setting. We can be
2753  * certain that the transaction will never become a member of any older
2754  * MultiXactIds than that. (We have to do this even if we end up just
2755  * using our own TransactionId below, since some other backend could
2756  * incorporate our XID into a MultiXact immediately afterwards.)
2757  */
2759 
2762  xid, LockTupleExclusive, true,
2763  &new_xmax, &new_infomask, &new_infomask2);
2764 
2766 
2767  /*
2768  * If this transaction commits, the tuple will become DEAD sooner or
2769  * later. Set flag that this page is a candidate for pruning once our xid
2770  * falls below the OldestXmin horizon. If the transaction finally aborts,
2771  * the subsequent page pruning will be a no-op and the hint will be
2772  * cleared.
2773  */
2774  PageSetPrunable(page, xid);
2775 
2776  if (PageIsAllVisible(page))
2777  {
2778  all_visible_cleared = true;
2779  PageClearAllVisible(page);
2780  visibilitymap_clear(relation, BufferGetBlockNumber(buffer),
2781  vmbuffer, VISIBILITYMAP_VALID_BITS);
2782  }
2783 
2784  /* store transaction information of xact deleting the tuple */
2787  tp.t_data->t_infomask |= new_infomask;
2788  tp.t_data->t_infomask2 |= new_infomask2;
2790  HeapTupleHeaderSetXmax(tp.t_data, new_xmax);
2791  HeapTupleHeaderSetCmax(tp.t_data, cid, iscombo);
2792  /* Make sure there is no forward chain link in t_ctid */
2793  tp.t_data->t_ctid = tp.t_self;
2794 
2795  /* Signal that this is actually a move into another partition */
2796  if (changingPart)
2798 
2799  MarkBufferDirty(buffer);
2800 
2801  /*
2802  * XLOG stuff
2803  *
2804  * NB: heap_abort_speculative() uses the same xlog record and replay
2805  * routines.
2806  */
2807  if (RelationNeedsWAL(relation))
2808  {
2809  xl_heap_delete xlrec;
2810  xl_heap_header xlhdr;
2811  XLogRecPtr recptr;
2812 
2813  /*
2814  * For logical decode we need combo CIDs to properly decode the
2815  * catalog
2816  */
2818  log_heap_new_cid(relation, &tp);
2819 
2820  xlrec.flags = 0;
2821  if (all_visible_cleared)
2823  if (changingPart)
2826  tp.t_data->t_infomask2);
2828  xlrec.xmax = new_xmax;
2829 
2830  if (old_key_tuple != NULL)
2831  {
2832  if (relation->rd_rel->relreplident == REPLICA_IDENTITY_FULL)
2834  else
2836  }
2837 
2838  XLogBeginInsert();
2839  XLogRegisterData((char *) &xlrec, SizeOfHeapDelete);
2840 
2841  XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
2842 
2843  /*
2844  * Log replica identity of the deleted tuple if there is one
2845  */
2846  if (old_key_tuple != NULL)
2847  {
2848  xlhdr.t_infomask2 = old_key_tuple->t_data->t_infomask2;
2849  xlhdr.t_infomask = old_key_tuple->t_data->t_infomask;
2850  xlhdr.t_hoff = old_key_tuple->t_data->t_hoff;
2851 
2852  XLogRegisterData((char *) &xlhdr, SizeOfHeapHeader);
2853  XLogRegisterData((char *) old_key_tuple->t_data
2855  old_key_tuple->t_len
2857  }
2858 
2859  /* filtering by origin on a row level is much more efficient */
2861 
2862  recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_DELETE);
2863 
2864  PageSetLSN(page, recptr);
2865  }
2866 
2867  END_CRIT_SECTION();
2868 
2869  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
2870 
2871  if (vmbuffer != InvalidBuffer)
2872  ReleaseBuffer(vmbuffer);
2873 
2874  /*
2875  * If the tuple has toasted out-of-line attributes, we need to delete
2876  * those items too. We have to do this before releasing the buffer
2877  * because we need to look at the contents of the tuple, but it's OK to
2878  * release the content lock on the buffer first.
2879  */
2880  if (relation->rd_rel->relkind != RELKIND_RELATION &&
2881  relation->rd_rel->relkind != RELKIND_MATVIEW)
2882  {
2883  /* toast table entries should never be recursively toasted */
2885  }
2886  else if (HeapTupleHasExternal(&tp))
2887  heap_toast_delete(relation, &tp, false);
2888 
2889  /*
2890  * Mark tuple for invalidation from system caches at next command
2891  * boundary. We have to do this before releasing the buffer because we
2892  * need to look at the contents of the tuple.
2893  */
2894  CacheInvalidateHeapTuple(relation, &tp, NULL);
2895 
2896  /* Now we can release the buffer */
2897  ReleaseBuffer(buffer);
2898 
2899  /*
2900  * Release the lmgr tuple lock, if we had it.
2901  */
2902  if (have_tuple_lock)
2903  UnlockTupleTuplock(relation, &(tp.t_self), LockTupleExclusive);
2904 
2905  pgstat_count_heap_delete(relation);
2906 
2907  if (old_key_tuple != NULL && old_key_copied)
2908  heap_freetuple(old_key_tuple);
2909 
2910  return TM_Ok;
2911 }
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:3290
void UnlockReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:4497
static void PageClearAllVisible(Page page)
Definition: bufpage.h:436
#define InvalidCommandId
Definition: c.h:658
unsigned short uint16
Definition: c.h:494
TransactionId MultiXactId
Definition: c.h:651
void HeapTupleHeaderAdjustCmax(HeapTupleHeader tup, CommandId *cmax, bool *iscombo)
Definition: combocid.c:153
CommandId HeapTupleHeaderGetCmax(HeapTupleHeader tup)
Definition: combocid.c:118
int errcode(int sqlerrcode)
Definition: elog.c:858
int errmsg(const char *fmt,...)
Definition: elog.c:1069
#define ereport(elevel,...)
Definition: elog.h:149
static bool DoesMultiXactIdConflict(MultiXactId multi, uint16 infomask, LockTupleMode lockmode, bool *current_is_member)
Definition: heapam.c:7108
static XLogRecPtr log_heap_new_cid(Relation relation, HeapTuple tup)
Definition: heapam.c:8572
static void compute_new_xmax_infomask(TransactionId xmax, uint16 old_infomask, uint16 old_infomask2, TransactionId add_to_xmax, LockTupleMode mode, bool is_update, TransactionId *result_xmax, uint16 *result_infomask, uint16 *result_infomask2)
Definition: heapam.c:4873
static HeapTuple ExtractReplicaIdentity(Relation relation, HeapTuple tp, bool key_required, bool *copy)
Definition: heapam.c:8653
static bool heap_acquire_tuplock(Relation relation, ItemPointer tid, LockTupleMode mode, LockWaitPolicy wait_policy, bool *have_tuple_lock)
Definition: heapam.c:4824
static void MultiXactIdWait(MultiXactId multi, MultiXactStatus status, uint16 infomask, Relation rel, ItemPointer ctid, XLTW_Oper oper, int *remaining)
Definition: heapam.c:7285
static bool xmax_infomask_changed(uint16 new_infomask, uint16 old_infomask)
Definition: heapam.c:2483
#define UnlockTupleTuplock(rel, tup, mode)
Definition: heapam.c:171
static void UpdateXmaxHintBits(HeapTupleHeader tuple, Buffer buffer, TransactionId xid)
Definition: heapam.c:1739
bool HeapTupleSatisfiesVisibility(HeapTuple htup, Snapshot snapshot, Buffer buffer)
bool HeapTupleHeaderIsOnlyLocked(HeapTupleHeader tuple)
TM_Result HeapTupleSatisfiesUpdate(HeapTuple htup, CommandId curcid, Buffer buffer)
#define XLH_DELETE_CONTAINS_OLD_KEY
Definition: heapam_xlog.h:98
#define XLH_DELETE_ALL_VISIBLE_CLEARED
Definition: heapam_xlog.h:96
#define SizeOfHeapHeader
Definition: heapam_xlog.h:151
#define XLH_DELETE_IS_PARTITION_MOVE
Definition: heapam_xlog.h:100
#define XLH_DELETE_CONTAINS_OLD_TUPLE
Definition: heapam_xlog.h:97
void heap_freetuple(HeapTuple htup)
Definition: heaptuple.c:1426
#define HEAP_XMAX_IS_LOCKED_ONLY(infomask)
Definition: htup_details.h:227
#define SizeofHeapTupleHeader
Definition: htup_details.h:185
#define HeapTupleHeaderSetXmax(tup, xid)
Definition: htup_details.h:376
#define HeapTupleHeaderClearHotUpdated(tup)
Definition: htup_details.h:494
#define HEAP_XMAX_IS_MULTI
Definition: htup_details.h:209
#define HEAP_XMAX_INVALID
Definition: htup_details.h:208
#define HeapTupleHeaderSetMovedPartitions(tup)
Definition: htup_details.h:447
#define HeapTupleHeaderGetRawXmax(tup)
Definition: htup_details.h:371
#define HeapTupleHeaderGetUpdateXid(tup)
Definition: htup_details.h:361
#define HeapTupleHeaderSetCmax(tup, cid, iscombo)
Definition: htup_details.h:401
void CacheInvalidateHeapTuple(Relation relation, HeapTuple tuple, HeapTuple newtuple)
Definition: inval.c:1207
bool ItemPointerEquals(ItemPointer pointer1, ItemPointer pointer2)
Definition: itemptr.c:35
void XactLockTableWait(TransactionId xid, Relation rel, ItemPointer ctid, XLTW_Oper oper)
Definition: lmgr.c:668
@ XLTW_Delete
Definition: lmgr.h:28
@ LockWaitBlock
Definition: lockoptions.h:39
@ LockTupleExclusive
Definition: lockoptions.h:58
void MultiXactIdSetOldestMember(void)
Definition: multixact.c:626
@ MultiXactStatusUpdate
Definition: multixact.h:50
void CheckForSerializableConflictIn(Relation relation, ItemPointer tid, BlockNumber blkno)
Definition: predicate.c:4270
#define RelationIsAccessibleInLogicalDecoding(relation)
Definition: rel.h:685
#define InvalidSnapshot
Definition: snapshot.h:123
TransactionId xmax
Definition: tableam.h:143
CommandId cmax
Definition: tableam.h:144
ItemPointerData ctid
Definition: tableam.h:142
uint16 t_infomask
Definition: heapam_xlog.h:147
uint16 t_infomask2
Definition: heapam_xlog.h:146
TM_Result
Definition: tableam.h:72
@ TM_Ok
Definition: tableam.h:77
@ TM_BeingModified
Definition: tableam.h:99
@ TM_Deleted
Definition: tableam.h:92
@ TM_Updated
Definition: tableam.h:89
@ TM_SelfModified
Definition: tableam.h:83
@ TM_Invisible
Definition: tableam.h:80
#define TransactionIdEquals(id1, id2)
Definition: transam.h:43
bool visibilitymap_clear(Relation rel, BlockNumber heapBlk, Buffer vmbuf, uint8 flags)
void visibilitymap_pin(Relation rel, BlockNumber heapBlk, Buffer *vmbuf)
#define VISIBILITYMAP_VALID_BITS
bool TransactionIdIsCurrentTransactionId(TransactionId xid)
Definition: xact.c:926
bool IsInParallelMode(void)
Definition: xact.c:1069
#define XLOG_INCLUDE_ORIGIN
Definition: xlog.h:149
void XLogSetRecordFlags(uint8 flags)
Definition: xloginsert.c:443

References Assert(), BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferGetBlockNumber(), BufferGetPage(), CacheInvalidateHeapTuple(), CheckForSerializableConflictIn(), TM_FailureData::cmax, compute_infobits(), compute_new_xmax_infomask(), TM_FailureData::ctid, DoesMultiXactIdConflict(), END_CRIT_SECTION, ereport, errcode(), errmsg(), ERROR, ExtractReplicaIdentity(), xl_heap_delete::flags, GetCurrentTransactionId(), heap_acquire_tuplock(), heap_freetuple(), HEAP_KEYS_UPDATED, HEAP_MOVED, heap_toast_delete(), HEAP_XMAX_BITS, HEAP_XMAX_INVALID, HEAP_XMAX_IS_LOCKED_ONLY, HEAP_XMAX_IS_MULTI, HeapTupleHasExternal, HeapTupleHeaderAdjustCmax(), HeapTupleHeaderClearHotUpdated, HeapTupleHeaderGetCmax(), HeapTupleHeaderGetRawXmax, HeapTupleHeaderGetUpdateXid, HeapTupleHeaderIsOnlyLocked(), HeapTupleHeaderSetCmax, HeapTupleHeaderSetMovedPartitions, HeapTupleHeaderSetXmax, HeapTupleSatisfiesUpdate(), HeapTupleSatisfiesVisibility(), xl_heap_delete::infobits_set, InvalidBuffer, InvalidCommandId, InvalidSnapshot, IsInParallelMode(), ItemIdGetLength, ItemIdIsNormal, ItemPointerEquals(), ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), ItemPointerIsValid(), LockBuffer(), LockTupleExclusive, LockWaitBlock, log_heap_new_cid(), MarkBufferDirty(), MultiXactIdSetOldestMember(), MultiXactIdWait(), MultiXactStatusUpdate, xl_heap_delete::offnum, PageClearAllVisible(), PageGetItem(), PageGetItemId(), PageIsAllVisible(), PageSetLSN(), PageSetPrunable, pgstat_count_heap_delete(), RelationData::rd_rel, ReadBuffer(), REGBUF_STANDARD, RelationGetRelid, RelationIsAccessibleInLogicalDecoding, RelationNeedsWAL, ReleaseBuffer(), SizeOfHeapDelete, SizeOfHeapHeader, SizeofHeapTupleHeader, START_CRIT_SECTION, HeapTupleHeaderData::t_ctid, HeapTupleData::t_data, xl_heap_header::t_hoff, HeapTupleHeaderData::t_hoff, xl_heap_header::t_infomask, HeapTupleHeaderData::t_infomask, xl_heap_header::t_infomask2, HeapTupleHeaderData::t_infomask2, HeapTupleData::t_len, HeapTupleData::t_self, HeapTupleData::t_tableOid, TM_BeingModified, TM_Deleted, TM_Invisible, TM_Ok, TM_SelfModified, TM_Updated, TransactionIdEquals, TransactionIdIsCurrentTransactionId(), UnlockReleaseBuffer(), UnlockTupleTuplock, UpdateXmaxHintBits(), visibilitymap_clear(), visibilitymap_pin(), VISIBILITYMAP_VALID_BITS, XactLockTableWait(), XLH_DELETE_ALL_VISIBLE_CLEARED, XLH_DELETE_CONTAINS_OLD_KEY, XLH_DELETE_CONTAINS_OLD_TUPLE, XLH_DELETE_IS_PARTITION_MOVE, XLOG_HEAP_DELETE, XLOG_INCLUDE_ORIGIN, XLogBeginInsert(), XLogInsert(), XLogRegisterBuffer(), XLogRegisterData(), XLogSetRecordFlags(), XLTW_Delete, xl_heap_delete::xmax, TM_FailureData::xmax, and xmax_infomask_changed().

Referenced by heapam_tuple_delete(), and simple_heap_delete().

◆ heap_endscan()

void heap_endscan ( TableScanDesc  sscan)

Definition at line 1053 of file heapam.c.

1054 {
1055  HeapScanDesc scan = (HeapScanDesc) sscan;
1056 
1057  /* Note: no locking manipulations needed */
1058 
1059  /*
1060  * unpin scan buffers
1061  */
1062  if (BufferIsValid(scan->rs_cbuf))
1063  ReleaseBuffer(scan->rs_cbuf);
1064 
1065  /*
1066  * decrement relation reference count and free scan descriptor storage
1067  */
1069 
1070  if (scan->rs_base.rs_key)
1071  pfree(scan->rs_base.rs_key);
1072 
1073  if (scan->rs_strategy != NULL)
1075 
1076  if (scan->rs_parallelworkerdata != NULL)
1078 
1079  if (scan->rs_base.rs_flags & SO_TEMP_SNAPSHOT)
1081 
1082  pfree(scan);
1083 }
static bool BufferIsValid(Buffer bufnum)
Definition: bufmgr.h:301
void RelationDecrementReferenceCount(Relation rel)
Definition: relcache.c:2140
void UnregisterSnapshot(Snapshot snapshot)
Definition: snapmgr.c:817
Buffer rs_cbuf
Definition: heapam.h:62
@ SO_TEMP_SNAPSHOT
Definition: tableam.h:64

References BufferIsValid(), FreeAccessStrategy(), pfree(), RelationDecrementReferenceCount(), ReleaseBuffer(), HeapScanDescData::rs_base, HeapScanDescData::rs_cbuf, TableScanDescData::rs_flags, TableScanDescData::rs_key, HeapScanDescData::rs_parallelworkerdata, TableScanDescData::rs_rd, TableScanDescData::rs_snapshot, HeapScanDescData::rs_strategy, SO_TEMP_SNAPSHOT, and UnregisterSnapshot().

◆ heap_fetch()

bool heap_fetch ( Relation  relation,
Snapshot  snapshot,
HeapTuple  tuple,
Buffer userbuf,
bool  keep_buf 
)

Definition at line 1345 of file heapam.c.

1350 {
1351  ItemPointer tid = &(tuple->t_self);
1352  ItemId lp;
1353  Buffer buffer;
1354  Page page;
1355  OffsetNumber offnum;
1356  bool valid;
1357 
1358  /*
1359  * Fetch and pin the appropriate page of the relation.
1360  */
1361  buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(tid));
1362 
1363  /*
1364  * Need share lock on buffer to examine tuple commit status.
1365  */
1366  LockBuffer(buffer, BUFFER_LOCK_SHARE);
1367  page = BufferGetPage(buffer);
1368 
1369  /*
1370  * We'd better check for out-of-range offnum in case of VACUUM since the
1371  * TID was obtained.
1372  */
1373  offnum = ItemPointerGetOffsetNumber(tid);
1374  if (offnum < FirstOffsetNumber || offnum > PageGetMaxOffsetNumber(page))
1375  {
1376  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
1377  ReleaseBuffer(buffer);
1378  *userbuf = InvalidBuffer;
1379  tuple->t_data = NULL;
1380  return false;
1381  }
1382 
1383  /*
1384  * get the item line pointer corresponding to the requested tid
1385  */
1386  lp = PageGetItemId(page, offnum);
1387 
1388  /*
1389  * Must check for deleted tuple.
1390  */
1391  if (!ItemIdIsNormal(lp))
1392  {
1393  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
1394  ReleaseBuffer(buffer);
1395  *userbuf = InvalidBuffer;
1396  tuple->t_data = NULL;
1397  return false;
1398  }
1399 
1400  /*
1401  * fill in *tuple fields
1402  */
1403  tuple->t_data = (HeapTupleHeader) PageGetItem(page, lp);
1404  tuple->t_len = ItemIdGetLength(lp);
1405  tuple->t_tableOid = RelationGetRelid(relation);
1406 
1407  /*
1408  * check tuple visibility, then release lock
1409  */
1410  valid = HeapTupleSatisfiesVisibility(tuple, snapshot, buffer);
1411 
1412  if (valid)
1413  PredicateLockTID(relation, &(tuple->t_self), snapshot,
1414  HeapTupleHeaderGetXmin(tuple->t_data));
1415 
1416  HeapCheckForSerializableConflictOut(valid, relation, tuple, buffer, snapshot);
1417 
1418  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
1419 
1420  if (valid)
1421  {
1422  /*
1423  * All checks passed, so return the tuple as valid. Caller is now
1424  * responsible for releasing the buffer.
1425  */
1426  *userbuf = buffer;
1427 
1428  return true;
1429  }
1430 
1431  /* Tuple failed time qual, but maybe caller wants to see it anyway. */
1432  if (keep_buf)
1433  *userbuf = buffer;
1434  else
1435  {
1436  ReleaseBuffer(buffer);
1437  *userbuf = InvalidBuffer;
1438  tuple->t_data = NULL;
1439  }
1440 
1441  return false;
1442 }
#define BUFFER_LOCK_SHARE
Definition: bufmgr.h:158
static OffsetNumber PageGetMaxOffsetNumber(Page page)
Definition: bufpage.h:369
void HeapCheckForSerializableConflictOut(bool visible, Relation relation, HeapTuple tuple, Buffer buffer, Snapshot snapshot)
Definition: heapam.c:10139
#define HeapTupleHeaderGetXmin(tup)
Definition: htup_details.h:309
uint16 OffsetNumber
Definition: off.h:24
void PredicateLockTID(Relation relation, ItemPointer tid, Snapshot snapshot, TransactionId tuple_xid)
Definition: predicate.c:2555

References BUFFER_LOCK_SHARE, BUFFER_LOCK_UNLOCK, BufferGetPage(), HeapCheckForSerializableConflictOut(), HeapTupleHeaderGetXmin, HeapTupleSatisfiesVisibility(), InvalidBuffer, ItemIdGetLength, ItemIdIsNormal, ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), LockBuffer(), PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), PredicateLockTID(), ReadBuffer(), RelationGetRelid, ReleaseBuffer(), HeapTupleData::t_data, HeapTupleData::t_len, HeapTupleData::t_self, and HeapTupleData::t_tableOid.

Referenced by heap_lock_updated_tuple_rec(), heapam_fetch_row_version(), and heapam_tuple_lock().

◆ heap_finish_speculative()

void heap_finish_speculative ( Relation  relation,
ItemPointer  tid 
)

Definition at line 5635 of file heapam.c.

5636 {
5637  Buffer buffer;
5638  Page page;
5639  OffsetNumber offnum;
5640  ItemId lp = NULL;
5641  HeapTupleHeader htup;
5642 
5643  buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(tid));
5645  page = (Page) BufferGetPage(buffer);
5646 
5647  offnum = ItemPointerGetOffsetNumber(tid);
5648  if (PageGetMaxOffsetNumber(page) >= offnum)
5649  lp = PageGetItemId(page, offnum);
5650 
5651  if (PageGetMaxOffsetNumber(page) < offnum || !ItemIdIsNormal(lp))
5652  elog(ERROR, "invalid lp");
5653 
5654  htup = (HeapTupleHeader) PageGetItem(page, lp);
5655 
5656  /* NO EREPORT(ERROR) from here till changes are logged */
5658 
5660 
5661  MarkBufferDirty(buffer);
5662 
5663  /*
5664  * Replace the speculative insertion token with a real t_ctid, pointing to
5665  * itself like it does on regular tuples.
5666  */
5667  htup->t_ctid = *tid;
5668 
5669  /* XLOG stuff */
5670  if (RelationNeedsWAL(relation))
5671  {
5672  xl_heap_confirm xlrec;
5673  XLogRecPtr recptr;
5674 
5675  xlrec.offnum = ItemPointerGetOffsetNumber(tid);
5676 
5677  XLogBeginInsert();
5678 
5679  /* We want the same filtering on this as on a plain insert */
5681 
5682  XLogRegisterData((char *) &xlrec, SizeOfHeapConfirm);
5683  XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
5684 
5685  recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_CONFIRM);
5686 
5687  PageSetLSN(page, recptr);
5688  }
5689 
5690  END_CRIT_SECTION();
5691 
5692  UnlockReleaseBuffer(buffer);
5693 }
#define SizeOfHeapConfirm
Definition: heapam_xlog.h:307
#define XLOG_HEAP_CONFIRM
Definition: heapam_xlog.h:37
OffsetNumber offnum
Definition: heapam_xlog.h:304

References Assert(), BUFFER_LOCK_EXCLUSIVE, BufferGetPage(), elog(), END_CRIT_SECTION, ERROR, HeapTupleHeaderIsSpeculative, ItemIdIsNormal, ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), LockBuffer(), MarkBufferDirty(), xl_heap_confirm::offnum, PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), PageSetLSN(), ReadBuffer(), REGBUF_STANDARD, RelationNeedsWAL, SizeOfHeapConfirm, START_CRIT_SECTION, HeapTupleHeaderData::t_ctid, UnlockReleaseBuffer(), XLOG_HEAP_CONFIRM, XLOG_INCLUDE_ORIGIN, XLogBeginInsert(), XLogInsert(), XLogRegisterBuffer(), XLogRegisterData(), and XLogSetRecordFlags().

Referenced by heapam_tuple_complete_speculative().

◆ heap_freeze_execute_prepared()

void heap_freeze_execute_prepared ( Relation  rel,
Buffer  buffer,
TransactionId  snapshotConflictHorizon,
HeapTupleFreeze tuples,
int  ntuples 
)

Definition at line 6664 of file heapam.c.

6667 {
6668  Page page = BufferGetPage(buffer);
6669 
6670  Assert(ntuples > 0);
6671 
6672  /*
6673  * Perform xmin/xmax XID status sanity checks before critical section.
6674  *
6675  * heap_prepare_freeze_tuple doesn't perform these checks directly because
6676  * pg_xact lookups are relatively expensive. They shouldn't be repeated
6677  * by successive VACUUMs that each decide against freezing the same page.
6678  */
6679  for (int i = 0; i < ntuples; i++)
6680  {
6681  HeapTupleFreeze *frz = tuples + i;
6682  ItemId itemid = PageGetItemId(page, frz->offset);
6683  HeapTupleHeader htup;
6684 
6685  htup = (HeapTupleHeader) PageGetItem(page, itemid);
6686 
6687  /* Deliberately avoid relying on tuple hint bits here */
6689  {
6691 
6693  if (unlikely(!TransactionIdDidCommit(xmin)))
6694  ereport(ERROR,
6696  errmsg_internal("uncommitted xmin %u needs to be frozen",
6697  xmin)));
6698  }
6699 
6700  /*
6701  * TransactionIdDidAbort won't work reliably in the presence of XIDs
6702  * left behind by transactions that were in progress during a crash,
6703  * so we can only check that xmax didn't commit
6704  */
6706  {
6708 
6710  if (unlikely(TransactionIdDidCommit(xmax)))
6711  ereport(ERROR,
6713  errmsg_internal("cannot freeze committed xmax %u",
6714  xmax)));
6715  }
6716  }
6717 
6719 
6720  for (int i = 0; i < ntuples; i++)
6721  {
6722  HeapTupleFreeze *frz = tuples + i;
6723  ItemId itemid = PageGetItemId(page, frz->offset);
6724  HeapTupleHeader htup;
6725 
6726  htup = (HeapTupleHeader) PageGetItem(page, itemid);
6727  heap_execute_freeze_tuple(htup, frz);
6728  }
6729 
6730  MarkBufferDirty(buffer);
6731 
6732  /* Now WAL-log freezing if necessary */
6733  if (RelationNeedsWAL(rel))
6734  {
6737  int nplans;
6738  xl_heap_freeze_page xlrec;
6739  XLogRecPtr recptr;
6740 
6741  /* Prepare deduplicated representation for use in WAL record */
6742  nplans = heap_log_freeze_plan(tuples, ntuples, plans, offsets);
6743 
6744  xlrec.snapshotConflictHorizon = snapshotConflictHorizon;
6746  xlrec.nplans = nplans;
6747 
6748  XLogBeginInsert();
6749  XLogRegisterData((char *) &xlrec, SizeOfHeapFreezePage);
6750 
6751  /*
6752  * The freeze plan array and offset array are not actually in the
6753  * buffer, but pretend that they are. When XLogInsert stores the
6754  * whole buffer, the arrays need not be stored too.
6755  */
6756  XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
6757  XLogRegisterBufData(0, (char *) plans,
6758  nplans * sizeof(xl_heap_freeze_plan));
6759  XLogRegisterBufData(0, (char *) offsets,
6760  ntuples * sizeof(OffsetNumber));
6761 
6762  recptr = XLogInsert(RM_HEAP2_ID, XLOG_HEAP2_FREEZE_PAGE);
6763 
6764  PageSetLSN(page, recptr);
6765  }
6766 
6767  END_CRIT_SECTION();
6768 }
#define unlikely(x)
Definition: c.h:300
int errmsg_internal(const char *fmt,...)
Definition: elog.c:1156
static int heap_log_freeze_plan(HeapTupleFreeze *tuples, int ntuples, xl_heap_freeze_plan *plans_out, OffsetNumber *offsets_out)
Definition: heapam.c:6858
static void heap_execute_freeze_tuple(HeapTupleHeader tuple, HeapTupleFreeze *frz)
Definition: heapam.c:6635
#define HEAP_FREEZE_CHECK_XMAX_ABORTED
Definition: heapam.h:108
#define HEAP_FREEZE_CHECK_XMIN_COMMITTED
Definition: heapam.h:107
#define SizeOfHeapFreezePage
Definition: heapam_xlog.h:357
#define XLOG_HEAP2_FREEZE_PAGE
Definition: heapam_xlog.h:56
#define HeapTupleHeaderGetRawXmin(tup)
Definition: htup_details.h:304
#define HeapTupleHeaderXminFrozen(tup)
Definition: htup_details.h:331
#define MaxHeapTuplesPerPage
Definition: htup_details.h:572
int i
Definition: isn.c:73
#define ERRCODE_DATA_CORRUPTED
Definition: pg_basebackup.c:41
OffsetNumber offset
Definition: heapam.h:122
uint8 checkflags
Definition: heapam.h:120
TransactionId snapshotConflictHorizon
Definition: heapam_xlog.h:347
bool TransactionIdDidCommit(TransactionId transactionId)
Definition: transam.c:126
#define TransactionIdIsNormal(xid)
Definition: transam.h:42
void XLogRegisterBufData(uint8 block_id, char *data, uint32 len)
Definition: xloginsert.c:392

References Assert(), BufferGetPage(), HeapTupleFreeze::checkflags, END_CRIT_SECTION, ereport, errcode(), ERRCODE_DATA_CORRUPTED, errmsg_internal(), ERROR, heap_execute_freeze_tuple(), HEAP_FREEZE_CHECK_XMAX_ABORTED, HEAP_FREEZE_CHECK_XMIN_COMMITTED, heap_log_freeze_plan(), HeapTupleHeaderGetRawXmax, HeapTupleHeaderGetRawXmin, HeapTupleHeaderXminFrozen, i, xl_heap_freeze_page::isCatalogRel, MarkBufferDirty(), MaxHeapTuplesPerPage, xl_heap_freeze_page::nplans, HeapTupleFreeze::offset, PageGetItem(), PageGetItemId(), PageSetLSN(), REGBUF_STANDARD, RelationIsAccessibleInLogicalDecoding, RelationNeedsWAL, SizeOfHeapFreezePage, xl_heap_freeze_page::snapshotConflictHorizon, START_CRIT_SECTION, TransactionIdDidCommit(), TransactionIdIsNormal, unlikely, XLOG_HEAP2_FREEZE_PAGE, XLogBeginInsert(), XLogInsert(), XLogRegisterBufData(), XLogRegisterBuffer(), and XLogRegisterData().

Referenced by lazy_scan_prune().

◆ heap_freeze_tuple()

bool heap_freeze_tuple ( HeapTupleHeader  tuple,
TransactionId  relfrozenxid,
TransactionId  relminmxid,
TransactionId  FreezeLimit,
TransactionId  MultiXactCutoff 
)

Definition at line 6915 of file heapam.c.

6918 {
6919  HeapTupleFreeze frz;
6920  bool do_freeze;
6921  bool totally_frozen;
6922  struct VacuumCutoffs cutoffs;
6923  HeapPageFreeze pagefrz;
6924 
6925  cutoffs.relfrozenxid = relfrozenxid;
6926  cutoffs.relminmxid = relminmxid;
6927  cutoffs.OldestXmin = FreezeLimit;
6928  cutoffs.OldestMxact = MultiXactCutoff;
6929  cutoffs.FreezeLimit = FreezeLimit;
6930  cutoffs.MultiXactCutoff = MultiXactCutoff;
6931 
6932  pagefrz.freeze_required = true;
6933  pagefrz.FreezePageRelfrozenXid = FreezeLimit;
6934  pagefrz.FreezePageRelminMxid = MultiXactCutoff;
6935  pagefrz.NoFreezePageRelfrozenXid = FreezeLimit;
6936  pagefrz.NoFreezePageRelminMxid = MultiXactCutoff;
6937 
6938  do_freeze = heap_prepare_freeze_tuple(tuple, &cutoffs,
6939  &pagefrz, &frz, &totally_frozen);
6940 
6941  /*
6942  * Note that because this is not a WAL-logged operation, we don't need to
6943  * fill in the offset in the freeze record.
6944  */
6945 
6946  if (do_freeze)
6947  heap_execute_freeze_tuple(tuple, &frz);
6948  return do_freeze;
6949 }
bool heap_prepare_freeze_tuple(HeapTupleHeader tuple, const struct VacuumCutoffs *cutoffs, HeapPageFreeze *pagefrz, HeapTupleFreeze *frz, bool *totally_frozen)
Definition: heapam.c:6361
TransactionId FreezeLimit
Definition: vacuum.h:275
TransactionId relfrozenxid
Definition: vacuum.h:249
MultiXactId relminmxid
Definition: vacuum.h:250
MultiXactId MultiXactCutoff
Definition: vacuum.h:276

References VacuumCutoffs::FreezeLimit, heap_execute_freeze_tuple(), heap_prepare_freeze_tuple(), VacuumCutoffs::MultiXactCutoff, VacuumCutoffs::OldestMxact, VacuumCutoffs::OldestXmin, VacuumCutoffs::relfrozenxid, and VacuumCutoffs::relminmxid.

Referenced by rewrite_heap_tuple().

◆ heap_get_latest_tid()

void heap_get_latest_tid ( TableScanDesc  sscan,
ItemPointer  tid 
)

Definition at line 1617 of file heapam.c.

1619 {
1620  Relation relation = sscan->rs_rd;
1621  Snapshot snapshot = sscan->rs_snapshot;
1622  ItemPointerData ctid;
1623  TransactionId priorXmax;
1624 
1625  /*
1626  * table_tuple_get_latest_tid() verified that the passed in tid is valid.
1627  * Assume that t_ctid links are valid however - there shouldn't be invalid
1628  * ones in the table.
1629  */
1630  Assert(ItemPointerIsValid(tid));
1631 
1632  /*
1633  * Loop to chase down t_ctid links. At top of loop, ctid is the tuple we
1634  * need to examine, and *tid is the TID we will return if ctid turns out
1635  * to be bogus.
1636  *
1637  * Note that we will loop until we reach the end of the t_ctid chain.
1638  * Depending on the snapshot passed, there might be at most one visible
1639  * version of the row, but we don't try to optimize for that.
1640  */
1641  ctid = *tid;
1642  priorXmax = InvalidTransactionId; /* cannot check first XMIN */
1643  for (;;)
1644  {
1645  Buffer buffer;
1646  Page page;
1647  OffsetNumber offnum;
1648  ItemId lp;
1649  HeapTupleData tp;
1650  bool valid;
1651 
1652  /*
1653  * Read, pin, and lock the page.
1654  */
1655  buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(&ctid));
1656  LockBuffer(buffer, BUFFER_LOCK_SHARE);
1657  page = BufferGetPage(buffer);
1658 
1659  /*
1660  * Check for bogus item number. This is not treated as an error
1661  * condition because it can happen while following a t_ctid link. We
1662  * just assume that the prior tid is OK and return it unchanged.
1663  */
1664  offnum = ItemPointerGetOffsetNumber(&ctid);
1665  if (offnum < FirstOffsetNumber || offnum > PageGetMaxOffsetNumber(page))
1666  {
1667  UnlockReleaseBuffer(buffer);
1668  break;
1669  }
1670  lp = PageGetItemId(page, offnum);
1671  if (!ItemIdIsNormal(lp))
1672  {
1673  UnlockReleaseBuffer(buffer);
1674  break;
1675  }
1676 
1677  /* OK to access the tuple */
1678  tp.t_self = ctid;
1679  tp.t_data = (HeapTupleHeader) PageGetItem(page, lp);
1680  tp.t_len = ItemIdGetLength(lp);
1681  tp.t_tableOid = RelationGetRelid(relation);
1682 
1683  /*
1684  * After following a t_ctid link, we might arrive at an unrelated
1685  * tuple. Check for XMIN match.
1686  */
1687  if (TransactionIdIsValid(priorXmax) &&
1689  {
1690  UnlockReleaseBuffer(buffer);
1691  break;
1692  }
1693 
1694  /*
1695  * Check tuple visibility; if visible, set it as the new result
1696  * candidate.
1697  */
1698  valid = HeapTupleSatisfiesVisibility(&tp, snapshot, buffer);
1699  HeapCheckForSerializableConflictOut(valid, relation, &tp, buffer, snapshot);
1700  if (valid)
1701  *tid = ctid;
1702 
1703  /*
1704  * If there's a valid t_ctid link, follow it, else we're done.
1705  */
1706  if ((tp.t_data->t_infomask & HEAP_XMAX_INVALID) ||
1710  {
1711  UnlockReleaseBuffer(buffer);
1712  break;
1713  }
1714 
1715  ctid = tp.t_data->t_ctid;
1716  priorXmax = HeapTupleHeaderGetUpdateXid(tp.t_data);
1717  UnlockReleaseBuffer(buffer);
1718  } /* end of loop */
1719 }
#define HeapTupleHeaderIndicatesMovedPartitions(tup)
Definition: htup_details.h:444

References Assert(), BUFFER_LOCK_SHARE, BufferGetPage(), HEAP_XMAX_INVALID, HeapCheckForSerializableConflictOut(), HeapTupleHeaderGetUpdateXid, HeapTupleHeaderGetXmin, HeapTupleHeaderIndicatesMovedPartitions, HeapTupleHeaderIsOnlyLocked(), HeapTupleSatisfiesVisibility(), InvalidTransactionId, ItemIdGetLength, ItemIdIsNormal, ItemPointerEquals(), ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), ItemPointerIsValid(), LockBuffer(), PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), ReadBuffer(), RelationGetRelid, TableScanDescData::rs_rd, TableScanDescData::rs_snapshot, HeapTupleHeaderData::t_ctid, HeapTupleData::t_data, HeapTupleHeaderData::t_infomask, HeapTupleData::t_len, HeapTupleData::t_self, HeapTupleData::t_tableOid, TransactionIdEquals, TransactionIdIsValid, and UnlockReleaseBuffer().

◆ heap_get_root_tuples()

void heap_get_root_tuples ( Page  page,
OffsetNumber root_offsets 
)

Definition at line 996 of file pruneheap.c.

997 {
998  OffsetNumber offnum,
999  maxoff;
1000 
1001  MemSet(root_offsets, InvalidOffsetNumber,
1003 
1004  maxoff = PageGetMaxOffsetNumber(page);
1005  for (offnum = FirstOffsetNumber; offnum <= maxoff; offnum = OffsetNumberNext(offnum))
1006  {
1007  ItemId lp = PageGetItemId(page, offnum);
1008  HeapTupleHeader htup;
1009  OffsetNumber nextoffnum;
1010  TransactionId priorXmax;
1011 
1012  /* skip unused and dead items */
1013  if (!ItemIdIsUsed(lp) || ItemIdIsDead(lp))
1014  continue;
1015 
1016  if (ItemIdIsNormal(lp))
1017  {
1018  htup = (HeapTupleHeader) PageGetItem(page, lp);
1019 
1020  /*
1021  * Check if this tuple is part of a HOT-chain rooted at some other
1022  * tuple. If so, skip it for now; we'll process it when we find
1023  * its root.
1024  */
1025  if (HeapTupleHeaderIsHeapOnly(htup))
1026  continue;
1027 
1028  /*
1029  * This is either a plain tuple or the root of a HOT-chain.
1030  * Remember it in the mapping.
1031  */
1032  root_offsets[offnum - 1] = offnum;
1033 
1034  /* If it's not the start of a HOT-chain, we're done with it */
1035  if (!HeapTupleHeaderIsHotUpdated(htup))
1036  continue;
1037 
1038  /* Set up to scan the HOT-chain */
1039  nextoffnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
1040  priorXmax = HeapTupleHeaderGetUpdateXid(htup);
1041  }
1042  else
1043  {
1044  /* Must be a redirect item. We do not set its root_offsets entry */
1046  /* Set up to scan the HOT-chain */
1047  nextoffnum = ItemIdGetRedirect(lp);
1048  priorXmax = InvalidTransactionId;
1049  }
1050 
1051  /*
1052  * Now follow the HOT-chain and collect other tuples in the chain.
1053  *
1054  * Note: Even though this is a nested loop, the complexity of the
1055  * function is O(N) because a tuple in the page should be visited not
1056  * more than twice, once in the outer loop and once in HOT-chain
1057  * chases.
1058  */
1059  for (;;)
1060  {
1061  /* Sanity check (pure paranoia) */
1062  if (offnum < FirstOffsetNumber)
1063  break;
1064 
1065  /*
1066  * An offset past the end of page's line pointer array is possible
1067  * when the array was truncated
1068  */
1069  if (offnum > maxoff)
1070  break;
1071 
1072  lp = PageGetItemId(page, nextoffnum);
1073 
1074  /* Check for broken chains */
1075  if (!ItemIdIsNormal(lp))
1076  break;
1077 
1078  htup = (HeapTupleHeader) PageGetItem(page, lp);
1079 
1080  if (TransactionIdIsValid(priorXmax) &&
1081  !TransactionIdEquals(priorXmax, HeapTupleHeaderGetXmin(htup)))
1082  break;
1083 
1084  /* Remember the root line pointer for this item */
1085  root_offsets[nextoffnum - 1] = offnum;
1086 
1087  /* Advance to next chain member, if any */
1088  if (!HeapTupleHeaderIsHotUpdated(htup))
1089  break;
1090 
1091  /* HOT implies it can't have moved to different partition */
1093 
1094  nextoffnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
1095  priorXmax = HeapTupleHeaderGetUpdateXid(htup);
1096  }
1097  }
1098 }
#define MemSet(start, val, len)
Definition: c.h:1009
#define HeapTupleHeaderIsHotUpdated(tup)
Definition: htup_details.h:482
#define ItemIdGetRedirect(itemId)
Definition: itemid.h:78
#define ItemIdIsDead(itemId)
Definition: itemid.h:113
#define ItemIdIsUsed(itemId)
Definition: itemid.h:92
#define ItemIdIsRedirected(itemId)
Definition: itemid.h:106
#define InvalidOffsetNumber
Definition: off.h:26
#define OffsetNumberNext(offsetNumber)
Definition: off.h:52
#define FirstOffsetNumber
Definition: off.h:27

References Assert(), FirstOffsetNumber, HeapTupleHeaderGetUpdateXid, HeapTupleHeaderGetXmin, HeapTupleHeaderIndicatesMovedPartitions, HeapTupleHeaderIsHeapOnly, HeapTupleHeaderIsHotUpdated, InvalidOffsetNumber, InvalidTransactionId, ItemIdGetRedirect, ItemIdIsDead, ItemIdIsNormal, ItemIdIsRedirected, ItemIdIsUsed, ItemPointerGetOffsetNumber(), MaxHeapTuplesPerPage, MemSet, OffsetNumberNext, PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), HeapTupleHeaderData::t_ctid, TransactionIdEquals, and TransactionIdIsValid.

Referenced by heapam_index_build_range_scan(), and heapam_index_validate_scan().

◆ heap_getnext()

HeapTuple heap_getnext ( TableScanDesc  sscan,
ScanDirection  direction 
)

Definition at line 1086 of file heapam.c.

1087 {
1088  HeapScanDesc scan = (HeapScanDesc) sscan;
1089 
1090  /*
1091  * This is still widely used directly, without going through table AM, so
1092  * add a safety check. It's possible we should, at a later point,
1093  * downgrade this to an assert. The reason for checking the AM routine,
1094  * rather than the AM oid, is that this allows to write regression tests
1095  * that create another AM reusing the heap handler.
1096  */
1098  ereport(ERROR,
1099  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1100  errmsg_internal("only heap AM is supported")));
1101 
1102  /*
1103  * We don't expect direct calls to heap_getnext with valid CheckXidAlive
1104  * for catalog or regular tables. See detailed comments in xact.c where
1105  * these variables are declared. Normally we have such a check at tableam
1106  * level API but this is called from many places so we need to ensure it
1107  * here.
1108  */
1110  elog(ERROR, "unexpected heap_getnext call during logical decoding");
1111 
1112  /* Note: no locking manipulations needed */
1113 
1114  if (scan->rs_base.rs_flags & SO_ALLOW_PAGEMODE)
1115  heapgettup_pagemode(scan, direction,
1116  scan->rs_base.rs_nkeys, scan->rs_base.rs_key);
1117  else
1118  heapgettup(scan, direction,
1119  scan->rs_base.rs_nkeys, scan->rs_base.rs_key);
1120 
1121  if (scan->rs_ctup.t_data == NULL)
1122  return NULL;
1123 
1124  /*
1125  * if we get here it means we have a new current scan tuple, so point to
1126  * the proper return buffer and return the tuple.
1127  */
1128 
1130 
1131  return &scan->rs_ctup;
1132 }
static void heapgettup(HeapScanDesc scan, ScanDirection dir, int nkeys, ScanKey key)
Definition: heapam.c:724
static void heapgettup_pagemode(HeapScanDesc scan, ScanDirection dir, int nkeys, ScanKey key)
Definition: heapam.c:839
const TableAmRoutine * GetHeapamTableAmRoutine(void)
#define pgstat_count_heap_getnext(rel)
Definition: pgstat.h:613
const struct TableAmRoutine * rd_tableam
Definition: rel.h:188
bool bsysscan
Definition: xact.c:100
TransactionId CheckXidAlive
Definition: xact.c:99

References bsysscan, CheckXidAlive, elog(), ereport, errcode(), errmsg_internal(), ERROR, GetHeapamTableAmRoutine(), heapgettup(), heapgettup_pagemode(), pgstat_count_heap_getnext, RelationData::rd_tableam, HeapScanDescData::rs_base, HeapScanDescData::rs_ctup, TableScanDescData::rs_flags, TableScanDescData::rs_key, TableScanDescData::rs_nkeys, TableScanDescData::rs_rd, SO_ALLOW_PAGEMODE, HeapTupleData::t_data, TransactionIdIsValid, and unlikely.

Referenced by AlterTableMoveAll(), AlterTableSpaceOptions(), check_db_file_conflict(), CreateDatabaseUsingFileCopy(), do_autovacuum(), DropSetting(), DropTableSpace(), find_typed_table_dependencies(), get_all_vacuum_rels(), get_database_list(), get_subscription_list(), get_tables_to_cluster(), get_tablespace_name(), get_tablespace_oid(), GetAllTablesPublicationRelations(), getRelationsInNamespace(), GetSchemaPublicationRelations(), heapam_index_build_range_scan(), heapam_index_validate_scan(), index_update_stats(), objectsInSchemaToOids(), pgrowlocks(), pgstat_heap(), populate_typ_list(), ReindexMultipleTables(), remove_dbtablespaces(), RemoveSubscriptionRel(), RenameTableSpace(), ThereIsAtLeastOneRole(), and vac_truncate_clog().

◆ heap_getnextslot()

bool heap_getnextslot ( TableScanDesc  sscan,
ScanDirection  direction,
struct TupleTableSlot slot 
)

Definition at line 1135 of file heapam.c.

1136 {
1137  HeapScanDesc scan = (HeapScanDesc) sscan;
1138 
1139  /* Note: no locking manipulations needed */
1140 
1141  if (sscan->rs_flags & SO_ALLOW_PAGEMODE)
1142  heapgettup_pagemode(scan, direction, sscan->rs_nkeys, sscan->rs_key);
1143  else
1144  heapgettup(scan, direction, sscan->rs_nkeys, sscan->rs_key);
1145 
1146  if (scan->rs_ctup.t_data == NULL)
1147  {
1148  ExecClearTuple(slot);
1149  return false;
1150  }
1151 
1152  /*
1153  * if we get here it means we have a new current scan tuple, so point to
1154  * the proper return buffer and return the tuple.
1155  */
1156 
1158 
1159  ExecStoreBufferHeapTuple(&scan->rs_ctup, slot,
1160  scan->rs_cbuf);
1161  return true;
1162 }
TupleTableSlot * ExecStoreBufferHeapTuple(HeapTuple tuple, TupleTableSlot *slot, Buffer buffer)
Definition: execTuples.c:1393
static TupleTableSlot * ExecClearTuple(TupleTableSlot *slot)
Definition: tuptable.h:432

References ExecClearTuple(), ExecStoreBufferHeapTuple(), heapgettup(), heapgettup_pagemode(), pgstat_count_heap_getnext, HeapScanDescData::rs_base, HeapScanDescData::rs_cbuf, HeapScanDescData::rs_ctup, TableScanDescData::rs_flags, TableScanDescData::rs_key, TableScanDescData::rs_nkeys, TableScanDescData::rs_rd, SO_ALLOW_PAGEMODE, and HeapTupleData::t_data.

◆ heap_getnextslot_tidrange()

bool heap_getnextslot_tidrange ( TableScanDesc  sscan,
ScanDirection  direction,
TupleTableSlot slot 
)

Definition at line 1238 of file heapam.c.

1240 {
1241  HeapScanDesc scan = (HeapScanDesc) sscan;
1242  ItemPointer mintid = &sscan->rs_mintid;
1243  ItemPointer maxtid = &sscan->rs_maxtid;
1244 
1245  /* Note: no locking manipulations needed */
1246  for (;;)
1247  {
1248  if (sscan->rs_flags & SO_ALLOW_PAGEMODE)
1249  heapgettup_pagemode(scan, direction, sscan->rs_nkeys, sscan->rs_key);
1250  else
1251  heapgettup(scan, direction, sscan->rs_nkeys, sscan->rs_key);
1252 
1253  if (scan->rs_ctup.t_data == NULL)
1254  {
1255  ExecClearTuple(slot);
1256  return false;
1257  }
1258 
1259  /*
1260  * heap_set_tidrange will have used heap_setscanlimits to limit the
1261  * range of pages we scan to only ones that can contain the TID range
1262  * we're scanning for. Here we must filter out any tuples from these
1263  * pages that are outside of that range.
1264  */
1265  if (ItemPointerCompare(&scan->rs_ctup.t_self, mintid) < 0)
1266  {
1267  ExecClearTuple(slot);
1268 
1269  /*
1270  * When scanning backwards, the TIDs will be in descending order.
1271  * Future tuples in this direction will be lower still, so we can
1272  * just return false to indicate there will be no more tuples.
1273  */
1274  if (ScanDirectionIsBackward(direction))
1275  return false;
1276 
1277  continue;
1278  }
1279 
1280  /*
1281  * Likewise for the final page, we must filter out TIDs greater than
1282  * maxtid.
1283  */
1284  if (ItemPointerCompare(&scan->rs_ctup.t_self, maxtid) > 0)
1285  {
1286  ExecClearTuple(slot);
1287 
1288  /*
1289  * When scanning forward, the TIDs will be in ascending order.
1290  * Future tuples in this direction will be higher still, so we can
1291  * just return false to indicate there will be no more tuples.
1292  */
1293  if (ScanDirectionIsForward(direction))
1294  return false;
1295  continue;
1296  }
1297 
1298  break;
1299  }
1300 
1301  /*
1302  * if we get here it means we have a new current scan tuple, so point to
1303  * the proper return buffer and return the tuple.
1304  */
1306 
1307  ExecStoreBufferHeapTuple(&scan->rs_ctup, slot, scan->rs_cbuf);
1308  return true;
1309 }
int32 ItemPointerCompare(ItemPointer arg1, ItemPointer arg2)
Definition: itemptr.c:51
#define ScanDirectionIsForward(direction)
Definition: sdir.h:64
#define ScanDirectionIsBackward(direction)
Definition: sdir.h:50
ItemPointerData rs_mintid
Definition: relscan.h:40
ItemPointerData rs_maxtid
Definition: relscan.h:41

References ExecClearTuple(), ExecStoreBufferHeapTuple(), heapgettup(), heapgettup_pagemode(), ItemPointerCompare(), pgstat_count_heap_getnext, HeapScanDescData::rs_base, HeapScanDescData::rs_cbuf, HeapScanDescData::rs_ctup, TableScanDescData::rs_flags, TableScanDescData::rs_key, TableScanDescData::rs_maxtid, TableScanDescData::rs_mintid, TableScanDescData::rs_nkeys, TableScanDescData::rs_rd, ScanDirectionIsBackward, ScanDirectionIsForward, SO_ALLOW_PAGEMODE, HeapTupleData::t_data, and HeapTupleData::t_self.

◆ heap_hot_search_buffer()

bool heap_hot_search_buffer ( ItemPointer  tid,
Relation  relation,
Buffer  buffer,
Snapshot  snapshot,
HeapTuple  heapTuple,
bool all_dead,
bool  first_call 
)

Definition at line 1465 of file heapam.c.

1468 {
1469  Page page = BufferGetPage(buffer);
1470  TransactionId prev_xmax = InvalidTransactionId;
1471  BlockNumber blkno;
1472  OffsetNumber offnum;
1473  bool at_chain_start;
1474  bool valid;
1475  bool skip;
1476  GlobalVisState *vistest = NULL;
1477 
1478  /* If this is not the first call, previous call returned a (live!) tuple */
1479  if (all_dead)
1480  *all_dead = first_call;
1481 
1482  blkno = ItemPointerGetBlockNumber(tid);
1483  offnum = ItemPointerGetOffsetNumber(tid);
1484  at_chain_start = first_call;
1485  skip = !first_call;
1486 
1487  /* XXX: we should assert that a snapshot is pushed or registered */
1489  Assert(BufferGetBlockNumber(buffer) == blkno);
1490 
1491  /* Scan through possible multiple members of HOT-chain */
1492  for (;;)
1493  {
1494  ItemId lp;
1495 
1496  /* check for bogus TID */
1497  if (offnum < FirstOffsetNumber || offnum > PageGetMaxOffsetNumber(page))
1498  break;
1499 
1500  lp = PageGetItemId(page, offnum);
1501 
1502  /* check for unused, dead, or redirected items */
1503  if (!ItemIdIsNormal(lp))
1504  {
1505  /* We should only see a redirect at start of chain */
1506  if (ItemIdIsRedirected(lp) && at_chain_start)
1507  {
1508  /* Follow the redirect */
1509  offnum = ItemIdGetRedirect(lp);
1510  at_chain_start = false;
1511  continue;
1512  }
1513  /* else must be end of chain */
1514  break;
1515  }
1516 
1517  /*
1518  * Update heapTuple to point to the element of the HOT chain we're
1519  * currently investigating. Having t_self set correctly is important
1520  * because the SSI checks and the *Satisfies routine for historical
1521  * MVCC snapshots need the correct tid to decide about the visibility.
1522  */
1523  heapTuple->t_data = (HeapTupleHeader) PageGetItem(page, lp);
1524  heapTuple->t_len = ItemIdGetLength(lp);
1525  heapTuple->t_tableOid = RelationGetRelid(relation);
1526  ItemPointerSet(&heapTuple->t_self, blkno, offnum);
1527 
1528  /*
1529  * Shouldn't see a HEAP_ONLY tuple at chain start.
1530  */
1531  if (at_chain_start && HeapTupleIsHeapOnly(heapTuple))
1532  break;
1533 
1534  /*
1535  * The xmin should match the previous xmax value, else chain is
1536  * broken.
1537  */
1538  if (TransactionIdIsValid(prev_xmax) &&
1539  !TransactionIdEquals(prev_xmax,
1540  HeapTupleHeaderGetXmin(heapTuple->t_data)))
1541  break;
1542 
1543  /*
1544  * When first_call is true (and thus, skip is initially false) we'll
1545  * return the first tuple we find. But on later passes, heapTuple
1546  * will initially be pointing to the tuple we returned last time.
1547  * Returning it again would be incorrect (and would loop forever), so
1548  * we skip it and return the next match we find.
1549  */
1550  if (!skip)
1551  {
1552  /* If it's visible per the snapshot, we must return it */
1553  valid = HeapTupleSatisfiesVisibility(heapTuple, snapshot, buffer);
1554  HeapCheckForSerializableConflictOut(valid, relation, heapTuple,
1555  buffer, snapshot);
1556 
1557  if (valid)
1558  {
1559  ItemPointerSetOffsetNumber(tid, offnum);
1560  PredicateLockTID(relation, &heapTuple->t_self, snapshot,
1561  HeapTupleHeaderGetXmin(heapTuple->t_data));
1562  if (all_dead)
1563  *all_dead = false;
1564  return true;
1565  }
1566  }
1567  skip = false;
1568 
1569  /*
1570  * If we can't see it, maybe no one else can either. At caller
1571  * request, check whether all chain members are dead to all
1572  * transactions.
1573  *
1574  * Note: if you change the criterion here for what is "dead", fix the
1575  * planner's get_actual_variable_range() function to match.
1576  */
1577  if (all_dead && *all_dead)
1578  {
1579  if (!vistest)
1580  vistest = GlobalVisTestFor(relation);
1581 
1582  if (!HeapTupleIsSurelyDead(heapTuple, vistest))
1583  *all_dead = false;
1584  }
1585 
1586  /*
1587  * Check to see if HOT chain continues past this tuple; if so fetch
1588  * the next offnum and loop around.
1589  */
1590  if (HeapTupleIsHotUpdated(heapTuple))
1591  {
1593  blkno);
1594  offnum = ItemPointerGetOffsetNumber(&heapTuple->t_data->t_ctid);
1595  at_chain_start = false;
1596  prev_xmax = HeapTupleHeaderGetUpdateXid(heapTuple->t_data);
1597  }
1598  else
1599  break; /* end of chain */
1600  }
1601 
1602  return false;
1603 }
bool HeapTupleIsSurelyDead(HeapTuple htup, GlobalVisState *vistest)
#define HeapTupleIsHeapOnly(tuple)
Definition: htup_details.h:683
#define HeapTupleIsHotUpdated(tuple)
Definition: htup_details.h:674
static void ItemPointerSet(ItemPointerData *pointer, BlockNumber blockNumber, OffsetNumber offNum)
Definition: itemptr.h:135
static void ItemPointerSetOffsetNumber(ItemPointerData *pointer, OffsetNumber offsetNumber)
Definition: itemptr.h:158
static const struct exclude_list_item skip[]
Definition: pg_checksums.c:108
GlobalVisState * GlobalVisTestFor(Relation rel)
Definition: procarray.c:4011
TransactionId RecentXmin
Definition: snapmgr.c:105

References Assert(), BufferGetBlockNumber(), BufferGetPage(), GlobalVisTestFor(), HeapCheckForSerializableConflictOut(), HeapTupleHeaderGetUpdateXid, HeapTupleHeaderGetXmin, HeapTupleIsHeapOnly, HeapTupleIsHotUpdated, HeapTupleIsSurelyDead(), HeapTupleSatisfiesVisibility(), InvalidTransactionId, ItemIdGetLength, ItemIdGetRedirect, ItemIdIsNormal, ItemIdIsRedirected, ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), ItemPointerSet(), ItemPointerSetOffsetNumber(), PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), PredicateLockTID(), RecentXmin, RelationGetRelid, skip, HeapTupleHeaderData::t_ctid, HeapTupleData::t_data, HeapTupleData::t_len, HeapTupleData::t_self, HeapTupleData::t_tableOid, TransactionIdEquals, and TransactionIdIsValid.

Referenced by heap_index_delete_tuples(), heapam_index_fetch_tuple(), and heapam_scan_bitmap_next_block().

◆ heap_index_delete_tuples()

TransactionId heap_index_delete_tuples ( Relation  rel,
TM_IndexDeleteOp delstate 
)

Definition at line 7629 of file heapam.c.

7630 {
7631  /* Initial assumption is that earlier pruning took care of conflict */
7632  TransactionId snapshotConflictHorizon = InvalidTransactionId;
7635  Page page = NULL;
7637  TransactionId priorXmax;
7638 #ifdef USE_PREFETCH
7639  IndexDeletePrefetchState prefetch_state;
7640  int prefetch_distance;
7641 #endif
7642  SnapshotData SnapshotNonVacuumable;
7643  int finalndeltids = 0,
7644  nblocksaccessed = 0;
7645 
7646  /* State that's only used in bottom-up index deletion case */
7647  int nblocksfavorable = 0;
7648  int curtargetfreespace = delstate->bottomupfreespace,
7649  lastfreespace = 0,
7650  actualfreespace = 0;
7651  bool bottomup_final_block = false;
7652 
7653  InitNonVacuumableSnapshot(SnapshotNonVacuumable, GlobalVisTestFor(rel));
7654 
7655  /* Sort caller's deltids array by TID for further processing */
7656  index_delete_sort(delstate);
7657 
7658  /*
7659  * Bottom-up case: resort deltids array in an order attuned to where the
7660  * greatest number of promising TIDs are to be found, and determine how
7661  * many blocks from the start of sorted array should be considered
7662  * favorable. This will also shrink the deltids array in order to
7663  * eliminate completely unfavorable blocks up front.
7664  */
7665  if (delstate->bottomup)
7666  nblocksfavorable = bottomup_sort_and_shrink(delstate);
7667 
7668 #ifdef USE_PREFETCH
7669  /* Initialize prefetch state. */
7670  prefetch_state.cur_hblkno = InvalidBlockNumber;
7671  prefetch_state.next_item = 0;
7672  prefetch_state.ndeltids = delstate->ndeltids;
7673  prefetch_state.deltids = delstate->deltids;
7674 
7675  /*
7676  * Determine the prefetch distance that we will attempt to maintain.
7677  *
7678  * Since the caller holds a buffer lock somewhere in rel, we'd better make
7679  * sure that isn't a catalog relation before we call code that does
7680  * syscache lookups, to avoid risk of deadlock.
7681  */
7682  if (IsCatalogRelation(rel))
7683  prefetch_distance = maintenance_io_concurrency;
7684  else
7685  prefetch_distance =
7687 
7688  /* Cap initial prefetch distance for bottom-up deletion caller */
7689  if (delstate->bottomup)
7690  {
7691  Assert(nblocksfavorable >= 1);
7692  Assert(nblocksfavorable <= BOTTOMUP_MAX_NBLOCKS);
7693  prefetch_distance = Min(prefetch_distance, nblocksfavorable);
7694  }
7695 
7696  /* Start prefetching. */
7697  index_delete_prefetch_buffer(rel, &prefetch_state, prefetch_distance);
7698 #endif
7699 
7700  /* Iterate over deltids, determine which to delete, check their horizon */
7701  Assert(delstate->ndeltids > 0);
7702  for (int i = 0; i < delstate->ndeltids; i++)
7703  {
7704  TM_IndexDelete *ideltid = &delstate->deltids[i];
7705  TM_IndexStatus *istatus = delstate->status + ideltid->id;
7706  ItemPointer htid = &ideltid->tid;
7707  OffsetNumber offnum;
7708 
7709  /*
7710  * Read buffer, and perform required extra steps each time a new block
7711  * is encountered. Avoid refetching if it's the same block as the one
7712  * from the last htid.
7713  */
7714  if (blkno == InvalidBlockNumber ||
7715  ItemPointerGetBlockNumber(htid) != blkno)
7716  {
7717  /*
7718  * Consider giving up early for bottom-up index deletion caller
7719  * first. (Only prefetch next-next block afterwards, when it
7720  * becomes clear that we're at least going to access the next
7721  * block in line.)
7722  *
7723  * Sometimes the first block frees so much space for bottom-up
7724  * caller that the deletion process can end without accessing any
7725  * more blocks. It is usually necessary to access 2 or 3 blocks
7726  * per bottom-up deletion operation, though.
7727  */
7728  if (delstate->bottomup)
7729  {
7730  /*
7731  * We often allow caller to delete a few additional items
7732  * whose entries we reached after the point that space target
7733  * from caller was satisfied. The cost of accessing the page
7734  * was already paid at that point, so it made sense to finish
7735  * it off. When that happened, we finalize everything here
7736  * (by finishing off the whole bottom-up deletion operation
7737  * without needlessly paying the cost of accessing any more
7738  * blocks).
7739  */
7740  if (bottomup_final_block)
7741  break;
7742 
7743  /*
7744  * Give up when we didn't enable our caller to free any
7745  * additional space as a result of processing the page that we
7746  * just finished up with. This rule is the main way in which
7747  * we keep the cost of bottom-up deletion under control.
7748  */
7749  if (nblocksaccessed >= 1 && actualfreespace == lastfreespace)
7750  break;
7751  lastfreespace = actualfreespace; /* for next time */
7752 
7753  /*
7754  * Deletion operation (which is bottom-up) will definitely
7755  * access the next block in line. Prepare for that now.
7756  *
7757  * Decay target free space so that we don't hang on for too
7758  * long with a marginal case. (Space target is only truly
7759  * helpful when it allows us to recognize that we don't need
7760  * to access more than 1 or 2 blocks to satisfy caller due to
7761  * agreeable workload characteristics.)
7762  *
7763  * We are a bit more patient when we encounter contiguous
7764  * blocks, though: these are treated as favorable blocks. The
7765  * decay process is only applied when the next block in line
7766  * is not a favorable/contiguous block. This is not an
7767  * exception to the general rule; we still insist on finding
7768  * at least one deletable item per block accessed. See
7769  * bottomup_nblocksfavorable() for full details of the theory
7770  * behind favorable blocks and heap block locality in general.
7771  *
7772  * Note: The first block in line is always treated as a
7773  * favorable block, so the earliest possible point that the
7774  * decay can be applied is just before we access the second
7775  * block in line. The Assert() verifies this for us.
7776  */
7777  Assert(nblocksaccessed > 0 || nblocksfavorable > 0);
7778  if (nblocksfavorable > 0)
7779  nblocksfavorable--;
7780  else
7781  curtargetfreespace /= 2;
7782  }
7783 
7784  /* release old buffer */
7785  if (BufferIsValid(buf))
7787 
7788  blkno = ItemPointerGetBlockNumber(htid);
7789  buf = ReadBuffer(rel, blkno);
7790  nblocksaccessed++;
7791  Assert(!delstate->bottomup ||
7792  nblocksaccessed <= BOTTOMUP_MAX_NBLOCKS);
7793 
7794 #ifdef USE_PREFETCH
7795 
7796  /*
7797  * To maintain the prefetch distance, prefetch one more page for
7798  * each page we read.
7799  */
7800  index_delete_prefetch_buffer(rel, &prefetch_state, 1);
7801 #endif
7802 
7804 
7805  page = BufferGetPage(buf);
7806  maxoff = PageGetMaxOffsetNumber(page);
7807  }
7808 
7809  /*
7810  * In passing, detect index corruption involving an index page with a
7811  * TID that points to a location in the heap that couldn't possibly be
7812  * correct. We only do this with actual TIDs from caller's index page
7813  * (not items reached by traversing through a HOT chain).
7814  */
7815  index_delete_check_htid(delstate, page, maxoff, htid, istatus);
7816 
7817  if (istatus->knowndeletable)
7818  Assert(!delstate->bottomup && !istatus->promising);
7819  else
7820  {
7821  ItemPointerData tmp = *htid;
7822  HeapTupleData heapTuple;
7823 
7824  /* Are any tuples from this HOT chain non-vacuumable? */
7825  if (heap_hot_search_buffer(&tmp, rel, buf, &SnapshotNonVacuumable,
7826  &heapTuple, NULL, true))
7827  continue; /* can't delete entry */
7828 
7829  /* Caller will delete, since whole HOT chain is vacuumable */
7830  istatus->knowndeletable = true;
7831 
7832  /* Maintain index free space info for bottom-up deletion case */
7833  if (delstate->bottomup)
7834  {
7835  Assert(istatus->freespace > 0);
7836  actualfreespace += istatus->freespace;
7837  if (actualfreespace >= curtargetfreespace)
7838  bottomup_final_block = true;
7839  }
7840  }
7841 
7842  /*
7843  * Maintain snapshotConflictHorizon value for deletion operation as a
7844  * whole by advancing current value using heap tuple headers. This is
7845  * loosely based on the logic for pruning a HOT chain.
7846  */
7847  offnum = ItemPointerGetOffsetNumber(htid);
7848  priorXmax = InvalidTransactionId; /* cannot check first XMIN */
7849  for (;;)
7850  {
7851  ItemId lp;
7852  HeapTupleHeader htup;
7853 
7854  /* Sanity check (pure paranoia) */
7855  if (offnum < FirstOffsetNumber)
7856  break;
7857 
7858  /*
7859  * An offset past the end of page's line pointer array is possible
7860  * when the array was truncated
7861  */
7862  if (offnum > maxoff)
7863  break;
7864 
7865  lp = PageGetItemId(page, offnum);
7866  if (ItemIdIsRedirected(lp))
7867  {
7868  offnum = ItemIdGetRedirect(lp);
7869  continue;
7870  }
7871 
7872  /*
7873  * We'll often encounter LP_DEAD line pointers (especially with an
7874  * entry marked knowndeletable by our caller up front). No heap
7875  * tuple headers get examined for an htid that leads us to an
7876  * LP_DEAD item. This is okay because the earlier pruning
7877  * operation that made the line pointer LP_DEAD in the first place
7878  * must have considered the original tuple header as part of
7879  * generating its own snapshotConflictHorizon value.
7880  *
7881  * Relying on XLOG_HEAP2_PRUNE records like this is the same
7882  * strategy that index vacuuming uses in all cases. Index VACUUM
7883  * WAL records don't even have a snapshotConflictHorizon field of
7884  * their own for this reason.
7885  */
7886  if (!ItemIdIsNormal(lp))
7887  break;
7888 
7889  htup = (HeapTupleHeader) PageGetItem(page, lp);
7890 
7891  /*
7892  * Check the tuple XMIN against prior XMAX, if any
7893  */
7894  if (TransactionIdIsValid(priorXmax) &&
7895  !TransactionIdEquals(HeapTupleHeaderGetXmin(htup), priorXmax))
7896  break;
7897 
7899  &snapshotConflictHorizon);
7900 
7901  /*
7902  * If the tuple is not HOT-updated, then we are at the end of this
7903  * HOT-chain. No need to visit later tuples from the same update
7904  * chain (they get their own index entries) -- just move on to
7905  * next htid from index AM caller.
7906  */
7907  if (!HeapTupleHeaderIsHotUpdated(htup))
7908  break;
7909 
7910  /* Advance to next HOT chain member */
7911  Assert(ItemPointerGetBlockNumber(&htup->t_ctid) == blkno);
7912  offnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
7913  priorXmax = HeapTupleHeaderGetUpdateXid(htup);
7914  }
7915 
7916  /* Enable further/final shrinking of deltids for caller */
7917  finalndeltids = i + 1;
7918  }
7919 
7921 
7922  /*
7923  * Shrink deltids array to exclude non-deletable entries at the end. This
7924  * is not just a minor optimization. Final deltids array size might be
7925  * zero for a bottom-up caller. Index AM is explicitly allowed to rely on
7926  * ndeltids being zero in all cases with zero total deletable entries.
7927  */
7928  Assert(finalndeltids > 0 || delstate->bottomup);
7929  delstate->ndeltids = finalndeltids;
7930 
7931  return snapshotConflictHorizon;
7932 }
int maintenance_io_concurrency
Definition: bufmgr.c:153
#define Min(x, y)
Definition: c.h:993
bool IsCatalogRelation(Relation relation)
Definition: catalog.c:105
static int bottomup_sort_and_shrink(TM_IndexDeleteOp *delstate)
Definition: heapam.c:8187
void HeapTupleHeaderAdvanceConflictHorizon(HeapTupleHeader tuple, TransactionId *snapshotConflictHorizon)
Definition: heapam.c:7484
static void index_delete_check_htid(TM_IndexDeleteOp *delstate, Page page, OffsetNumber maxoff, ItemPointer htid, TM_IndexStatus *istatus)
Definition: heapam.c:7569
#define BOTTOMUP_MAX_NBLOCKS
Definition: heapam.c:191
bool heap_hot_search_buffer(ItemPointer tid, Relation relation, Buffer buffer, Snapshot snapshot, HeapTuple heapTuple, bool *all_dead, bool first_call)
Definition: heapam.c:1465
static void index_delete_sort(TM_IndexDeleteOp *delstate)
Definition: heapam.c:7974
static char * buf
Definition: pg_test_fsync.c:67
#define InitNonVacuumableSnapshot(snapshotdata, vistestp)
Definition: snapmgr.h:48
int get_tablespace_maintenance_io_concurrency(Oid spcid)
Definition: spccache.c:229
TM_IndexStatus * status
Definition: tableam.h:247
int bottomupfreespace
Definition: tableam.h:242
TM_IndexDelete * deltids
Definition: tableam.h:246
ItemPointerData tid
Definition: tableam.h:205
bool knowndeletable
Definition: tableam.h:212
bool promising
Definition: tableam.h:215
int16 freespace
Definition: tableam.h:216

References Assert(), TM_IndexDeleteOp::bottomup, BOTTOMUP_MAX_NBLOCKS, bottomup_sort_and_shrink(), TM_IndexDeleteOp::bottomupfreespace, buf, BUFFER_LOCK_SHARE, BufferGetPage(), BufferIsValid(), TM_IndexDeleteOp::deltids, FirstOffsetNumber, TM_IndexStatus::freespace, get_tablespace_maintenance_io_concurrency(), GlobalVisTestFor(), heap_hot_search_buffer(), HeapTupleHeaderAdvanceConflictHorizon(), HeapTupleHeaderGetUpdateXid, HeapTupleHeaderGetXmin, HeapTupleHeaderIsHotUpdated, i, TM_IndexDelete::id, index_delete_check_htid(), index_delete_sort(), InitNonVacuumableSnapshot, InvalidBlockNumber, InvalidBuffer, InvalidOffsetNumber, InvalidTransactionId, IsCatalogRelation(), ItemIdGetRedirect, ItemIdIsNormal, ItemIdIsRedirected, ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), TM_IndexStatus::knowndeletable, LockBuffer(), maintenance_io_concurrency, Min, TM_IndexDeleteOp::ndeltids, PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), TM_IndexStatus::promising, RelationData::rd_rel, ReadBuffer(), TM_IndexDeleteOp::status, HeapTupleHeaderData::t_ctid, TM_IndexDelete::tid, TransactionIdEquals, TransactionIdIsValid, and UnlockReleaseBuffer().

◆ heap_inplace_update()

void heap_inplace_update ( Relation  relation,
HeapTuple  tuple 
)

Definition at line 5875 of file heapam.c.

5876 {
5877  Buffer buffer;
5878  Page page;
5879  OffsetNumber offnum;
5880  ItemId lp = NULL;
5881  HeapTupleHeader htup;
5882  uint32 oldlen;
5883  uint32 newlen;
5884 
5885  /*
5886  * For now, we don't allow parallel updates. Unlike a regular update,
5887  * this should never create a combo CID, so it might be possible to relax
5888  * this restriction, but not without more thought and testing. It's not
5889  * clear that it would be useful, anyway.
5890  */
5891  if (IsInParallelMode())
5892  ereport(ERROR,
5893  (errcode(ERRCODE_INVALID_TRANSACTION_STATE),
5894  errmsg("cannot update tuples during a parallel operation")));
5895 
5896  buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(&(tuple->t_self)));
5898  page = (Page) BufferGetPage(buffer);
5899 
5900  offnum = ItemPointerGetOffsetNumber(&(tuple->t_self));
5901  if (PageGetMaxOffsetNumber(page) >= offnum)
5902  lp = PageGetItemId(page, offnum);
5903 
5904  if (PageGetMaxOffsetNumber(page) < offnum || !ItemIdIsNormal(lp))
5905  elog(ERROR, "invalid lp");
5906 
5907  htup = (HeapTupleHeader) PageGetItem(page, lp);
5908 
5909  oldlen = ItemIdGetLength(lp) - htup->t_hoff;
5910  newlen = tuple->t_len - tuple->t_data->t_hoff;
5911  if (oldlen != newlen || htup->t_hoff != tuple->t_data->t_hoff)
5912  elog(ERROR, "wrong tuple length");
5913 
5914  /* NO EREPORT(ERROR) from here till changes are logged */
5916 
5917  memcpy((char *) htup + htup->t_hoff,
5918  (char *) tuple->t_data + tuple->t_data->t_hoff,
5919  newlen);
5920 
5921  MarkBufferDirty(buffer);
5922 
5923  /* XLOG stuff */
5924  if (RelationNeedsWAL(relation))
5925  {
5926  xl_heap_inplace xlrec;
5927  XLogRecPtr recptr;
5928 
5929  xlrec.offnum = ItemPointerGetOffsetNumber(&tuple->t_self);
5930 
5931  XLogBeginInsert();
5932  XLogRegisterData((char *) &xlrec, SizeOfHeapInplace);
5933 
5934  XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
5935  XLogRegisterBufData(0, (char *) htup + htup->t_hoff, newlen);
5936 
5937  /* inplace updates aren't decoded atm, don't log the origin */
5938 
5939  recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_INPLACE);
5940 
5941  PageSetLSN(page, recptr);
5942  }
5943 
5944  END_CRIT_SECTION();
5945 
5946  UnlockReleaseBuffer(buffer);
5947 
5948  /*
5949  * Send out shared cache inval if necessary. Note that because we only
5950  * pass the new version of the tuple, this mustn't be used for any
5951  * operations that could change catcache lookup keys. But we aren't
5952  * bothering with index updates either, so that's true a fortiori.
5953  */
5955  CacheInvalidateHeapTuple(relation, tuple, NULL);
5956 }
unsigned int uint32
Definition: c.h:495
#define SizeOfHeapInplace
Definition: heapam_xlog.h:316
#define XLOG_HEAP_INPLACE
Definition: heapam_xlog.h:39
#define IsBootstrapProcessingMode()
Definition: miscadmin.h:414
OffsetNumber offnum
Definition: heapam_xlog.h:312

References BUFFER_LOCK_EXCLUSIVE, BufferGetPage(), CacheInvalidateHeapTuple(), elog(), END_CRIT_SECTION, ereport, errcode(), errmsg(), ERROR, IsBootstrapProcessingMode, IsInParallelMode(), ItemIdGetLength, ItemIdIsNormal, ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), LockBuffer(), MarkBufferDirty(), xl_heap_inplace::offnum, PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), PageSetLSN(), ReadBuffer(), REGBUF_STANDARD, RelationNeedsWAL, SizeOfHeapInplace, START_CRIT_SECTION, HeapTupleData::t_data, HeapTupleHeaderData::t_hoff, HeapTupleData::t_len, HeapTupleData::t_self, UnlockReleaseBuffer(), XLOG_HEAP_INPLACE, XLogBeginInsert(), XLogInsert(), XLogRegisterBufData(), XLogRegisterBuffer(), and XLogRegisterData().

Referenced by create_toast_table(), dropdb(), index_update_stats(), vac_update_datfrozenxid(), and vac_update_relstats().

◆ heap_insert()

void heap_insert ( Relation  relation,
HeapTuple  tup,
CommandId  cid,
int  options,
BulkInsertState  bistate 
)

Definition at line 1817 of file heapam.c.

1819 {
1821  HeapTuple heaptup;
1822  Buffer buffer;
1823  Buffer vmbuffer = InvalidBuffer;
1824  bool all_visible_cleared = false;
1825 
1826  /* Cheap, simplistic check that the tuple matches the rel's rowtype. */
1828  RelationGetNumberOfAttributes(relation));
1829 
1830  /*
1831  * Fill in tuple header fields and toast the tuple if necessary.
1832  *
1833  * Note: below this point, heaptup is the data we actually intend to store
1834  * into the relation; tup is the caller's original untoasted data.
1835  */
1836  heaptup = heap_prepare_insert(relation, tup, xid, cid, options);
1837 
1838  /*
1839  * Find buffer to insert this tuple into. If the page is all visible,
1840  * this will also pin the requisite visibility map page.
1841  */
1842  buffer = RelationGetBufferForTuple(relation, heaptup->t_len,
1843  InvalidBuffer, options, bistate,
1844  &vmbuffer, NULL,
1845  0);
1846 
1847  /*
1848  * We're about to do the actual insert -- but check for conflict first, to
1849  * avoid possibly having to roll back work we've just done.
1850  *
1851  * This is safe without a recheck as long as there is no possibility of
1852  * another process scanning the page between this check and the insert
1853  * being visible to the scan (i.e., an exclusive buffer content lock is
1854  * continuously held from this point until the tuple insert is visible).
1855  *
1856  * For a heap insert, we only need to check for table-level SSI locks. Our
1857  * new tuple can't possibly conflict with existing tuple locks, and heap
1858  * page locks are only consolidated versions of tuple locks; they do not
1859  * lock "gaps" as index page locks do. So we don't need to specify a
1860  * buffer when making the call, which makes for a faster check.
1861  */
1863 
1864  /* NO EREPORT(ERROR) from here till changes are logged */
1866 
1867  RelationPutHeapTuple(relation, buffer, heaptup,
1868  (options & HEAP_INSERT_SPECULATIVE) != 0);
1869 
1870  if (PageIsAllVisible(BufferGetPage(buffer)))
1871  {
1872  all_visible_cleared = true;
1874  visibilitymap_clear(relation,
1875  ItemPointerGetBlockNumber(&(heaptup->t_self)),
1876  vmbuffer, VISIBILITYMAP_VALID_BITS);
1877  }
1878 
1879  /*
1880  * XXX Should we set PageSetPrunable on this page ?
1881  *
1882  * The inserting transaction may eventually abort thus making this tuple
1883  * DEAD and hence available for pruning. Though we don't want to optimize
1884  * for aborts, if no other tuple in this page is UPDATEd/DELETEd, the
1885  * aborted tuple will never be pruned until next vacuum is triggered.
1886  *
1887  * If you do add PageSetPrunable here, add it in heap_xlog_insert too.
1888  */
1889 
1890  MarkBufferDirty(buffer);
1891 
1892  /* XLOG stuff */
1893  if (RelationNeedsWAL(relation))
1894  {
1895  xl_heap_insert xlrec;
1896  xl_heap_header xlhdr;
1897  XLogRecPtr recptr;
1898  Page page = BufferGetPage(buffer);
1899  uint8 info = XLOG_HEAP_INSERT;
1900  int bufflags = 0;
1901 
1902  /*
1903  * If this is a catalog, we need to transmit combo CIDs to properly
1904  * decode, so log that as well.
1905  */
1907  log_heap_new_cid(relation, heaptup);
1908 
1909  /*
1910  * If this is the single and first tuple on page, we can reinit the
1911  * page instead of restoring the whole thing. Set flag, and hide
1912  * buffer references from XLogInsert.
1913  */
1914  if (ItemPointerGetOffsetNumber(&(heaptup->t_self)) == FirstOffsetNumber &&
1916  {
1917  info |= XLOG_HEAP_INIT_PAGE;
1918  bufflags |= REGBUF_WILL_INIT;
1919  }
1920 
1921  xlrec.offnum = ItemPointerGetOffsetNumber(&heaptup->t_self);
1922  xlrec.flags = 0;
1923  if (all_visible_cleared)
1928 
1929  /*
1930  * For logical decoding, we need the tuple even if we're doing a full
1931  * page write, so make sure it's included even if we take a full-page
1932  * image. (XXX We could alternatively store a pointer into the FPW).
1933  */
1934  if (RelationIsLogicallyLogged(relation) &&
1936  {
1938  bufflags |= REGBUF_KEEP_DATA;
1939 
1940  if (IsToastRelation(relation))
1942  }
1943 
1944  XLogBeginInsert();
1945  XLogRegisterData((char *) &xlrec, SizeOfHeapInsert);
1946 
1947  xlhdr.t_infomask2 = heaptup->t_data->t_infomask2;
1948  xlhdr.t_infomask = heaptup->t_data->t_infomask;
1949  xlhdr.t_hoff = heaptup->t_data->t_hoff;
1950 
1951  /*
1952  * note we mark xlhdr as belonging to buffer; if XLogInsert decides to
1953  * write the whole page to the xlog, we don't need to store
1954  * xl_heap_header in the xlog.
1955  */
1956  XLogRegisterBuffer(0, buffer, REGBUF_STANDARD | bufflags);
1957  XLogRegisterBufData(0, (char *) &xlhdr, SizeOfHeapHeader);
1958  /* PG73FORMAT: write bitmap [+ padding] [+ oid] + data */
1960  (char *) heaptup->t_data + SizeofHeapTupleHeader,
1961  heaptup->t_len - SizeofHeapTupleHeader);
1962 
1963  /* filtering by origin on a row level is much more efficient */
1965 
1966  recptr = XLogInsert(RM_HEAP_ID, info);
1967 
1968  PageSetLSN(page, recptr);
1969  }
1970 
1971  END_CRIT_SECTION();
1972 
1973  UnlockReleaseBuffer(buffer);
1974  if (vmbuffer != InvalidBuffer)
1975  ReleaseBuffer(vmbuffer);
1976 
1977  /*
1978  * If tuple is cachable, mark it for invalidation from the caches in case
1979  * we abort. Note it is OK to do this after releasing the buffer, because
1980  * the heaptup data structure is all in local memory, not in the shared
1981  * buffer.
1982  */
1983  CacheInvalidateHeapTuple(relation, heaptup, NULL);
1984 
1985  /* Note: speculative insertions are counted too, even if aborted later */
1986  pgstat_count_heap_insert(relation, 1);
1987 
1988  /*
1989  * If heaptup is a private copy, release it. Don't forget to copy t_self
1990  * back to the caller's image, too.
1991  */
1992  if (heaptup != tup)
1993  {
1994  tup->t_self = heaptup->t_self;
1995  heap_freetuple(heaptup);
1996  }
1997 }
unsigned char uint8
Definition: c.h:493
static HeapTuple heap_prepare_insert(Relation relation, HeapTuple tup, TransactionId xid, CommandId cid, int options)
Definition: heapam.c:2006
#define HEAP_INSERT_SPECULATIVE
Definition: heapam.h:37
#define HEAP_INSERT_NO_LOGICAL
Definition: heapam.h:36
#define XLH_INSERT_ON_TOAST_RELATION
Definition: heapam_xlog.h:70
#define XLH_INSERT_IS_SPECULATIVE
Definition: heapam_xlog.h:68
#define XLH_INSERT_ALL_VISIBLE_CLEARED
Definition: heapam_xlog.h:66
#define XLOG_HEAP_INSERT
Definition: heapam_xlog.h:32
#define SizeOfHeapInsert
Definition: heapam_xlog.h:162
#define XLH_INSERT_CONTAINS_NEW_TUPLE
Definition: heapam_xlog.h:69
#define XLOG_HEAP_INIT_PAGE
Definition: heapam_xlog.h:46
void RelationPutHeapTuple(Relation relation, Buffer buffer, HeapTuple tuple, bool token)
Definition: hio.c:36
Buffer RelationGetBufferForTuple(Relation relation, Size len, Buffer otherBuffer, int options, BulkInsertState bistate, Buffer *vmbuffer, Buffer *vmbuffer_other, int num_pages)
Definition: hio.c:503
#define HeapTupleHeaderGetNatts(tup)
Definition: htup_details.h:529
void pgstat_count_heap_insert(Relation rel, PgStat_Counter n)
#define RelationIsLogicallyLogged(relation)
Definition: rel.h:702
#define RelationGetNumberOfAttributes(relation)
Definition: rel.h:510
OffsetNumber offnum
Definition: heapam_xlog.h:156
#define REGBUF_KEEP_DATA
Definition: xloginsert.h:35
#define REGBUF_WILL_INIT
Definition: xloginsert.h:33

References Assert(), BufferGetBlockNumber(), BufferGetPage(), CacheInvalidateHeapTuple(), CheckForSerializableConflictIn(), END_CRIT_SECTION, FirstOffsetNumber, xl_heap_insert::flags, GetCurrentTransactionId(), heap_freetuple(), HEAP_INSERT_NO_LOGICAL, HEAP_INSERT_SPECULATIVE, heap_prepare_insert(), HeapTupleHeaderGetNatts, InvalidBlockNumber, InvalidBuffer, IsToastRelation(), ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), log_heap_new_cid(), MarkBufferDirty(), xl_heap_insert::offnum, PageClearAllVisible(), PageGetMaxOffsetNumber(), PageIsAllVisible(), PageSetLSN(), pgstat_count_heap_insert(), REGBUF_KEEP_DATA, REGBUF_STANDARD, REGBUF_WILL_INIT, RelationGetBufferForTuple(), RelationGetNumberOfAttributes, RelationIsAccessibleInLogicalDecoding, RelationIsLogicallyLogged, RelationNeedsWAL, RelationPutHeapTuple(), ReleaseBuffer(), SizeOfHeapHeader, SizeOfHeapInsert, SizeofHeapTupleHeader, START_CRIT_SECTION, HeapTupleData::t_data, xl_heap_header::t_hoff, HeapTupleHeaderData::t_hoff, xl_heap_header::t_infomask, HeapTupleHeaderData::t_infomask, xl_heap_header::t_infomask2, HeapTupleHeaderData::t_infomask2, HeapTupleData::t_len, HeapTupleData::t_self, UnlockReleaseBuffer(), visibilitymap_clear(), VISIBILITYMAP_VALID_BITS, XLH_INSERT_ALL_VISIBLE_CLEARED, XLH_INSERT_CONTAINS_NEW_TUPLE, XLH_INSERT_IS_SPECULATIVE, XLH_INSERT_ON_TOAST_RELATION, XLOG_HEAP_INIT_PAGE, XLOG_HEAP_INSERT, XLOG_INCLUDE_ORIGIN, XLogBeginInsert(), XLogInsert(), XLogRegisterBufData(), XLogRegisterBuffer(), XLogRegisterData(), and XLogSetRecordFlags().

Referenced by heapam_tuple_insert(), heapam_tuple_insert_speculative(), simple_heap_insert(), and toast_save_datum().

◆ heap_lock_tuple()

TM_Result heap_lock_tuple ( Relation  relation,
HeapTuple  tuple,
CommandId  cid,
LockTupleMode  mode,
LockWaitPolicy  wait_policy,
bool  follow_updates,
Buffer buffer,
struct TM_FailureData tmfd 
)

Definition at line 4126 of file heapam.c.

4130 {
4131  TM_Result result;
4132  ItemPointer tid = &(tuple->t_self);
4133  ItemId lp;
4134  Page page;
4135  Buffer vmbuffer = InvalidBuffer;
4136  BlockNumber block;
4137  TransactionId xid,
4138  xmax;
4139  uint16 old_infomask,
4140  new_infomask,
4141  new_infomask2;
4142  bool first_time = true;
4143  bool skip_tuple_lock = false;
4144  bool have_tuple_lock = false;
4145  bool cleared_all_frozen = false;
4146 
4147  *buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(tid));
4148  block = ItemPointerGetBlockNumber(tid);
4149 
4150  /*
4151  * Before locking the buffer, pin the visibility map page if it appears to
4152  * be necessary. Since we haven't got the lock yet, someone else might be
4153  * in the middle of changing this, so we'll need to recheck after we have
4154  * the lock.
4155  */
4156  if (PageIsAllVisible(BufferGetPage(*buffer)))
4157  visibilitymap_pin(relation, block, &vmbuffer);
4158 
4160 
4161  page = BufferGetPage(*buffer);
4162  lp = PageGetItemId(page, ItemPointerGetOffsetNumber(tid));
4163  Assert(ItemIdIsNormal(lp));
4164 
4165  tuple->t_data = (HeapTupleHeader) PageGetItem(page, lp);
4166  tuple->t_len = ItemIdGetLength(lp);
4167  tuple->t_tableOid = RelationGetRelid(relation);
4168 
4169 l3:
4170  result = HeapTupleSatisfiesUpdate(tuple, cid, *buffer);
4171 
4172  if (result == TM_Invisible)
4173  {
4174  /*
4175  * This is possible, but only when locking a tuple for ON CONFLICT
4176  * UPDATE. We return this value here rather than throwing an error in
4177  * order to give that case the opportunity to throw a more specific
4178  * error.
4179  */
4180  result = TM_Invisible;
4181  goto out_locked;
4182  }
4183  else if (result == TM_BeingModified ||
4184  result == TM_Updated ||
4185  result == TM_Deleted)
4186  {
4187  TransactionId xwait;
4188  uint16 infomask;
4189  uint16 infomask2;
4190  bool require_sleep;
4191  ItemPointerData t_ctid;
4192 
4193  /* must copy state data before unlocking buffer */
4194  xwait = HeapTupleHeaderGetRawXmax(tuple->t_data);
4195  infomask = tuple->t_data->t_infomask;
4196  infomask2 = tuple->t_data->t_infomask2;
4197  ItemPointerCopy(&tuple->t_data->t_ctid, &t_ctid);
4198 
4199  LockBuffer(*buffer, BUFFER_LOCK_UNLOCK);
4200 
4201  /*
4202  * If any subtransaction of the current top transaction already holds
4203  * a lock as strong as or stronger than what we're requesting, we
4204  * effectively hold the desired lock already. We *must* succeed
4205  * without trying to take the tuple lock, else we will deadlock
4206  * against anyone wanting to acquire a stronger lock.
4207  *
4208  * Note we only do this the first time we loop on the HTSU result;
4209  * there is no point in testing in subsequent passes, because
4210  * evidently our own transaction cannot have acquired a new lock after
4211  * the first time we checked.
4212  */
4213  if (first_time)
4214  {
4215  first_time = false;
4216 
4217  if (infomask & HEAP_XMAX_IS_MULTI)
4218  {
4219  int i;
4220  int nmembers;
4221  MultiXactMember *members;
4222 
4223  /*
4224  * We don't need to allow old multixacts here; if that had
4225  * been the case, HeapTupleSatisfiesUpdate would have returned
4226  * MayBeUpdated and we wouldn't be here.
4227  */
4228  nmembers =
4229  GetMultiXactIdMembers(xwait, &members, false,
4230  HEAP_XMAX_IS_LOCKED_ONLY(infomask));
4231 
4232  for (i = 0; i < nmembers; i++)
4233  {
4234  /* only consider members of our own transaction */
4235  if (!TransactionIdIsCurrentTransactionId(members[i].xid))
4236  continue;
4237 
4238  if (TUPLOCK_from_mxstatus(members[i].status) >= mode)
4239  {
4240  pfree(members);
4241  result = TM_Ok;
4242  goto out_unlocked;
4243  }
4244  else
4245  {
4246  /*
4247  * Disable acquisition of the heavyweight tuple lock.
4248  * Otherwise, when promoting a weaker lock, we might
4249  * deadlock with another locker that has acquired the
4250  * heavyweight tuple lock and is waiting for our
4251  * transaction to finish.
4252  *
4253  * Note that in this case we still need to wait for
4254  * the multixact if required, to avoid acquiring
4255  * conflicting locks.
4256  */
4257  skip_tuple_lock = true;
4258  }
4259  }
4260 
4261  if (members)
4262  pfree(members);
4263  }
4264  else if (TransactionIdIsCurrentTransactionId(xwait))
4265  {
4266  switch (mode)
4267  {
4268  case LockTupleKeyShare:
4269  Assert(HEAP_XMAX_IS_KEYSHR_LOCKED(infomask) ||
4270  HEAP_XMAX_IS_SHR_LOCKED(infomask) ||
4271  HEAP_XMAX_IS_EXCL_LOCKED(infomask));
4272  result = TM_Ok;
4273  goto out_unlocked;
4274  case LockTupleShare:
4275  if (HEAP_XMAX_IS_SHR_LOCKED(infomask) ||
4276  HEAP_XMAX_IS_EXCL_LOCKED(infomask))
4277  {
4278  result = TM_Ok;
4279  goto out_unlocked;
4280  }
4281  break;
4283  if (HEAP_XMAX_IS_EXCL_LOCKED(infomask))
4284  {
4285  result = TM_Ok;
4286  goto out_unlocked;
4287  }
4288  break;
4289  case LockTupleExclusive:
4290  if (HEAP_XMAX_IS_EXCL_LOCKED(infomask) &&
4291  infomask2 & HEAP_KEYS_UPDATED)
4292  {
4293  result = TM_Ok;
4294  goto out_unlocked;
4295  }
4296  break;
4297  }
4298  }
4299  }
4300 
4301  /*
4302  * Initially assume that we will have to wait for the locking
4303  * transaction(s) to finish. We check various cases below in which
4304  * this can be turned off.
4305  */
4306  require_sleep = true;
4307  if (mode == LockTupleKeyShare)
4308  {
4309  /*
4310  * If we're requesting KeyShare, and there's no update present, we
4311  * don't need to wait. Even if there is an update, we can still
4312  * continue if the key hasn't been modified.
4313  *
4314  * However, if there are updates, we need to walk the update chain
4315  * to mark future versions of the row as locked, too. That way,
4316  * if somebody deletes that future version, we're protected
4317  * against the key going away. This locking of future versions
4318  * could block momentarily, if a concurrent transaction is
4319  * deleting a key; or it could return a value to the effect that
4320  * the transaction deleting the key has already committed. So we
4321  * do this before re-locking the buffer; otherwise this would be
4322  * prone to deadlocks.
4323  *
4324  * Note that the TID we're locking was grabbed before we unlocked
4325  * the buffer. For it to change while we're not looking, the
4326  * other properties we're testing for below after re-locking the
4327  * buffer would also change, in which case we would restart this
4328  * loop above.
4329  */
4330  if (!(infomask2 & HEAP_KEYS_UPDATED))
4331  {
4332  bool updated;
4333 
4334  updated = !HEAP_XMAX_IS_LOCKED_ONLY(infomask);
4335 
4336  /*
4337  * If there are updates, follow the update chain; bail out if
4338  * that cannot be done.
4339  */
4340  if (follow_updates && updated)
4341  {
4342  TM_Result res;
4343 
4344  res = heap_lock_updated_tuple(relation, tuple, &t_ctid,
4346  mode);
4347  if (res != TM_Ok)
4348  {
4349  result = res;
4350  /* recovery code expects to have buffer lock held */
4352  goto failed;
4353  }
4354  }
4355 
4357 
4358  /*
4359  * Make sure it's still an appropriate lock, else start over.
4360  * Also, if it wasn't updated before we released the lock, but
4361  * is updated now, we start over too; the reason is that we
4362  * now need to follow the update chain to lock the new
4363  * versions.
4364  */
4365  if (!HeapTupleHeaderIsOnlyLocked(tuple->t_data) &&
4366  ((tuple->t_data->t_infomask2 & HEAP_KEYS_UPDATED) ||
4367  !updated))
4368  goto l3;
4369 
4370  /* Things look okay, so we can skip sleeping */
4371  require_sleep = false;
4372 
4373  /*
4374  * Note we allow Xmax to change here; other updaters/lockers
4375  * could have modified it before we grabbed the buffer lock.
4376  * However, this is not a problem, because with the recheck we
4377  * just did we ensure that they still don't conflict with the
4378  * lock we want.
4379  */
4380  }
4381  }
4382  else if (mode == LockTupleShare)
4383  {
4384  /*
4385  * If we're requesting Share, we can similarly avoid sleeping if
4386  * there's no update and no exclusive lock present.
4387  */
4388  if (HEAP_XMAX_IS_LOCKED_ONLY(infomask) &&
4389  !HEAP_XMAX_IS_EXCL_LOCKED(infomask))
4390  {
4392 
4393  /*
4394  * Make sure it's still an appropriate lock, else start over.
4395  * See above about allowing xmax to change.
4396  */
4397  if (!HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_data->t_infomask) ||
4399  goto l3;
4400  require_sleep = false;
4401  }
4402  }
4403  else if (mode == LockTupleNoKeyExclusive)
4404  {
4405  /*
4406  * If we're requesting NoKeyExclusive, we might also be able to
4407  * avoid sleeping; just ensure that there no conflicting lock
4408  * already acquired.
4409  */
4410  if (infomask & HEAP_XMAX_IS_MULTI)
4411  {
4412  if (!DoesMultiXactIdConflict((MultiXactId) xwait, infomask,
4413  mode, NULL))
4414  {
4415  /*
4416  * No conflict, but if the xmax changed under us in the
4417  * meantime, start over.
4418  */
4420  if (xmax_infomask_changed(tuple->t_data->t_infomask, infomask) ||
4422  xwait))
4423  goto l3;
4424 
4425  /* otherwise, we're good */
4426  require_sleep = false;
4427  }
4428  }
4429  else if (HEAP_XMAX_IS_KEYSHR_LOCKED(infomask))
4430  {
4432 
4433  /* if the xmax changed in the meantime, start over */
4434  if (xmax_infomask_changed(tuple->t_data->t_infomask, infomask) ||
4436  xwait))
4437  goto l3;
4438  /* otherwise, we're good */
4439  require_sleep = false;
4440  }
4441  }
4442 
4443  /*
4444  * As a check independent from those above, we can also avoid sleeping
4445  * if the current transaction is the sole locker of the tuple. Note
4446  * that the strength of the lock already held is irrelevant; this is
4447  * not about recording the lock in Xmax (which will be done regardless
4448  * of this optimization, below). Also, note that the cases where we
4449  * hold a lock stronger than we are requesting are already handled
4450  * above by not doing anything.
4451  *
4452  * Note we only deal with the non-multixact case here; MultiXactIdWait
4453  * is well equipped to deal with this situation on its own.
4454  */
4455  if (require_sleep && !(infomask & HEAP_XMAX_IS_MULTI) &&
4457  {
4458  /* ... but if the xmax changed in the meantime, start over */
4460  if (xmax_infomask_changed(tuple->t_data->t_infomask, infomask) ||
4462  xwait))
4463  goto l3;
4465  require_sleep = false;
4466  }
4467 
4468  /*
4469  * Time to sleep on the other transaction/multixact, if necessary.
4470  *
4471  * If the other transaction is an update/delete that's already
4472  * committed, then sleeping cannot possibly do any good: if we're
4473  * required to sleep, get out to raise an error instead.
4474  *
4475  * By here, we either have already acquired the buffer exclusive lock,
4476  * or we must wait for the locking transaction or multixact; so below
4477  * we ensure that we grab buffer lock after the sleep.
4478  */
4479  if (require_sleep && (result == TM_Updated || result == TM_Deleted))
4480  {
4482  goto failed;
4483  }
4484  else if (require_sleep)
4485  {
4486  /*
4487  * Acquire tuple lock to establish our priority for the tuple, or
4488  * die trying. LockTuple will release us when we are next-in-line
4489  * for the tuple. We must do this even if we are share-locking,
4490  * but not if we already have a weaker lock on the tuple.
4491  *
4492  * If we are forced to "start over" below, we keep the tuple lock;
4493  * this arranges that we stay at the head of the line while
4494  * rechecking tuple state.
4495  */
4496  if (!skip_tuple_lock &&
4497  !heap_acquire_tuplock(relation, tid, mode, wait_policy,
4498  &have_tuple_lock))
4499  {
4500  /*
4501  * This can only happen if wait_policy is Skip and the lock
4502  * couldn't be obtained.
4503  */
4504  result = TM_WouldBlock;
4505  /* recovery code expects to have buffer lock held */
4507  goto failed;
4508  }
4509 
4510  if (infomask & HEAP_XMAX_IS_MULTI)
4511  {
4513 
4514  /* We only ever lock tuples, never update them */
4515  if (status >= MultiXactStatusNoKeyUpdate)
4516  elog(ERROR, "invalid lock mode in heap_lock_tuple");
4517 
4518  /* wait for multixact to end, or die trying */
4519  switch (wait_policy)
4520  {
4521  case LockWaitBlock:
4522  MultiXactIdWait((MultiXactId) xwait, status, infomask,
4523  relation, &tuple->t_self, XLTW_Lock, NULL);
4524  break;
4525  case LockWaitSkip:
4527  status, infomask, relation,
4528  NULL))
4529  {
4530  result = TM_WouldBlock;
4531  /* recovery code expects to have buffer lock held */
4533  goto failed;
4534  }
4535  break;
4536  case LockWaitError:
4538  status, infomask, relation,
4539  NULL))
4540  ereport(ERROR,
4541  (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
4542  errmsg("could not obtain lock on row in relation \"%s\"",
4543  RelationGetRelationName(relation))));
4544 
4545  break;
4546  }
4547 
4548  /*
4549  * Of course, the multixact might not be done here: if we're
4550  * requesting a light lock mode, other transactions with light
4551  * locks could still be alive, as well as locks owned by our
4552  * own xact or other subxacts of this backend. We need to
4553  * preserve the surviving MultiXact members. Note that it
4554  * isn't absolutely necessary in the latter case, but doing so
4555  * is simpler.
4556  */
4557  }
4558  else
4559  {
4560  /* wait for regular transaction to end, or die trying */
4561  switch (wait_policy)
4562  {
4563  case LockWaitBlock:
4564  XactLockTableWait(xwait, relation, &tuple->t_self,
4565  XLTW_Lock);
4566  break;
4567  case LockWaitSkip:
4568  if (!ConditionalXactLockTableWait(xwait))
4569  {
4570  result = TM_WouldBlock;
4571  /* recovery code expects to have buffer lock held */
4573  goto failed;
4574  }
4575  break;
4576  case LockWaitError:
4577  if (!ConditionalXactLockTableWait(xwait))
4578  ereport(ERROR,
4579  (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
4580  errmsg("could not obtain lock on row in relation \"%s\"",
4581  RelationGetRelationName(relation))));
4582  break;
4583  }
4584  }
4585 
4586  /* if there are updates, follow the update chain */
4587  if (follow_updates && !HEAP_XMAX_IS_LOCKED_ONLY(infomask))
4588  {
4589  TM_Result res;
4590 
4591  res = heap_lock_updated_tuple(relation, tuple, &t_ctid,
4593  mode);
4594  if (res != TM_Ok)
4595  {
4596  result = res;
4597  /* recovery code expects to have buffer lock held */
4599  goto failed;
4600  }
4601  }
4602 
4604 
4605  /*
4606  * xwait is done, but if xwait had just locked the tuple then some
4607  * other xact could update this tuple before we get to this point.
4608  * Check for xmax change, and start over if so.
4609  */
4610  if (xmax_infomask_changed(tuple->t_data->t_infomask, infomask) ||
4612  xwait))
4613  goto l3;
4614 
4615  if (!(infomask & HEAP_XMAX_IS_MULTI))
4616  {
4617  /*
4618  * Otherwise check if it committed or aborted. Note we cannot
4619  * be here if the tuple was only locked by somebody who didn't
4620  * conflict with us; that would have been handled above. So
4621  * that transaction must necessarily be gone by now. But
4622  * don't check for this in the multixact case, because some
4623  * locker transactions might still be running.
4624  */
4625  UpdateXmaxHintBits(tuple->t_data, *buffer, xwait);
4626  }
4627  }
4628 
4629  /* By here, we're certain that we hold buffer exclusive lock again */
4630 
4631  /*
4632  * We may lock if previous xmax aborted, or if it committed but only
4633  * locked the tuple without updating it; or if we didn't have to wait
4634  * at all for whatever reason.
4635  */
4636  if (!require_sleep ||
4637  (tuple->t_data->t_infomask & HEAP_XMAX_INVALID) ||
4640  result = TM_Ok;
4641  else if (!ItemPointerEquals(&tuple->t_self, &tuple->t_data->t_ctid))
4642  result = TM_Updated;
4643  else
4644  result = TM_Deleted;
4645  }
4646 
4647 failed:
4648  if (result != TM_Ok)
4649  {
4650  Assert(result == TM_SelfModified || result == TM_Updated ||
4651  result == TM_Deleted || result == TM_WouldBlock);
4652 
4653  /*
4654  * When locking a tuple under LockWaitSkip semantics and we fail with
4655  * TM_WouldBlock above, it's possible for concurrent transactions to
4656  * release the lock and set HEAP_XMAX_INVALID in the meantime. So
4657  * this assert is slightly different from the equivalent one in
4658  * heap_delete and heap_update.
4659  */
4660  Assert((result == TM_WouldBlock) ||
4661  !(tuple->t_data->t_infomask & HEAP_XMAX_INVALID));
4662  Assert(result != TM_Updated ||
4663  !ItemPointerEquals(&tuple->t_self, &tuple->t_data->t_ctid));
4664  tmfd->ctid = tuple->t_data->t_ctid;
4665  tmfd->xmax = HeapTupleHeaderGetUpdateXid(tuple->t_data);
4666  if (result == TM_SelfModified)
4667  tmfd->cmax = HeapTupleHeaderGetCmax(tuple->t_data);
4668  else
4669  tmfd->cmax = InvalidCommandId;
4670  goto out_locked;
4671  }
4672 
4673  /*
4674  * If we didn't pin the visibility map page and the page has become all
4675  * visible while we were busy locking the buffer, or during some
4676  * subsequent window during which we had it unlocked, we'll have to unlock
4677  * and re-lock, to avoid holding the buffer lock across I/O. That's a bit
4678  * unfortunate, especially since we'll now have to recheck whether the
4679  * tuple has been locked or updated under us, but hopefully it won't
4680  * happen very often.
4681  */
4682  if (vmbuffer == InvalidBuffer && PageIsAllVisible(page))
4683  {
4684  LockBuffer(*buffer, BUFFER_LOCK_UNLOCK);
4685  visibilitymap_pin(relation, block, &vmbuffer);
4687  goto l3;
4688  }
4689 
4690  xmax = HeapTupleHeaderGetRawXmax(tuple->t_data);
4691  old_infomask = tuple->t_data->t_infomask;
4692 
4693  /*
4694  * If this is the first possibly-multixact-able operation in the current
4695  * transaction, set my per-backend OldestMemberMXactId setting. We can be
4696  * certain that the transaction will never become a member of any older
4697  * MultiXactIds than that. (We have to do this even if we end up just
4698  * using our own TransactionId below, since some other backend could
4699  * incorporate our XID into a MultiXact immediately afterwards.)
4700  */
4702 
4703  /*
4704  * Compute the new xmax and infomask to store into the tuple. Note we do
4705  * not modify the tuple just yet, because that would leave it in the wrong
4706  * state if multixact.c elogs.
4707  */
4708  compute_new_xmax_infomask(xmax, old_infomask, tuple->t_data->t_infomask2,
4709  GetCurrentTransactionId(), mode, false,
4710  &xid, &new_infomask, &new_infomask2);
4711 
4713 
4714  /*
4715  * Store transaction information of xact locking the tuple.
4716  *
4717  * Note: Cmax is meaningless in this context, so don't set it; this avoids
4718  * possibly generating a useless combo CID. Moreover, if we're locking a
4719  * previously updated tuple, it's important to preserve the Cmax.
4720  *
4721  * Also reset the HOT UPDATE bit, but only if there's no update; otherwise
4722  * we would break the HOT chain.
4723  */
4724  tuple->t_data->t_infomask &= ~HEAP_XMAX_BITS;
4725  tuple->t_data->t_infomask2 &= ~HEAP_KEYS_UPDATED;
4726  tuple->t_data->t_infomask |= new_infomask;
4727  tuple->t_data->t_infomask2 |= new_infomask2;
4728  if (HEAP_XMAX_IS_LOCKED_ONLY(new_infomask))
4730  HeapTupleHeaderSetXmax(tuple->t_data, xid);
4731 
4732  /*
4733  * Make sure there is no forward chain link in t_ctid. Note that in the
4734  * cases where the tuple has been updated, we must not overwrite t_ctid,
4735  * because it was set by the updater. Moreover, if the tuple has been
4736  * updated, we need to follow the update chain to lock the new versions of
4737  * the tuple as well.
4738  */
4739  if (HEAP_XMAX_IS_LOCKED_ONLY(new_infomask))
4740  tuple->t_data->t_ctid = *tid;
4741 
4742  /* Clear only the all-frozen bit on visibility map if needed */
4743  if (PageIsAllVisible(page) &&
4744  visibilitymap_clear(relation, block, vmbuffer,
4746  cleared_all_frozen = true;
4747 
4748 
4749  MarkBufferDirty(*buffer);
4750 
4751  /*
4752  * XLOG stuff. You might think that we don't need an XLOG record because
4753  * there is no state change worth restoring after a crash. You would be
4754  * wrong however: we have just written either a TransactionId or a
4755  * MultiXactId that may never have been seen on disk before, and we need
4756  * to make sure that there are XLOG entries covering those ID numbers.
4757  * Else the same IDs might be re-used after a crash, which would be
4758  * disastrous if this page made it to disk before the crash. Essentially
4759  * we have to enforce the WAL log-before-data rule even in this case.
4760  * (Also, in a PITR log-shipping or 2PC environment, we have to have XLOG
4761  * entries for everything anyway.)
4762  */
4763  if (RelationNeedsWAL(relation))
4764  {
4765  xl_heap_lock xlrec;
4766  XLogRecPtr recptr;
4767 
4768  XLogBeginInsert();
4769  XLogRegisterBuffer(0, *buffer, REGBUF_STANDARD);
4770 
4771  xlrec.offnum = ItemPointerGetOffsetNumber(&tuple->t_self);
4772  xlrec.xmax = xid;
4773  xlrec.infobits_set = compute_infobits(new_infomask,
4774  tuple->t_data->t_infomask2);
4775  xlrec.flags = cleared_all_frozen ? XLH_LOCK_ALL_FROZEN_CLEARED : 0;
4776  XLogRegisterData((char *) &xlrec, SizeOfHeapLock);
4777 
4778  /* we don't decode row locks atm, so no need to log the origin */
4779 
4780  recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_LOCK);
4781 
4782  PageSetLSN(page, recptr);
4783  }
4784 
4785  END_CRIT_SECTION();
4786 
4787  result = TM_Ok;
4788 
4789 out_locked:
4790  LockBuffer(*buffer, BUFFER_LOCK_UNLOCK);
4791 
4792 out_unlocked:
4793  if (BufferIsValid(vmbuffer))
4794  ReleaseBuffer(vmbuffer);
4795 
4796  /*
4797  * Don't update the visibility map here. Locking a tuple doesn't change
4798  * visibility info.
4799  */
4800 
4801  /*
4802  * Now that we have successfully marked the tuple as locked, we can
4803  * release the lmgr tuple lock, if we had it.
4804  */
4805  if (have_tuple_lock)
4806  UnlockTupleTuplock(relation, tid, mode);
4807 
4808  return result;
4809 }
#define TUPLOCK_from_mxstatus(status)
Definition: heapam.c:220
static TM_Result heap_lock_updated_tuple(Relation rel, HeapTuple tuple, ItemPointer ctid, TransactionId xid, LockTupleMode mode)
Definition: heapam.c:5590
static bool ConditionalMultiXactIdWait(MultiXactId multi, MultiXactStatus status, uint16 infomask, Relation rel, int *remaining)
Definition: heapam.c:7307
static MultiXactStatus get_mxact_status_for_lock(LockTupleMode mode, bool is_update)
Definition: heapam.c:4078
#define XLH_LOCK_ALL_FROZEN_CLEARED
Definition: heapam_xlog.h:277
#define XLOG_HEAP_LOCK
Definition: heapam_xlog.h:38
#define SizeOfHeapLock
Definition: heapam_xlog.h:288
#define HEAP_XMAX_IS_EXCL_LOCKED(infomask)
Definition: htup_details.h:261
#define HEAP_XMAX_IS_KEYSHR_LOCKED(infomask)
Definition: htup_details.h:263
#define HEAP_XMAX_IS_SHR_LOCKED(infomask)
Definition: htup_details.h:259
static void ItemPointerCopy(const ItemPointerData *fromPointer, ItemPointerData *toPointer)
Definition: itemptr.h:172
bool ConditionalXactLockTableWait(TransactionId xid)
Definition: lmgr.c:741
@ XLTW_Lock
Definition: lmgr.h:29
@ LockWaitSkip
Definition: lockoptions.h:41
@ LockWaitError
Definition: lockoptions.h:43
@ LockTupleNoKeyExclusive
Definition: lockoptions.h:56
@ LockTupleShare
Definition: lockoptions.h:54
@ LockTupleKeyShare
Definition: lockoptions.h:52
int GetMultiXactIdMembers(MultiXactId multi, MultiXactMember **members, bool from_pgupgrade, bool isLockOnly)
Definition: multixact.c:1225
MultiXactStatus
Definition: multixact.h:42
@ MultiXactStatusNoKeyUpdate
Definition: multixact.h:48
static PgChecksumMode mode
Definition: pg_checksums.c:56
#define RelationGetRelationName(relation)
Definition: rel.h:538
uint8 infobits_set
Definition: heapam_xlog.h:284
OffsetNumber offnum
Definition: heapam_xlog.h:283
TransactionId xmax
Definition: heapam_xlog.h:282
@ TM_WouldBlock
Definition: tableam.h:102
#define VISIBILITYMAP_ALL_FROZEN

References Assert(), BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferGetPage(), BufferIsValid(), TM_FailureData::cmax, compute_infobits(), compute_new_xmax_infomask(), ConditionalMultiXactIdWait(), ConditionalXactLockTableWait(), TM_FailureData::ctid, DoesMultiXactIdConflict(), elog(), END_CRIT_SECTION, ereport, errcode(), errmsg(), ERROR, xl_heap_lock::flags, get_mxact_status_for_lock(), GetCurrentTransactionId(), GetMultiXactIdMembers(), heap_acquire_tuplock(), HEAP_KEYS_UPDATED, heap_lock_updated_tuple(), HEAP_XMAX_BITS, HEAP_XMAX_INVALID, HEAP_XMAX_IS_EXCL_LOCKED, HEAP_XMAX_IS_KEYSHR_LOCKED, HEAP_XMAX_IS_LOCKED_ONLY, HEAP_XMAX_IS_MULTI, HEAP_XMAX_IS_SHR_LOCKED, HeapTupleHeaderClearHotUpdated, HeapTupleHeaderGetCmax(), HeapTupleHeaderGetRawXmax, HeapTupleHeaderGetUpdateXid, HeapTupleHeaderIsOnlyLocked(), HeapTupleHeaderSetXmax, HeapTupleSatisfiesUpdate(), i, xl_heap_lock::infobits_set, InvalidBuffer, InvalidCommandId, ItemIdGetLength, ItemIdIsNormal, ItemPointerCopy(), ItemPointerEquals(), ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), LockBuffer(), LockTupleExclusive, LockTupleKeyShare, LockTupleNoKeyExclusive, LockTupleShare, LockWaitBlock, LockWaitError, LockWaitSkip, MarkBufferDirty(), mode, MultiXactIdSetOldestMember(), MultiXactIdWait(), MultiXactStatusNoKeyUpdate, xl_heap_lock::offnum, PageGetItem(), PageGetItemId(), PageIsAllVisible(), PageSetLSN(), pfree(), ReadBuffer(), REGBUF_STANDARD, RelationGetRelationName, RelationGetRelid, RelationNeedsWAL, ReleaseBuffer(), res, SizeOfHeapLock, START_CRIT_SECTION, HeapTupleHeaderData::t_ctid, HeapTupleData::t_data, HeapTupleHeaderData::t_infomask, HeapTupleHeaderData::t_infomask2, HeapTupleData::t_len, HeapTupleData::t_self, HeapTupleData::t_tableOid, TM_BeingModified, TM_Deleted, TM_Invisible, TM_Ok, TM_SelfModified, TM_Updated, TM_WouldBlock, TransactionIdEquals, TransactionIdIsCurrentTransactionId(), TUPLOCK_from_mxstatus, UnlockTupleTuplock, UpdateXmaxHintBits(), VISIBILITYMAP_ALL_FROZEN, visibilitymap_clear(), visibilitymap_pin(), XactLockTableWait(), XLH_LOCK_ALL_FROZEN_CLEARED, XLOG_HEAP_LOCK, XLogBeginInsert(), XLogInsert(), XLogRegisterBuffer(), XLogRegisterData(), XLTW_Lock, xl_heap_lock::xmax, TM_FailureData::xmax, and xmax_infomask_changed().

Referenced by heapam_tuple_lock().

◆ heap_multi_insert()

void heap_multi_insert ( Relation  relation,
struct TupleTableSlot **  slots,
int  ntuples,
CommandId  cid,
int  options,
BulkInsertState  bistate 
)

Definition at line 2086 of file heapam.c.

2088 {
2090  HeapTuple *heaptuples;
2091  int i;
2092  int ndone;
2093  PGAlignedBlock scratch;
2094  Page page;
2095  Buffer vmbuffer = InvalidBuffer;
2096  bool needwal;
2097  Size saveFreeSpace;
2098  bool need_tuple_data = RelationIsLogicallyLogged(relation);
2099  bool need_cids = RelationIsAccessibleInLogicalDecoding(relation);
2100  bool starting_with_empty_page = false;
2101  int npages = 0;
2102  int npages_used = 0;
2103 
2104  /* currently not needed (thus unsupported) for heap_multi_insert() */
2106 
2107  needwal = RelationNeedsWAL(relation);
2108  saveFreeSpace = RelationGetTargetPageFreeSpace(relation,
2110 
2111  /* Toast and set header data in all the slots */
2112  heaptuples = palloc(ntuples * sizeof(HeapTuple));
2113  for (i = 0; i < ntuples; i++)
2114  {
2115  HeapTuple tuple;
2116 
2117  tuple = ExecFetchSlotHeapTuple(slots[i], true, NULL);
2118  slots[i]->tts_tableOid = RelationGetRelid(relation);
2119  tuple->t_tableOid = slots[i]->tts_tableOid;
2120  heaptuples[i] = heap_prepare_insert(relation, tuple, xid, cid,
2121  options);
2122  }
2123 
2124  /*
2125  * We're about to do the actual inserts -- but check for conflict first,
2126  * to minimize the possibility of having to roll back work we've just
2127  * done.
2128  *
2129  * A check here does not definitively prevent a serialization anomaly;
2130  * that check MUST be done at least past the point of acquiring an
2131  * exclusive buffer content lock on every buffer that will be affected,
2132  * and MAY be done after all inserts are reflected in the buffers and
2133  * those locks are released; otherwise there is a race condition. Since
2134  * multiple buffers can be locked and unlocked in the loop below, and it
2135  * would not be feasible to identify and lock all of those buffers before
2136  * the loop, we must do a final check at the end.
2137  *
2138  * The check here could be omitted with no loss of correctness; it is
2139  * present strictly as an optimization.
2140  *
2141  * For heap inserts, we only need to check for table-level SSI locks. Our
2142  * new tuples can't possibly conflict with existing tuple locks, and heap
2143  * page locks are only consolidated versions of tuple locks; they do not
2144  * lock "gaps" as index page locks do. So we don't need to specify a
2145  * buffer when making the call, which makes for a faster check.
2146  */
2148 
2149  ndone = 0;
2150  while (ndone < ntuples)
2151  {
2152  Buffer buffer;
2153  bool all_visible_cleared = false;
2154  bool all_frozen_set = false;
2155  int nthispage;
2156 
2158 
2159  /*
2160  * Compute number of pages needed to fit the to-be-inserted tuples in
2161  * the worst case. This will be used to determine how much to extend
2162  * the relation by in RelationGetBufferForTuple(), if needed. If we
2163  * filled a prior page from scratch, we can just update our last
2164  * computation, but if we started with a partially filled page,
2165  * recompute from scratch, the number of potentially required pages
2166  * can vary due to tuples needing to fit onto the page, page headers
2167  * etc.
2168  */
2169  if (ndone == 0 || !starting_with_empty_page)
2170  {
2171  npages = heap_multi_insert_pages(heaptuples, ndone, ntuples,
2172  saveFreeSpace);
2173  npages_used = 0;
2174  }
2175  else
2176  npages_used++;
2177 
2178  /*
2179  * Find buffer where at least the next tuple will fit. If the page is
2180  * all-visible, this will also pin the requisite visibility map page.
2181  *
2182  * Also pin visibility map page if COPY FREEZE inserts tuples into an
2183  * empty page. See all_frozen_set below.
2184  */
2185  buffer = RelationGetBufferForTuple(relation, heaptuples[ndone]->t_len,
2186  InvalidBuffer, options, bistate,
2187  &vmbuffer, NULL,
2188  npages - npages_used);
2189  page = BufferGetPage(buffer);
2190 
2191  starting_with_empty_page = PageGetMaxOffsetNumber(page) == 0;
2192 
2193  if (starting_with_empty_page && (options & HEAP_INSERT_FROZEN))
2194  all_frozen_set = true;
2195 
2196  /* NO EREPORT(ERROR) from here till changes are logged */
2198 
2199  /*
2200  * RelationGetBufferForTuple has ensured that the first tuple fits.
2201  * Put that on the page, and then as many other tuples as fit.
2202  */
2203  RelationPutHeapTuple(relation, buffer, heaptuples[ndone], false);
2204 
2205  /*
2206  * For logical decoding we need combo CIDs to properly decode the
2207  * catalog.
2208  */
2209  if (needwal && need_cids)
2210  log_heap_new_cid(relation, heaptuples[ndone]);
2211 
2212  for (nthispage = 1; ndone + nthispage < ntuples; nthispage++)
2213  {
2214  HeapTuple heaptup = heaptuples[ndone + nthispage];
2215 
2216  if (PageGetHeapFreeSpace(page) < MAXALIGN(heaptup->t_len) + saveFreeSpace)
2217  break;
2218 
2219  RelationPutHeapTuple(relation, buffer, heaptup, false);
2220 
2221  /*
2222  * For logical decoding we need combo CIDs to properly decode the
2223  * catalog.
2224  */
2225  if (needwal && need_cids)
2226  log_heap_new_cid(relation, heaptup);
2227  }
2228 
2229  /*
2230  * If the page is all visible, need to clear that, unless we're only
2231  * going to add further frozen rows to it.
2232  *
2233  * If we're only adding already frozen rows to a previously empty
2234  * page, mark it as all-visible.
2235  */
2236  if (PageIsAllVisible(page) && !(options & HEAP_INSERT_FROZEN))
2237  {
2238  all_visible_cleared = true;
2239  PageClearAllVisible(page);
2240  visibilitymap_clear(relation,
2241  BufferGetBlockNumber(buffer),
2242  vmbuffer, VISIBILITYMAP_VALID_BITS);
2243  }
2244  else if (all_frozen_set)
2245  PageSetAllVisible(page);
2246 
2247  /*
2248  * XXX Should we set PageSetPrunable on this page ? See heap_insert()
2249  */
2250 
2251  MarkBufferDirty(buffer);
2252 
2253  /* XLOG stuff */
2254  if (needwal)
2255  {
2256  XLogRecPtr recptr;
2257  xl_heap_multi_insert *xlrec;
2259  char *tupledata;
2260  int totaldatalen;
2261  char *scratchptr = scratch.data;
2262  bool init;
2263  int bufflags = 0;
2264 
2265  /*
2266  * If the page was previously empty, we can reinit the page
2267  * instead of restoring the whole thing.
2268  */
2269  init = starting_with_empty_page;
2270 
2271  /* allocate xl_heap_multi_insert struct from the scratch area */
2272  xlrec = (xl_heap_multi_insert *) scratchptr;
2273  scratchptr += SizeOfHeapMultiInsert;
2274 
2275  /*
2276  * Allocate offsets array. Unless we're reinitializing the page,
2277  * in that case the tuples are stored in order starting at
2278  * FirstOffsetNumber and we don't need to store the offsets
2279  * explicitly.
2280  */
2281  if (!init)
2282  scratchptr += nthispage * sizeof(OffsetNumber);
2283 
2284  /* the rest of the scratch space is used for tuple data */
2285  tupledata = scratchptr;
2286 
2287  /* check that the mutually exclusive flags are not both set */
2288  Assert(!(all_visible_cleared && all_frozen_set));
2289 
2290  xlrec->flags = 0;
2291  if (all_visible_cleared)
2293  if (all_frozen_set)
2295 
2296  xlrec->ntuples = nthispage;
2297 
2298  /*
2299  * Write out an xl_multi_insert_tuple and the tuple data itself
2300  * for each tuple.
2301  */
2302  for (i = 0; i < nthispage; i++)
2303  {
2304  HeapTuple heaptup = heaptuples[ndone + i];
2305  xl_multi_insert_tuple *tuphdr;
2306  int datalen;
2307 
2308  if (!init)
2309  xlrec->offsets[i] = ItemPointerGetOffsetNumber(&heaptup->t_self);
2310  /* xl_multi_insert_tuple needs two-byte alignment. */
2311  tuphdr = (xl_multi_insert_tuple *) SHORTALIGN(scratchptr);
2312  scratchptr = ((char *) tuphdr) + SizeOfMultiInsertTuple;
2313 
2314  tuphdr->t_infomask2 = heaptup->t_data->t_infomask2;
2315  tuphdr->t_infomask = heaptup->t_data->t_infomask;
2316  tuphdr->t_hoff = heaptup->t_data->t_hoff;
2317 
2318  /* write bitmap [+ padding] [+ oid] + data */
2319  datalen = heaptup->t_len - SizeofHeapTupleHeader;
2320  memcpy(scratchptr,
2321  (char *) heaptup->t_data + SizeofHeapTupleHeader,
2322  datalen);
2323  tuphdr->datalen = datalen;
2324  scratchptr += datalen;
2325  }
2326  totaldatalen = scratchptr - tupledata;
2327  Assert((scratchptr - scratch.data) < BLCKSZ);
2328 
2329  if (need_tuple_data)
2331 
2332  /*
2333  * Signal that this is the last xl_heap_multi_insert record
2334  * emitted by this call to heap_multi_insert(). Needed for logical
2335  * decoding so it knows when to cleanup temporary data.
2336  */
2337  if (ndone + nthispage == ntuples)
2338  xlrec->flags |= XLH_INSERT_LAST_IN_MULTI;
2339 
2340  if (init)
2341  {
2342  info |= XLOG_HEAP_INIT_PAGE;
2343  bufflags |= REGBUF_WILL_INIT;
2344  }
2345 
2346  /*
2347  * If we're doing logical decoding, include the new tuple data
2348  * even if we take a full-page image of the page.
2349  */
2350  if (need_tuple_data)
2351  bufflags |= REGBUF_KEEP_DATA;
2352 
2353  XLogBeginInsert();
2354  XLogRegisterData((char *) xlrec, tupledata - scratch.data);
2355  XLogRegisterBuffer(0, buffer, REGBUF_STANDARD | bufflags);
2356 
2357  XLogRegisterBufData(0, tupledata, totaldatalen);
2358 
2359  /* filtering by origin on a row level is much more efficient */
2361 
2362  recptr = XLogInsert(RM_HEAP2_ID, info);
2363 
2364  PageSetLSN(page, recptr);
2365  }
2366 
2367  END_CRIT_SECTION();
2368 
2369  /*
2370  * If we've frozen everything on the page, update the visibilitymap.
2371  * We're already holding pin on the vmbuffer.
2372  */
2373  if (all_frozen_set)
2374  {
2375  Assert(PageIsAllVisible(page));
2376  Assert(visibilitymap_pin_ok(BufferGetBlockNumber(buffer), vmbuffer));
2377 
2378  /*
2379  * It's fine to use InvalidTransactionId here - this is only used
2380  * when HEAP_INSERT_FROZEN is specified, which intentionally
2381  * violates visibility rules.
2382  */
2383  visibilitymap_set(relation, BufferGetBlockNumber(buffer), buffer,
2384  InvalidXLogRecPtr, vmbuffer,
2387  }
2388 
2389  UnlockReleaseBuffer(buffer);
2390  ndone += nthispage;
2391 
2392  /*
2393  * NB: Only release vmbuffer after inserting all tuples - it's fairly
2394  * likely that we'll insert into subsequent heap pages that are likely
2395  * to use the same vm page.
2396  */
2397  }
2398 
2399  /* We're done with inserting all tuples, so release the last vmbuffer. */
2400  if (vmbuffer != InvalidBuffer)
2401  ReleaseBuffer(vmbuffer);
2402 
2403  /*
2404  * We're done with the actual inserts. Check for conflicts again, to
2405  * ensure that all rw-conflicts in to these inserts are detected. Without
2406  * this final check, a sequential scan of the heap may have locked the
2407  * table after the "before" check, missing one opportunity to detect the
2408  * conflict, and then scanned the table before the new tuples were there,
2409  * missing the other chance to detect the conflict.
2410  *
2411  * For heap inserts, we only need to check for table-level SSI locks. Our
2412  * new tuples can't possibly conflict with existing tuple locks, and heap
2413  * page locks are only consolidated versions of tuple locks; they do not
2414  * lock "gaps" as index page locks do. So we don't need to specify a
2415  * buffer when making the call.
2416  */
2418 
2419  /*
2420  * If tuples are cachable, mark them for invalidation from the caches in
2421  * case we abort. Note it is OK to do this after releasing the buffer,
2422  * because the heaptuples data structure is all in local memory, not in
2423  * the shared buffer.
2424  */
2425  if (IsCatalogRelation(relation))
2426  {
2427  for (i = 0; i < ntuples; i++)
2428  CacheInvalidateHeapTuple(relation, heaptuples[i], NULL);
2429  }
2430 
2431  /* copy t_self fields back to the caller's slots */
2432  for (i = 0; i < ntuples; i++)
2433  slots[i]->tts_tid = heaptuples[i]->t_self;
2434 
2435  pgstat_count_heap_insert(relation, ntuples);
2436 }
Size PageGetHeapFreeSpace(Page page)
Definition: bufpage.c:991
static void PageSetAllVisible(Page page)
Definition: bufpage.h:431
#define MAXALIGN(LEN)
Definition: c.h:800
#define SHORTALIGN(LEN)
Definition: c.h:796
size_t Size
Definition: c.h:594
HeapTuple ExecFetchSlotHeapTuple(TupleTableSlot *slot, bool materialize, bool *shouldFree)
Definition: execTuples.c:1645
static int heap_multi_insert_pages(HeapTuple *heaptuples, int done, int ntuples, Size saveFreeSpace)
Definition: heapam.c:2054
#define HEAP_INSERT_FROZEN
Definition: heapam.h:35
#define SizeOfHeapMultiInsert
Definition: heapam_xlog.h:182
#define XLOG_HEAP2_MULTI_INSERT
Definition: heapam_xlog.h:58
#define XLH_INSERT_LAST_IN_MULTI
Definition: heapam_xlog.h:67
#define XLH_INSERT_ALL_FROZEN_SET
Definition: heapam_xlog.h:73
#define SizeOfMultiInsertTuple
Definition: heapam_xlog.h:193
int init
Definition: isn.c:75
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:121
#define RelationGetTargetPageFreeSpace(relation, defaultff)
Definition: rel.h:377
#define HEAP_DEFAULT_FILLFACTOR
Definition: rel.h:348
Oid tts_tableOid
Definition: tuptable.h:130
OffsetNumber offsets[FLEXIBLE_ARRAY_MEMBER]
Definition: heapam_xlog.h:179
char data[BLCKSZ]
Definition: c.h:1132
void visibilitymap_set(Relation rel, BlockNumber heapBlk, Buffer heapBuf, XLogRecPtr recptr, Buffer vmBuf, TransactionId cutoff_xid, uint8 flags)
bool visibilitymap_pin_ok(BlockNumber heapBlk, Buffer vmbuf)
#define VISIBILITYMAP_ALL_VISIBLE
#define InvalidXLogRecPtr
Definition: xlogdefs.h:28

References Assert(), BufferGetBlockNumber(), BufferGetPage(), CacheInvalidateHeapTuple(), CHECK_FOR_INTERRUPTS, CheckForSerializableConflictIn(), PGAlignedBlock::data, xl_multi_insert_tuple::datalen, END_CRIT_SECTION, ExecFetchSlotHeapTuple(), xl_heap_multi_insert::flags, GetCurrentTransactionId(), HEAP_DEFAULT_FILLFACTOR, HEAP_INSERT_FROZEN, HEAP_INSERT_NO_LOGICAL, heap_multi_insert_pages(), heap_prepare_insert(), i, init, InvalidBlockNumber, InvalidBuffer, InvalidTransactionId, InvalidXLogRecPtr, IsCatalogRelation(), ItemPointerGetOffsetNumber(), log_heap_new_cid(), MarkBufferDirty(), MAXALIGN, xl_heap_multi_insert::ntuples, xl_heap_multi_insert::offsets, PageClearAllVisible(), PageGetHeapFreeSpace(), PageGetMaxOffsetNumber(), PageIsAllVisible(), PageSetAllVisible(), PageSetLSN(), palloc(), pgstat_count_heap_insert(), REGBUF_KEEP_DATA, REGBUF_STANDARD, REGBUF_WILL_INIT, RelationGetBufferForTuple(), RelationGetRelid, RelationGetTargetPageFreeSpace, RelationIsAccessibleInLogicalDecoding, RelationIsLogicallyLogged, RelationNeedsWAL, RelationPutHeapTuple(), ReleaseBuffer(), SHORTALIGN, SizeOfHeapMultiInsert, SizeofHeapTupleHeader, SizeOfMultiInsertTuple, START_CRIT_SECTION, HeapTupleData::t_data, xl_multi_insert_tuple::t_hoff, HeapTupleHeaderData::t_hoff, xl_multi_insert_tuple::t_infomask, HeapTupleHeaderData::t_infomask, xl_multi_insert_tuple::t_infomask2, HeapTupleHeaderData::t_infomask2, HeapTupleData::t_len, HeapTupleData::t_self, HeapTupleData::t_tableOid, TupleTableSlot::tts_tableOid, UnlockReleaseBuffer(), VISIBILITYMAP_ALL_FROZEN, VISIBILITYMAP_ALL_VISIBLE, visibilitymap_clear(), visibilitymap_pin_ok(), visibilitymap_set(), VISIBILITYMAP_VALID_BITS, XLH_INSERT_ALL_FROZEN_SET, XLH_INSERT_ALL_VISIBLE_CLEARED, XLH_INSERT_CONTAINS_NEW_TUPLE, XLH_INSERT_LAST_IN_MULTI, XLOG_HEAP2_MULTI_INSERT, XLOG_HEAP_INIT_PAGE, XLOG_INCLUDE_ORIGIN, XLogBeginInsert(), XLogInsert(), XLogRegisterBufData(), XLogRegisterBuffer(), XLogRegisterData(), and XLogSetRecordFlags().

Referenced by CatalogTuplesMultiInsertWithInfo().

◆ heap_page_prune()

void heap_page_prune ( Relation  relation,
Buffer  buffer,
struct GlobalVisState vistest,
PruneResult presult,
OffsetNumber off_loc 
)

Definition at line 213 of file pruneheap.c.

217 {
218  Page page = BufferGetPage(buffer);
219  BlockNumber blockno = BufferGetBlockNumber(buffer);
220  OffsetNumber offnum,
221  maxoff;
222  PruneState prstate;
223  HeapTupleData tup;
224 
225  /*
226  * Our strategy is to scan the page and make lists of items to change,
227  * then apply the changes within a critical section. This keeps as much
228  * logic as possible out of the critical section, and also ensures that
229  * WAL replay will work the same as the normal case.
230  *
231  * First, initialize the new pd_prune_xid value to zero (indicating no
232  * prunable tuples). If we find any tuples which may soon become
233  * prunable, we will save the lowest relevant XID in new_prune_xid. Also
234  * initialize the rest of our working state.
235  */
237  prstate.rel = relation;
238  prstate.vistest = vistest;
240  prstate.nredirected = prstate.ndead = prstate.nunused = 0;
241  memset(prstate.marked, 0, sizeof(prstate.marked));
242 
243  presult->ndeleted = 0;
244  presult->nnewlpdead = 0;
245 
246  maxoff = PageGetMaxOffsetNumber(page);
247  tup.t_tableOid = RelationGetRelid(prstate.rel);
248 
249  /*
250  * Determine HTSV for all tuples.
251  *
252  * This is required for correctness to deal with cases where running HTSV
253  * twice could result in different results (e.g. RECENTLY_DEAD can turn to
254  * DEAD if another checked item causes GlobalVisTestIsRemovableFullXid()
255  * to update the horizon, INSERT_IN_PROGRESS can change to DEAD if the
256  * inserting transaction aborts, ...). That in turn could cause
257  * heap_prune_chain() to behave incorrectly if a tuple is reached twice,
258  * once directly via a heap_prune_chain() and once following a HOT chain.
259  *
260  * It's also good for performance. Most commonly tuples within a page are
261  * stored at decreasing offsets (while the items are stored at increasing
262  * offsets). When processing all tuples on a page this leads to reading
263  * memory at decreasing offsets within a page, with a variable stride.
264  * That's hard for CPU prefetchers to deal with. Processing the items in
265  * reverse order (and thus the tuples in increasing order) increases
266  * prefetching efficiency significantly / decreases the number of cache
267  * misses.
268  */
269  for (offnum = maxoff;
270  offnum >= FirstOffsetNumber;
271  offnum = OffsetNumberPrev(offnum))
272  {
273  ItemId itemid = PageGetItemId(page, offnum);
274  HeapTupleHeader htup;
275 
276  /* Nothing to do if slot doesn't contain a tuple */
277  if (!ItemIdIsNormal(itemid))
278  {
279  prstate.htsv[offnum] = -1;
280  continue;
281  }
282 
283  htup = (HeapTupleHeader) PageGetItem(page, itemid);
284  tup.t_data = htup;
285  tup.t_len = ItemIdGetLength(itemid);
286  ItemPointerSet(&(tup.t_self), blockno, offnum);
287 
288  /*
289  * Set the offset number so that we can display it along with any
290  * error that occurred while processing this tuple.
291  */
292  if (off_loc)
293  *off_loc = offnum;
294 
295  prstate.htsv[offnum] = heap_prune_satisfies_vacuum(&prstate, &tup,
296  buffer);
297  }
298 
299  /* Scan the page */
300  for (offnum = FirstOffsetNumber;
301  offnum <= maxoff;
302  offnum = OffsetNumberNext(offnum))
303  {
304  ItemId itemid;
305 
306  /* Ignore items already processed as part of an earlier chain */
307  if (prstate.marked[offnum])
308  continue;
309 
310  /* see preceding loop */
311  if (off_loc)
312  *off_loc = offnum;
313 
314  /* Nothing to do if slot is empty or already dead */
315  itemid = PageGetItemId(page, offnum);
316  if (!ItemIdIsUsed(itemid) || ItemIdIsDead(itemid))
317  continue;
318 
319  /* Process this item or chain of items */
320  presult->ndeleted += heap_prune_chain(buffer, offnum, &prstate);
321  }
322 
323  /* Clear the offset information once we have processed the given page. */
324  if (off_loc)
325  *off_loc = InvalidOffsetNumber;
326 
327  /* Any error while applying the changes is critical */
329 
330  /* Have we found any prunable items? */
331  if (prstate.nredirected > 0 || prstate.ndead > 0 || prstate.nunused > 0)
332  {
333  /*
334  * Apply the planned item changes, then repair page fragmentation, and
335  * update the page's hint bit about whether it has free line pointers.
336  */
338  prstate.redirected, prstate.nredirected,
339  prstate.nowdead, prstate.ndead,
340  prstate.nowunused, prstate.nunused);
341 
342  /*
343  * Update the page's pd_prune_xid field to either zero, or the lowest
344  * XID of any soon-prunable tuple.
345  */
346  ((PageHeader) page)->pd_prune_xid = prstate.new_prune_xid;
347 
348  /*
349  * Also clear the "page is full" flag, since there's no point in
350  * repeating the prune/defrag process until something else happens to
351  * the page.
352  */
353  PageClearFull(page);
354 
355  MarkBufferDirty(buffer);
356 
357  /*
358  * Emit a WAL XLOG_HEAP2_PRUNE record showing what we did
359  */
360  if (RelationNeedsWAL(relation))
361  {
362  xl_heap_prune xlrec;
363  XLogRecPtr recptr;
364 
367  xlrec.nredirected = prstate.nredirected;
368  xlrec.ndead = prstate.ndead;
369 
370  XLogBeginInsert();
371  XLogRegisterData((char *) &xlrec, SizeOfHeapPrune);
372 
374 
375  /*
376  * The OffsetNumber arrays are not actually in the buffer, but we
377  * pretend that they are. When XLogInsert stores the whole
378  * buffer, the offset arrays need not be stored too.
379  */
380  if (prstate.nredirected > 0)
381  XLogRegisterBufData(0, (char *) prstate.redirected,
382  prstate.nredirected *
383  sizeof(OffsetNumber) * 2);
384 
385  if (prstate.ndead > 0)
386  XLogRegisterBufData(0, (char *) prstate.nowdead,
387  prstate.ndead * sizeof(OffsetNumber));
388 
389  if (prstate.nunused > 0)
390  XLogRegisterBufData(0, (char *) prstate.nowunused,
391  prstate.nunused * sizeof(OffsetNumber));
392 
393  recptr = XLogInsert(RM_HEAP2_ID, XLOG_HEAP2_PRUNE);
394 
395  PageSetLSN(BufferGetPage(buffer), recptr);
396  }
397  }
398  else
399  {
400  /*
401  * If we didn't prune anything, but have found a new value for the
402  * pd_prune_xid field, update it and mark the buffer dirty. This is
403  * treated as a non-WAL-logged hint.
404  *
405  * Also clear the "page is full" flag if it is set, since there's no
406  * point in repeating the prune/defrag process until something else
407  * happens to the page.
408  */
409  if (((PageHeader) page)->pd_prune_xid != prstate.new_prune_xid ||
410  PageIsFull(page))
411  {
412  ((PageHeader) page)->pd_prune_xid = prstate.new_prune_xid;
413  PageClearFull(page);
414  MarkBufferDirtyHint(buffer, true);
415  }
416  }
417 
419 
420  /* Record number of newly-set-LP_DEAD items for caller */
421  presult->nnewlpdead = prstate.ndead;
422 }
void MarkBufferDirtyHint(Buffer buffer, bool buffer_std)
Definition: bufmgr.c:4544
PageHeaderData * PageHeader
Definition: bufpage.h:170
static void PageClearFull(Page page)
Definition: bufpage.h:420
static bool PageIsFull(Page page)
Definition: bufpage.h:410
#define XLOG_HEAP2_PRUNE
Definition: heapam_xlog.h:54
#define SizeOfHeapPrune
Definition: heapam_xlog.h:253
#define OffsetNumberPrev(offsetNumber)
Definition: off.h:54
static int heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum, PruneState *prstate)
Definition: pruneheap.c:476
static HTSV_Result heap_prune_satisfies_vacuum(PruneState *prstate, HeapTuple tup, Buffer buffer)
Definition: pruneheap.c:429
void heap_page_prune_execute(Buffer buffer, OffsetNumber *redirected, int nredirected, OffsetNumber *nowdead, int ndead, OffsetNumber *nowunused, int nunused)
Definition: pruneheap.c:797
int nnewlpdead
Definition: heapam.h:200
int ndeleted
Definition: heapam.h:199
int ndead
Definition: pruneheap.c:42
TransactionId new_prune_xid
Definition: pruneheap.c:39
OffsetNumber nowdead[MaxHeapTuplesPerPage]
Definition: pruneheap.c:46
bool marked[MaxHeapTuplesPerPage+1]
Definition: pruneheap.c:55
OffsetNumber nowunused[MaxHeapTuplesPerPage]
Definition: pruneheap.c:47
GlobalVisState * vistest
Definition: pruneheap.c:37
Relation rel
Definition: pruneheap.c:34
OffsetNumber redirected[MaxHeapTuplesPerPage *2]
Definition: pruneheap.c:45
int nredirected
Definition: pruneheap.c:41
int8 htsv[MaxHeapTuplesPerPage+1]
Definition: pruneheap.c:65
int nunused
Definition: pruneheap.c:43
TransactionId snapshotConflictHorizon
Definition: pruneheap.c:40
TransactionId snapshotConflictHorizon
Definition: heapam_xlog.h:245
uint16 nredirected
Definition: heapam_xlog.h:246

References BufferGetBlockNumber(), BufferGetPage(), END_CRIT_SECTION, FirstOffsetNumber, heap_page_prune_execute(), heap_prune_chain(), heap_prune_satisfies_vacuum(), PruneState::htsv, InvalidOffsetNumber, InvalidTransactionId, xl_heap_prune::isCatalogRel, ItemIdGetLength, ItemIdIsDead, ItemIdIsNormal, ItemIdIsUsed, ItemPointerSet(), MarkBufferDirty(), MarkBufferDirtyHint(), PruneState::marked, PruneState::ndead, xl_heap_prune::ndead, PruneResult::ndeleted, PruneState::new_prune_xid, PruneResult::nnewlpdead, PruneState::nowdead, PruneState::nowunused, PruneState::nredirected, xl_heap_prune::nredirected, PruneState::nunused, OffsetNumberNext, OffsetNumberPrev, PageClearFull(), PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), PageIsFull(), PageSetLSN(), PruneState::redirected, REGBUF_STANDARD, PruneState::rel, RelationGetRelid, RelationIsAccessibleInLogicalDecoding, RelationNeedsWAL, SizeOfHeapPrune, PruneState::snapshotConflictHorizon, xl_heap_prune::snapshotConflictHorizon, START_CRIT_SECTION, HeapTupleData::t_data, HeapTupleData::t_len, HeapTupleData::t_self, HeapTupleData::t_tableOid, PruneState::vistest, XLOG_HEAP2_PRUNE, XLogBeginInsert(), XLogInsert(), XLogRegisterBufData(), XLogRegisterBuffer(), and XLogRegisterData().

Referenced by heap_page_prune_opt(), and lazy_scan_prune().

◆ heap_page_prune_execute()

void heap_page_prune_execute ( Buffer  buffer,
OffsetNumber redirected,
int  nredirected,
OffsetNumber nowdead,
int  ndead,
OffsetNumber nowunused,
int  nunused 
)

Definition at line 797 of file pruneheap.c.

801 {
802  Page page = (Page) BufferGetPage(buffer);
803  OffsetNumber *offnum;
805 
806  /* Shouldn't be called unless there's something to do */
807  Assert(nredirected > 0 || ndead > 0 || nunused > 0);
808 
809  /* Update all redirected line pointers */
810  offnum = redirected;
811  for (int i = 0; i < nredirected; i++)
812  {
813  OffsetNumber fromoff = *offnum++;
814  OffsetNumber tooff = *offnum++;
815  ItemId fromlp = PageGetItemId(page, fromoff);
817 
818 #ifdef USE_ASSERT_CHECKING
819 
820  /*
821  * Any existing item that we set as an LP_REDIRECT (any 'from' item)
822  * must be the first item from a HOT chain. If the item has tuple
823  * storage then it can't be a heap-only tuple. Otherwise we are just
824  * maintaining an existing LP_REDIRECT from an existing HOT chain that
825  * has been pruned at least once before now.
826  */
827  if (!ItemIdIsRedirected(fromlp))
828  {
829  Assert(ItemIdHasStorage(fromlp) && ItemIdIsNormal(fromlp));
830 
831  htup = (HeapTupleHeader) PageGetItem(page, fromlp);
833  }
834  else
835  {
836  /* We shouldn't need to redundantly set the redirect */
837  Assert(ItemIdGetRedirect(fromlp) != tooff);
838  }
839 
840  /*
841  * The item that we're about to set as an LP_REDIRECT (the 'from'
842  * item) will point to an existing item (the 'to' item) that is
843  * already a heap-only tuple. There can be at most one LP_REDIRECT
844  * item per HOT chain.
845  *
846  * We need to keep around an LP_REDIRECT item (after original
847  * non-heap-only root tuple gets pruned away) so that it's always
848  * possible for VACUUM to easily figure out what TID to delete from
849  * indexes when an entire HOT chain becomes dead. A heap-only tuple
850  * can never become LP_DEAD; an LP_REDIRECT item or a regular heap
851  * tuple can.
852  *
853  * This check may miss problems, e.g. the target of a redirect could
854  * be marked as unused subsequently. The page_verify_redirects() check
855  * below will catch such problems.
856  */
857  tolp = PageGetItemId(page, tooff);
858  Assert(ItemIdHasStorage(tolp) && ItemIdIsNormal(tolp));
859  htup = (HeapTupleHeader) PageGetItem(page, tolp);
861 #endif
862 
863  ItemIdSetRedirect(fromlp, tooff);
864  }
865 
866  /* Update all now-dead line pointers */
867  offnum = nowdead;
868  for (int i = 0; i < ndead; i++)
869  {
870  OffsetNumber off = *offnum++;
871  ItemId lp = PageGetItemId(page, off);
872 
873 #ifdef USE_ASSERT_CHECKING
874 
875  /*
876  * An LP_DEAD line pointer must be left behind when the original item
877  * (which is dead to everybody) could still be referenced by a TID in
878  * an index. This should never be necessary with any individual
879  * heap-only tuple item, though. (It's not clear how much of a problem
880  * that would be, but there is no reason to allow it.)
881  */
882  if (ItemIdHasStorage(lp))
883  {
884  Assert(ItemIdIsNormal(lp));
885  htup = (HeapTupleHeader) PageGetItem(page, lp);
887  }
888  else
889  {
890  /* Whole HOT chain becomes dead */
892  }
893 #endif
894 
895  ItemIdSetDead(lp);
896  }
897 
898  /* Update all now-unused line pointers */
899  offnum = nowunused;
900  for (int i = 0; i < nunused; i++)
901  {
902  OffsetNumber off = *offnum++;
903  ItemId lp = PageGetItemId(page, off);
904 
905 #ifdef USE_ASSERT_CHECKING
906 
907  /*
908  * Only heap-only tuples can become LP_UNUSED during pruning. They
909  * don't need to be left in place as LP_DEAD items until VACUUM gets
910  * around to doing index vacuuming.
911  */
913  htup = (HeapTupleHeader) PageGetItem(page, lp);
915 #endif
916 
917  ItemIdSetUnused(lp);
918  }
919 
920  /*
921  * Finally, repair any fragmentation, and update the page's hint bit about
922  * whether it has free pointers.
923  */
925 
926  /*
927  * Now that the page has been modified, assert that redirect items still
928  * point to valid targets.
929  */
930  page_verify_redirects(page);
931 }
void PageRepairFragmentation(Page page)
Definition: bufpage.c:699
#define PG_USED_FOR_ASSERTS_ONLY
Definition: c.h:171
#define ItemIdSetRedirect(itemId, link)
Definition: itemid.h:152
#define ItemIdSetDead(itemId)
Definition: itemid.h:164
#define ItemIdSetUnused(itemId)
Definition: itemid.h:128
#define ItemIdHasStorage(itemId)
Definition: itemid.h:120
static void page_verify_redirects(Page page)
Definition: pruneheap.c:948

References Assert(), BufferGetPage(), HeapTupleHeaderIsHeapOnly, i, ItemIdGetRedirect, ItemIdHasStorage, ItemIdIsNormal, ItemIdIsRedirected, ItemIdSetDead, ItemIdSetRedirect, ItemIdSetUnused, page_verify_redirects(), PageGetItem(), PageGetItemId(), PageRepairFragmentation(), and PG_USED_FOR_ASSERTS_ONLY.

Referenced by heap_page_prune(), and heap_xlog_prune().

◆ heap_page_prune_opt()

void heap_page_prune_opt ( Relation  relation,
Buffer  buffer 
)

Definition at line 96 of file pruneheap.c.

97 {
98  Page page = BufferGetPage(buffer);
99  TransactionId prune_xid;
100  GlobalVisState *vistest;
101  Size minfree;
102 
103  /*
104  * We can't write WAL in recovery mode, so there's no point trying to
105  * clean the page. The primary will likely issue a cleaning WAL record
106  * soon anyway, so this is no particular loss.
107  */
108  if (RecoveryInProgress())
109  return;
110 
111  /*
112  * First check whether there's any chance there's something to prune,
113  * determining the appropriate horizon is a waste if there's no prune_xid
114  * (i.e. no updates/deletes left potentially dead tuples around).
115  */
116  prune_xid = ((PageHeader) page)->pd_prune_xid;
117  if (!TransactionIdIsValid(prune_xid))
118  return;
119 
120  /*
121  * Check whether prune_xid indicates that there may be dead rows that can
122  * be cleaned up.
123  */
124  vistest = GlobalVisTestFor(relation);
125 
126  if (!GlobalVisTestIsRemovableXid(vistest, prune_xid))
127  return;
128 
129  /*
130  * We prune when a previous UPDATE failed to find enough space on the page
131  * for a new tuple version, or when free space falls below the relation's
132  * fill-factor target (but not less than 10%).
133  *
134  * Checking free space here is questionable since we aren't holding any
135  * lock on the buffer; in the worst case we could get a bogus answer. It's
136  * unlikely to be *seriously* wrong, though, since reading either pd_lower
137  * or pd_upper is probably atomic. Avoiding taking a lock seems more
138  * important than sometimes getting a wrong answer in what is after all
139  * just a heuristic estimate.
140  */
141  minfree = RelationGetTargetPageFreeSpace(relation,
143  minfree = Max(minfree, BLCKSZ / 10);
144 
145  if (PageIsFull(page) || PageGetHeapFreeSpace(page) < minfree)
146  {
147  /* OK, try to get exclusive buffer lock */
148  if (!ConditionalLockBufferForCleanup(buffer))
149  return;
150 
151  /*
152  * Now that we have buffer lock, get accurate information about the
153  * page's free space, and recheck the heuristic about whether to
154  * prune.
155  */
156  if (PageIsFull(page) || PageGetHeapFreeSpace(page) < minfree)
157  {
158  PruneResult presult;
159 
160  heap_page_prune(relation, buffer, vistest, &presult, NULL);
161 
162  /*
163  * Report the number of tuples reclaimed to pgstats. This is
164  * presult.ndeleted minus the number of newly-LP_DEAD-set items.
165  *
166  * We derive the number of dead tuples like this to avoid totally
167  * forgetting about items that were set to LP_DEAD, since they
168  * still need to be cleaned up by VACUUM. We only want to count
169  * heap-only tuples that just became LP_UNUSED in our report,
170  * which don't.
171  *
172  * VACUUM doesn't have to compensate in the same way when it
173  * tracks ndeleted, since it will set the same LP_DEAD items to
174  * LP_UNUSED separately.
175  */
176  if (presult.ndeleted > presult.nnewlpdead)
178  presult.ndeleted - presult.nnewlpdead);
179  }
180 
181  /* And release buffer lock */
183 
184  /*
185  * We avoid reuse of any free space created on the page by unrelated
186  * UPDATEs/INSERTs by opting to not update the FSM at this point. The
187  * free space should be reused by UPDATEs to *this* page.
188  */
189  }
190 }
bool ConditionalLockBufferForCleanup(Buffer buffer)
Definition: bufmgr.c:4956
#define Max(x, y)
Definition: c.h:987
void pgstat_update_heap_dead_tuples(Relation rel, int delta)
bool GlobalVisTestIsRemovableXid(GlobalVisState *state, TransactionId xid)
Definition: procarray.c:4168
void heap_page_prune(Relation relation, Buffer buffer, GlobalVisState *vistest, PruneResult *presult, OffsetNumber *off_loc)
Definition: pruneheap.c:213
bool RecoveryInProgress(void)
Definition: xlog.c:5948

References BUFFER_LOCK_UNLOCK, BufferGetPage(), ConditionalLockBufferForCleanup(), GlobalVisTestFor(), GlobalVisTestIsRemovableXid(), HEAP_DEFAULT_FILLFACTOR, heap_page_prune(), LockBuffer(), Max, PruneResult::ndeleted, PruneResult::nnewlpdead, PageGetHeapFreeSpace(), PageIsFull(), pgstat_update_heap_dead_tuples(), RecoveryInProgress(), RelationGetTargetPageFreeSpace, and TransactionIdIsValid.

Referenced by heapam_index_fetch_tuple(), heapam_scan_bitmap_next_block(), and heapgetpage().

◆ heap_prepare_freeze_tuple()

bool heap_prepare_freeze_tuple ( HeapTupleHeader  tuple,
const struct VacuumCutoffs cutoffs,
HeapPageFreeze pagefrz,
HeapTupleFreeze frz,
bool totally_frozen 
)

Definition at line 6361 of file heapam.c.

6365 {
6366  bool xmin_already_frozen = false,
6367  xmax_already_frozen = false;
6368  bool freeze_xmin = false,
6369  replace_xvac = false,
6370  replace_xmax = false,
6371  freeze_xmax = false;
6372  TransactionId xid;
6373 
6374  frz->xmax = HeapTupleHeaderGetRawXmax(tuple);
6375  frz->t_infomask2 = tuple->t_infomask2;
6376  frz->t_infomask = tuple->t_infomask;
6377  frz->frzflags = 0;
6378  frz->checkflags = 0;
6379 
6380  /*
6381  * Process xmin, while keeping track of whether it's already frozen, or
6382  * will become frozen iff our freeze plan is executed by caller (could be
6383  * neither).
6384  */
6385  xid = HeapTupleHeaderGetXmin(tuple);
6386  if (!TransactionIdIsNormal(xid))
6387  xmin_already_frozen = true;
6388  else
6389  {
6390  if (TransactionIdPrecedes(xid, cutoffs->relfrozenxid))
6391  ereport(ERROR,
6393  errmsg_internal("found xmin %u from before relfrozenxid %u",
6394  xid, cutoffs->relfrozenxid)));
6395 
6396  /* Will set freeze_xmin flags in freeze plan below */
6397  freeze_xmin = TransactionIdPrecedes(xid, cutoffs->OldestXmin);
6398 
6399  /* Verify that xmin committed if and when freeze plan is executed */
6400  if (freeze_xmin)
6402  }
6403 
6404  /*
6405  * Old-style VACUUM FULL is gone, but we have to process xvac for as long
6406  * as we support having MOVED_OFF/MOVED_IN tuples in the database
6407  */
6408  xid = HeapTupleHeaderGetXvac(tuple);
6409  if (TransactionIdIsNormal(xid))
6410  {
6412  Assert(TransactionIdPrecedes(xid, cutoffs->OldestXmin));
6413 
6414  /*
6415  * For Xvac, we always freeze proactively. This allows totally_frozen
6416  * tracking to ignore xvac.
6417  */
6418  replace_xvac = pagefrz->freeze_required = true;
6419 
6420  /* Will set replace_xvac flags in freeze plan below */
6421  }
6422 
6423  /* Now process xmax */
6424  xid = frz->xmax;
6425  if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
6426  {
6427  /* Raw xmax is a MultiXactId */
6428  TransactionId newxmax;
6429  uint16 flags;
6430 
6431  /*
6432  * We will either remove xmax completely (in the "freeze_xmax" path),
6433  * process xmax by replacing it (in the "replace_xmax" path), or
6434  * perform no-op xmax processing. The only constraint is that the
6435  * FreezeLimit/MultiXactCutoff postcondition must never be violated.
6436  */
6437  newxmax = FreezeMultiXactId(xid, tuple->t_infomask, cutoffs,
6438  &flags, pagefrz);
6439 
6440  if (flags & FRM_NOOP)
6441  {
6442  /*
6443  * xmax is a MultiXactId, and nothing about it changes for now.
6444  * This is the only case where 'freeze_required' won't have been
6445  * set for us by FreezeMultiXactId, as well as the only case where
6446  * neither freeze_xmax nor replace_xmax are set (given a multi).
6447  *
6448  * This is a no-op, but the call to FreezeMultiXactId might have
6449  * ratcheted back NewRelfrozenXid and/or NewRelminMxid trackers
6450  * for us (the "freeze page" variants, specifically). That'll
6451  * make it safe for our caller to freeze the page later on, while
6452  * leaving this particular xmax undisturbed.
6453  *
6454  * FreezeMultiXactId is _not_ responsible for the "no freeze"
6455  * NewRelfrozenXid/NewRelminMxid trackers, though -- that's our
6456  * job. A call to heap_tuple_should_freeze for this same tuple
6457  * will take place below if 'freeze_required' isn't set already.
6458  * (This repeats work from FreezeMultiXactId, but allows "no
6459  * freeze" tracker maintenance to happen in only one place.)
6460  */
6461  Assert(!MultiXactIdPrecedes(newxmax, cutoffs->MultiXactCutoff));
6462  Assert(MultiXactIdIsValid(newxmax) && xid == newxmax);
6463  }
6464  else if (flags & FRM_RETURN_IS_XID)
6465  {
6466  /*
6467  * xmax will become an updater Xid (original MultiXact's updater
6468  * member Xid will be carried forward as a simple Xid in Xmax).
6469  */
6470  Assert(!TransactionIdPrecedes(newxmax, cutoffs->OldestXmin));
6471 
6472  /*
6473  * NB -- some of these transformations are only valid because we
6474  * know the return Xid is a tuple updater (i.e. not merely a
6475  * locker.) Also note that the only reason we don't explicitly
6476  * worry about HEAP_KEYS_UPDATED is because it lives in
6477  * t_infomask2 rather than t_infomask.
6478  */
6479  frz->t_infomask &= ~HEAP_XMAX_BITS;
6480  frz->xmax = newxmax;
6481  if (flags & FRM_MARK_COMMITTED)
6483  replace_xmax = true;
6484  }
6485  else if (flags & FRM_RETURN_IS_MULTI)
6486  {
6487  uint16 newbits;
6488  uint16 newbits2;
6489 
6490  /*
6491  * xmax is an old MultiXactId that we have to replace with a new
6492  * MultiXactId, to carry forward two or more original member XIDs.
6493  */
6494  Assert(!MultiXactIdPrecedes(newxmax, cutoffs->OldestMxact));
6495 
6496  /*
6497  * We can't use GetMultiXactIdHintBits directly on the new multi
6498  * here; that routine initializes the masks to all zeroes, which
6499  * would lose other bits we need. Doing it this way ensures all
6500  * unrelated bits remain untouched.
6501  */
6502  frz->t_infomask &= ~HEAP_XMAX_BITS;
6503  frz->t_infomask2 &= ~HEAP_KEYS_UPDATED;
6504  GetMultiXactIdHintBits(newxmax, &newbits, &newbits2);
6505  frz->t_infomask |= newbits;
6506  frz->t_infomask2 |= newbits2;
6507  frz->xmax = newxmax;
6508  replace_xmax = true;
6509  }
6510  else
6511  {
6512  /*
6513  * Freeze plan for tuple "freezes xmax" in the strictest sense:
6514  * it'll leave nothing in xmax (neither an Xid nor a MultiXactId).
6515  */
6516  Assert(flags & FRM_INVALIDATE_XMAX);
6517  Assert(!TransactionIdIsValid(newxmax));
6518 
6519  /* Will set freeze_xmax flags in freeze plan below */
6520  freeze_xmax = true;
6521  }
6522 
6523  /* MultiXactId processing forces freezing (barring FRM_NOOP case) */
6524  Assert(pagefrz->freeze_required || (!freeze_xmax && !replace_xmax));
6525  }
6526  else if (TransactionIdIsNormal(xid))
6527  {
6528  /* Raw xmax is normal XID */
6529  if (TransactionIdPrecedes(xid, cutoffs->relfrozenxid))
6530  ereport(ERROR,
6532  errmsg_internal("found xmax %u from before relfrozenxid %u",
6533  xid, cutoffs->relfrozenxid)));
6534 
6535  /* Will set freeze_xmax flags in freeze plan below */
6536  freeze_xmax = TransactionIdPrecedes(xid, cutoffs->OldestXmin);
6537 
6538  /*
6539  * Verify that xmax aborted if and when freeze plan is executed,
6540  * provided it's from an update. (A lock-only xmax can be removed
6541  * independent of this, since the lock is released at xact end.)
6542  */
6543  if (freeze_xmax && !HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask))
6545  }
6546  else if (!TransactionIdIsValid(xid))
6547  {
6548  /* Raw xmax is InvalidTransactionId XID */
6549  Assert((tuple->t_infomask & HEAP_XMAX_IS_MULTI) == 0);
6550  xmax_already_frozen = true;
6551  }
6552  else
6553  ereport(ERROR,
6555  errmsg_internal("found raw xmax %u (infomask 0x%04x) not invalid and not multi",
6556  xid, tuple->t_infomask)));
6557 
6558  if (freeze_xmin)
6559  {
6560  Assert(!xmin_already_frozen);
6561 
6562  frz->t_infomask |= HEAP_XMIN_FROZEN;
6563  }
6564  if (replace_xvac)
6565  {
6566  /*
6567  * If a MOVED_OFF tuple is not dead, the xvac transaction must have
6568  * failed; whereas a non-dead MOVED_IN tuple must mean the xvac
6569  * transaction succeeded.
6570  */
6571  Assert(pagefrz->freeze_required);
6572  if (tuple->t_infomask & HEAP_MOVED_OFF)
6573  frz->frzflags |= XLH_INVALID_XVAC;
6574  else
6575  frz->frzflags |= XLH_FREEZE_XVAC;
6576  }
6577  if (replace_xmax)
6578  {
6579  Assert(!xmax_already_frozen && !freeze_xmax);
6580  Assert(pagefrz->freeze_required);
6581 
6582  /* Already set replace_xmax flags in freeze plan earlier */
6583  }
6584  if (freeze_xmax)
6585  {
6586  Assert(!xmax_already_frozen && !replace_xmax);
6587 
6588  frz->xmax = InvalidTransactionId;
6589 
6590  /*
6591  * The tuple might be marked either XMAX_INVALID or XMAX_COMMITTED +
6592  * LOCKED. Normalize to INVALID just to be sure no one gets confused.
6593  * Also get rid of the HEAP_KEYS_UPDATED bit.
6594  */
6595  frz->t_infomask &= ~HEAP_XMAX_BITS;
6596  frz->t_infomask |= HEAP_XMAX_INVALID;
6597  frz->t_infomask2 &= ~HEAP_HOT_UPDATED;
6598  frz->t_infomask2 &= ~HEAP_KEYS_UPDATED;
6599  }
6600 
6601  /*
6602  * Determine if this tuple is already totally frozen, or will become
6603  * totally frozen (provided caller executes freeze plans for the page)
6604  */
6605  *totally_frozen = ((freeze_xmin || xmin_already_frozen) &&
6606  (freeze_xmax || xmax_already_frozen));
6607 
6608  if (!pagefrz->freeze_required && !(xmin_already_frozen &&
6609  xmax_already_frozen))
6610  {
6611  /*
6612  * So far no previous tuple from the page made freezing mandatory.
6613  * Does this tuple force caller to freeze the entire page?
6614  */
6615  pagefrz->freeze_required =
6616  heap_tuple_should_freeze(tuple, cutoffs,
6617  &pagefrz->NoFreezePageRelfrozenXid,
6618  &pagefrz->NoFreezePageRelminMxid);
6619  }
6620 
6621  /* Tell caller if this tuple has a usable freeze plan set in *frz */
6622  return freeze_xmin || replace_xvac || replace_xmax || freeze_xmax;
6623 }
static void GetMultiXactIdHintBits(MultiXactId multi, uint16 *new_infomask, uint16 *new_infomask2)
Definition: heapam.c:6959
#define FRM_RETURN_IS_XID
Definition: heapam.c:5960
static TransactionId FreezeMultiXactId(MultiXactId multi, uint16 t_infomask, const struct VacuumCutoffs *cutoffs, uint16 *flags, HeapPageFreeze *pagefrz)
Definition: heapam.c:6011
bool heap_tuple_should_freeze(HeapTupleHeader tuple, const struct VacuumCutoffs *cutoffs, TransactionId *NoFreezePageRelfrozenXid, MultiXactId *NoFreezePageRelminMxid)
Definition: heapam.c:7376
#define FRM_MARK_COMMITTED
Definition: heapam.c:5962
#define FRM_NOOP
Definition: heapam.c:5958
#define FRM_RETURN_IS_MULTI
Definition: heapam.c:5961
#define FRM_INVALIDATE_XMAX
Definition: heapam.c:5959
#define XLH_INVALID_XVAC
Definition: heapam_xlog.h:324
#define XLH_FREEZE_XVAC
Definition: heapam_xlog.h:323
#define HEAP_MOVED_OFF
Definition: htup_details.h:211
#define HEAP_XMIN_FROZEN
Definition: htup_details.h:206
#define HEAP_HOT_UPDATED
Definition: htup_details.h:276
#define HeapTupleHeaderGetXvac(tup)
Definition: htup_details.h:411
#define HEAP_XMAX_COMMITTED
Definition: htup_details.h:207
bool MultiXactIdPrecedes(MultiXactId multi1, MultiXactId multi2)
Definition: multixact.c:3156
#define MultiXactIdIsValid(multi)
Definition: multixact.h:28
MultiXactId NoFreezePageRelminMxid
Definition: heapam.h:190
bool freeze_required
Definition: heapam.h:152
TransactionId NoFreezePageRelfrozenXid
Definition: heapam.h:189
uint8 frzflags
Definition: heapam.h:117
uint16 t_infomask2
Definition: heapam.h:115
TransactionId xmax
Definition: heapam.h:114
uint16 t_infomask
Definition: heapam.h:116
TransactionId OldestXmin
Definition: vacuum.h:265
MultiXactId OldestMxact
Definition: vacuum.h:266
bool TransactionIdPrecedesOrEquals(TransactionId id1, TransactionId id2)
Definition: transam.c:299

References Assert(), HeapTupleFreeze::checkflags, ereport, errcode(), ERRCODE_DATA_CORRUPTED, errmsg_internal(), ERROR, HeapPageFreeze::freeze_required, FreezeMultiXactId(), FRM_INVALIDATE_XMAX, FRM_MARK_COMMITTED, FRM_NOOP, FRM_RETURN_IS_MULTI, FRM_RETURN_IS_XID, HeapTupleFreeze::frzflags, GetMultiXactIdHintBits(), HEAP_FREEZE_CHECK_XMAX_ABORTED, HEAP_FREEZE_CHECK_XMIN_COMMITTED, HEAP_HOT_UPDATED, HEAP_KEYS_UPDATED, HEAP_MOVED_OFF, heap_tuple_should_freeze(), HEAP_XMAX_BITS, HEAP_XMAX_COMMITTED, HEAP_XMAX_INVALID, HEAP_XMAX_IS_LOCKED_ONLY, HEAP_XMAX_IS_MULTI, HEAP_XMIN_FROZEN, HeapTupleHeaderGetRawXmax, HeapTupleHeaderGetXmin, HeapTupleHeaderGetXvac, InvalidTransactionId, VacuumCutoffs::MultiXactCutoff, MultiXactIdIsValid, MultiXactIdPrecedes(), HeapPageFreeze::NoFreezePageRelfrozenXid, HeapPageFreeze::NoFreezePageRelminMxid, VacuumCutoffs::OldestMxact, VacuumCutoffs::OldestXmin, VacuumCutoffs::relfrozenxid, HeapTupleFreeze::t_infomask, HeapTupleHeaderData::t_infomask, HeapTupleFreeze::t_infomask2, HeapTupleHeaderData::t_infomask2, TransactionIdIsNormal, TransactionIdIsValid, TransactionIdPrecedes(), TransactionIdPrecedesOrEquals(), XLH_FREEZE_XVAC, XLH_INVALID_XVAC, and HeapTupleFreeze::xmax.

Referenced by heap_freeze_tuple(), and lazy_scan_prune().

◆ heap_rescan()

void heap_rescan ( TableScanDesc  sscan,
ScanKey  key,
bool  set_params,
bool  allow_strat,
bool  allow_sync,
bool  allow_pagemode 
)

Definition at line 1016 of file heapam.c.

1018 {
1019  HeapScanDesc scan = (HeapScanDesc) sscan;
1020 
1021  if (set_params)
1022  {
1023  if (allow_strat)
1024  scan->rs_base.rs_flags |= SO_ALLOW_STRAT;
1025  else
1026  scan->rs_base.rs_flags &= ~SO_ALLOW_STRAT;
1027 
1028  if (allow_sync)
1029  scan->rs_base.rs_flags |= SO_ALLOW_SYNC;
1030  else
1031  scan->rs_base.rs_flags &= ~SO_ALLOW_SYNC;
1032 
1033  if (allow_pagemode && scan->rs_base.rs_snapshot &&
1036  else
1038  }
1039 
1040  /*
1041  * unpin scan buffers
1042  */
1043  if (BufferIsValid(scan->rs_cbuf))
1044  ReleaseBuffer(scan->rs_cbuf);
1045 
1046  /*
1047  * reinitialize scan descriptor
1048  */
1049  initscan(scan, key, true);
1050 }
@ SO_ALLOW_STRAT
Definition: tableam.h:57