PostgreSQL Source Code  git master
heapam.h File Reference
#include "access/relation.h"
#include "access/relscan.h"
#include "access/sdir.h"
#include "access/skey.h"
#include "access/table.h"
#include "access/tableam.h"
#include "nodes/lockoptions.h"
#include "nodes/primnodes.h"
#include "storage/bufpage.h"
#include "storage/dsm.h"
#include "storage/lockdefs.h"
#include "storage/shm_toc.h"
#include "utils/relcache.h"
#include "utils/snapshot.h"
Include dependency graph for heapam.h:
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Data Structures

struct  HeapScanDescData
 
struct  IndexFetchHeapData
 
struct  HeapTupleFreeze
 
struct  HeapPageFreeze
 
struct  PruneResult
 

Macros

#define HEAP_INSERT_SKIP_FSM   TABLE_INSERT_SKIP_FSM
 
#define HEAP_INSERT_FROZEN   TABLE_INSERT_FROZEN
 
#define HEAP_INSERT_NO_LOGICAL   TABLE_INSERT_NO_LOGICAL
 
#define HEAP_INSERT_SPECULATIVE   0x0010
 
#define MaxLockTupleMode   LockTupleExclusive
 
#define HEAP_FREEZE_CHECK_XMIN_COMMITTED   0x01
 
#define HEAP_FREEZE_CHECK_XMAX_ABORTED   0x02
 
#define HeapScanIsValid(scan)   PointerIsValid(scan)
 

Typedefs

typedef struct BulkInsertStateDataBulkInsertState
 
typedef struct HeapScanDescData HeapScanDescData
 
typedef struct HeapScanDescDataHeapScanDesc
 
typedef struct IndexFetchHeapData IndexFetchHeapData
 
typedef struct HeapTupleFreeze HeapTupleFreeze
 
typedef struct HeapPageFreeze HeapPageFreeze
 
typedef struct PruneResult PruneResult
 

Enumerations

enum  HTSV_Result {
  HEAPTUPLE_DEAD , HEAPTUPLE_LIVE , HEAPTUPLE_RECENTLY_DEAD , HEAPTUPLE_INSERT_IN_PROGRESS ,
  HEAPTUPLE_DELETE_IN_PROGRESS
}
 
enum  PruneReason { PRUNE_ON_ACCESS , PRUNE_VACUUM_SCAN , PRUNE_VACUUM_CLEANUP }
 

Functions

static HTSV_Result htsv_get_valid_status (int status)
 
TableScanDesc heap_beginscan (Relation relation, Snapshot snapshot, int nkeys, ScanKey key, ParallelTableScanDesc parallel_scan, uint32 flags)
 
void heap_setscanlimits (TableScanDesc sscan, BlockNumber startBlk, BlockNumber numBlks)
 
void heapgetpage (TableScanDesc sscan, BlockNumber block)
 
void heap_rescan (TableScanDesc sscan, ScanKey key, bool set_params, bool allow_strat, bool allow_sync, bool allow_pagemode)
 
void heap_endscan (TableScanDesc sscan)
 
HeapTuple heap_getnext (TableScanDesc sscan, ScanDirection direction)
 
bool heap_getnextslot (TableScanDesc sscan, ScanDirection direction, struct TupleTableSlot *slot)
 
void heap_set_tidrange (TableScanDesc sscan, ItemPointer mintid, ItemPointer maxtid)
 
bool heap_getnextslot_tidrange (TableScanDesc sscan, ScanDirection direction, TupleTableSlot *slot)
 
bool heap_fetch (Relation relation, Snapshot snapshot, HeapTuple tuple, Buffer *userbuf, bool keep_buf)
 
bool heap_hot_search_buffer (ItemPointer tid, Relation relation, Buffer buffer, Snapshot snapshot, HeapTuple heapTuple, bool *all_dead, bool first_call)
 
void heap_get_latest_tid (TableScanDesc sscan, ItemPointer tid)
 
BulkInsertState GetBulkInsertState (void)
 
void FreeBulkInsertState (BulkInsertState)
 
void ReleaseBulkInsertStatePin (BulkInsertState bistate)
 
void heap_insert (Relation relation, HeapTuple tup, CommandId cid, int options, BulkInsertState bistate)
 
void heap_multi_insert (Relation relation, struct TupleTableSlot **slots, int ntuples, CommandId cid, int options, BulkInsertState bistate)
 
TM_Result heap_delete (Relation relation, ItemPointer tid, CommandId cid, Snapshot crosscheck, int options, struct TM_FailureData *tmfd, bool changingPart, TupleTableSlot *oldSlot)
 
void heap_finish_speculative (Relation relation, ItemPointer tid)
 
void heap_abort_speculative (Relation relation, ItemPointer tid)
 
TM_Result heap_update (Relation relation, ItemPointer otid, HeapTuple newtup, CommandId cid, Snapshot crosscheck, int options, struct TM_FailureData *tmfd, LockTupleMode *lockmode, TU_UpdateIndexes *update_indexes, TupleTableSlot *oldSlot)
 
TM_Result heap_lock_tuple (Relation relation, ItemPointer tid, TupleTableSlot *slot, CommandId cid, LockTupleMode mode, LockWaitPolicy wait_policy, bool follow_updates, struct TM_FailureData *tmfd)
 
void heap_inplace_update (Relation relation, HeapTuple tuple)
 
bool heap_prepare_freeze_tuple (HeapTupleHeader tuple, const struct VacuumCutoffs *cutoffs, HeapPageFreeze *pagefrz, HeapTupleFreeze *frz, bool *totally_frozen)
 
void heap_freeze_execute_prepared (Relation rel, Buffer buffer, TransactionId snapshotConflictHorizon, HeapTupleFreeze *tuples, int ntuples)
 
bool heap_freeze_tuple (HeapTupleHeader tuple, TransactionId relfrozenxid, TransactionId relminmxid, TransactionId FreezeLimit, TransactionId MultiXactCutoff)
 
bool heap_tuple_should_freeze (HeapTupleHeader tuple, const struct VacuumCutoffs *cutoffs, TransactionId *NoFreezePageRelfrozenXid, MultiXactId *NoFreezePageRelminMxid)
 
bool heap_tuple_needs_eventual_freeze (HeapTupleHeader tuple)
 
void simple_heap_insert (Relation relation, HeapTuple tup)
 
void simple_heap_delete (Relation relation, ItemPointer tid)
 
void simple_heap_update (Relation relation, ItemPointer otid, HeapTuple tup, TU_UpdateIndexes *update_indexes)
 
TransactionId heap_index_delete_tuples (Relation rel, TM_IndexDeleteOp *delstate)
 
void heap_page_prune_opt (Relation relation, Buffer buffer)
 
void heap_page_prune (Relation relation, Buffer buffer, struct GlobalVisState *vistest, bool mark_unused_now, PruneResult *presult, PruneReason reason, OffsetNumber *off_loc)
 
void heap_page_prune_execute (Buffer buffer, bool lp_truncate_only, OffsetNumber *redirected, int nredirected, OffsetNumber *nowdead, int ndead, OffsetNumber *nowunused, int nunused)
 
void heap_get_root_tuples (Page page, OffsetNumber *root_offsets)
 
void log_heap_prune_and_freeze (Relation relation, Buffer buffer, TransactionId conflict_xid, bool lp_truncate_only, PruneReason reason, HeapTupleFreeze *frozen, int nfrozen, OffsetNumber *redirected, int nredirected, OffsetNumber *dead, int ndead, OffsetNumber *unused, int nunused)
 
void heap_vacuum_rel (Relation rel, struct VacuumParams *params, BufferAccessStrategy bstrategy)
 
bool HeapTupleSatisfiesVisibility (HeapTuple htup, Snapshot snapshot, Buffer buffer)
 
TM_Result HeapTupleSatisfiesUpdate (HeapTuple htup, CommandId curcid, Buffer buffer)
 
HTSV_Result HeapTupleSatisfiesVacuum (HeapTuple htup, TransactionId OldestXmin, Buffer buffer)
 
HTSV_Result HeapTupleSatisfiesVacuumHorizon (HeapTuple htup, Buffer buffer, TransactionId *dead_after)
 
void HeapTupleSetHintBits (HeapTupleHeader tuple, Buffer buffer, uint16 infomask, TransactionId xid)
 
bool HeapTupleHeaderIsOnlyLocked (HeapTupleHeader tuple)
 
bool HeapTupleIsSurelyDead (HeapTuple htup, struct GlobalVisState *vistest)
 
bool ResolveCminCmaxDuringDecoding (struct HTAB *tuplecid_data, Snapshot snapshot, HeapTuple htup, Buffer buffer, CommandId *cmin, CommandId *cmax)
 
void HeapCheckForSerializableConflictOut (bool visible, Relation relation, HeapTuple tuple, Buffer buffer, Snapshot snapshot)
 

Macro Definition Documentation

◆ HEAP_FREEZE_CHECK_XMAX_ABORTED

#define HEAP_FREEZE_CHECK_XMAX_ABORTED   0x02

Definition at line 108 of file heapam.h.

◆ HEAP_FREEZE_CHECK_XMIN_COMMITTED

#define HEAP_FREEZE_CHECK_XMIN_COMMITTED   0x01

Definition at line 107 of file heapam.h.

◆ HEAP_INSERT_FROZEN

#define HEAP_INSERT_FROZEN   TABLE_INSERT_FROZEN

Definition at line 35 of file heapam.h.

◆ HEAP_INSERT_NO_LOGICAL

#define HEAP_INSERT_NO_LOGICAL   TABLE_INSERT_NO_LOGICAL

Definition at line 36 of file heapam.h.

◆ HEAP_INSERT_SKIP_FSM

#define HEAP_INSERT_SKIP_FSM   TABLE_INSERT_SKIP_FSM

Definition at line 34 of file heapam.h.

◆ HEAP_INSERT_SPECULATIVE

#define HEAP_INSERT_SPECULATIVE   0x0010

Definition at line 37 of file heapam.h.

◆ HeapScanIsValid

#define HeapScanIsValid (   scan)    PointerIsValid(scan)

Definition at line 249 of file heapam.h.

◆ MaxLockTupleMode

#define MaxLockTupleMode   LockTupleExclusive

Definition at line 43 of file heapam.h.

Typedef Documentation

◆ BulkInsertState

Definition at line 39 of file heapam.h.

◆ HeapPageFreeze

◆ HeapScanDesc

typedef struct HeapScanDescData* HeapScanDesc

Definition at line 80 of file heapam.h.

◆ HeapScanDescData

◆ HeapTupleFreeze

◆ IndexFetchHeapData

◆ PruneResult

typedef struct PruneResult PruneResult

Enumeration Type Documentation

◆ HTSV_Result

Enumerator
HEAPTUPLE_DEAD 
HEAPTUPLE_LIVE 
HEAPTUPLE_RECENTLY_DEAD 
HEAPTUPLE_INSERT_IN_PROGRESS 
HEAPTUPLE_DELETE_IN_PROGRESS 

Definition at line 94 of file heapam.h.

95 {
96  HEAPTUPLE_DEAD, /* tuple is dead and deletable */
97  HEAPTUPLE_LIVE, /* tuple is live (committed, no deleter) */
98  HEAPTUPLE_RECENTLY_DEAD, /* tuple is dead, but not deletable yet */
99  HEAPTUPLE_INSERT_IN_PROGRESS, /* inserting xact is still in progress */
100  HEAPTUPLE_DELETE_IN_PROGRESS, /* deleting xact is still in progress */
101 } HTSV_Result;
HTSV_Result
Definition: heapam.h:95
@ HEAPTUPLE_RECENTLY_DEAD
Definition: heapam.h:98
@ HEAPTUPLE_INSERT_IN_PROGRESS
Definition: heapam.h:99
@ HEAPTUPLE_LIVE
Definition: heapam.h:97
@ HEAPTUPLE_DELETE_IN_PROGRESS
Definition: heapam.h:100
@ HEAPTUPLE_DEAD
Definition: heapam.h:96

◆ PruneReason

Enumerator
PRUNE_ON_ACCESS 
PRUNE_VACUUM_SCAN 
PRUNE_VACUUM_CLEANUP 

Definition at line 215 of file heapam.h.

216 {
217  PRUNE_ON_ACCESS, /* on-access pruning */
218  PRUNE_VACUUM_SCAN, /* VACUUM 1st heap pass */
219  PRUNE_VACUUM_CLEANUP, /* VACUUM 2nd heap pass */
220 } PruneReason;
PruneReason
Definition: heapam.h:216
@ PRUNE_VACUUM_CLEANUP
Definition: heapam.h:219
@ PRUNE_ON_ACCESS
Definition: heapam.h:217
@ PRUNE_VACUUM_SCAN
Definition: heapam.h:218

Function Documentation

◆ FreeBulkInsertState()

void FreeBulkInsertState ( BulkInsertState  bistate)

Definition at line 1771 of file heapam.c.

1772 {
1773  if (bistate->current_buf != InvalidBuffer)
1774  ReleaseBuffer(bistate->current_buf);
1775  FreeAccessStrategy(bistate->strategy);
1776  pfree(bistate);
1777 }
#define InvalidBuffer
Definition: buf.h:25
void ReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:4560
void FreeAccessStrategy(BufferAccessStrategy strategy)
Definition: freelist.c:639
void pfree(void *pointer)
Definition: mcxt.c:1508
BufferAccessStrategy strategy
Definition: hio.h:31
Buffer current_buf
Definition: hio.h:32

References BulkInsertStateData::current_buf, FreeAccessStrategy(), InvalidBuffer, pfree(), ReleaseBuffer(), and BulkInsertStateData::strategy.

Referenced by ATRewriteTable(), CopyFrom(), CopyMultiInsertBufferCleanup(), intorel_shutdown(), and transientrel_shutdown().

◆ GetBulkInsertState()

BulkInsertState GetBulkInsertState ( void  )

Definition at line 1754 of file heapam.c.

1755 {
1756  BulkInsertState bistate;
1757 
1758  bistate = (BulkInsertState) palloc(sizeof(BulkInsertStateData));
1760  bistate->current_buf = InvalidBuffer;
1761  bistate->next_free = InvalidBlockNumber;
1762  bistate->last_free = InvalidBlockNumber;
1763  bistate->already_extended_by = 0;
1764  return bistate;
1765 }
#define InvalidBlockNumber
Definition: block.h:33
@ BAS_BULKWRITE
Definition: bufmgr.h:37
BufferAccessStrategy GetAccessStrategy(BufferAccessStrategyType btype)
Definition: freelist.c:541
struct BulkInsertStateData * BulkInsertState
Definition: heapam.h:39
void * palloc(Size size)
Definition: mcxt.c:1304
BlockNumber last_free
Definition: hio.h:49
uint32 already_extended_by
Definition: hio.h:50
BlockNumber next_free
Definition: hio.h:48

References BulkInsertStateData::already_extended_by, BAS_BULKWRITE, BulkInsertStateData::current_buf, GetAccessStrategy(), InvalidBlockNumber, InvalidBuffer, BulkInsertStateData::last_free, BulkInsertStateData::next_free, palloc(), and BulkInsertStateData::strategy.

Referenced by ATRewriteTable(), CopyFrom(), CopyMultiInsertBufferInit(), intorel_startup(), and transientrel_startup().

◆ heap_abort_speculative()

void heap_abort_speculative ( Relation  relation,
ItemPointer  tid 
)

Definition at line 5838 of file heapam.c.

5839 {
5841  ItemId lp;
5842  HeapTupleData tp;
5843  Page page;
5844  BlockNumber block;
5845  Buffer buffer;
5846  TransactionId prune_xid;
5847 
5848  Assert(ItemPointerIsValid(tid));
5849 
5850  block = ItemPointerGetBlockNumber(tid);
5851  buffer = ReadBuffer(relation, block);
5852  page = BufferGetPage(buffer);
5853 
5855 
5856  /*
5857  * Page can't be all visible, we just inserted into it, and are still
5858  * running.
5859  */
5860  Assert(!PageIsAllVisible(page));
5861 
5862  lp = PageGetItemId(page, ItemPointerGetOffsetNumber(tid));
5863  Assert(ItemIdIsNormal(lp));
5864 
5865  tp.t_tableOid = RelationGetRelid(relation);
5866  tp.t_data = (HeapTupleHeader) PageGetItem(page, lp);
5867  tp.t_len = ItemIdGetLength(lp);
5868  tp.t_self = *tid;
5869 
5870  /*
5871  * Sanity check that the tuple really is a speculatively inserted tuple,
5872  * inserted by us.
5873  */
5874  if (tp.t_data->t_choice.t_heap.t_xmin != xid)
5875  elog(ERROR, "attempted to kill a tuple inserted by another transaction");
5876  if (!(IsToastRelation(relation) || HeapTupleHeaderIsSpeculative(tp.t_data)))
5877  elog(ERROR, "attempted to kill a non-speculative tuple");
5879 
5880  /*
5881  * No need to check for serializable conflicts here. There is never a
5882  * need for a combo CID, either. No need to extract replica identity, or
5883  * do anything special with infomask bits.
5884  */
5885 
5887 
5888  /*
5889  * The tuple will become DEAD immediately. Flag that this page is a
5890  * candidate for pruning by setting xmin to TransactionXmin. While not
5891  * immediately prunable, it is the oldest xid we can cheaply determine
5892  * that's safe against wraparound / being older than the table's
5893  * relfrozenxid. To defend against the unlikely case of a new relation
5894  * having a newer relfrozenxid than our TransactionXmin, use relfrozenxid
5895  * if so (vacuum can't subsequently move relfrozenxid to beyond
5896  * TransactionXmin, so there's no race here).
5897  */
5899  if (TransactionIdPrecedes(TransactionXmin, relation->rd_rel->relfrozenxid))
5900  prune_xid = relation->rd_rel->relfrozenxid;
5901  else
5902  prune_xid = TransactionXmin;
5903  PageSetPrunable(page, prune_xid);
5904 
5905  /* store transaction information of xact deleting the tuple */
5908 
5909  /*
5910  * Set the tuple header xmin to InvalidTransactionId. This makes the
5911  * tuple immediately invisible everyone. (In particular, to any
5912  * transactions waiting on the speculative token, woken up later.)
5913  */
5915 
5916  /* Clear the speculative insertion token too */
5917  tp.t_data->t_ctid = tp.t_self;
5918 
5919  MarkBufferDirty(buffer);
5920 
5921  /*
5922  * XLOG stuff
5923  *
5924  * The WAL records generated here match heap_delete(). The same recovery
5925  * routines are used.
5926  */
5927  if (RelationNeedsWAL(relation))
5928  {
5929  xl_heap_delete xlrec;
5930  XLogRecPtr recptr;
5931 
5932  xlrec.flags = XLH_DELETE_IS_SUPER;
5934  tp.t_data->t_infomask2);
5936  xlrec.xmax = xid;
5937 
5938  XLogBeginInsert();
5939  XLogRegisterData((char *) &xlrec, SizeOfHeapDelete);
5940  XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
5941 
5942  /* No replica identity & replication origin logged */
5943 
5944  recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_DELETE);
5945 
5946  PageSetLSN(page, recptr);
5947  }
5948 
5949  END_CRIT_SECTION();
5950 
5951  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
5952 
5953  if (HeapTupleHasExternal(&tp))
5954  {
5955  Assert(!IsToastRelation(relation));
5956  heap_toast_delete(relation, &tp, true);
5957  }
5958 
5959  /*
5960  * Never need to mark tuple for invalidation, since catalogs don't support
5961  * speculative insertion
5962  */
5963 
5964  /* Now we can release the buffer */
5965  ReleaseBuffer(buffer);
5966 
5967  /* count deletion, as we counted the insertion too */
5968  pgstat_count_heap_delete(relation);
5969 }
uint32 BlockNumber
Definition: block.h:31
int Buffer
Definition: buf.h:23
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:2189
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:4795
Buffer ReadBuffer(Relation reln, BlockNumber blockNum)
Definition: bufmgr.c:734
#define BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:157
static Page BufferGetPage(Buffer buffer)
Definition: bufmgr.h:350
#define BUFFER_LOCK_EXCLUSIVE
Definition: bufmgr.h:159
Pointer Page
Definition: bufpage.h:78
static Item PageGetItem(Page page, ItemId itemId)
Definition: bufpage.h:351
static ItemId PageGetItemId(Page page, OffsetNumber offsetNumber)
Definition: bufpage.h:240
static bool PageIsAllVisible(Page page)
Definition: bufpage.h:426
static void PageSetLSN(Page page, XLogRecPtr lsn)
Definition: bufpage.h:388
#define PageSetPrunable(page, xid)
Definition: bufpage.h:444
uint32 TransactionId
Definition: c.h:639
bool IsToastRelation(Relation relation)
Definition: catalog.c:145
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:224
static uint8 compute_infobits(uint16 infomask, uint16 infomask2)
Definition: heapam.c:2465
#define XLOG_HEAP_DELETE
Definition: heapam_xlog.h:33
#define SizeOfHeapDelete
Definition: heapam_xlog.h:120
#define XLH_DELETE_IS_SUPER
Definition: heapam_xlog.h:104
void heap_toast_delete(Relation rel, HeapTuple oldtup, bool is_speculative)
Definition: heaptoast.c:43
HeapTupleHeaderData * HeapTupleHeader
Definition: htup.h:23
#define HEAP_KEYS_UPDATED
Definition: htup_details.h:275
#define HeapTupleHeaderIsHeapOnly(tup)
Definition: htup_details.h:499
#define HeapTupleHeaderSetXmin(tup, xid)
Definition: htup_details.h:315
#define HEAP_XMAX_BITS
Definition: htup_details.h:267
#define HeapTupleHasExternal(tuple)
Definition: htup_details.h:671
#define HEAP_MOVED
Definition: htup_details.h:213
#define HeapTupleHeaderIsSpeculative(tup)
Definition: htup_details.h:428
#define ItemIdGetLength(itemId)
Definition: itemid.h:59
#define ItemIdIsNormal(itemId)
Definition: itemid.h:99
static OffsetNumber ItemPointerGetOffsetNumber(const ItemPointerData *pointer)
Definition: itemptr.h:124
static BlockNumber ItemPointerGetBlockNumber(const ItemPointerData *pointer)
Definition: itemptr.h:103
static bool ItemPointerIsValid(const ItemPointerData *pointer)
Definition: itemptr.h:83
Assert(fmt[strlen(fmt) - 1] !='\n')
#define START_CRIT_SECTION()
Definition: miscadmin.h:149
#define END_CRIT_SECTION()
Definition: miscadmin.h:151
void pgstat_count_heap_delete(Relation rel)
#define RelationGetRelid(relation)
Definition: rel.h:507
#define RelationNeedsWAL(relation)
Definition: rel.h:630
TransactionId TransactionXmin
Definition: snapmgr.c:98
ItemPointerData t_self
Definition: htup.h:65
uint32 t_len
Definition: htup.h:64
HeapTupleHeader t_data
Definition: htup.h:68
Oid t_tableOid
Definition: htup.h:66
TransactionId t_xmin
Definition: htup_details.h:124
union HeapTupleHeaderData::@47 t_choice
ItemPointerData t_ctid
Definition: htup_details.h:161
HeapTupleFields t_heap
Definition: htup_details.h:157
Form_pg_class rd_rel
Definition: rel.h:111
TransactionId xmax
Definition: heapam_xlog.h:114
OffsetNumber offnum
Definition: heapam_xlog.h:115
uint8 infobits_set
Definition: heapam_xlog.h:116
bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition: transam.c:280
#define InvalidTransactionId
Definition: transam.h:31
#define TransactionIdIsValid(xid)
Definition: transam.h:41
TransactionId GetCurrentTransactionId(void)
Definition: xact.c:451
uint64 XLogRecPtr
Definition: xlogdefs.h:21
void XLogRegisterData(char *data, uint32 len)
Definition: xloginsert.c:364
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition: xloginsert.c:474
void XLogRegisterBuffer(uint8 block_id, Buffer buffer, uint8 flags)
Definition: xloginsert.c:242
void XLogBeginInsert(void)
Definition: xloginsert.c:149
#define REGBUF_STANDARD
Definition: xloginsert.h:34

References Assert(), BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferGetPage(), compute_infobits(), elog, END_CRIT_SECTION, ERROR, xl_heap_delete::flags, GetCurrentTransactionId(), HEAP_KEYS_UPDATED, HEAP_MOVED, heap_toast_delete(), HEAP_XMAX_BITS, HeapTupleHasExternal, HeapTupleHeaderIsHeapOnly, HeapTupleHeaderIsSpeculative, HeapTupleHeaderSetXmin, xl_heap_delete::infobits_set, InvalidTransactionId, IsToastRelation(), ItemIdGetLength, ItemIdIsNormal, ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), ItemPointerIsValid(), LockBuffer(), MarkBufferDirty(), xl_heap_delete::offnum, PageGetItem(), PageGetItemId(), PageIsAllVisible(), PageSetLSN(), PageSetPrunable, pgstat_count_heap_delete(), RelationData::rd_rel, ReadBuffer(), REGBUF_STANDARD, RelationGetRelid, RelationNeedsWAL, ReleaseBuffer(), SizeOfHeapDelete, START_CRIT_SECTION, HeapTupleHeaderData::t_choice, HeapTupleHeaderData::t_ctid, HeapTupleData::t_data, HeapTupleHeaderData::t_heap, HeapTupleHeaderData::t_infomask, HeapTupleHeaderData::t_infomask2, HeapTupleData::t_len, HeapTupleData::t_self, HeapTupleData::t_tableOid, HeapTupleFields::t_xmin, TransactionIdIsValid, TransactionIdPrecedes(), TransactionXmin, XLH_DELETE_IS_SUPER, XLOG_HEAP_DELETE, XLogBeginInsert(), XLogInsert(), XLogRegisterBuffer(), XLogRegisterData(), and xl_heap_delete::xmax.

Referenced by heapam_tuple_complete_speculative(), and toast_delete_datum().

◆ heap_beginscan()

TableScanDesc heap_beginscan ( Relation  relation,
Snapshot  snapshot,
int  nkeys,
ScanKey  key,
ParallelTableScanDesc  parallel_scan,
uint32  flags 
)

Definition at line 924 of file heapam.c.

928 {
929  HeapScanDesc scan;
930 
931  /*
932  * increment relation ref count while scanning relation
933  *
934  * This is just to make really sure the relcache entry won't go away while
935  * the scan has a pointer to it. Caller should be holding the rel open
936  * anyway, so this is redundant in all normal scenarios...
937  */
939 
940  /*
941  * allocate and initialize scan descriptor
942  */
943  scan = (HeapScanDesc) palloc(sizeof(HeapScanDescData));
944 
945  scan->rs_base.rs_rd = relation;
946  scan->rs_base.rs_snapshot = snapshot;
947  scan->rs_base.rs_nkeys = nkeys;
948  scan->rs_base.rs_flags = flags;
949  scan->rs_base.rs_parallel = parallel_scan;
950  scan->rs_strategy = NULL; /* set in initscan */
951 
952  /*
953  * Disable page-at-a-time mode if it's not a MVCC-safe snapshot.
954  */
955  if (!(snapshot && IsMVCCSnapshot(snapshot)))
957 
958  /*
959  * For seqscan and sample scans in a serializable transaction, acquire a
960  * predicate lock on the entire relation. This is required not only to
961  * lock all the matching tuples, but also to conflict with new insertions
962  * into the table. In an indexscan, we take page locks on the index pages
963  * covering the range specified in the scan qual, but in a heap scan there
964  * is nothing more fine-grained to lock. A bitmap scan is a different
965  * story, there we have already scanned the index and locked the index
966  * pages covering the predicate. But in that case we still have to lock
967  * any matching heap tuples. For sample scan we could optimize the locking
968  * to be at least page-level granularity, but we'd need to add per-tuple
969  * locking for that.
970  */
972  {
973  /*
974  * Ensure a missing snapshot is noticed reliably, even if the
975  * isolation mode means predicate locking isn't performed (and
976  * therefore the snapshot isn't used here).
977  */
978  Assert(snapshot);
979  PredicateLockRelation(relation, snapshot);
980  }
981 
982  /* we only need to set this up once */
983  scan->rs_ctup.t_tableOid = RelationGetRelid(relation);
984 
985  /*
986  * Allocate memory to keep track of page allocation for parallel workers
987  * when doing a parallel scan.
988  */
989  if (parallel_scan != NULL)
991  else
992  scan->rs_parallelworkerdata = NULL;
993 
994  /*
995  * we do this here instead of in initscan() because heap_rescan also calls
996  * initscan() and we don't want to allocate memory again
997  */
998  if (nkeys > 0)
999  scan->rs_base.rs_key = (ScanKey) palloc(sizeof(ScanKeyData) * nkeys);
1000  else
1001  scan->rs_base.rs_key = NULL;
1002 
1003  initscan(scan, key, false);
1004 
1005  return (TableScanDesc) scan;
1006 }
static void initscan(HeapScanDesc scan, ScanKey key, bool keep_startblock)
Definition: heapam.c:226
struct HeapScanDescData * HeapScanDesc
Definition: heapam.h:80
if(TABLE==NULL||TABLE_index==NULL)
Definition: isn.c:77
void PredicateLockRelation(Relation relation, Snapshot snapshot)
Definition: predicate.c:2556
void RelationIncrementReferenceCount(Relation rel)
Definition: relcache.c:2160
ScanKeyData * ScanKey
Definition: skey.h:75
#define IsMVCCSnapshot(snapshot)
Definition: snapmgr.h:62
BufferAccessStrategy rs_strategy
Definition: heapam.h:65
ParallelBlockTableScanWorkerData * rs_parallelworkerdata
Definition: heapam.h:73
HeapTupleData rs_ctup
Definition: heapam.h:67
TableScanDescData rs_base
Definition: heapam.h:50
Relation rs_rd
Definition: relscan.h:34
uint32 rs_flags
Definition: relscan.h:47
struct ScanKeyData * rs_key
Definition: relscan.h:37
struct SnapshotData * rs_snapshot
Definition: relscan.h:35
struct ParallelTableScanDescData * rs_parallel
Definition: relscan.h:49
@ SO_ALLOW_PAGEMODE
Definition: tableam.h:61
@ SO_TYPE_SAMPLESCAN
Definition: tableam.h:50
@ SO_TYPE_SEQSCAN
Definition: tableam.h:48

References Assert(), if(), initscan(), IsMVCCSnapshot, sort-test::key, palloc(), PredicateLockRelation(), RelationGetRelid, RelationIncrementReferenceCount(), HeapScanDescData::rs_base, HeapScanDescData::rs_ctup, TableScanDescData::rs_flags, TableScanDescData::rs_key, TableScanDescData::rs_nkeys, TableScanDescData::rs_parallel, HeapScanDescData::rs_parallelworkerdata, TableScanDescData::rs_rd, TableScanDescData::rs_snapshot, HeapScanDescData::rs_strategy, SO_ALLOW_PAGEMODE, SO_TYPE_SAMPLESCAN, SO_TYPE_SEQSCAN, and HeapTupleData::t_tableOid.

◆ heap_delete()

TM_Result heap_delete ( Relation  relation,
ItemPointer  tid,
CommandId  cid,
Snapshot  crosscheck,
int  options,
struct TM_FailureData tmfd,
bool  changingPart,
TupleTableSlot oldSlot 
)

Definition at line 2511 of file heapam.c.

2515 {
2516  TM_Result result;
2518  ItemId lp;
2519  HeapTupleData tp;
2520  Page page;
2521  BlockNumber block;
2522  Buffer buffer;
2523  Buffer vmbuffer = InvalidBuffer;
2524  TransactionId new_xmax;
2525  uint16 new_infomask,
2526  new_infomask2;
2527  bool have_tuple_lock = false;
2528  bool iscombo;
2529  bool all_visible_cleared = false;
2530  HeapTuple old_key_tuple = NULL; /* replica identity of the tuple */
2531  bool old_key_copied = false;
2532 
2533  Assert(ItemPointerIsValid(tid));
2534 
2535  /*
2536  * Forbid this during a parallel operation, lest it allocate a combo CID.
2537  * Other workers might need that combo CID for visibility checks, and we
2538  * have no provision for broadcasting it to them.
2539  */
2540  if (IsInParallelMode())
2541  ereport(ERROR,
2542  (errcode(ERRCODE_INVALID_TRANSACTION_STATE),
2543  errmsg("cannot delete tuples during a parallel operation")));
2544 
2545  block = ItemPointerGetBlockNumber(tid);
2546  buffer = ReadBuffer(relation, block);
2547  page = BufferGetPage(buffer);
2548 
2549  /*
2550  * Before locking the buffer, pin the visibility map page if it appears to
2551  * be necessary. Since we haven't got the lock yet, someone else might be
2552  * in the middle of changing this, so we'll need to recheck after we have
2553  * the lock.
2554  */
2555  if (PageIsAllVisible(page))
2556  visibilitymap_pin(relation, block, &vmbuffer);
2557 
2559 
2560  lp = PageGetItemId(page, ItemPointerGetOffsetNumber(tid));
2561  Assert(ItemIdIsNormal(lp));
2562 
2563  tp.t_tableOid = RelationGetRelid(relation);
2564  tp.t_data = (HeapTupleHeader) PageGetItem(page, lp);
2565  tp.t_len = ItemIdGetLength(lp);
2566  tp.t_self = *tid;
2567 
2568 l1:
2569 
2570  /*
2571  * If we didn't pin the visibility map page and the page has become all
2572  * visible while we were busy locking the buffer, we'll have to unlock and
2573  * re-lock, to avoid holding the buffer lock across an I/O. That's a bit
2574  * unfortunate, but hopefully shouldn't happen often.
2575  */
2576  if (vmbuffer == InvalidBuffer && PageIsAllVisible(page))
2577  {
2578  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
2579  visibilitymap_pin(relation, block, &vmbuffer);
2581  }
2582 
2583  result = HeapTupleSatisfiesUpdate(&tp, cid, buffer);
2584 
2585  if (result == TM_Invisible)
2586  {
2587  UnlockReleaseBuffer(buffer);
2588  ereport(ERROR,
2589  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
2590  errmsg("attempted to delete invisible tuple")));
2591  }
2592  else if (result == TM_BeingModified && (options & TABLE_MODIFY_WAIT))
2593  {
2594  TransactionId xwait;
2595  uint16 infomask;
2596 
2597  /* must copy state data before unlocking buffer */
2598  xwait = HeapTupleHeaderGetRawXmax(tp.t_data);
2599  infomask = tp.t_data->t_infomask;
2600 
2601  /*
2602  * Sleep until concurrent transaction ends -- except when there's a
2603  * single locker and it's our own transaction. Note we don't care
2604  * which lock mode the locker has, because we need the strongest one.
2605  *
2606  * Before sleeping, we need to acquire tuple lock to establish our
2607  * priority for the tuple (see heap_lock_tuple). LockTuple will
2608  * release us when we are next-in-line for the tuple.
2609  *
2610  * If we are forced to "start over" below, we keep the tuple lock;
2611  * this arranges that we stay at the head of the line while rechecking
2612  * tuple state.
2613  */
2614  if (infomask & HEAP_XMAX_IS_MULTI)
2615  {
2616  bool current_is_member = false;
2617 
2618  if (DoesMultiXactIdConflict((MultiXactId) xwait, infomask,
2619  LockTupleExclusive, &current_is_member))
2620  {
2621  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
2622 
2623  /*
2624  * Acquire the lock, if necessary (but skip it when we're
2625  * requesting a lock and already have one; avoids deadlock).
2626  */
2627  if (!current_is_member)
2629  LockWaitBlock, &have_tuple_lock);
2630 
2631  /* wait for multixact */
2633  relation, &(tp.t_self), XLTW_Delete,
2634  NULL);
2636 
2637  /*
2638  * If xwait had just locked the tuple then some other xact
2639  * could update this tuple before we get to this point. Check
2640  * for xmax change, and start over if so.
2641  *
2642  * We also must start over if we didn't pin the VM page, and
2643  * the page has become all visible.
2644  */
2645  if ((vmbuffer == InvalidBuffer && PageIsAllVisible(page)) ||
2646  xmax_infomask_changed(tp.t_data->t_infomask, infomask) ||
2648  xwait))
2649  goto l1;
2650  }
2651 
2652  /*
2653  * You might think the multixact is necessarily done here, but not
2654  * so: it could have surviving members, namely our own xact or
2655  * other subxacts of this backend. It is legal for us to delete
2656  * the tuple in either case, however (the latter case is
2657  * essentially a situation of upgrading our former shared lock to
2658  * exclusive). We don't bother changing the on-disk hint bits
2659  * since we are about to overwrite the xmax altogether.
2660  */
2661  }
2662  else if (!TransactionIdIsCurrentTransactionId(xwait))
2663  {
2664  /*
2665  * Wait for regular transaction to end; but first, acquire tuple
2666  * lock.
2667  */
2668  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
2670  LockWaitBlock, &have_tuple_lock);
2671  XactLockTableWait(xwait, relation, &(tp.t_self), XLTW_Delete);
2673 
2674  /*
2675  * xwait is done, but if xwait had just locked the tuple then some
2676  * other xact could update this tuple before we get to this point.
2677  * Check for xmax change, and start over if so.
2678  *
2679  * We also must start over if we didn't pin the VM page, and the
2680  * page has become all visible.
2681  */
2682  if ((vmbuffer == InvalidBuffer && PageIsAllVisible(page)) ||
2683  xmax_infomask_changed(tp.t_data->t_infomask, infomask) ||
2685  xwait))
2686  goto l1;
2687 
2688  /* Otherwise check if it committed or aborted */
2689  UpdateXmaxHintBits(tp.t_data, buffer, xwait);
2690  }
2691 
2692  /*
2693  * We may overwrite if previous xmax aborted, or if it committed but
2694  * only locked the tuple without updating it.
2695  */
2696  if ((tp.t_data->t_infomask & HEAP_XMAX_INVALID) ||
2699  result = TM_Ok;
2700  else if (!ItemPointerEquals(&tp.t_self, &tp.t_data->t_ctid))
2701  result = TM_Updated;
2702  else
2703  result = TM_Deleted;
2704  }
2705 
2706  /* sanity check the result HeapTupleSatisfiesUpdate() and the logic above */
2707  if (result != TM_Ok)
2708  {
2709  Assert(result == TM_SelfModified ||
2710  result == TM_Updated ||
2711  result == TM_Deleted ||
2712  result == TM_BeingModified);
2714  Assert(result != TM_Updated ||
2715  !ItemPointerEquals(&tp.t_self, &tp.t_data->t_ctid));
2716  }
2717 
2718  if (crosscheck != InvalidSnapshot && result == TM_Ok)
2719  {
2720  /* Perform additional check for transaction-snapshot mode RI updates */
2721  if (!HeapTupleSatisfiesVisibility(&tp, crosscheck, buffer))
2722  result = TM_Updated;
2723  }
2724 
2725  if (result != TM_Ok)
2726  {
2727  tmfd->ctid = tp.t_data->t_ctid;
2729  if (result == TM_SelfModified)
2730  tmfd->cmax = HeapTupleHeaderGetCmax(tp.t_data);
2731  else
2732  tmfd->cmax = InvalidCommandId;
2733 
2734  /*
2735  * If we're asked to lock the updated tuple, we just fetch the
2736  * existing tuple. That let's the caller save some resources on
2737  * placing the lock.
2738  */
2739  if (result == TM_Updated &&
2741  {
2742  BufferHeapTupleTableSlot *bslot;
2743 
2744  Assert(TTS_IS_BUFFERTUPLE(oldSlot));
2745  bslot = (BufferHeapTupleTableSlot *) oldSlot;
2746 
2747  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
2748  bslot->base.tupdata = tp;
2749  ExecStorePinnedBufferHeapTuple(&bslot->base.tupdata,
2750  oldSlot,
2751  buffer);
2752  }
2753  else
2754  {
2755  UnlockReleaseBuffer(buffer);
2756  }
2757  if (have_tuple_lock)
2758  UnlockTupleTuplock(relation, &(tp.t_self), LockTupleExclusive);
2759  if (vmbuffer != InvalidBuffer)
2760  ReleaseBuffer(vmbuffer);
2761  return result;
2762  }
2763 
2764  /*
2765  * We're about to do the actual delete -- check for conflict first, to
2766  * avoid possibly having to roll back work we've just done.
2767  *
2768  * This is safe without a recheck as long as there is no possibility of
2769  * another process scanning the page between this check and the delete
2770  * being visible to the scan (i.e., an exclusive buffer content lock is
2771  * continuously held from this point until the tuple delete is visible).
2772  */
2773  CheckForSerializableConflictIn(relation, tid, BufferGetBlockNumber(buffer));
2774 
2775  /* replace cid with a combo CID if necessary */
2776  HeapTupleHeaderAdjustCmax(tp.t_data, &cid, &iscombo);
2777 
2778  /*
2779  * Compute replica identity tuple before entering the critical section so
2780  * we don't PANIC upon a memory allocation failure.
2781  */
2782  old_key_tuple = ExtractReplicaIdentity(relation, &tp, true, &old_key_copied);
2783 
2784  /*
2785  * If this is the first possibly-multixact-able operation in the current
2786  * transaction, set my per-backend OldestMemberMXactId setting. We can be
2787  * certain that the transaction will never become a member of any older
2788  * MultiXactIds than that. (We have to do this even if we end up just
2789  * using our own TransactionId below, since some other backend could
2790  * incorporate our XID into a MultiXact immediately afterwards.)
2791  */
2793 
2796  xid, LockTupleExclusive, true,
2797  &new_xmax, &new_infomask, &new_infomask2);
2798 
2800 
2801  /*
2802  * If this transaction commits, the tuple will become DEAD sooner or
2803  * later. Set flag that this page is a candidate for pruning once our xid
2804  * falls below the OldestXmin horizon. If the transaction finally aborts,
2805  * the subsequent page pruning will be a no-op and the hint will be
2806  * cleared.
2807  */
2808  PageSetPrunable(page, xid);
2809 
2810  if (PageIsAllVisible(page))
2811  {
2812  all_visible_cleared = true;
2813  PageClearAllVisible(page);
2814  visibilitymap_clear(relation, BufferGetBlockNumber(buffer),
2815  vmbuffer, VISIBILITYMAP_VALID_BITS);
2816  }
2817 
2818  /* store transaction information of xact deleting the tuple */
2821  tp.t_data->t_infomask |= new_infomask;
2822  tp.t_data->t_infomask2 |= new_infomask2;
2824  HeapTupleHeaderSetXmax(tp.t_data, new_xmax);
2825  HeapTupleHeaderSetCmax(tp.t_data, cid, iscombo);
2826  /* Make sure there is no forward chain link in t_ctid */
2827  tp.t_data->t_ctid = tp.t_self;
2828 
2829  /* Signal that this is actually a move into another partition */
2830  if (changingPart)
2832 
2833  MarkBufferDirty(buffer);
2834 
2835  /*
2836  * XLOG stuff
2837  *
2838  * NB: heap_abort_speculative() uses the same xlog record and replay
2839  * routines.
2840  */
2841  if (RelationNeedsWAL(relation))
2842  {
2843  xl_heap_delete xlrec;
2844  xl_heap_header xlhdr;
2845  XLogRecPtr recptr;
2846 
2847  /*
2848  * For logical decode we need combo CIDs to properly decode the
2849  * catalog
2850  */
2852  log_heap_new_cid(relation, &tp);
2853 
2854  xlrec.flags = 0;
2855  if (all_visible_cleared)
2857  if (changingPart)
2860  tp.t_data->t_infomask2);
2862  xlrec.xmax = new_xmax;
2863 
2864  if (old_key_tuple != NULL)
2865  {
2866  if (relation->rd_rel->relreplident == REPLICA_IDENTITY_FULL)
2868  else
2870  }
2871 
2872  XLogBeginInsert();
2873  XLogRegisterData((char *) &xlrec, SizeOfHeapDelete);
2874 
2875  XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
2876 
2877  /*
2878  * Log replica identity of the deleted tuple if there is one
2879  */
2880  if (old_key_tuple != NULL)
2881  {
2882  xlhdr.t_infomask2 = old_key_tuple->t_data->t_infomask2;
2883  xlhdr.t_infomask = old_key_tuple->t_data->t_infomask;
2884  xlhdr.t_hoff = old_key_tuple->t_data->t_hoff;
2885 
2886  XLogRegisterData((char *) &xlhdr, SizeOfHeapHeader);
2887  XLogRegisterData((char *) old_key_tuple->t_data
2889  old_key_tuple->t_len
2891  }
2892 
2893  /* filtering by origin on a row level is much more efficient */
2895 
2896  recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_DELETE);
2897 
2898  PageSetLSN(page, recptr);
2899  }
2900 
2901  END_CRIT_SECTION();
2902 
2903  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
2904 
2905  if (vmbuffer != InvalidBuffer)
2906  ReleaseBuffer(vmbuffer);
2907 
2908  /*
2909  * If the tuple has toasted out-of-line attributes, we need to delete
2910  * those items too. We have to do this before releasing the buffer
2911  * because we need to look at the contents of the tuple, but it's OK to
2912  * release the content lock on the buffer first.
2913  */
2914  if (relation->rd_rel->relkind != RELKIND_RELATION &&
2915  relation->rd_rel->relkind != RELKIND_MATVIEW)
2916  {
2917  /* toast table entries should never be recursively toasted */
2919  }
2920  else if (HeapTupleHasExternal(&tp))
2921  heap_toast_delete(relation, &tp, false);
2922 
2923  /*
2924  * Mark tuple for invalidation from system caches at next command
2925  * boundary. We have to do this before releasing the buffer because we
2926  * need to look at the contents of the tuple.
2927  */
2928  CacheInvalidateHeapTuple(relation, &tp, NULL);
2929 
2930  /* Fetch the old tuple version if we're asked for that. */
2932  {
2933  BufferHeapTupleTableSlot *bslot;
2934 
2935  Assert(TTS_IS_BUFFERTUPLE(oldSlot));
2936  bslot = (BufferHeapTupleTableSlot *) oldSlot;
2937 
2938  bslot->base.tupdata = tp;
2939  ExecStorePinnedBufferHeapTuple(&bslot->base.tupdata,
2940  oldSlot,
2941  buffer);
2942  }
2943  else
2944  {
2945  /* Now we can release the buffer */
2946  ReleaseBuffer(buffer);
2947  }
2948 
2949  /*
2950  * Release the lmgr tuple lock, if we had it.
2951  */
2952  if (have_tuple_lock)
2953  UnlockTupleTuplock(relation, &(tp.t_self), LockTupleExclusive);
2954 
2955  pgstat_count_heap_delete(relation);
2956 
2957  if (old_key_tuple != NULL && old_key_copied)
2958  heap_freetuple(old_key_tuple);
2959 
2960  return TM_Ok;
2961 }
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:3377
void UnlockReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:4577
static void PageClearAllVisible(Page page)
Definition: bufpage.h:436
#define InvalidCommandId
Definition: c.h:656
unsigned short uint16
Definition: c.h:492
TransactionId MultiXactId
Definition: c.h:649
void HeapTupleHeaderAdjustCmax(HeapTupleHeader tup, CommandId *cmax, bool *iscombo)
Definition: combocid.c:153
CommandId HeapTupleHeaderGetCmax(HeapTupleHeader tup)
Definition: combocid.c:118
int errcode(int sqlerrcode)
Definition: elog.c:859
int errmsg(const char *fmt,...)
Definition: elog.c:1072
#define ereport(elevel,...)
Definition: elog.h:149
TupleTableSlot * ExecStorePinnedBufferHeapTuple(HeapTuple tuple, TupleTableSlot *slot, Buffer buffer)
Definition: execTuples.c:1505
static bool DoesMultiXactIdConflict(MultiXactId multi, uint16 infomask, LockTupleMode lockmode, bool *current_is_member)
Definition: heapam.c:7063
static XLogRecPtr log_heap_new_cid(Relation relation, HeapTuple tup)
Definition: heapam.c:8527
static void compute_new_xmax_infomask(TransactionId xmax, uint16 old_infomask, uint16 old_infomask2, TransactionId add_to_xmax, LockTupleMode mode, bool is_update, TransactionId *result_xmax, uint16 *result_infomask, uint16 *result_infomask2)
Definition: heapam.c:4989
static HeapTuple ExtractReplicaIdentity(Relation relation, HeapTuple tp, bool key_required, bool *copy)
Definition: heapam.c:8608
static bool heap_acquire_tuplock(Relation relation, ItemPointer tid, LockTupleMode mode, LockWaitPolicy wait_policy, bool *have_tuple_lock)
Definition: heapam.c:4940
static void MultiXactIdWait(MultiXactId multi, MultiXactStatus status, uint16 infomask, Relation rel, ItemPointer ctid, XLTW_Oper oper, int *remaining)
Definition: heapam.c:7240
static bool xmax_infomask_changed(uint16 new_infomask, uint16 old_infomask)
Definition: heapam.c:2487
#define UnlockTupleTuplock(rel, tup, mode)
Definition: heapam.c:164
static void UpdateXmaxHintBits(HeapTupleHeader tuple, Buffer buffer, TransactionId xid)
Definition: heapam.c:1732
bool HeapTupleSatisfiesVisibility(HeapTuple htup, Snapshot snapshot, Buffer buffer)
bool HeapTupleHeaderIsOnlyLocked(HeapTupleHeader tuple)
TM_Result HeapTupleSatisfiesUpdate(HeapTuple htup, CommandId curcid, Buffer buffer)
#define XLH_DELETE_CONTAINS_OLD_KEY
Definition: heapam_xlog.h:103
#define XLH_DELETE_ALL_VISIBLE_CLEARED
Definition: heapam_xlog.h:101
#define SizeOfHeapHeader
Definition: heapam_xlog.h:156
#define XLH_DELETE_IS_PARTITION_MOVE
Definition: heapam_xlog.h:105
#define XLH_DELETE_CONTAINS_OLD_TUPLE
Definition: heapam_xlog.h:102
void heap_freetuple(HeapTuple htup)
Definition: heaptuple.c:1434
#define HEAP_XMAX_IS_LOCKED_ONLY(infomask)
Definition: htup_details.h:227
#define SizeofHeapTupleHeader
Definition: htup_details.h:185
#define HeapTupleHeaderSetXmax(tup, xid)
Definition: htup_details.h:376
#define HeapTupleHeaderClearHotUpdated(tup)
Definition: htup_details.h:494
#define HEAP_XMAX_IS_MULTI
Definition: htup_details.h:209
#define HEAP_XMAX_INVALID
Definition: htup_details.h:208
#define HeapTupleHeaderSetMovedPartitions(tup)
Definition: htup_details.h:447
#define HeapTupleHeaderGetRawXmax(tup)
Definition: htup_details.h:371
#define HeapTupleHeaderGetUpdateXid(tup)
Definition: htup_details.h:361
#define HeapTupleHeaderSetCmax(tup, cid, iscombo)
Definition: htup_details.h:401
void CacheInvalidateHeapTuple(Relation relation, HeapTuple tuple, HeapTuple newtuple)
Definition: inval.c:1204
bool ItemPointerEquals(ItemPointer pointer1, ItemPointer pointer2)
Definition: itemptr.c:35
void XactLockTableWait(TransactionId xid, Relation rel, ItemPointer ctid, XLTW_Oper oper)
Definition: lmgr.c:667
@ XLTW_Delete
Definition: lmgr.h:28
@ LockWaitBlock
Definition: lockoptions.h:39
@ LockTupleExclusive
Definition: lockoptions.h:58
void MultiXactIdSetOldestMember(void)
Definition: multixact.c:624
@ MultiXactStatusUpdate
Definition: multixact.h:46
void CheckForSerializableConflictIn(Relation relation, ItemPointer tid, BlockNumber blkno)
Definition: predicate.c:4316
#define RelationIsAccessibleInLogicalDecoding(relation)
Definition: rel.h:686
#define InvalidSnapshot
Definition: snapshot.h:123
TransactionId xmax
Definition: tableam.h:143
CommandId cmax
Definition: tableam.h:144
ItemPointerData ctid
Definition: tableam.h:142
uint16 t_infomask
Definition: heapam_xlog.h:152
uint16 t_infomask2
Definition: heapam_xlog.h:151
TM_Result
Definition: tableam.h:72
@ TM_Ok
Definition: tableam.h:77
@ TM_BeingModified
Definition: tableam.h:99
@ TM_Deleted
Definition: tableam.h:92
@ TM_Updated
Definition: tableam.h:89
@ TM_SelfModified
Definition: tableam.h:83
@ TM_Invisible
Definition: tableam.h:80
#define TABLE_MODIFY_FETCH_OLD_TUPLE
Definition: tableam.h:267
#define TABLE_MODIFY_LOCK_UPDATED
Definition: tableam.h:269
#define TABLE_MODIFY_WAIT
Definition: tableam.h:265
#define TransactionIdEquals(id1, id2)
Definition: transam.h:43
#define TTS_IS_BUFFERTUPLE(slot)
Definition: tuptable.h:237
bool visibilitymap_clear(Relation rel, BlockNumber heapBlk, Buffer vmbuf, uint8 flags)
void visibilitymap_pin(Relation rel, BlockNumber heapBlk, Buffer *vmbuf)
#define VISIBILITYMAP_VALID_BITS
bool TransactionIdIsCurrentTransactionId(TransactionId xid)
Definition: xact.c:938
bool IsInParallelMode(void)
Definition: xact.c:1086
#define XLOG_INCLUDE_ORIGIN
Definition: xlog.h:152
void XLogSetRecordFlags(uint8 flags)
Definition: xloginsert.c:456

References Assert(), BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferGetBlockNumber(), BufferGetPage(), CacheInvalidateHeapTuple(), CheckForSerializableConflictIn(), TM_FailureData::cmax, compute_infobits(), compute_new_xmax_infomask(), TM_FailureData::ctid, DoesMultiXactIdConflict(), END_CRIT_SECTION, ereport, errcode(), errmsg(), ERROR, ExecStorePinnedBufferHeapTuple(), ExtractReplicaIdentity(), xl_heap_delete::flags, GetCurrentTransactionId(), heap_acquire_tuplock(), heap_freetuple(), HEAP_KEYS_UPDATED, HEAP_MOVED, heap_toast_delete(), HEAP_XMAX_BITS, HEAP_XMAX_INVALID, HEAP_XMAX_IS_LOCKED_ONLY, HEAP_XMAX_IS_MULTI, HeapTupleHasExternal, HeapTupleHeaderAdjustCmax(), HeapTupleHeaderClearHotUpdated, HeapTupleHeaderGetCmax(), HeapTupleHeaderGetRawXmax, HeapTupleHeaderGetUpdateXid, HeapTupleHeaderIsOnlyLocked(), HeapTupleHeaderSetCmax, HeapTupleHeaderSetMovedPartitions, HeapTupleHeaderSetXmax, HeapTupleSatisfiesUpdate(), HeapTupleSatisfiesVisibility(), xl_heap_delete::infobits_set, InvalidBuffer, InvalidCommandId, InvalidSnapshot, IsInParallelMode(), ItemIdGetLength, ItemIdIsNormal, ItemPointerEquals(), ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), ItemPointerIsValid(), LockBuffer(), LockTupleExclusive, LockWaitBlock, log_heap_new_cid(), MarkBufferDirty(), MultiXactIdSetOldestMember(), MultiXactIdWait(), MultiXactStatusUpdate, xl_heap_delete::offnum, PageClearAllVisible(), PageGetItem(), PageGetItemId(), PageIsAllVisible(), PageSetLSN(), PageSetPrunable, pgstat_count_heap_delete(), RelationData::rd_rel, ReadBuffer(), REGBUF_STANDARD, RelationGetRelid, RelationIsAccessibleInLogicalDecoding, RelationNeedsWAL, ReleaseBuffer(), SizeOfHeapDelete, SizeOfHeapHeader, SizeofHeapTupleHeader, START_CRIT_SECTION, HeapTupleHeaderData::t_ctid, HeapTupleData::t_data, xl_heap_header::t_hoff, HeapTupleHeaderData::t_hoff, xl_heap_header::t_infomask, HeapTupleHeaderData::t_infomask, xl_heap_header::t_infomask2, HeapTupleHeaderData::t_infomask2, HeapTupleData::t_len, HeapTupleData::t_self, HeapTupleData::t_tableOid, TABLE_MODIFY_FETCH_OLD_TUPLE, TABLE_MODIFY_LOCK_UPDATED, TABLE_MODIFY_WAIT, TM_BeingModified, TM_Deleted, TM_Invisible, TM_Ok, TM_SelfModified, TM_Updated, TransactionIdEquals, TransactionIdIsCurrentTransactionId(), TTS_IS_BUFFERTUPLE, UnlockReleaseBuffer(), UnlockTupleTuplock, UpdateXmaxHintBits(), visibilitymap_clear(), visibilitymap_pin(), VISIBILITYMAP_VALID_BITS, XactLockTableWait(), XLH_DELETE_ALL_VISIBLE_CLEARED, XLH_DELETE_CONTAINS_OLD_KEY, XLH_DELETE_CONTAINS_OLD_TUPLE, XLH_DELETE_IS_PARTITION_MOVE, XLOG_HEAP_DELETE, XLOG_INCLUDE_ORIGIN, XLogBeginInsert(), XLogInsert(), XLogRegisterBuffer(), XLogRegisterData(), XLogSetRecordFlags(), XLTW_Delete, xl_heap_delete::xmax, TM_FailureData::xmax, and xmax_infomask_changed().

Referenced by heapam_tuple_delete(), and simple_heap_delete().

◆ heap_endscan()

void heap_endscan ( TableScanDesc  sscan)

Definition at line 1046 of file heapam.c.

1047 {
1048  HeapScanDesc scan = (HeapScanDesc) sscan;
1049 
1050  /* Note: no locking manipulations needed */
1051 
1052  /*
1053  * unpin scan buffers
1054  */
1055  if (BufferIsValid(scan->rs_cbuf))
1056  ReleaseBuffer(scan->rs_cbuf);
1057 
1058  /*
1059  * decrement relation reference count and free scan descriptor storage
1060  */
1062 
1063  if (scan->rs_base.rs_key)
1064  pfree(scan->rs_base.rs_key);
1065 
1066  if (scan->rs_strategy != NULL)
1068 
1069  if (scan->rs_parallelworkerdata != NULL)
1071 
1072  if (scan->rs_base.rs_flags & SO_TEMP_SNAPSHOT)
1074 
1075  pfree(scan);
1076 }
static bool BufferIsValid(Buffer bufnum)
Definition: bufmgr.h:301
void RelationDecrementReferenceCount(Relation rel)
Definition: relcache.c:2173
void UnregisterSnapshot(Snapshot snapshot)
Definition: snapmgr.c:836
Buffer rs_cbuf
Definition: heapam.h:62
@ SO_TEMP_SNAPSHOT
Definition: tableam.h:64

References BufferIsValid(), FreeAccessStrategy(), pfree(), RelationDecrementReferenceCount(), ReleaseBuffer(), HeapScanDescData::rs_base, HeapScanDescData::rs_cbuf, TableScanDescData::rs_flags, TableScanDescData::rs_key, HeapScanDescData::rs_parallelworkerdata, TableScanDescData::rs_rd, TableScanDescData::rs_snapshot, HeapScanDescData::rs_strategy, SO_TEMP_SNAPSHOT, and UnregisterSnapshot().

◆ heap_fetch()

bool heap_fetch ( Relation  relation,
Snapshot  snapshot,
HeapTuple  tuple,
Buffer userbuf,
bool  keep_buf 
)

Definition at line 1338 of file heapam.c.

1343 {
1344  ItemPointer tid = &(tuple->t_self);
1345  ItemId lp;
1346  Buffer buffer;
1347  Page page;
1348  OffsetNumber offnum;
1349  bool valid;
1350 
1351  /*
1352  * Fetch and pin the appropriate page of the relation.
1353  */
1354  buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(tid));
1355 
1356  /*
1357  * Need share lock on buffer to examine tuple commit status.
1358  */
1359  LockBuffer(buffer, BUFFER_LOCK_SHARE);
1360  page = BufferGetPage(buffer);
1361 
1362  /*
1363  * We'd better check for out-of-range offnum in case of VACUUM since the
1364  * TID was obtained.
1365  */
1366  offnum = ItemPointerGetOffsetNumber(tid);
1367  if (offnum < FirstOffsetNumber || offnum > PageGetMaxOffsetNumber(page))
1368  {
1369  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
1370  ReleaseBuffer(buffer);
1371  *userbuf = InvalidBuffer;
1372  tuple->t_data = NULL;
1373  return false;
1374  }
1375 
1376  /*
1377  * get the item line pointer corresponding to the requested tid
1378  */
1379  lp = PageGetItemId(page, offnum);
1380 
1381  /*
1382  * Must check for deleted tuple.
1383  */
1384  if (!ItemIdIsNormal(lp))
1385  {
1386  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
1387  ReleaseBuffer(buffer);
1388  *userbuf = InvalidBuffer;
1389  tuple->t_data = NULL;
1390  return false;
1391  }
1392 
1393  /*
1394  * fill in *tuple fields
1395  */
1396  tuple->t_data = (HeapTupleHeader) PageGetItem(page, lp);
1397  tuple->t_len = ItemIdGetLength(lp);
1398  tuple->t_tableOid = RelationGetRelid(relation);
1399 
1400  /*
1401  * check tuple visibility, then release lock
1402  */
1403  valid = HeapTupleSatisfiesVisibility(tuple, snapshot, buffer);
1404 
1405  if (valid)
1406  PredicateLockTID(relation, &(tuple->t_self), snapshot,
1407  HeapTupleHeaderGetXmin(tuple->t_data));
1408 
1409  HeapCheckForSerializableConflictOut(valid, relation, tuple, buffer, snapshot);
1410 
1411  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
1412 
1413  if (valid)
1414  {
1415  /*
1416  * All checks passed, so return the tuple as valid. Caller is now
1417  * responsible for releasing the buffer.
1418  */
1419  *userbuf = buffer;
1420 
1421  return true;
1422  }
1423 
1424  /* Tuple failed time qual, but maybe caller wants to see it anyway. */
1425  if (keep_buf)
1426  *userbuf = buffer;
1427  else
1428  {
1429  ReleaseBuffer(buffer);
1430  *userbuf = InvalidBuffer;
1431  tuple->t_data = NULL;
1432  }
1433 
1434  return false;
1435 }
#define BUFFER_LOCK_SHARE
Definition: bufmgr.h:158
static OffsetNumber PageGetMaxOffsetNumber(Page page)
Definition: bufpage.h:369
void HeapCheckForSerializableConflictOut(bool visible, Relation relation, HeapTuple tuple, Buffer buffer, Snapshot snapshot)
Definition: heapam.c:10009
#define HeapTupleHeaderGetXmin(tup)
Definition: htup_details.h:309
uint16 OffsetNumber
Definition: off.h:24
void PredicateLockTID(Relation relation, ItemPointer tid, Snapshot snapshot, TransactionId tuple_xid)
Definition: predicate.c:2601

References BUFFER_LOCK_SHARE, BUFFER_LOCK_UNLOCK, BufferGetPage(), HeapCheckForSerializableConflictOut(), HeapTupleHeaderGetXmin, HeapTupleSatisfiesVisibility(), InvalidBuffer, ItemIdGetLength, ItemIdIsNormal, ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), LockBuffer(), PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), PredicateLockTID(), ReadBuffer(), RelationGetRelid, ReleaseBuffer(), HeapTupleData::t_data, HeapTupleData::t_len, HeapTupleData::t_self, and HeapTupleData::t_tableOid.

Referenced by heap_lock_updated_tuple_rec(), heapam_fetch_row_version(), and heapam_tuple_lock().

◆ heap_finish_speculative()

void heap_finish_speculative ( Relation  relation,
ItemPointer  tid 
)

Definition at line 5751 of file heapam.c.

5752 {
5753  Buffer buffer;
5754  Page page;
5755  OffsetNumber offnum;
5756  ItemId lp = NULL;
5757  HeapTupleHeader htup;
5758 
5759  buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(tid));
5761  page = (Page) BufferGetPage(buffer);
5762 
5763  offnum = ItemPointerGetOffsetNumber(tid);
5764  if (PageGetMaxOffsetNumber(page) >= offnum)
5765  lp = PageGetItemId(page, offnum);
5766 
5767  if (PageGetMaxOffsetNumber(page) < offnum || !ItemIdIsNormal(lp))
5768  elog(ERROR, "invalid lp");
5769 
5770  htup = (HeapTupleHeader) PageGetItem(page, lp);
5771 
5772  /* NO EREPORT(ERROR) from here till changes are logged */
5774 
5776 
5777  MarkBufferDirty(buffer);
5778 
5779  /*
5780  * Replace the speculative insertion token with a real t_ctid, pointing to
5781  * itself like it does on regular tuples.
5782  */
5783  htup->t_ctid = *tid;
5784 
5785  /* XLOG stuff */
5786  if (RelationNeedsWAL(relation))
5787  {
5788  xl_heap_confirm xlrec;
5789  XLogRecPtr recptr;
5790 
5791  xlrec.offnum = ItemPointerGetOffsetNumber(tid);
5792 
5793  XLogBeginInsert();
5794 
5795  /* We want the same filtering on this as on a plain insert */
5797 
5798  XLogRegisterData((char *) &xlrec, SizeOfHeapConfirm);
5799  XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
5800 
5801  recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_CONFIRM);
5802 
5803  PageSetLSN(page, recptr);
5804  }
5805 
5806  END_CRIT_SECTION();
5807 
5808  UnlockReleaseBuffer(buffer);
5809 }
#define SizeOfHeapConfirm
Definition: heapam_xlog.h:422
#define XLOG_HEAP_CONFIRM
Definition: heapam_xlog.h:37
OffsetNumber offnum
Definition: heapam_xlog.h:419

References Assert(), BUFFER_LOCK_EXCLUSIVE, BufferGetPage(), elog, END_CRIT_SECTION, ERROR, HeapTupleHeaderIsSpeculative, ItemIdIsNormal, ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), LockBuffer(), MarkBufferDirty(), xl_heap_confirm::offnum, PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), PageSetLSN(), ReadBuffer(), REGBUF_STANDARD, RelationNeedsWAL, SizeOfHeapConfirm, START_CRIT_SECTION, HeapTupleHeaderData::t_ctid, UnlockReleaseBuffer(), XLOG_HEAP_CONFIRM, XLOG_INCLUDE_ORIGIN, XLogBeginInsert(), XLogInsert(), XLogRegisterBuffer(), XLogRegisterData(), and XLogSetRecordFlags().

Referenced by heapam_tuple_complete_speculative().

◆ heap_freeze_execute_prepared()

void heap_freeze_execute_prepared ( Relation  rel,
Buffer  buffer,
TransactionId  snapshotConflictHorizon,
HeapTupleFreeze tuples,
int  ntuples 
)

Definition at line 6780 of file heapam.c.

6783 {
6784  Page page = BufferGetPage(buffer);
6785 
6786  Assert(ntuples > 0);
6787 
6788  /*
6789  * Perform xmin/xmax XID status sanity checks before critical section.
6790  *
6791  * heap_prepare_freeze_tuple doesn't perform these checks directly because
6792  * pg_xact lookups are relatively expensive. They shouldn't be repeated
6793  * by successive VACUUMs that each decide against freezing the same page.
6794  */
6795  for (int i = 0; i < ntuples; i++)
6796  {
6797  HeapTupleFreeze *frz = tuples + i;
6798  ItemId itemid = PageGetItemId(page, frz->offset);
6799  HeapTupleHeader htup;
6800 
6801  htup = (HeapTupleHeader) PageGetItem(page, itemid);
6802 
6803  /* Deliberately avoid relying on tuple hint bits here */
6805  {
6807 
6809  if (unlikely(!TransactionIdDidCommit(xmin)))
6810  ereport(ERROR,
6812  errmsg_internal("uncommitted xmin %u needs to be frozen",
6813  xmin)));
6814  }
6815 
6816  /*
6817  * TransactionIdDidAbort won't work reliably in the presence of XIDs
6818  * left behind by transactions that were in progress during a crash,
6819  * so we can only check that xmax didn't commit
6820  */
6822  {
6824 
6826  if (unlikely(TransactionIdDidCommit(xmax)))
6827  ereport(ERROR,
6829  errmsg_internal("cannot freeze committed xmax %u",
6830  xmax)));
6831  }
6832  }
6833 
6835 
6836  for (int i = 0; i < ntuples; i++)
6837  {
6838  HeapTupleFreeze *frz = tuples + i;
6839  ItemId itemid = PageGetItemId(page, frz->offset);
6840  HeapTupleHeader htup;
6841 
6842  htup = (HeapTupleHeader) PageGetItem(page, itemid);
6843  heap_execute_freeze_tuple(htup, frz);
6844  }
6845 
6846  MarkBufferDirty(buffer);
6847 
6848  /* Now WAL-log freezing if necessary */
6849  if (RelationNeedsWAL(rel))
6850  {
6851  log_heap_prune_and_freeze(rel, buffer, snapshotConflictHorizon,
6852  false, /* no cleanup lock required */
6854  tuples, ntuples,
6855  NULL, 0, /* redirected */
6856  NULL, 0, /* dead */
6857  NULL, 0); /* unused */
6858  }
6859 
6860  END_CRIT_SECTION();
6861 }
#define unlikely(x)
Definition: c.h:298
int errmsg_internal(const char *fmt,...)
Definition: elog.c:1159
static void heap_execute_freeze_tuple(HeapTupleHeader tuple, HeapTupleFreeze *frz)
Definition: heapam.c:6751
#define HEAP_FREEZE_CHECK_XMAX_ABORTED
Definition: heapam.h:108
#define HEAP_FREEZE_CHECK_XMIN_COMMITTED
Definition: heapam.h:107
#define HeapTupleHeaderGetRawXmin(tup)
Definition: htup_details.h:304
#define HeapTupleHeaderXminFrozen(tup)
Definition: htup_details.h:331
int i
Definition: isn.c:73
#define ERRCODE_DATA_CORRUPTED
Definition: pg_basebackup.c:41
void log_heap_prune_and_freeze(Relation relation, Buffer buffer, TransactionId conflict_xid, bool cleanup_lock, PruneReason reason, HeapTupleFreeze *frozen, int nfrozen, OffsetNumber *redirected, int nredirected, OffsetNumber *dead, int ndead, OffsetNumber *unused, int nunused)
Definition: pruneheap.c:1304
OffsetNumber offset
Definition: heapam.h:122
uint8 checkflags
Definition: heapam.h:120
bool TransactionIdDidCommit(TransactionId transactionId)
Definition: transam.c:126
#define TransactionIdIsNormal(xid)
Definition: transam.h:42

References Assert(), BufferGetPage(), HeapTupleFreeze::checkflags, END_CRIT_SECTION, ereport, errcode(), ERRCODE_DATA_CORRUPTED, errmsg_internal(), ERROR, heap_execute_freeze_tuple(), HEAP_FREEZE_CHECK_XMAX_ABORTED, HEAP_FREEZE_CHECK_XMIN_COMMITTED, HeapTupleHeaderGetRawXmax, HeapTupleHeaderGetRawXmin, HeapTupleHeaderXminFrozen, i, log_heap_prune_and_freeze(), MarkBufferDirty(), HeapTupleFreeze::offset, PageGetItem(), PageGetItemId(), PRUNE_VACUUM_SCAN, RelationNeedsWAL, START_CRIT_SECTION, TransactionIdDidCommit(), TransactionIdIsNormal, and unlikely.

Referenced by lazy_scan_prune().

◆ heap_freeze_tuple()

bool heap_freeze_tuple ( HeapTupleHeader  tuple,
TransactionId  relfrozenxid,
TransactionId  relminmxid,
TransactionId  FreezeLimit,
TransactionId  MultiXactCutoff 
)

Definition at line 6870 of file heapam.c.

6873 {
6874  HeapTupleFreeze frz;
6875  bool do_freeze;
6876  bool totally_frozen;
6877  struct VacuumCutoffs cutoffs;
6878  HeapPageFreeze pagefrz;
6879 
6880  cutoffs.relfrozenxid = relfrozenxid;
6881  cutoffs.relminmxid = relminmxid;
6882  cutoffs.OldestXmin = FreezeLimit;
6883  cutoffs.OldestMxact = MultiXactCutoff;
6884  cutoffs.FreezeLimit = FreezeLimit;
6885  cutoffs.MultiXactCutoff = MultiXactCutoff;
6886 
6887  pagefrz.freeze_required = true;
6888  pagefrz.FreezePageRelfrozenXid = FreezeLimit;
6889  pagefrz.FreezePageRelminMxid = MultiXactCutoff;
6890  pagefrz.NoFreezePageRelfrozenXid = FreezeLimit;
6891  pagefrz.NoFreezePageRelminMxid = MultiXactCutoff;
6892 
6893  do_freeze = heap_prepare_freeze_tuple(tuple, &cutoffs,
6894  &pagefrz, &frz, &totally_frozen);
6895 
6896  /*
6897  * Note that because this is not a WAL-logged operation, we don't need to
6898  * fill in the offset in the freeze record.
6899  */
6900 
6901  if (do_freeze)
6902  heap_execute_freeze_tuple(tuple, &frz);
6903  return do_freeze;
6904 }
bool heap_prepare_freeze_tuple(HeapTupleHeader tuple, const struct VacuumCutoffs *cutoffs, HeapPageFreeze *pagefrz, HeapTupleFreeze *frz, bool *totally_frozen)
Definition: heapam.c:6477
TransactionId FreezeLimit
Definition: vacuum.h:276
TransactionId relfrozenxid
Definition: vacuum.h:250
MultiXactId relminmxid
Definition: vacuum.h:251
MultiXactId MultiXactCutoff
Definition: vacuum.h:277

References VacuumCutoffs::FreezeLimit, heap_execute_freeze_tuple(), heap_prepare_freeze_tuple(), VacuumCutoffs::MultiXactCutoff, VacuumCutoffs::OldestMxact, VacuumCutoffs::OldestXmin, VacuumCutoffs::relfrozenxid, and VacuumCutoffs::relminmxid.

Referenced by rewrite_heap_tuple().

◆ heap_get_latest_tid()

void heap_get_latest_tid ( TableScanDesc  sscan,
ItemPointer  tid 
)

Definition at line 1610 of file heapam.c.

1612 {
1613  Relation relation = sscan->rs_rd;
1614  Snapshot snapshot = sscan->rs_snapshot;
1615  ItemPointerData ctid;
1616  TransactionId priorXmax;
1617 
1618  /*
1619  * table_tuple_get_latest_tid() verified that the passed in tid is valid.
1620  * Assume that t_ctid links are valid however - there shouldn't be invalid
1621  * ones in the table.
1622  */
1623  Assert(ItemPointerIsValid(tid));
1624 
1625  /*
1626  * Loop to chase down t_ctid links. At top of loop, ctid is the tuple we
1627  * need to examine, and *tid is the TID we will return if ctid turns out
1628  * to be bogus.
1629  *
1630  * Note that we will loop until we reach the end of the t_ctid chain.
1631  * Depending on the snapshot passed, there might be at most one visible
1632  * version of the row, but we don't try to optimize for that.
1633  */
1634  ctid = *tid;
1635  priorXmax = InvalidTransactionId; /* cannot check first XMIN */
1636  for (;;)
1637  {
1638  Buffer buffer;
1639  Page page;
1640  OffsetNumber offnum;
1641  ItemId lp;
1642  HeapTupleData tp;
1643  bool valid;
1644 
1645  /*
1646  * Read, pin, and lock the page.
1647  */
1648  buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(&ctid));
1649  LockBuffer(buffer, BUFFER_LOCK_SHARE);
1650  page = BufferGetPage(buffer);
1651 
1652  /*
1653  * Check for bogus item number. This is not treated as an error
1654  * condition because it can happen while following a t_ctid link. We
1655  * just assume that the prior tid is OK and return it unchanged.
1656  */
1657  offnum = ItemPointerGetOffsetNumber(&ctid);
1658  if (offnum < FirstOffsetNumber || offnum > PageGetMaxOffsetNumber(page))
1659  {
1660  UnlockReleaseBuffer(buffer);
1661  break;
1662  }
1663  lp = PageGetItemId(page, offnum);
1664  if (!ItemIdIsNormal(lp))
1665  {
1666  UnlockReleaseBuffer(buffer);
1667  break;
1668  }
1669 
1670  /* OK to access the tuple */
1671  tp.t_self = ctid;
1672  tp.t_data = (HeapTupleHeader) PageGetItem(page, lp);
1673  tp.t_len = ItemIdGetLength(lp);
1674  tp.t_tableOid = RelationGetRelid(relation);
1675 
1676  /*
1677  * After following a t_ctid link, we might arrive at an unrelated
1678  * tuple. Check for XMIN match.
1679  */
1680  if (TransactionIdIsValid(priorXmax) &&
1682  {
1683  UnlockReleaseBuffer(buffer);
1684  break;
1685  }
1686 
1687  /*
1688  * Check tuple visibility; if visible, set it as the new result
1689  * candidate.
1690  */
1691  valid = HeapTupleSatisfiesVisibility(&tp, snapshot, buffer);
1692  HeapCheckForSerializableConflictOut(valid, relation, &tp, buffer, snapshot);
1693  if (valid)
1694  *tid = ctid;
1695 
1696  /*
1697  * If there's a valid t_ctid link, follow it, else we're done.
1698  */
1699  if ((tp.t_data->t_infomask & HEAP_XMAX_INVALID) ||
1703  {
1704  UnlockReleaseBuffer(buffer);
1705  break;
1706  }
1707 
1708  ctid = tp.t_data->t_ctid;
1709  priorXmax = HeapTupleHeaderGetUpdateXid(tp.t_data);
1710  UnlockReleaseBuffer(buffer);
1711  } /* end of loop */
1712 }
#define HeapTupleHeaderIndicatesMovedPartitions(tup)
Definition: htup_details.h:444

References Assert(), BUFFER_LOCK_SHARE, BufferGetPage(), HEAP_XMAX_INVALID, HeapCheckForSerializableConflictOut(), HeapTupleHeaderGetUpdateXid, HeapTupleHeaderGetXmin, HeapTupleHeaderIndicatesMovedPartitions, HeapTupleHeaderIsOnlyLocked(), HeapTupleSatisfiesVisibility(), InvalidTransactionId, ItemIdGetLength, ItemIdIsNormal, ItemPointerEquals(), ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), ItemPointerIsValid(), LockBuffer(), PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), ReadBuffer(), RelationGetRelid, TableScanDescData::rs_rd, TableScanDescData::rs_snapshot, HeapTupleHeaderData::t_ctid, HeapTupleData::t_data, HeapTupleHeaderData::t_infomask, HeapTupleData::t_len, HeapTupleData::t_self, HeapTupleData::t_tableOid, TransactionIdEquals, TransactionIdIsValid, and UnlockReleaseBuffer().

◆ heap_get_root_tuples()

void heap_get_root_tuples ( Page  page,
OffsetNumber root_offsets 
)

Definition at line 1036 of file pruneheap.c.

1037 {
1038  OffsetNumber offnum,
1039  maxoff;
1040 
1041  MemSet(root_offsets, InvalidOffsetNumber,
1043 
1044  maxoff = PageGetMaxOffsetNumber(page);
1045  for (offnum = FirstOffsetNumber; offnum <= maxoff; offnum = OffsetNumberNext(offnum))
1046  {
1047  ItemId lp = PageGetItemId(page, offnum);
1048  HeapTupleHeader htup;
1049  OffsetNumber nextoffnum;
1050  TransactionId priorXmax;
1051 
1052  /* skip unused and dead items */
1053  if (!ItemIdIsUsed(lp) || ItemIdIsDead(lp))
1054  continue;
1055 
1056  if (ItemIdIsNormal(lp))
1057  {
1058  htup = (HeapTupleHeader) PageGetItem(page, lp);
1059 
1060  /*
1061  * Check if this tuple is part of a HOT-chain rooted at some other
1062  * tuple. If so, skip it for now; we'll process it when we find
1063  * its root.
1064  */
1065  if (HeapTupleHeaderIsHeapOnly(htup))
1066  continue;
1067 
1068  /*
1069  * This is either a plain tuple or the root of a HOT-chain.
1070  * Remember it in the mapping.
1071  */
1072  root_offsets[offnum - 1] = offnum;
1073 
1074  /* If it's not the start of a HOT-chain, we're done with it */
1075  if (!HeapTupleHeaderIsHotUpdated(htup))
1076  continue;
1077 
1078  /* Set up to scan the HOT-chain */
1079  nextoffnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
1080  priorXmax = HeapTupleHeaderGetUpdateXid(htup);
1081  }
1082  else
1083  {
1084  /* Must be a redirect item. We do not set its root_offsets entry */
1086  /* Set up to scan the HOT-chain */
1087  nextoffnum = ItemIdGetRedirect(lp);
1088  priorXmax = InvalidTransactionId;
1089  }
1090 
1091  /*
1092  * Now follow the HOT-chain and collect other tuples in the chain.
1093  *
1094  * Note: Even though this is a nested loop, the complexity of the
1095  * function is O(N) because a tuple in the page should be visited not
1096  * more than twice, once in the outer loop and once in HOT-chain
1097  * chases.
1098  */
1099  for (;;)
1100  {
1101  /* Sanity check (pure paranoia) */
1102  if (offnum < FirstOffsetNumber)
1103  break;
1104 
1105  /*
1106  * An offset past the end of page's line pointer array is possible
1107  * when the array was truncated
1108  */
1109  if (offnum > maxoff)
1110  break;
1111 
1112  lp = PageGetItemId(page, nextoffnum);
1113 
1114  /* Check for broken chains */
1115  if (!ItemIdIsNormal(lp))
1116  break;
1117 
1118  htup = (HeapTupleHeader) PageGetItem(page, lp);
1119 
1120  if (TransactionIdIsValid(priorXmax) &&
1121  !TransactionIdEquals(priorXmax, HeapTupleHeaderGetXmin(htup)))
1122  break;
1123 
1124  /* Remember the root line pointer for this item */
1125  root_offsets[nextoffnum - 1] = offnum;
1126 
1127  /* Advance to next chain member, if any */
1128  if (!HeapTupleHeaderIsHotUpdated(htup))
1129  break;
1130 
1131  /* HOT implies it can't have moved to different partition */
1133 
1134  nextoffnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
1135  priorXmax = HeapTupleHeaderGetUpdateXid(htup);
1136  }
1137  }
1138 }
#define MemSet(start, val, len)
Definition: c.h:1007
#define MaxHeapTuplesPerPage
Definition: htup_details.h:572
#define HeapTupleHeaderIsHotUpdated(tup)
Definition: htup_details.h:482
#define ItemIdGetRedirect(itemId)
Definition: itemid.h:78
#define ItemIdIsDead(itemId)
Definition: itemid.h:113
#define ItemIdIsUsed(itemId)
Definition: itemid.h:92
#define ItemIdIsRedirected(itemId)
Definition: itemid.h:106
#define InvalidOffsetNumber
Definition: off.h:26
#define OffsetNumberNext(offsetNumber)
Definition: off.h:52
#define FirstOffsetNumber
Definition: off.h:27

References Assert(), FirstOffsetNumber, HeapTupleHeaderGetUpdateXid, HeapTupleHeaderGetXmin, HeapTupleHeaderIndicatesMovedPartitions, HeapTupleHeaderIsHeapOnly, HeapTupleHeaderIsHotUpdated, InvalidOffsetNumber, InvalidTransactionId, ItemIdGetRedirect, ItemIdIsDead, ItemIdIsNormal, ItemIdIsRedirected, ItemIdIsUsed, ItemPointerGetOffsetNumber(), MaxHeapTuplesPerPage, MemSet, OffsetNumberNext, PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), HeapTupleHeaderData::t_ctid, TransactionIdEquals, and TransactionIdIsValid.

Referenced by heapam_index_build_range_scan(), and heapam_index_validate_scan().

◆ heap_getnext()

HeapTuple heap_getnext ( TableScanDesc  sscan,
ScanDirection  direction 
)

Definition at line 1079 of file heapam.c.

1080 {
1081  HeapScanDesc scan = (HeapScanDesc) sscan;
1082 
1083  /*
1084  * This is still widely used directly, without going through table AM, so
1085  * add a safety check. It's possible we should, at a later point,
1086  * downgrade this to an assert. The reason for checking the AM routine,
1087  * rather than the AM oid, is that this allows to write regression tests
1088  * that create another AM reusing the heap handler.
1089  */
1091  ereport(ERROR,
1092  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1093  errmsg_internal("only heap AM is supported")));
1094 
1095  /*
1096  * We don't expect direct calls to heap_getnext with valid CheckXidAlive
1097  * for catalog or regular tables. See detailed comments in xact.c where
1098  * these variables are declared. Normally we have such a check at tableam
1099  * level API but this is called from many places so we need to ensure it
1100  * here.
1101  */
1103  elog(ERROR, "unexpected heap_getnext call during logical decoding");
1104 
1105  /* Note: no locking manipulations needed */
1106 
1107  if (scan->rs_base.rs_flags & SO_ALLOW_PAGEMODE)
1108  heapgettup_pagemode(scan, direction,
1109  scan->rs_base.rs_nkeys, scan->rs_base.rs_key);
1110  else
1111  heapgettup(scan, direction,
1112  scan->rs_base.rs_nkeys, scan->rs_base.rs_key);
1113 
1114  if (scan->rs_ctup.t_data == NULL)
1115  return NULL;
1116 
1117  /*
1118  * if we get here it means we have a new current scan tuple, so point to
1119  * the proper return buffer and return the tuple.
1120  */
1121 
1123 
1124  return &scan->rs_ctup;
1125 }
static void heapgettup(HeapScanDesc scan, ScanDirection dir, int nkeys, ScanKey key)
Definition: heapam.c:717
static void heapgettup_pagemode(HeapScanDesc scan, ScanDirection dir, int nkeys, ScanKey key)
Definition: heapam.c:832
const TableAmRoutine * GetHeapamTableAmRoutine(void)
#define pgstat_count_heap_getnext(rel)
Definition: pgstat.h:615
const struct TableAmRoutine * rd_tableam
Definition: rel.h:189
bool bsysscan
Definition: xact.c:98
TransactionId CheckXidAlive
Definition: xact.c:97

References bsysscan, CheckXidAlive, elog, ereport, errcode(), errmsg_internal(), ERROR, GetHeapamTableAmRoutine(), heapgettup(), heapgettup_pagemode(), pgstat_count_heap_getnext, RelationData::rd_tableam, HeapScanDescData::rs_base, HeapScanDescData::rs_ctup, TableScanDescData::rs_flags, TableScanDescData::rs_key, TableScanDescData::rs_nkeys, TableScanDescData::rs_rd, SO_ALLOW_PAGEMODE, HeapTupleData::t_data, TransactionIdIsValid, and unlikely.

Referenced by AlterTableMoveAll(), AlterTableSpaceOptions(), check_db_file_conflict(), CreateDatabaseUsingFileCopy(), do_autovacuum(), DropSetting(), DropTableSpace(), find_typed_table_dependencies(), get_all_vacuum_rels(), get_database_list(), get_subscription_list(), get_tables_to_cluster(), get_tablespace_name(), get_tablespace_oid(), GetAllTablesPublicationRelations(), getRelationsInNamespace(), GetSchemaPublicationRelations(), heapam_index_build_range_scan(), heapam_index_validate_scan(), index_update_stats(), objectsInSchemaToOids(), pgrowlocks(), pgstat_heap(), populate_typ_list(), ReindexMultipleTables(), remove_dbtablespaces(), RemoveSubscriptionRel(), RenameTableSpace(), ThereIsAtLeastOneRole(), and vac_truncate_clog().

◆ heap_getnextslot()

bool heap_getnextslot ( TableScanDesc  sscan,
ScanDirection  direction,
struct TupleTableSlot slot 
)

Definition at line 1128 of file heapam.c.

1129 {
1130  HeapScanDesc scan = (HeapScanDesc) sscan;
1131 
1132  /* Note: no locking manipulations needed */
1133 
1134  if (sscan->rs_flags & SO_ALLOW_PAGEMODE)
1135  heapgettup_pagemode(scan, direction, sscan->rs_nkeys, sscan->rs_key);
1136  else
1137  heapgettup(scan, direction, sscan->rs_nkeys, sscan->rs_key);
1138 
1139  if (scan->rs_ctup.t_data == NULL)
1140  {
1141  ExecClearTuple(slot);
1142  return false;
1143  }
1144 
1145  /*
1146  * if we get here it means we have a new current scan tuple, so point to
1147  * the proper return buffer and return the tuple.
1148  */
1149 
1151 
1152  ExecStoreBufferHeapTuple(&scan->rs_ctup, slot,
1153  scan->rs_cbuf);
1154  return true;
1155 }
TupleTableSlot * ExecStoreBufferHeapTuple(HeapTuple tuple, TupleTableSlot *slot, Buffer buffer)
Definition: execTuples.c:1479
static TupleTableSlot * ExecClearTuple(TupleTableSlot *slot)
Definition: tuptable.h:454

References ExecClearTuple(), ExecStoreBufferHeapTuple(), heapgettup(), heapgettup_pagemode(), pgstat_count_heap_getnext, HeapScanDescData::rs_base, HeapScanDescData::rs_cbuf, HeapScanDescData::rs_ctup, TableScanDescData::rs_flags, TableScanDescData::rs_key, TableScanDescData::rs_nkeys, TableScanDescData::rs_rd, SO_ALLOW_PAGEMODE, and HeapTupleData::t_data.

◆ heap_getnextslot_tidrange()

bool heap_getnextslot_tidrange ( TableScanDesc  sscan,
ScanDirection  direction,
TupleTableSlot slot 
)

Definition at line 1231 of file heapam.c.

1233 {
1234  HeapScanDesc scan = (HeapScanDesc) sscan;
1235  ItemPointer mintid = &sscan->rs_mintid;
1236  ItemPointer maxtid = &sscan->rs_maxtid;
1237 
1238  /* Note: no locking manipulations needed */
1239  for (;;)
1240  {
1241  if (sscan->rs_flags & SO_ALLOW_PAGEMODE)
1242  heapgettup_pagemode(scan, direction, sscan->rs_nkeys, sscan->rs_key);
1243  else
1244  heapgettup(scan, direction, sscan->rs_nkeys, sscan->rs_key);
1245 
1246  if (scan->rs_ctup.t_data == NULL)
1247  {
1248  ExecClearTuple(slot);
1249  return false;
1250  }
1251 
1252  /*
1253  * heap_set_tidrange will have used heap_setscanlimits to limit the
1254  * range of pages we scan to only ones that can contain the TID range
1255  * we're scanning for. Here we must filter out any tuples from these
1256  * pages that are outside of that range.
1257  */
1258  if (ItemPointerCompare(&scan->rs_ctup.t_self, mintid) < 0)
1259  {
1260  ExecClearTuple(slot);
1261 
1262  /*
1263  * When scanning backwards, the TIDs will be in descending order.
1264  * Future tuples in this direction will be lower still, so we can
1265  * just return false to indicate there will be no more tuples.
1266  */
1267  if (ScanDirectionIsBackward(direction))
1268  return false;
1269 
1270  continue;
1271  }
1272 
1273  /*
1274  * Likewise for the final page, we must filter out TIDs greater than
1275  * maxtid.
1276  */
1277  if (ItemPointerCompare(&scan->rs_ctup.t_self, maxtid) > 0)
1278  {
1279  ExecClearTuple(slot);
1280 
1281  /*
1282  * When scanning forward, the TIDs will be in ascending order.
1283  * Future tuples in this direction will be higher still, so we can
1284  * just return false to indicate there will be no more tuples.
1285  */
1286  if (ScanDirectionIsForward(direction))
1287  return false;
1288  continue;
1289  }
1290 
1291  break;
1292  }
1293 
1294  /*
1295  * if we get here it means we have a new current scan tuple, so point to
1296  * the proper return buffer and return the tuple.
1297  */
1299 
1300  ExecStoreBufferHeapTuple(&scan->rs_ctup, slot, scan->rs_cbuf);
1301  return true;
1302 }
int32 ItemPointerCompare(ItemPointer arg1, ItemPointer arg2)
Definition: itemptr.c:51
#define ScanDirectionIsForward(direction)
Definition: sdir.h:64
#define ScanDirectionIsBackward(direction)
Definition: sdir.h:50
ItemPointerData rs_mintid
Definition: relscan.h:40
ItemPointerData rs_maxtid
Definition: relscan.h:41

References ExecClearTuple(), ExecStoreBufferHeapTuple(), heapgettup(), heapgettup_pagemode(), ItemPointerCompare(), pgstat_count_heap_getnext, HeapScanDescData::rs_base, HeapScanDescData::rs_cbuf, HeapScanDescData::rs_ctup, TableScanDescData::rs_flags, TableScanDescData::rs_key, TableScanDescData::rs_maxtid, TableScanDescData::rs_mintid, TableScanDescData::rs_nkeys, TableScanDescData::rs_rd, ScanDirectionIsBackward, ScanDirectionIsForward, SO_ALLOW_PAGEMODE, HeapTupleData::t_data, and HeapTupleData::t_self.

◆ heap_hot_search_buffer()

bool heap_hot_search_buffer ( ItemPointer  tid,
Relation  relation,
Buffer  buffer,
Snapshot  snapshot,
HeapTuple  heapTuple,
bool all_dead,
bool  first_call 
)

Definition at line 1458 of file heapam.c.

1461 {
1462  Page page = BufferGetPage(buffer);
1463  TransactionId prev_xmax = InvalidTransactionId;
1464  BlockNumber blkno;
1465  OffsetNumber offnum;
1466  bool at_chain_start;
1467  bool valid;
1468  bool skip;
1469  GlobalVisState *vistest = NULL;
1470 
1471  /* If this is not the first call, previous call returned a (live!) tuple */
1472  if (all_dead)
1473  *all_dead = first_call;
1474 
1475  blkno = ItemPointerGetBlockNumber(tid);
1476  offnum = ItemPointerGetOffsetNumber(tid);
1477  at_chain_start = first_call;
1478  skip = !first_call;
1479 
1480  /* XXX: we should assert that a snapshot is pushed or registered */
1482  Assert(BufferGetBlockNumber(buffer) == blkno);
1483 
1484  /* Scan through possible multiple members of HOT-chain */
1485  for (;;)
1486  {
1487  ItemId lp;
1488 
1489  /* check for bogus TID */
1490  if (offnum < FirstOffsetNumber || offnum > PageGetMaxOffsetNumber(page))
1491  break;
1492 
1493  lp = PageGetItemId(page, offnum);
1494 
1495  /* check for unused, dead, or redirected items */
1496  if (!ItemIdIsNormal(lp))
1497  {
1498  /* We should only see a redirect at start of chain */
1499  if (ItemIdIsRedirected(lp) && at_chain_start)
1500  {
1501  /* Follow the redirect */
1502  offnum = ItemIdGetRedirect(lp);
1503  at_chain_start = false;
1504  continue;
1505  }
1506  /* else must be end of chain */
1507  break;
1508  }
1509 
1510  /*
1511  * Update heapTuple to point to the element of the HOT chain we're
1512  * currently investigating. Having t_self set correctly is important
1513  * because the SSI checks and the *Satisfies routine for historical
1514  * MVCC snapshots need the correct tid to decide about the visibility.
1515  */
1516  heapTuple->t_data = (HeapTupleHeader) PageGetItem(page, lp);
1517  heapTuple->t_len = ItemIdGetLength(lp);
1518  heapTuple->t_tableOid = RelationGetRelid(relation);
1519  ItemPointerSet(&heapTuple->t_self, blkno, offnum);
1520 
1521  /*
1522  * Shouldn't see a HEAP_ONLY tuple at chain start.
1523  */
1524  if (at_chain_start && HeapTupleIsHeapOnly(heapTuple))
1525  break;
1526 
1527  /*
1528  * The xmin should match the previous xmax value, else chain is
1529  * broken.
1530  */
1531  if (TransactionIdIsValid(prev_xmax) &&
1532  !TransactionIdEquals(prev_xmax,
1533  HeapTupleHeaderGetXmin(heapTuple->t_data)))
1534  break;
1535 
1536  /*
1537  * When first_call is true (and thus, skip is initially false) we'll
1538  * return the first tuple we find. But on later passes, heapTuple
1539  * will initially be pointing to the tuple we returned last time.
1540  * Returning it again would be incorrect (and would loop forever), so
1541  * we skip it and return the next match we find.
1542  */
1543  if (!skip)
1544  {
1545  /* If it's visible per the snapshot, we must return it */
1546  valid = HeapTupleSatisfiesVisibility(heapTuple, snapshot, buffer);
1547  HeapCheckForSerializableConflictOut(valid, relation, heapTuple,
1548  buffer, snapshot);
1549 
1550  if (valid)
1551  {
1552  ItemPointerSetOffsetNumber(tid, offnum);
1553  PredicateLockTID(relation, &heapTuple->t_self, snapshot,
1554  HeapTupleHeaderGetXmin(heapTuple->t_data));
1555  if (all_dead)
1556  *all_dead = false;
1557  return true;
1558  }
1559  }
1560  skip = false;
1561 
1562  /*
1563  * If we can't see it, maybe no one else can either. At caller
1564  * request, check whether all chain members are dead to all
1565  * transactions.
1566  *
1567  * Note: if you change the criterion here for what is "dead", fix the
1568  * planner's get_actual_variable_range() function to match.
1569  */
1570  if (all_dead && *all_dead)
1571  {
1572  if (!vistest)
1573  vistest = GlobalVisTestFor(relation);
1574 
1575  if (!HeapTupleIsSurelyDead(heapTuple, vistest))
1576  *all_dead = false;
1577  }
1578 
1579  /*
1580  * Check to see if HOT chain continues past this tuple; if so fetch
1581  * the next offnum and loop around.
1582  */
1583  if (HeapTupleIsHotUpdated(heapTuple))
1584  {
1586  blkno);
1587  offnum = ItemPointerGetOffsetNumber(&heapTuple->t_data->t_ctid);
1588  at_chain_start = false;
1589  prev_xmax = HeapTupleHeaderGetUpdateXid(heapTuple->t_data);
1590  }
1591  else
1592  break; /* end of chain */
1593  }
1594 
1595  return false;
1596 }
bool HeapTupleIsSurelyDead(HeapTuple htup, GlobalVisState *vistest)
#define HeapTupleIsHeapOnly(tuple)
Definition: htup_details.h:683
#define HeapTupleIsHotUpdated(tuple)
Definition: htup_details.h:674
static void ItemPointerSet(ItemPointerData *pointer, BlockNumber blockNumber, OffsetNumber offNum)
Definition: itemptr.h:135
static void ItemPointerSetOffsetNumber(ItemPointerData *pointer, OffsetNumber offsetNumber)
Definition: itemptr.h:158
static const struct exclude_list_item skip[]
Definition: pg_checksums.c:108
GlobalVisState * GlobalVisTestFor(Relation rel)
Definition: procarray.c:4091
TransactionId RecentXmin
Definition: snapmgr.c:99

References Assert(), BufferGetBlockNumber(), BufferGetPage(), GlobalVisTestFor(), HeapCheckForSerializableConflictOut(), HeapTupleHeaderGetUpdateXid, HeapTupleHeaderGetXmin, HeapTupleIsHeapOnly, HeapTupleIsHotUpdated, HeapTupleIsSurelyDead(), HeapTupleSatisfiesVisibility(), InvalidTransactionId, ItemIdGetLength, ItemIdGetRedirect, ItemIdIsNormal, ItemIdIsRedirected, ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), ItemPointerSet(), ItemPointerSetOffsetNumber(), PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), PredicateLockTID(), RecentXmin, RelationGetRelid, skip, HeapTupleHeaderData::t_ctid, HeapTupleData::t_data, HeapTupleData::t_len, HeapTupleData::t_self, HeapTupleData::t_tableOid, TransactionIdEquals, and TransactionIdIsValid.

Referenced by heap_index_delete_tuples(), heapam_index_fetch_tuple(), and heapam_scan_bitmap_next_block().

◆ heap_index_delete_tuples()

TransactionId heap_index_delete_tuples ( Relation  rel,
TM_IndexDeleteOp delstate 
)

Definition at line 7584 of file heapam.c.

7585 {
7586  /* Initial assumption is that earlier pruning took care of conflict */
7587  TransactionId snapshotConflictHorizon = InvalidTransactionId;
7590  Page page = NULL;
7592  TransactionId priorXmax;
7593 #ifdef USE_PREFETCH
7594  IndexDeletePrefetchState prefetch_state;
7595  int prefetch_distance;
7596 #endif
7597  SnapshotData SnapshotNonVacuumable;
7598  int finalndeltids = 0,
7599  nblocksaccessed = 0;
7600 
7601  /* State that's only used in bottom-up index deletion case */
7602  int nblocksfavorable = 0;
7603  int curtargetfreespace = delstate->bottomupfreespace,
7604  lastfreespace = 0,
7605  actualfreespace = 0;
7606  bool bottomup_final_block = false;
7607 
7608  InitNonVacuumableSnapshot(SnapshotNonVacuumable, GlobalVisTestFor(rel));
7609 
7610  /* Sort caller's deltids array by TID for further processing */
7611  index_delete_sort(delstate);
7612 
7613  /*
7614  * Bottom-up case: resort deltids array in an order attuned to where the
7615  * greatest number of promising TIDs are to be found, and determine how
7616  * many blocks from the start of sorted array should be considered
7617  * favorable. This will also shrink the deltids array in order to
7618  * eliminate completely unfavorable blocks up front.
7619  */
7620  if (delstate->bottomup)
7621  nblocksfavorable = bottomup_sort_and_shrink(delstate);
7622 
7623 #ifdef USE_PREFETCH
7624  /* Initialize prefetch state. */
7625  prefetch_state.cur_hblkno = InvalidBlockNumber;
7626  prefetch_state.next_item = 0;
7627  prefetch_state.ndeltids = delstate->ndeltids;
7628  prefetch_state.deltids = delstate->deltids;
7629 
7630  /*
7631  * Determine the prefetch distance that we will attempt to maintain.
7632  *
7633  * Since the caller holds a buffer lock somewhere in rel, we'd better make
7634  * sure that isn't a catalog relation before we call code that does
7635  * syscache lookups, to avoid risk of deadlock.
7636  */
7637  if (IsCatalogRelation(rel))
7638  prefetch_distance = maintenance_io_concurrency;
7639  else
7640  prefetch_distance =
7642 
7643  /* Cap initial prefetch distance for bottom-up deletion caller */
7644  if (delstate->bottomup)
7645  {
7646  Assert(nblocksfavorable >= 1);
7647  Assert(nblocksfavorable <= BOTTOMUP_MAX_NBLOCKS);
7648  prefetch_distance = Min(prefetch_distance, nblocksfavorable);
7649  }
7650 
7651  /* Start prefetching. */
7652  index_delete_prefetch_buffer(rel, &prefetch_state, prefetch_distance);
7653 #endif
7654 
7655  /* Iterate over deltids, determine which to delete, check their horizon */
7656  Assert(delstate->ndeltids > 0);
7657  for (int i = 0; i < delstate->ndeltids; i++)
7658  {
7659  TM_IndexDelete *ideltid = &delstate->deltids[i];
7660  TM_IndexStatus *istatus = delstate->status + ideltid->id;
7661  ItemPointer htid = &ideltid->tid;
7662  OffsetNumber offnum;
7663 
7664  /*
7665  * Read buffer, and perform required extra steps each time a new block
7666  * is encountered. Avoid refetching if it's the same block as the one
7667  * from the last htid.
7668  */
7669  if (blkno == InvalidBlockNumber ||
7670  ItemPointerGetBlockNumber(htid) != blkno)
7671  {
7672  /*
7673  * Consider giving up early for bottom-up index deletion caller
7674  * first. (Only prefetch next-next block afterwards, when it
7675  * becomes clear that we're at least going to access the next
7676  * block in line.)
7677  *
7678  * Sometimes the first block frees so much space for bottom-up
7679  * caller that the deletion process can end without accessing any
7680  * more blocks. It is usually necessary to access 2 or 3 blocks
7681  * per bottom-up deletion operation, though.
7682  */
7683  if (delstate->bottomup)
7684  {
7685  /*
7686  * We often allow caller to delete a few additional items
7687  * whose entries we reached after the point that space target
7688  * from caller was satisfied. The cost of accessing the page
7689  * was already paid at that point, so it made sense to finish
7690  * it off. When that happened, we finalize everything here
7691  * (by finishing off the whole bottom-up deletion operation
7692  * without needlessly paying the cost of accessing any more
7693  * blocks).
7694  */
7695  if (bottomup_final_block)
7696  break;
7697 
7698  /*
7699  * Give up when we didn't enable our caller to free any
7700  * additional space as a result of processing the page that we
7701  * just finished up with. This rule is the main way in which
7702  * we keep the cost of bottom-up deletion under control.
7703  */
7704  if (nblocksaccessed >= 1 && actualfreespace == lastfreespace)
7705  break;
7706  lastfreespace = actualfreespace; /* for next time */
7707 
7708  /*
7709  * Deletion operation (which is bottom-up) will definitely
7710  * access the next block in line. Prepare for that now.
7711  *
7712  * Decay target free space so that we don't hang on for too
7713  * long with a marginal case. (Space target is only truly
7714  * helpful when it allows us to recognize that we don't need
7715  * to access more than 1 or 2 blocks to satisfy caller due to
7716  * agreeable workload characteristics.)
7717  *
7718  * We are a bit more patient when we encounter contiguous
7719  * blocks, though: these are treated as favorable blocks. The
7720  * decay process is only applied when the next block in line
7721  * is not a favorable/contiguous block. This is not an
7722  * exception to the general rule; we still insist on finding
7723  * at least one deletable item per block accessed. See
7724  * bottomup_nblocksfavorable() for full details of the theory
7725  * behind favorable blocks and heap block locality in general.
7726  *
7727  * Note: The first block in line is always treated as a
7728  * favorable block, so the earliest possible point that the
7729  * decay can be applied is just before we access the second
7730  * block in line. The Assert() verifies this for us.
7731  */
7732  Assert(nblocksaccessed > 0 || nblocksfavorable > 0);
7733  if (nblocksfavorable > 0)
7734  nblocksfavorable--;
7735  else
7736  curtargetfreespace /= 2;
7737  }
7738 
7739  /* release old buffer */
7740  if (BufferIsValid(buf))
7742 
7743  blkno = ItemPointerGetBlockNumber(htid);
7744  buf = ReadBuffer(rel, blkno);
7745  nblocksaccessed++;
7746  Assert(!delstate->bottomup ||
7747  nblocksaccessed <= BOTTOMUP_MAX_NBLOCKS);
7748 
7749 #ifdef USE_PREFETCH
7750 
7751  /*
7752  * To maintain the prefetch distance, prefetch one more page for
7753  * each page we read.
7754  */
7755  index_delete_prefetch_buffer(rel, &prefetch_state, 1);
7756 #endif
7757 
7759 
7760  page = BufferGetPage(buf);
7761  maxoff = PageGetMaxOffsetNumber(page);
7762  }
7763 
7764  /*
7765  * In passing, detect index corruption involving an index page with a
7766  * TID that points to a location in the heap that couldn't possibly be
7767  * correct. We only do this with actual TIDs from caller's index page
7768  * (not items reached by traversing through a HOT chain).
7769  */
7770  index_delete_check_htid(delstate, page, maxoff, htid, istatus);
7771 
7772  if (istatus->knowndeletable)
7773  Assert(!delstate->bottomup && !istatus->promising);
7774  else
7775  {
7776  ItemPointerData tmp = *htid;
7777  HeapTupleData heapTuple;
7778 
7779  /* Are any tuples from this HOT chain non-vacuumable? */
7780  if (heap_hot_search_buffer(&tmp, rel, buf, &SnapshotNonVacuumable,
7781  &heapTuple, NULL, true))
7782  continue; /* can't delete entry */
7783 
7784  /* Caller will delete, since whole HOT chain is vacuumable */
7785  istatus->knowndeletable = true;
7786 
7787  /* Maintain index free space info for bottom-up deletion case */
7788  if (delstate->bottomup)
7789  {
7790  Assert(istatus->freespace > 0);
7791  actualfreespace += istatus->freespace;
7792  if (actualfreespace >= curtargetfreespace)
7793  bottomup_final_block = true;
7794  }
7795  }
7796 
7797  /*
7798  * Maintain snapshotConflictHorizon value for deletion operation as a
7799  * whole by advancing current value using heap tuple headers. This is
7800  * loosely based on the logic for pruning a HOT chain.
7801  */
7802  offnum = ItemPointerGetOffsetNumber(htid);
7803  priorXmax = InvalidTransactionId; /* cannot check first XMIN */
7804  for (;;)
7805  {
7806  ItemId lp;
7807  HeapTupleHeader htup;
7808 
7809  /* Sanity check (pure paranoia) */
7810  if (offnum < FirstOffsetNumber)
7811  break;
7812 
7813  /*
7814  * An offset past the end of page's line pointer array is possible
7815  * when the array was truncated
7816  */
7817  if (offnum > maxoff)
7818  break;
7819 
7820  lp = PageGetItemId(page, offnum);
7821  if (ItemIdIsRedirected(lp))
7822  {
7823  offnum = ItemIdGetRedirect(lp);
7824  continue;
7825  }
7826 
7827  /*
7828  * We'll often encounter LP_DEAD line pointers (especially with an
7829  * entry marked knowndeletable by our caller up front). No heap
7830  * tuple headers get examined for an htid that leads us to an
7831  * LP_DEAD item. This is okay because the earlier pruning
7832  * operation that made the line pointer LP_DEAD in the first place
7833  * must have considered the original tuple header as part of
7834  * generating its own snapshotConflictHorizon value.
7835  *
7836  * Relying on XLOG_HEAP2_PRUNE_VACUUM_SCAN records like this is
7837  * the same strategy that index vacuuming uses in all cases. Index
7838  * VACUUM WAL records don't even have a snapshotConflictHorizon
7839  * field of their own for this reason.
7840  */
7841  if (!ItemIdIsNormal(lp))
7842  break;
7843 
7844  htup = (HeapTupleHeader) PageGetItem(page, lp);
7845 
7846  /*
7847  * Check the tuple XMIN against prior XMAX, if any
7848  */
7849  if (TransactionIdIsValid(priorXmax) &&
7850  !TransactionIdEquals(HeapTupleHeaderGetXmin(htup), priorXmax))
7851  break;
7852 
7854  &snapshotConflictHorizon);
7855 
7856  /*
7857  * If the tuple is not HOT-updated, then we are at the end of this
7858  * HOT-chain. No need to visit later tuples from the same update
7859  * chain (they get their own index entries) -- just move on to
7860  * next htid from index AM caller.
7861  */
7862  if (!HeapTupleHeaderIsHotUpdated(htup))
7863  break;
7864 
7865  /* Advance to next HOT chain member */
7866  Assert(ItemPointerGetBlockNumber(&htup->t_ctid) == blkno);
7867  offnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
7868  priorXmax = HeapTupleHeaderGetUpdateXid(htup);
7869  }
7870 
7871  /* Enable further/final shrinking of deltids for caller */
7872  finalndeltids = i + 1;
7873  }
7874 
7876 
7877  /*
7878  * Shrink deltids array to exclude non-deletable entries at the end. This
7879  * is not just a minor optimization. Final deltids array size might be
7880  * zero for a bottom-up caller. Index AM is explicitly allowed to rely on
7881  * ndeltids being zero in all cases with zero total deletable entries.
7882  */
7883  Assert(finalndeltids > 0 || delstate->bottomup);
7884  delstate->ndeltids = finalndeltids;
7885 
7886  return snapshotConflictHorizon;
7887 }
int maintenance_io_concurrency
Definition: bufmgr.c:153
#define Min(x, y)
Definition: c.h:991
bool IsCatalogRelation(Relation relation)
Definition: catalog.c:103
static int bottomup_sort_and_shrink(TM_IndexDeleteOp *delstate)
Definition: heapam.c:8142
void HeapTupleHeaderAdvanceConflictHorizon(HeapTupleHeader tuple, TransactionId *snapshotConflictHorizon)
Definition: heapam.c:7439
static void index_delete_check_htid(TM_IndexDeleteOp *delstate, Page page, OffsetNumber maxoff, ItemPointer htid, TM_IndexStatus *istatus)
Definition: heapam.c:7524
#define BOTTOMUP_MAX_NBLOCKS
Definition: heapam.c:184
bool heap_hot_search_buffer(ItemPointer tid, Relation relation, Buffer buffer, Snapshot snapshot, HeapTuple heapTuple, bool *all_dead, bool first_call)
Definition: heapam.c:1458
static void index_delete_sort(TM_IndexDeleteOp *delstate)
Definition: heapam.c:7929
static char * buf
Definition: pg_test_fsync.c:73
#define InitNonVacuumableSnapshot(snapshotdata, vistestp)
Definition: snapmgr.h:48
int get_tablespace_maintenance_io_concurrency(Oid spcid)
Definition: spccache.c:229
TM_IndexStatus * status
Definition: tableam.h:247
int bottomupfreespace
Definition: tableam.h:242
TM_IndexDelete * deltids
Definition: tableam.h:246
ItemPointerData tid
Definition: tableam.h:205
bool knowndeletable
Definition: tableam.h:212
bool promising
Definition: tableam.h:215
int16 freespace
Definition: tableam.h:216

References Assert(), TM_IndexDeleteOp::bottomup, BOTTOMUP_MAX_NBLOCKS, bottomup_sort_and_shrink(), TM_IndexDeleteOp::bottomupfreespace, buf, BUFFER_LOCK_SHARE, BufferGetPage(), BufferIsValid(), TM_IndexDeleteOp::deltids, FirstOffsetNumber, TM_IndexStatus::freespace, get_tablespace_maintenance_io_concurrency(), GlobalVisTestFor(), heap_hot_search_buffer(), HeapTupleHeaderAdvanceConflictHorizon(), HeapTupleHeaderGetUpdateXid, HeapTupleHeaderGetXmin, HeapTupleHeaderIsHotUpdated, i, TM_IndexDelete::id, index_delete_check_htid(), index_delete_sort(), InitNonVacuumableSnapshot, InvalidBlockNumber, InvalidBuffer, InvalidOffsetNumber, InvalidTransactionId, IsCatalogRelation(), ItemIdGetRedirect, ItemIdIsNormal, ItemIdIsRedirected, ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), TM_IndexStatus::knowndeletable, LockBuffer(), maintenance_io_concurrency, Min, TM_IndexDeleteOp::ndeltids, PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), TM_IndexStatus::promising, RelationData::rd_rel, ReadBuffer(), TM_IndexDeleteOp::status, HeapTupleHeaderData::t_ctid, TM_IndexDelete::tid, TransactionIdEquals, TransactionIdIsValid, and UnlockReleaseBuffer().

◆ heap_inplace_update()

void heap_inplace_update ( Relation  relation,
HeapTuple  tuple 
)

Definition at line 5991 of file heapam.c.

5992 {
5993  Buffer buffer;
5994  Page page;
5995  OffsetNumber offnum;
5996  ItemId lp = NULL;
5997  HeapTupleHeader htup;
5998  uint32 oldlen;
5999  uint32 newlen;
6000 
6001  /*
6002  * For now, we don't allow parallel updates. Unlike a regular update,
6003  * this should never create a combo CID, so it might be possible to relax
6004  * this restriction, but not without more thought and testing. It's not
6005  * clear that it would be useful, anyway.
6006  */
6007  if (IsInParallelMode())
6008  ereport(ERROR,
6009  (errcode(ERRCODE_INVALID_TRANSACTION_STATE),
6010  errmsg("cannot update tuples during a parallel operation")));
6011 
6012  buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(&(tuple->t_self)));
6014  page = (Page) BufferGetPage(buffer);
6015 
6016  offnum = ItemPointerGetOffsetNumber(&(tuple->t_self));
6017  if (PageGetMaxOffsetNumber(page) >= offnum)
6018  lp = PageGetItemId(page, offnum);
6019 
6020  if (PageGetMaxOffsetNumber(page) < offnum || !ItemIdIsNormal(lp))
6021  elog(ERROR, "invalid lp");
6022 
6023  htup = (HeapTupleHeader) PageGetItem(page, lp);
6024 
6025  oldlen = ItemIdGetLength(lp) - htup->t_hoff;
6026  newlen = tuple->t_len - tuple->t_data->t_hoff;
6027  if (oldlen != newlen || htup->t_hoff != tuple->t_data->t_hoff)
6028  elog(ERROR, "wrong tuple length");
6029 
6030  /* NO EREPORT(ERROR) from here till changes are logged */
6032 
6033  memcpy((char *) htup + htup->t_hoff,
6034  (char *) tuple->t_data + tuple->t_data->t_hoff,
6035  newlen);
6036 
6037  MarkBufferDirty(buffer);
6038 
6039  /* XLOG stuff */
6040  if (RelationNeedsWAL(relation))
6041  {
6042  xl_heap_inplace xlrec;
6043  XLogRecPtr recptr;
6044 
6045  xlrec.offnum = ItemPointerGetOffsetNumber(&tuple->t_self);
6046 
6047  XLogBeginInsert();
6048  XLogRegisterData((char *) &xlrec, SizeOfHeapInplace);
6049 
6050  XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
6051  XLogRegisterBufData(0, (char *) htup + htup->t_hoff, newlen);
6052 
6053  /* inplace updates aren't decoded atm, don't log the origin */
6054 
6055  recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_INPLACE);
6056 
6057  PageSetLSN(page, recptr);
6058  }
6059 
6060  END_CRIT_SECTION();
6061 
6062  UnlockReleaseBuffer(buffer);
6063 
6064  /*
6065  * Send out shared cache inval if necessary. Note that because we only
6066  * pass the new version of the tuple, this mustn't be used for any
6067  * operations that could change catcache lookup keys. But we aren't
6068  * bothering with index updates either, so that's true a fortiori.
6069  */
6071  CacheInvalidateHeapTuple(relation, tuple, NULL);
6072 }
unsigned int uint32
Definition: c.h:493
#define SizeOfHeapInplace
Definition: heapam_xlog.h:431
#define XLOG_HEAP_INPLACE
Definition: heapam_xlog.h:39
#define IsBootstrapProcessingMode()
Definition: miscadmin.h:454
OffsetNumber offnum
Definition: heapam_xlog.h:427
void XLogRegisterBufData(uint8 block_id, char *data, uint32 len)
Definition: xloginsert.c:405

References BUFFER_LOCK_EXCLUSIVE, BufferGetPage(), CacheInvalidateHeapTuple(), elog, END_CRIT_SECTION, ereport, errcode(), errmsg(), ERROR, IsBootstrapProcessingMode, IsInParallelMode(), ItemIdGetLength, ItemIdIsNormal, ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), LockBuffer(), MarkBufferDirty(), xl_heap_inplace::offnum, PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), PageSetLSN(), ReadBuffer(), REGBUF_STANDARD, RelationNeedsWAL, SizeOfHeapInplace, START_CRIT_SECTION, HeapTupleData::t_data, HeapTupleHeaderData::t_hoff, HeapTupleData::t_len, HeapTupleData::t_self, UnlockReleaseBuffer(), XLOG_HEAP_INPLACE, XLogBeginInsert(), XLogInsert(), XLogRegisterBufData(), XLogRegisterBuffer(), and XLogRegisterData().

Referenced by create_toast_table(), dropdb(), EventTriggerOnLogin(), index_update_stats(), vac_update_datfrozenxid(), and vac_update_relstats().

◆ heap_insert()

void heap_insert ( Relation  relation,
HeapTuple  tup,
CommandId  cid,
int  options,
BulkInsertState  bistate 
)

Definition at line 1821 of file heapam.c.

1823 {
1825  HeapTuple heaptup;
1826  Buffer buffer;
1827  Buffer vmbuffer = InvalidBuffer;
1828  bool all_visible_cleared = false;
1829 
1830  /* Cheap, simplistic check that the tuple matches the rel's rowtype. */
1832  RelationGetNumberOfAttributes(relation));
1833 
1834  /*
1835  * Fill in tuple header fields and toast the tuple if necessary.
1836  *
1837  * Note: below this point, heaptup is the data we actually intend to store
1838  * into the relation; tup is the caller's original untoasted data.
1839  */
1840  heaptup = heap_prepare_insert(relation, tup, xid, cid, options);
1841 
1842  /*
1843  * Find buffer to insert this tuple into. If the page is all visible,
1844  * this will also pin the requisite visibility map page.
1845  */
1846  buffer = RelationGetBufferForTuple(relation, heaptup->t_len,
1847  InvalidBuffer, options, bistate,
1848  &vmbuffer, NULL,
1849  0);
1850 
1851  /*
1852  * We're about to do the actual insert -- but check for conflict first, to
1853  * avoid possibly having to roll back work we've just done.
1854  *
1855  * This is safe without a recheck as long as there is no possibility of
1856  * another process scanning the page between this check and the insert
1857  * being visible to the scan (i.e., an exclusive buffer content lock is
1858  * continuously held from this point until the tuple insert is visible).
1859  *
1860  * For a heap insert, we only need to check for table-level SSI locks. Our
1861  * new tuple can't possibly conflict with existing tuple locks, and heap
1862  * page locks are only consolidated versions of tuple locks; they do not
1863  * lock "gaps" as index page locks do. So we don't need to specify a
1864  * buffer when making the call, which makes for a faster check.
1865  */
1867 
1868  /* NO EREPORT(ERROR) from here till changes are logged */
1870 
1871  RelationPutHeapTuple(relation, buffer, heaptup,
1872  (options & HEAP_INSERT_SPECULATIVE) != 0);
1873 
1874  if (PageIsAllVisible(BufferGetPage(buffer)))
1875  {
1876  all_visible_cleared = true;
1878  visibilitymap_clear(relation,
1879  ItemPointerGetBlockNumber(&(heaptup->t_self)),
1880  vmbuffer, VISIBILITYMAP_VALID_BITS);
1881  }
1882 
1883  /*
1884  * XXX Should we set PageSetPrunable on this page ?
1885  *
1886  * The inserting transaction may eventually abort thus making this tuple
1887  * DEAD and hence available for pruning. Though we don't want to optimize
1888  * for aborts, if no other tuple in this page is UPDATEd/DELETEd, the
1889  * aborted tuple will never be pruned until next vacuum is triggered.
1890  *
1891  * If you do add PageSetPrunable here, add it in heap_xlog_insert too.
1892  */
1893 
1894  MarkBufferDirty(buffer);
1895 
1896  /* XLOG stuff */
1897  if (RelationNeedsWAL(relation))
1898  {
1899  xl_heap_insert xlrec;
1900  xl_heap_header xlhdr;
1901  XLogRecPtr recptr;
1902  Page page = BufferGetPage(buffer);
1903  uint8 info = XLOG_HEAP_INSERT;
1904  int bufflags = 0;
1905 
1906  /*
1907  * If this is a catalog, we need to transmit combo CIDs to properly
1908  * decode, so log that as well.
1909  */
1911  log_heap_new_cid(relation, heaptup);
1912 
1913  /*
1914  * If this is the single and first tuple on page, we can reinit the
1915  * page instead of restoring the whole thing. Set flag, and hide
1916  * buffer references from XLogInsert.
1917  */
1918  if (ItemPointerGetOffsetNumber(&(heaptup->t_self)) == FirstOffsetNumber &&
1920  {
1921  info |= XLOG_HEAP_INIT_PAGE;
1922  bufflags |= REGBUF_WILL_INIT;
1923  }
1924 
1925  xlrec.offnum = ItemPointerGetOffsetNumber(&heaptup->t_self);
1926  xlrec.flags = 0;
1927  if (all_visible_cleared)
1932 
1933  /*
1934  * For logical decoding, we need the tuple even if we're doing a full
1935  * page write, so make sure it's included even if we take a full-page
1936  * image. (XXX We could alternatively store a pointer into the FPW).
1937  */
1938  if (RelationIsLogicallyLogged(relation) &&
1940  {
1942  bufflags |= REGBUF_KEEP_DATA;
1943 
1944  if (IsToastRelation(relation))
1946  }
1947 
1948  XLogBeginInsert();
1949  XLogRegisterData((char *) &xlrec, SizeOfHeapInsert);
1950 
1951  xlhdr.t_infomask2 = heaptup->t_data->t_infomask2;
1952  xlhdr.t_infomask = heaptup->t_data->t_infomask;
1953  xlhdr.t_hoff = heaptup->t_data->t_hoff;
1954 
1955  /*
1956  * note we mark xlhdr as belonging to buffer; if XLogInsert decides to
1957  * write the whole page to the xlog, we don't need to store
1958  * xl_heap_header in the xlog.
1959  */
1960  XLogRegisterBuffer(0, buffer, REGBUF_STANDARD | bufflags);
1961  XLogRegisterBufData(0, (char *) &xlhdr, SizeOfHeapHeader);
1962  /* PG73FORMAT: write bitmap [+ padding] [+ oid] + data */
1964  (char *) heaptup->t_data + SizeofHeapTupleHeader,
1965  heaptup->t_len - SizeofHeapTupleHeader);
1966 
1967  /* filtering by origin on a row level is much more efficient */
1969 
1970  recptr = XLogInsert(RM_HEAP_ID, info);
1971 
1972  PageSetLSN(page, recptr);
1973  }
1974 
1975  END_CRIT_SECTION();
1976 
1977  UnlockReleaseBuffer(buffer);
1978  if (vmbuffer != InvalidBuffer)
1979  ReleaseBuffer(vmbuffer);
1980 
1981  /*
1982  * If tuple is cachable, mark it for invalidation from the caches in case
1983  * we abort. Note it is OK to do this after releasing the buffer, because
1984  * the heaptup data structure is all in local memory, not in the shared
1985  * buffer.
1986  */
1987  CacheInvalidateHeapTuple(relation, heaptup, NULL);
1988 
1989  /* Note: speculative insertions are counted too, even if aborted later */
1990  pgstat_count_heap_insert(relation, 1);
1991 
1992  /*
1993  * If heaptup is a private copy, release it. Don't forget to copy t_self
1994  * back to the caller's image, too.
1995  */
1996  if (heaptup != tup)
1997  {
1998  tup->t_self = heaptup->t_self;
1999  heap_freetuple(heaptup);
2000  }
2001 }
unsigned char uint8
Definition: c.h:491
static HeapTuple heap_prepare_insert(Relation relation, HeapTuple tup, TransactionId xid, CommandId cid, int options)
Definition: heapam.c:2010
#define HEAP_INSERT_SPECULATIVE
Definition: heapam.h:37
#define HEAP_INSERT_NO_LOGICAL
Definition: heapam.h:36
#define XLH_INSERT_ON_TOAST_RELATION
Definition: heapam_xlog.h:75
#define XLH_INSERT_IS_SPECULATIVE
Definition: heapam_xlog.h:73
#define XLH_INSERT_ALL_VISIBLE_CLEARED
Definition: heapam_xlog.h:71
#define XLOG_HEAP_INSERT
Definition: heapam_xlog.h:32
#define SizeOfHeapInsert
Definition: heapam_xlog.h:167
#define XLH_INSERT_CONTAINS_NEW_TUPLE
Definition: heapam_xlog.h:74
#define XLOG_HEAP_INIT_PAGE
Definition: heapam_xlog.h:46
void RelationPutHeapTuple(Relation relation, Buffer buffer, HeapTuple tuple, bool token)
Definition: hio.c:35
Buffer RelationGetBufferForTuple(Relation relation, Size len, Buffer otherBuffer, int options, BulkInsertState bistate, Buffer *vmbuffer, Buffer *vmbuffer_other, int num_pages)
Definition: hio.c:502
#define HeapTupleHeaderGetNatts(tup)
Definition: htup_details.h:529
void pgstat_count_heap_insert(Relation rel, PgStat_Counter n)
#define RelationIsLogicallyLogged(relation)
Definition: rel.h:703
#define RelationGetNumberOfAttributes(relation)
Definition: rel.h:513
OffsetNumber offnum
Definition: heapam_xlog.h:161
#define REGBUF_KEEP_DATA
Definition: xloginsert.h:35
#define REGBUF_WILL_INIT
Definition: xloginsert.h:33

References Assert(), BufferGetBlockNumber(), BufferGetPage(), CacheInvalidateHeapTuple(), CheckForSerializableConflictIn(), END_CRIT_SECTION, FirstOffsetNumber, xl_heap_insert::flags, GetCurrentTransactionId(), heap_freetuple(), HEAP_INSERT_NO_LOGICAL, HEAP_INSERT_SPECULATIVE, heap_prepare_insert(), HeapTupleHeaderGetNatts, InvalidBlockNumber, InvalidBuffer, IsToastRelation(), ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), log_heap_new_cid(), MarkBufferDirty(), xl_heap_insert::offnum, PageClearAllVisible(), PageGetMaxOffsetNumber(), PageIsAllVisible(), PageSetLSN(), pgstat_count_heap_insert(), REGBUF_KEEP_DATA, REGBUF_STANDARD, REGBUF_WILL_INIT, RelationGetBufferForTuple(), RelationGetNumberOfAttributes, RelationIsAccessibleInLogicalDecoding, RelationIsLogicallyLogged, RelationNeedsWAL, RelationPutHeapTuple(), ReleaseBuffer(), SizeOfHeapHeader, SizeOfHeapInsert, SizeofHeapTupleHeader, START_CRIT_SECTION, HeapTupleData::t_data, xl_heap_header::t_hoff, HeapTupleHeaderData::t_hoff, xl_heap_header::t_infomask, HeapTupleHeaderData::t_infomask, xl_heap_header::t_infomask2, HeapTupleHeaderData::t_infomask2, HeapTupleData::t_len, HeapTupleData::t_self, UnlockReleaseBuffer(), visibilitymap_clear(), VISIBILITYMAP_VALID_BITS, XLH_INSERT_ALL_VISIBLE_CLEARED, XLH_INSERT_CONTAINS_NEW_TUPLE, XLH_INSERT_IS_SPECULATIVE, XLH_INSERT_ON_TOAST_RELATION, XLOG_HEAP_INIT_PAGE, XLOG_HEAP_INSERT, XLOG_INCLUDE_ORIGIN, XLogBeginInsert(), XLogInsert(), XLogRegisterBufData(), XLogRegisterBuffer(), XLogRegisterData(), and XLogSetRecordFlags().

Referenced by heapam_tuple_insert(), heapam_tuple_insert_speculative(), simple_heap_insert(), and toast_save_datum().

◆ heap_lock_tuple()

TM_Result heap_lock_tuple ( Relation  relation,
ItemPointer  tid,
TupleTableSlot slot,
CommandId  cid,
LockTupleMode  mode,
LockWaitPolicy  wait_policy,
bool  follow_updates,
struct TM_FailureData tmfd 
)

Definition at line 4223 of file heapam.c.

4226 {
4227  TM_Result result;
4228  ItemId lp;
4229  Page page;
4230  Buffer buffer;
4231  Buffer vmbuffer = InvalidBuffer;
4232  BlockNumber block;
4233  TransactionId xid,
4234  xmax;
4235  uint16 old_infomask,
4236  new_infomask,
4237  new_infomask2;
4238  bool first_time = true;
4239  bool skip_tuple_lock = false;
4240  bool have_tuple_lock = false;
4241  bool cleared_all_frozen = false;
4243  HeapTuple tuple = &bslot->base.tupdata;
4244 
4245  Assert(TTS_IS_BUFFERTUPLE(slot));
4246 
4247  /* Take advantage if slot already contains the relevant tuple */
4248  if (!TTS_EMPTY(slot) &&
4249  slot->tts_tableOid == relation->rd_id &&
4250  ItemPointerCompare(&slot->tts_tid, tid) == 0 &&
4251  BufferIsValid(bslot->buffer))
4252  {
4253  buffer = bslot->buffer;
4254  IncrBufferRefCount(buffer);
4255  }
4256  else
4257  {
4258  buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(tid));
4259  }
4260  block = ItemPointerGetBlockNumber(tid);
4261 
4262  /*
4263  * Before locking the buffer, pin the visibility map page if it appears to
4264  * be necessary. Since we haven't got the lock yet, someone else might be
4265  * in the middle of changing this, so we'll need to recheck after we have
4266  * the lock.
4267  */
4268  if (PageIsAllVisible(BufferGetPage(buffer)))
4269  visibilitymap_pin(relation, block, &vmbuffer);
4270 
4272 
4273  page = BufferGetPage(buffer);
4274  lp = PageGetItemId(page, ItemPointerGetOffsetNumber(tid));
4275  Assert(ItemIdIsNormal(lp));
4276 
4277  tuple->t_self = *tid;
4278  tuple->t_data = (HeapTupleHeader) PageGetItem(page, lp);
4279  tuple->t_len = ItemIdGetLength(lp);
4280  tuple->t_tableOid = RelationGetRelid(relation);
4281 
4282 l3:
4283  result = HeapTupleSatisfiesUpdate(tuple, cid, buffer);
4284 
4285  if (result == TM_Invisible)
4286  {
4287  /*
4288  * This is possible, but only when locking a tuple for ON CONFLICT
4289  * UPDATE. We return this value here rather than throwing an error in
4290  * order to give that case the opportunity to throw a more specific
4291  * error.
4292  */
4293  result = TM_Invisible;
4294  goto out_locked;
4295  }
4296  else if (result == TM_BeingModified ||
4297  result == TM_Updated ||
4298  result == TM_Deleted)
4299  {
4300  TransactionId xwait;
4301  uint16 infomask;
4302  uint16 infomask2;
4303  bool require_sleep;
4304  ItemPointerData t_ctid;
4305 
4306  /* must copy state data before unlocking buffer */
4307  xwait = HeapTupleHeaderGetRawXmax(tuple->t_data);
4308  infomask = tuple->t_data->t_infomask;
4309  infomask2 = tuple->t_data->t_infomask2;
4310  ItemPointerCopy(&tuple->t_data->t_ctid, &t_ctid);
4311 
4312  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
4313 
4314  /*
4315  * If any subtransaction of the current top transaction already holds
4316  * a lock as strong as or stronger than what we're requesting, we
4317  * effectively hold the desired lock already. We *must* succeed
4318  * without trying to take the tuple lock, else we will deadlock
4319  * against anyone wanting to acquire a stronger lock.
4320  *
4321  * Note we only do this the first time we loop on the HTSU result;
4322  * there is no point in testing in subsequent passes, because
4323  * evidently our own transaction cannot have acquired a new lock after
4324  * the first time we checked.
4325  */
4326  if (first_time)
4327  {
4328  first_time = false;
4329 
4330  if (infomask & HEAP_XMAX_IS_MULTI)
4331  {
4332  int i;
4333  int nmembers;
4334  MultiXactMember *members;
4335 
4336  /*
4337  * We don't need to allow old multixacts here; if that had
4338  * been the case, HeapTupleSatisfiesUpdate would have returned
4339  * MayBeUpdated and we wouldn't be here.
4340  */
4341  nmembers =
4342  GetMultiXactIdMembers(xwait, &members, false,
4343  HEAP_XMAX_IS_LOCKED_ONLY(infomask));
4344 
4345  for (i = 0; i < nmembers; i++)
4346  {
4347  /* only consider members of our own transaction */
4348  if (!TransactionIdIsCurrentTransactionId(members[i].xid))
4349  continue;
4350 
4351  if (TUPLOCK_from_mxstatus(members[i].status) >= mode)
4352  {
4353  pfree(members);
4354  result = TM_Ok;
4355  goto out_unlocked;
4356  }
4357  else
4358  {
4359  /*
4360  * Disable acquisition of the heavyweight tuple lock.
4361  * Otherwise, when promoting a weaker lock, we might
4362  * deadlock with another locker that has acquired the
4363  * heavyweight tuple lock and is waiting for our
4364  * transaction to finish.
4365  *
4366  * Note that in this case we still need to wait for
4367  * the multixact if required, to avoid acquiring
4368  * conflicting locks.
4369  */
4370  skip_tuple_lock = true;
4371  }
4372  }
4373 
4374  if (members)
4375  pfree(members);
4376  }
4377  else if (TransactionIdIsCurrentTransactionId(xwait))
4378  {
4379  switch (mode)
4380  {
4381  case LockTupleKeyShare:
4382  Assert(HEAP_XMAX_IS_KEYSHR_LOCKED(infomask) ||
4383  HEAP_XMAX_IS_SHR_LOCKED(infomask) ||
4384  HEAP_XMAX_IS_EXCL_LOCKED(infomask));
4385  result = TM_Ok;
4386  goto out_unlocked;
4387  case LockTupleShare:
4388  if (HEAP_XMAX_IS_SHR_LOCKED(infomask) ||
4389  HEAP_XMAX_IS_EXCL_LOCKED(infomask))
4390  {
4391  result = TM_Ok;
4392  goto out_unlocked;
4393  }
4394  break;
4396  if (HEAP_XMAX_IS_EXCL_LOCKED(infomask))
4397  {
4398  result = TM_Ok;
4399  goto out_unlocked;
4400  }
4401  break;
4402  case LockTupleExclusive:
4403  if (HEAP_XMAX_IS_EXCL_LOCKED(infomask) &&
4404  infomask2 & HEAP_KEYS_UPDATED)
4405  {
4406  result = TM_Ok;
4407  goto out_unlocked;
4408  }
4409  break;
4410  }
4411  }
4412  }
4413 
4414  /*
4415  * Initially assume that we will have to wait for the locking
4416  * transaction(s) to finish. We check various cases below in which
4417  * this can be turned off.
4418  */
4419  require_sleep = true;
4420  if (mode == LockTupleKeyShare)
4421  {
4422  /*
4423  * If we're requesting KeyShare, and there's no update present, we
4424  * don't need to wait. Even if there is an update, we can still
4425  * continue if the key hasn't been modified.
4426  *
4427  * However, if there are updates, we need to walk the update chain
4428  * to mark future versions of the row as locked, too. That way,
4429  * if somebody deletes that future version, we're protected
4430  * against the key going away. This locking of future versions
4431  * could block momentarily, if a concurrent transaction is
4432  * deleting a key; or it could return a value to the effect that
4433  * the transaction deleting the key has already committed. So we
4434  * do this before re-locking the buffer; otherwise this would be
4435  * prone to deadlocks.
4436  *
4437  * Note that the TID we're locking was grabbed before we unlocked
4438  * the buffer. For it to change while we're not looking, the
4439  * other properties we're testing for below after re-locking the
4440  * buffer would also change, in which case we would restart this
4441  * loop above.
4442  */
4443  if (!(infomask2 & HEAP_KEYS_UPDATED))
4444  {
4445  bool updated;
4446 
4447  updated = !HEAP_XMAX_IS_LOCKED_ONLY(infomask);
4448 
4449  /*
4450  * If there are updates, follow the update chain; bail out if
4451  * that cannot be done.
4452  */
4453  if (follow_updates && updated)
4454  {
4455  TM_Result res;
4456 
4457  res = heap_lock_updated_tuple(relation, tuple, &t_ctid,
4459  mode);
4460  if (res != TM_Ok)
4461  {
4462  result = res;
4463  /* recovery code expects to have buffer lock held */
4465  goto failed;
4466  }
4467  }
4468 
4470 
4471  /*
4472  * Make sure it's still an appropriate lock, else start over.
4473  * Also, if it wasn't updated before we released the lock, but
4474  * is updated now, we start over too; the reason is that we
4475  * now need to follow the update chain to lock the new
4476  * versions.
4477  */
4478  if (!HeapTupleHeaderIsOnlyLocked(tuple->t_data) &&
4479  ((tuple->t_data->t_infomask2 & HEAP_KEYS_UPDATED) ||
4480  !updated))
4481  goto l3;
4482 
4483  /* Things look okay, so we can skip sleeping */
4484  require_sleep = false;
4485 
4486  /*
4487  * Note we allow Xmax to change here; other updaters/lockers
4488  * could have modified it before we grabbed the buffer lock.
4489  * However, this is not a problem, because with the recheck we
4490  * just did we ensure that they still don't conflict with the
4491  * lock we want.
4492  */
4493  }
4494  }
4495  else if (mode == LockTupleShare)
4496  {
4497  /*
4498  * If we're requesting Share, we can similarly avoid sleeping if
4499  * there's no update and no exclusive lock present.
4500  */
4501  if (HEAP_XMAX_IS_LOCKED_ONLY(infomask) &&
4502  !HEAP_XMAX_IS_EXCL_LOCKED(infomask))
4503  {
4505 
4506  /*
4507  * Make sure it's still an appropriate lock, else start over.
4508  * See above about allowing xmax to change.
4509  */
4510  if (!HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_data->t_infomask) ||
4512  goto l3;
4513  require_sleep = false;
4514  }
4515  }
4516  else if (mode == LockTupleNoKeyExclusive)
4517  {
4518  /*
4519  * If we're requesting NoKeyExclusive, we might also be able to
4520  * avoid sleeping; just ensure that there no conflicting lock
4521  * already acquired.
4522  */
4523  if (infomask & HEAP_XMAX_IS_MULTI)
4524  {
4525  if (!DoesMultiXactIdConflict((MultiXactId) xwait, infomask,
4526  mode, NULL))
4527  {
4528  /*
4529  * No conflict, but if the xmax changed under us in the
4530  * meantime, start over.
4531  */
4533  if (xmax_infomask_changed(tuple->t_data->t_infomask, infomask) ||
4535  xwait))
4536  goto l3;
4537 
4538  /* otherwise, we're good */
4539  require_sleep = false;
4540  }
4541  }
4542  else if (HEAP_XMAX_IS_KEYSHR_LOCKED(infomask))
4543  {
4545 
4546  /* if the xmax changed in the meantime, start over */
4547  if (xmax_infomask_changed(tuple->t_data->t_infomask, infomask) ||
4549  xwait))
4550  goto l3;
4551  /* otherwise, we're good */
4552  require_sleep = false;
4553  }
4554  }
4555 
4556  /*
4557  * As a check independent from those above, we can also avoid sleeping
4558  * if the current transaction is the sole locker of the tuple. Note
4559  * that the strength of the lock already held is irrelevant; this is
4560  * not about recording the lock in Xmax (which will be done regardless
4561  * of this optimization, below). Also, note that the cases where we
4562  * hold a lock stronger than we are requesting are already handled
4563  * above by not doing anything.
4564  *
4565  * Note we only deal with the non-multixact case here; MultiXactIdWait
4566  * is well equipped to deal with this situation on its own.
4567  */
4568  if (require_sleep && !(infomask & HEAP_XMAX_IS_MULTI) &&
4570  {
4571  /* ... but if the xmax changed in the meantime, start over */
4573  if (xmax_infomask_changed(tuple->t_data->t_infomask, infomask) ||
4575  xwait))
4576  goto l3;
4578  require_sleep = false;
4579  }
4580 
4581  /*
4582  * Time to sleep on the other transaction/multixact, if necessary.
4583  *
4584  * If the other transaction is an update/delete that's already
4585  * committed, then sleeping cannot possibly do any good: if we're
4586  * required to sleep, get out to raise an error instead.
4587  *
4588  * By here, we either have already acquired the buffer exclusive lock,
4589  * or we must wait for the locking transaction or multixact; so below
4590  * we ensure that we grab buffer lock after the sleep.
4591  */
4592  if (require_sleep && (result == TM_Updated || result == TM_Deleted))
4593  {
4595  goto failed;
4596  }
4597  else if (require_sleep)
4598  {
4599  /*
4600  * Acquire tuple lock to establish our priority for the tuple, or
4601  * die trying. LockTuple will release us when we are next-in-line
4602  * for the tuple. We must do this even if we are share-locking,
4603  * but not if we already have a weaker lock on the tuple.
4604  *
4605  * If we are forced to "start over" below, we keep the tuple lock;
4606  * this arranges that we stay at the head of the line while
4607  * rechecking tuple state.
4608  */
4609  if (!skip_tuple_lock &&
4610  !heap_acquire_tuplock(relation, tid, mode, wait_policy,
4611  &have_tuple_lock))
4612  {
4613  /*
4614  * This can only happen if wait_policy is Skip and the lock
4615  * couldn't be obtained.
4616  */
4617  result = TM_WouldBlock;
4618  /* recovery code expects to have buffer lock held */
4620  goto failed;
4621  }
4622 
4623  if (infomask & HEAP_XMAX_IS_MULTI)
4624  {
4626 
4627  /* We only ever lock tuples, never update them */
4628  if (status >= MultiXactStatusNoKeyUpdate)
4629  elog(ERROR, "invalid lock mode in heap_lock_tuple");
4630 
4631  /* wait for multixact to end, or die trying */
4632  switch (wait_policy)
4633  {
4634  case LockWaitBlock:
4635  MultiXactIdWait((MultiXactId) xwait, status, infomask,
4636  relation, &tuple->t_self, XLTW_Lock, NULL);
4637  break;
4638  case LockWaitSkip:
4640  status, infomask, relation,
4641  NULL))
4642  {
4643  result = TM_WouldBlock;
4644  /* recovery code expects to have buffer lock held */
4646  goto failed;
4647  }
4648  break;
4649  case LockWaitError:
4651  status, infomask, relation,
4652  NULL))
4653  ereport(ERROR,
4654  (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
4655  errmsg("could not obtain lock on row in relation \"%s\"",
4656  RelationGetRelationName(relation))));
4657 
4658  break;
4659  }
4660 
4661  /*
4662  * Of course, the multixact might not be done here: if we're
4663  * requesting a light lock mode, other transactions with light
4664  * locks could still be alive, as well as locks owned by our
4665  * own xact or other subxacts of this backend. We need to
4666  * preserve the surviving MultiXact members. Note that it
4667  * isn't absolutely necessary in the latter case, but doing so
4668  * is simpler.
4669  */
4670  }
4671  else
4672  {
4673  /* wait for regular transaction to end, or die trying */
4674  switch (wait_policy)
4675  {
4676  case LockWaitBlock:
4677  XactLockTableWait(xwait, relation, &tuple->t_self,
4678  XLTW_Lock);
4679  break;
4680  case LockWaitSkip:
4681  if (!ConditionalXactLockTableWait(xwait))
4682  {
4683  result = TM_WouldBlock;
4684  /* recovery code expects to have buffer lock held */
4686  goto failed;
4687  }
4688  break;
4689  case LockWaitError:
4690  if (!ConditionalXactLockTableWait(xwait))
4691  ereport(ERROR,
4692  (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
4693  errmsg("could not obtain lock on row in relation \"%s\"",
4694  RelationGetRelationName(relation))));
4695  break;
4696  }
4697  }
4698 
4699  /* if there are updates, follow the update chain */
4700  if (follow_updates && !HEAP_XMAX_IS_LOCKED_ONLY(infomask))
4701  {
4702  TM_Result res;
4703 
4704  res = heap_lock_updated_tuple(relation, tuple, &t_ctid,
4706  mode);
4707  if (res != TM_Ok)
4708  {
4709  result = res;
4710  /* recovery code expects to have buffer lock held */
4712  goto failed;
4713  }
4714  }
4715 
4717 
4718  /*
4719  * xwait is done, but if xwait had just locked the tuple then some
4720  * other xact could update this tuple before we get to this point.
4721  * Check for xmax change, and start over if so.
4722  */
4723  if (xmax_infomask_changed(tuple->t_data->t_infomask, infomask) ||
4725  xwait))
4726  goto l3;
4727 
4728  if (!(infomask & HEAP_XMAX_IS_MULTI))
4729  {
4730  /*
4731  * Otherwise check if it committed or aborted. Note we cannot
4732  * be here if the tuple was only locked by somebody who didn't
4733  * conflict with us; that would have been handled above. So
4734  * that transaction must necessarily be gone by now. But
4735  * don't check for this in the multixact case, because some
4736  * locker transactions might still be running.
4737  */
4738  UpdateXmaxHintBits(tuple->t_data, buffer, xwait);
4739  }
4740  }
4741 
4742  /* By here, we're certain that we hold buffer exclusive lock again */
4743 
4744  /*
4745  * We may lock if previous xmax aborted, or if it committed but only
4746  * locked the tuple without updating it; or if we didn't have to wait
4747  * at all for whatever reason.
4748  */
4749  if (!require_sleep ||
4750  (tuple->t_data->t_infomask & HEAP_XMAX_INVALID) ||
4753  result = TM_Ok;
4754  else if (!ItemPointerEquals(&tuple->t_self, &tuple->t_data->t_ctid))
4755  result = TM_Updated;
4756  else
4757  result = TM_Deleted;
4758  }
4759 
4760 failed:
4761  if (result != TM_Ok)
4762  {
4763  Assert(result == TM_SelfModified || result == TM_Updated ||
4764  result == TM_Deleted || result == TM_WouldBlock);
4765 
4766  /*
4767  * When locking a tuple under LockWaitSkip semantics and we fail with
4768  * TM_WouldBlock above, it's possible for concurrent transactions to
4769  * release the lock and set HEAP_XMAX_INVALID in the meantime. So
4770  * this assert is slightly different from the equivalent one in
4771  * heap_delete and heap_update.
4772  */
4773  Assert((result == TM_WouldBlock) ||
4774  !(tuple->t_data->t_infomask & HEAP_XMAX_INVALID));
4775  Assert(result != TM_Updated ||
4776  !ItemPointerEquals(&tuple->t_self, &tuple->t_data->t_ctid));
4777  tmfd->ctid = tuple->t_data->t_ctid;
4778  tmfd->xmax = HeapTupleHeaderGetUpdateXid(tuple->t_data);
4779  if (result == TM_SelfModified)
4780  tmfd->cmax = HeapTupleHeaderGetCmax(tuple->t_data);
4781  else
4782  tmfd->cmax = InvalidCommandId;
4783  goto out_locked;
4784  }
4785 
4786  /*
4787  * If we didn't pin the visibility map page and the page has become all
4788  * visible while we were busy locking the buffer, or during some
4789  * subsequent window during which we had it unlocked, we'll have to unlock
4790  * and re-lock, to avoid holding the buffer lock across I/O. That's a bit
4791  * unfortunate, especially since we'll now have to recheck whether the
4792  * tuple has been locked or updated under us, but hopefully it won't
4793  * happen very often.
4794  */
4795  if (vmbuffer == InvalidBuffer && PageIsAllVisible(page))
4796  {
4797  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
4798  visibilitymap_pin(relation, block, &vmbuffer);
4800  goto l3;
4801  }
4802 
4803  xmax = HeapTupleHeaderGetRawXmax(tuple->t_data);
4804  old_infomask = tuple->t_data->t_infomask;
4805 
4806  /*
4807  * If this is the first possibly-multixact-able operation in the current
4808  * transaction, set my per-backend OldestMemberMXactId setting. We can be
4809  * certain that the transaction will never become a member of any older
4810  * MultiXactIds than that. (We have to do this even if we end up just
4811  * using our own TransactionId below, since some other backend could
4812  * incorporate our XID into a MultiXact immediately afterwards.)
4813  */
4815 
4816  /*
4817  * Compute the new xmax and infomask to store into the tuple. Note we do
4818  * not modify the tuple just yet, because that would leave it in the wrong
4819  * state if multixact.c elogs.
4820  */
4821  compute_new_xmax_infomask(xmax, old_infomask, tuple->t_data->t_infomask2,
4822  GetCurrentTransactionId(), mode, false,
4823  &xid, &new_infomask, &new_infomask2);
4824 
4826 
4827  /*
4828  * Store transaction information of xact locking the tuple.
4829  *
4830  * Note: Cmax is meaningless in this context, so don't set it; this avoids
4831  * possibly generating a useless combo CID. Moreover, if we're locking a
4832  * previously updated tuple, it's important to preserve the Cmax.
4833  *
4834  * Also reset the HOT UPDATE bit, but only if there's no update; otherwise
4835  * we would break the HOT chain.
4836  */
4837  tuple->t_data->t_infomask &= ~HEAP_XMAX_BITS;
4838  tuple->t_data->t_infomask2 &= ~HEAP_KEYS_UPDATED;
4839  tuple->t_data->t_infomask |= new_infomask;
4840  tuple->t_data->t_infomask2 |= new_infomask2;
4841  if (HEAP_XMAX_IS_LOCKED_ONLY(new_infomask))
4843  HeapTupleHeaderSetXmax(tuple->t_data, xid);
4844 
4845  /*
4846  * Make sure there is no forward chain link in t_ctid. Note that in the
4847  * cases where the tuple has been updated, we must not overwrite t_ctid,
4848  * because it was set by the updater. Moreover, if the tuple has been
4849  * updated, we need to follow the update chain to lock the new versions of
4850  * the tuple as well.
4851  */
4852  if (HEAP_XMAX_IS_LOCKED_ONLY(new_infomask))
4853  tuple->t_data->t_ctid = *tid;
4854 
4855  /* Clear only the all-frozen bit on visibility map if needed */
4856  if (PageIsAllVisible(page) &&
4857  visibilitymap_clear(relation, block, vmbuffer,
4859  cleared_all_frozen = true;
4860 
4861 
4862  MarkBufferDirty(buffer);
4863 
4864  /*
4865  * XLOG stuff. You might think that we don't need an XLOG record because
4866  * there is no state change worth restoring after a crash. You would be
4867  * wrong however: we have just written either a TransactionId or a
4868  * MultiXactId that may never have been seen on disk before, and we need
4869  * to make sure that there are XLOG entries covering those ID numbers.
4870  * Else the same IDs might be re-used after a crash, which would be
4871  * disastrous if this page made it to disk before the crash. Essentially
4872  * we have to enforce the WAL log-before-data rule even in this case.
4873  * (Also, in a PITR log-shipping or 2PC environment, we have to have XLOG
4874  * entries for everything anyway.)
4875  */
4876  if (RelationNeedsWAL(relation))
4877  {
4878  xl_heap_lock xlrec;
4879  XLogRecPtr recptr;
4880 
4881  XLogBeginInsert();
4882  XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
4883 
4884  xlrec.offnum = ItemPointerGetOffsetNumber(&tuple->t_self);
4885  xlrec.xmax = xid;
4886  xlrec.infobits_set = compute_infobits(new_infomask,
4887  tuple->t_data->t_infomask2);
4888  xlrec.flags = cleared_all_frozen ? XLH_LOCK_ALL_FROZEN_CLEARED : 0;
4889  XLogRegisterData((char *) &xlrec, SizeOfHeapLock);
4890 
4891  /* we don't decode row locks atm, so no need to log the origin */
4892 
4893  recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_LOCK);
4894 
4895  PageSetLSN(page, recptr);
4896  }
4897 
4898  END_CRIT_SECTION();
4899 
4900  result = TM_Ok;
4901 
4902 out_locked:
4903  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
4904 
4905 out_unlocked:
4906  if (BufferIsValid(vmbuffer))
4907  ReleaseBuffer(vmbuffer);
4908 
4909  /*
4910  * Don't update the visibility map here. Locking a tuple doesn't change
4911  * visibility info.
4912  */
4913 
4914  /*
4915  * Now that we have successfully marked the tuple as locked, we can
4916  * release the lmgr tuple lock, if we had it.
4917  */
4918  if (have_tuple_lock)
4919  UnlockTupleTuplock(relation, tid, mode);
4920 
4921  /* Put the target tuple to the slot */
4922  ExecStorePinnedBufferHeapTuple(tuple, slot, buffer);
4923 
4924  return result;
4925 }
void IncrBufferRefCount(Buffer buffer)
Definition: bufmgr.c:4592
#define TUPLOCK_from_mxstatus(status)
Definition: heapam.c:213
static TM_Result heap_lock_updated_tuple(Relation rel, HeapTuple tuple, ItemPointer ctid, TransactionId xid, LockTupleMode mode)
Definition: heapam.c:5706
static bool ConditionalMultiXactIdWait(MultiXactId multi, MultiXactStatus status, uint16 infomask, Relation rel, int *remaining)
Definition: heapam.c:7262
static MultiXactStatus get_mxact_status_for_lock(LockTupleMode mode, bool is_update)
Definition: heapam.c:4173
#define XLH_LOCK_ALL_FROZEN_CLEARED
Definition: heapam_xlog.h:392
#define XLOG_HEAP_LOCK
Definition: heapam_xlog.h:38
#define SizeOfHeapLock
Definition: heapam_xlog.h:403
#define HEAP_XMAX_IS_EXCL_LOCKED(infomask)
Definition: htup_details.h:261
#define HEAP_XMAX_IS_KEYSHR_LOCKED(infomask)
Definition: htup_details.h:263
#define HEAP_XMAX_IS_SHR_LOCKED(infomask)
Definition: htup_details.h:259
static void ItemPointerCopy(const ItemPointerData *fromPointer, ItemPointerData *toPointer)
Definition: itemptr.h:172
bool ConditionalXactLockTableWait(TransactionId xid)
Definition: lmgr.c:740
@ XLTW_Lock
Definition: lmgr.h:29
@ LockWaitSkip
Definition: lockoptions.h:41
@ LockWaitError
Definition: lockoptions.h:43
@ LockTupleNoKeyExclusive
Definition: lockoptions.h:56
@ LockTupleShare
Definition: lockoptions.h:54
@ LockTupleKeyShare
Definition: lockoptions.h:52
int GetMultiXactIdMembers(MultiXactId multi, MultiXactMember **members, bool from_pgupgrade, bool isLockOnly)
Definition: multixact.c:1239
MultiXactStatus
Definition: multixact.h:38
@ MultiXactStatusNoKeyUpdate
Definition: multixact.h:44
static PgChecksumMode mode
Definition: pg_checksums.c:56
#define RelationGetRelationName(relation)
Definition: rel.h:541
Oid rd_id
Definition: rel.h:113
Oid tts_tableOid
Definition: tuptable.h:130
ItemPointerData tts_tid
Definition: tuptable.h:129
uint8 infobits_set
Definition: heapam_xlog.h:399
OffsetNumber offnum
Definition: heapam_xlog.h:398
TransactionId xmax
Definition: heapam_xlog.h:397
@ TM_WouldBlock
Definition: tableam.h:102
#define TTS_EMPTY(slot)
Definition: tuptable.h:96
#define VISIBILITYMAP_ALL_FROZEN

References Assert(), BufferHeapTupleTableSlot::buffer, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferGetPage(), BufferIsValid(), TM_FailureData::cmax, compute_infobits(), compute_new_xmax_infomask(), ConditionalMultiXactIdWait(), ConditionalXactLockTableWait(), TM_FailureData::ctid, DoesMultiXactIdConflict(), elog, END_CRIT_SECTION, ereport, errcode(), errmsg(), ERROR, ExecStorePinnedBufferHeapTuple(), xl_heap_lock::flags, get_mxact_status_for_lock(), GetCurrentTransactionId(), GetMultiXactIdMembers(), heap_acquire_tuplock(), HEAP_KEYS_UPDATED, heap_lock_updated_tuple(), HEAP_XMAX_BITS, HEAP_XMAX_INVALID, HEAP_XMAX_IS_EXCL_LOCKED, HEAP_XMAX_IS_KEYSHR_LOCKED, HEAP_XMAX_IS_LOCKED_ONLY, HEAP_XMAX_IS_MULTI, HEAP_XMAX_IS_SHR_LOCKED, HeapTupleHeaderClearHotUpdated, HeapTupleHeaderGetCmax(), HeapTupleHeaderGetRawXmax, HeapTupleHeaderGetUpdateXid, HeapTupleHeaderIsOnlyLocked(), HeapTupleHeaderSetXmax, HeapTupleSatisfiesUpdate(), i, IncrBufferRefCount(), xl_heap_lock::infobits_set, InvalidBuffer, InvalidCommandId, ItemIdGetLength, ItemIdIsNormal, ItemPointerCompare(), ItemPointerCopy(), ItemPointerEquals(), ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), LockBuffer(), LockTupleExclusive, LockTupleKeyShare, LockTupleNoKeyExclusive, LockTupleShare, LockWaitBlock, LockWaitError, LockWaitSkip, MarkBufferDirty(), mode, MultiXactIdSetOldestMember(), MultiXactIdWait(), MultiXactStatusNoKeyUpdate, xl_heap_lock::offnum, PageGetItem(), PageGetItemId(), PageIsAllVisible(), PageSetLSN(), pfree(), RelationData::rd_id, ReadBuffer(), REGBUF_STANDARD, RelationGetRelationName, RelationGetRelid, RelationNeedsWAL, ReleaseBuffer(), res, SizeOfHeapLock, START_CRIT_SECTION, HeapTupleHeaderData::t_ctid, HeapTupleData::t_data, HeapTupleHeaderData::t_infomask, HeapTupleHeaderData::t_infomask2, HeapTupleData::t_len, HeapTupleData::t_self, HeapTupleData::t_tableOid, TM_BeingModified, TM_Deleted, TM_Invisible, TM_Ok, TM_SelfModified, TM_Updated, TM_WouldBlock, TransactionIdEquals, TransactionIdIsCurrentTransactionId(), TTS_EMPTY, TTS_IS_BUFFERTUPLE, TupleTableSlot::tts_tableOid, TupleTableSlot::tts_tid, TUPLOCK_from_mxstatus, UnlockTupleTuplock, UpdateXmaxHintBits(), VISIBILITYMAP_ALL_FROZEN, visibilitymap_clear(), visibilitymap_pin(), XactLockTableWait(), XLH_LOCK_ALL_FROZEN_CLEARED, XLOG_HEAP_LOCK, XLogBeginInsert(), XLogInsert(), XLogRegisterBuffer(), XLogRegisterData(), XLTW_Lock, xl_heap_lock::xmax, TM_FailureData::xmax, and xmax_infomask_changed().

Referenced by heapam_tuple_lock().

◆ heap_multi_insert()

void heap_multi_insert ( Relation  relation,
struct TupleTableSlot **  slots,
int  ntuples,
CommandId  cid,
int  options,
BulkInsertState  bistate 
)

Definition at line 2090 of file heapam.c.

2092 {
2094  HeapTuple *heaptuples;
2095  int i;
2096  int ndone;
2097  PGAlignedBlock scratch;
2098  Page page;
2099  Buffer vmbuffer = InvalidBuffer;
2100  bool needwal;
2101  Size saveFreeSpace;
2102  bool need_tuple_data = RelationIsLogicallyLogged(relation);
2103  bool need_cids = RelationIsAccessibleInLogicalDecoding(relation);
2104  bool starting_with_empty_page = false;
2105  int npages = 0;
2106  int npages_used = 0;
2107 
2108  /* currently not needed (thus unsupported) for heap_multi_insert() */
2110 
2111  needwal = RelationNeedsWAL(relation);
2112  saveFreeSpace = RelationGetTargetPageFreeSpace(relation,
2114 
2115  /* Toast and set header data in all the slots */
2116  heaptuples = palloc(ntuples * sizeof(HeapTuple));
2117  for (i = 0; i < ntuples; i++)
2118  {
2119  HeapTuple tuple;
2120 
2121  tuple = ExecFetchSlotHeapTuple(slots[i], true, NULL);
2122  slots[i]->tts_tableOid = RelationGetRelid(relation);
2123  tuple->t_tableOid = slots[i]->tts_tableOid;
2124  heaptuples[i] = heap_prepare_insert(relation, tuple, xid, cid,
2125  options);
2126  }
2127 
2128  /*
2129  * We're about to do the actual inserts -- but check for conflict first,
2130  * to minimize the possibility of having to roll back work we've just
2131  * done.
2132  *
2133  * A check here does not definitively prevent a serialization anomaly;
2134  * that check MUST be done at least past the point of acquiring an
2135  * exclusive buffer content lock on every buffer that will be affected,
2136  * and MAY be done after all inserts are reflected in the buffers and
2137  * those locks are released; otherwise there is a race condition. Since
2138  * multiple buffers can be locked and unlocked in the loop below, and it
2139  * would not be feasible to identify and lock all of those buffers before
2140  * the loop, we must do a final check at the end.
2141  *
2142  * The check here could be omitted with no loss of correctness; it is
2143  * present strictly as an optimization.
2144  *
2145  * For heap inserts, we only need to check for table-level SSI locks. Our
2146  * new tuples can't possibly conflict with existing tuple locks, and heap
2147  * page locks are only consolidated versions of tuple locks; they do not
2148  * lock "gaps" as index page locks do. So we don't need to specify a
2149  * buffer when making the call, which makes for a faster check.
2150  */
2152 
2153  ndone = 0;
2154  while (ndone < ntuples)
2155  {
2156  Buffer buffer;
2157  bool all_visible_cleared = false;
2158  bool all_frozen_set = false;
2159  int nthispage;
2160 
2162 
2163  /*
2164  * Compute number of pages needed to fit the to-be-inserted tuples in
2165  * the worst case. This will be used to determine how much to extend
2166  * the relation by in RelationGetBufferForTuple(), if needed. If we
2167  * filled a prior page from scratch, we can just update our last
2168  * computation, but if we started with a partially filled page,
2169  * recompute from scratch, the number of potentially required pages
2170  * can vary due to tuples needing to fit onto the page, page headers
2171  * etc.
2172  */
2173  if (ndone == 0 || !starting_with_empty_page)
2174  {
2175  npages = heap_multi_insert_pages(heaptuples, ndone, ntuples,
2176  saveFreeSpace);
2177  npages_used = 0;
2178  }
2179  else
2180  npages_used++;
2181 
2182  /*
2183  * Find buffer where at least the next tuple will fit. If the page is
2184  * all-visible, this will also pin the requisite visibility map page.
2185  *
2186  * Also pin visibility map page if COPY FREEZE inserts tuples into an
2187  * empty page. See all_frozen_set below.
2188  */
2189  buffer = RelationGetBufferForTuple(relation, heaptuples[ndone]->t_len,
2190  InvalidBuffer, options, bistate,
2191  &vmbuffer, NULL,
2192  npages - npages_used);
2193  page = BufferGetPage(buffer);
2194 
2195  starting_with_empty_page = PageGetMaxOffsetNumber(page) == 0;
2196 
2197  if (starting_with_empty_page && (options & HEAP_INSERT_FROZEN))
2198  all_frozen_set = true;
2199 
2200  /* NO EREPORT(ERROR) from here till changes are logged */
2202 
2203  /*
2204  * RelationGetBufferForTuple has ensured that the first tuple fits.
2205  * Put that on the page, and then as many other tuples as fit.
2206  */
2207  RelationPutHeapTuple(relation, buffer, heaptuples[ndone], false);
2208 
2209  /*
2210  * For logical decoding we need combo CIDs to properly decode the
2211  * catalog.
2212  */
2213  if (needwal && need_cids)
2214  log_heap_new_cid(relation, heaptuples[ndone]);
2215 
2216  for (nthispage = 1; ndone + nthispage < ntuples; nthispage++)
2217  {
2218  HeapTuple heaptup = heaptuples[ndone + nthispage];
2219 
2220  if (PageGetHeapFreeSpace(page) < MAXALIGN(heaptup->t_len) + saveFreeSpace)
2221  break;
2222 
2223  RelationPutHeapTuple(relation, buffer, heaptup, false);
2224 
2225  /*
2226  * For logical decoding we need combo CIDs to properly decode the
2227  * catalog.
2228  */
2229  if (needwal && need_cids)
2230  log_heap_new_cid(relation, heaptup);
2231  }
2232 
2233  /*
2234  * If the page is all visible, need to clear that, unless we're only
2235  * going to add further frozen rows to it.
2236  *
2237  * If we're only adding already frozen rows to a previously empty
2238  * page, mark it as all-visible.
2239  */
2240  if (PageIsAllVisible(page) && !(options & HEAP_INSERT_FROZEN))
2241  {
2242  all_visible_cleared = true;
2243  PageClearAllVisible(page);
2244  visibilitymap_clear(relation,
2245  BufferGetBlockNumber(buffer),
2246  vmbuffer, VISIBILITYMAP_VALID_BITS);
2247  }
2248  else if (all_frozen_set)
2249  PageSetAllVisible(page);
2250 
2251  /*
2252  * XXX Should we set PageSetPrunable on this page ? See heap_insert()
2253  */
2254 
2255  MarkBufferDirty(buffer);
2256 
2257  /* XLOG stuff */
2258  if (needwal)
2259  {
2260  XLogRecPtr recptr;
2261  xl_heap_multi_insert *xlrec;
2263  char *tupledata;
2264  int totaldatalen;
2265  char *scratchptr = scratch.data;
2266  bool init;
2267  int bufflags = 0;
2268 
2269  /*
2270  * If the page was previously empty, we can reinit the page
2271  * instead of restoring the whole thing.
2272  */
2273  init = starting_with_empty_page;
2274 
2275  /* allocate xl_heap_multi_insert struct from the scratch area */
2276  xlrec = (xl_heap_multi_insert *) scratchptr;
2277  scratchptr += SizeOfHeapMultiInsert;
2278 
2279  /*
2280  * Allocate offsets array. Unless we're reinitializing the page,
2281  * in that case the tuples are stored in order starting at
2282  * FirstOffsetNumber and we don't need to store the offsets
2283  * explicitly.
2284  */
2285  if (!init)
2286  scratchptr += nthispage * sizeof(OffsetNumber);
2287 
2288  /* the rest of the scratch space is used for tuple data */
2289  tupledata = scratchptr;
2290 
2291  /* check that the mutually exclusive flags are not both set */
2292  Assert(!(all_visible_cleared && all_frozen_set));
2293 
2294  xlrec->flags = 0;
2295  if (all_visible_cleared)
2297  if (all_frozen_set)
2299 
2300  xlrec->ntuples = nthispage;
2301 
2302  /*
2303  * Write out an xl_multi_insert_tuple and the tuple data itself
2304  * for each tuple.
2305  */
2306  for (i = 0; i < nthispage; i++)
2307  {
2308  HeapTuple heaptup = heaptuples[ndone + i];
2309  xl_multi_insert_tuple *tuphdr;
2310  int datalen;
2311 
2312  if (!init)
2313  xlrec->offsets[i] = ItemPointerGetOffsetNumber(&heaptup->t_self);
2314  /* xl_multi_insert_tuple needs two-byte alignment. */
2315  tuphdr = (xl_multi_insert_tuple *) SHORTALIGN(scratchptr);
2316  scratchptr = ((char *) tuphdr) + SizeOfMultiInsertTuple;
2317 
2318  tuphdr->t_infomask2 = heaptup->t_data->t_infomask2;
2319  tuphdr->t_infomask = heaptup->t_data->t_infomask;
2320  tuphdr->t_hoff = heaptup->t_data->t_hoff;
2321 
2322  /* write bitmap [+ padding] [+ oid] + data */
2323  datalen = heaptup->t_len - SizeofHeapTupleHeader;
2324  memcpy(scratchptr,
2325  (char *) heaptup->t_data + SizeofHeapTupleHeader,
2326  datalen);
2327  tuphdr->datalen = datalen;
2328  scratchptr += datalen;
2329  }
2330  totaldatalen = scratchptr - tupledata;
2331  Assert((scratchptr - scratch.data) < BLCKSZ);
2332 
2333  if (need_tuple_data)
2335 
2336  /*
2337  * Signal that this is the last xl_heap_multi_insert record
2338  * emitted by this call to heap_multi_insert(). Needed for logical
2339  * decoding so it knows when to cleanup temporary data.
2340  */
2341  if (ndone + nthispage == ntuples)
2342  xlrec->flags |= XLH_INSERT_LAST_IN_MULTI;
2343 
2344  if (init)
2345  {
2346  info |= XLOG_HEAP_INIT_PAGE;
2347  bufflags |= REGBUF_WILL_INIT;
2348  }
2349 
2350  /*
2351  * If we're doing logical decoding, include the new tuple data
2352  * even if we take a full-page image of the page.
2353  */
2354  if (need_tuple_data)
2355  bufflags |= REGBUF_KEEP_DATA;
2356 
2357  XLogBeginInsert();
2358  XLogRegisterData((char *) xlrec, tupledata - scratch.data);
2359  XLogRegisterBuffer(0, buffer, REGBUF_STANDARD | bufflags);
2360 
2361  XLogRegisterBufData(0, tupledata, totaldatalen);
2362 
2363  /* filtering by origin on a row level is much more efficient */
2365 
2366  recptr = XLogInsert(RM_HEAP2_ID, info);
2367 
2368  PageSetLSN(page, recptr);
2369  }
2370 
2371  END_CRIT_SECTION();
2372 
2373  /*
2374  * If we've frozen everything on the page, update the visibilitymap.
2375  * We're already holding pin on the vmbuffer.
2376  */
2377  if (all_frozen_set)
2378  {
2379  Assert(PageIsAllVisible(page));
2380  Assert(visibilitymap_pin_ok(BufferGetBlockNumber(buffer), vmbuffer));
2381 
2382  /*
2383  * It's fine to use InvalidTransactionId here - this is only used
2384  * when HEAP_INSERT_FROZEN is specified, which intentionally
2385  * violates visibility rules.
2386  */
2387  visibilitymap_set(relation, BufferGetBlockNumber(buffer), buffer,
2388  InvalidXLogRecPtr, vmbuffer,
2391  }
2392 
2393  UnlockReleaseBuffer(buffer);
2394  ndone += nthispage;
2395 
2396  /*
2397  * NB: Only release vmbuffer after inserting all tuples - it's fairly
2398  * likely that we'll insert into subsequent heap pages that are likely
2399  * to use the same vm page.
2400  */
2401  }
2402 
2403  /* We're done with inserting all tuples, so release the last vmbuffer. */
2404  if (vmbuffer != InvalidBuffer)
2405  ReleaseBuffer(vmbuffer);
2406 
2407  /*
2408  * We're done with the actual inserts. Check for conflicts again, to
2409  * ensure that all rw-conflicts in to these inserts are detected. Without
2410  * this final check, a sequential scan of the heap may have locked the
2411  * table after the "before" check, missing one opportunity to detect the
2412  * conflict, and then scanned the table before the new tuples were there,
2413  * missing the other chance to detect the conflict.
2414  *
2415  * For heap inserts, we only need to check for table-level SSI locks. Our
2416  * new tuples can't possibly conflict with existing tuple locks, and heap
2417  * page locks are only consolidated versions of tuple locks; they do not
2418  * lock "gaps" as index page locks do. So we don't need to specify a
2419  * buffer when making the call.
2420  */
2422 
2423  /*
2424  * If tuples are cachable, mark them for invalidation from the caches in
2425  * case we abort. Note it is OK to do this after releasing the buffer,
2426  * because the heaptuples data structure is all in local memory, not in
2427  * the shared buffer.
2428  */
2429  if (IsCatalogRelation(relation))
2430  {
2431  for (i = 0; i < ntuples; i++)
2432  CacheInvalidateHeapTuple(relation, heaptuples[i], NULL);
2433  }
2434 
2435  /* copy t_self fields back to the caller's slots */
2436  for (i = 0; i < ntuples; i++)
2437  slots[i]->tts_tid = heaptuples[i]->t_self;
2438 
2439  pgstat_count_heap_insert(relation, ntuples);
2440 }
Size PageGetHeapFreeSpace(Page page)
Definition: bufpage.c:991
static void PageSetAllVisible(Page page)
Definition: bufpage.h:431
#define MAXALIGN(LEN)
Definition: c.h:798
#define SHORTALIGN(LEN)
Definition: c.h:794
size_t Size
Definition: c.h:592
HeapTuple ExecFetchSlotHeapTuple(TupleTableSlot *slot, bool materialize, bool *shouldFree)
Definition: execTuples.c:1731
static int heap_multi_insert_pages(HeapTuple *heaptuples, int done, int ntuples, Size saveFreeSpace)
Definition: heapam.c:2058
#define HEAP_INSERT_FROZEN
Definition: heapam.h:35
#define SizeOfHeapMultiInsert
Definition: heapam_xlog.h:187
#define XLOG_HEAP2_MULTI_INSERT
Definition: heapam_xlog.h:63
#define XLH_INSERT_LAST_IN_MULTI
Definition: heapam_xlog.h:72
#define XLH_INSERT_ALL_FROZEN_SET
Definition: heapam_xlog.h:78
#define SizeOfMultiInsertTuple
Definition: heapam_xlog.h:198
int init
Definition: isn.c:75
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:122
#define RelationGetTargetPageFreeSpace(relation, defaultff)
Definition: rel.h:380
#define HEAP_DEFAULT_FILLFACTOR
Definition: rel.h:351
OffsetNumber offsets[FLEXIBLE_ARRAY_MEMBER]
Definition: heapam_xlog.h:184
char data[BLCKSZ]
Definition: c.h:1106
void visibilitymap_set(Relation rel, BlockNumber heapBlk, Buffer heapBuf, XLogRecPtr recptr, Buffer vmBuf, TransactionId cutoff_xid, uint8 flags)
bool visibilitymap_pin_ok(BlockNumber heapBlk, Buffer vmbuf)
#define VISIBILITYMAP_ALL_VISIBLE
#define InvalidXLogRecPtr
Definition: xlogdefs.h:28

References Assert(), BufferGetBlockNumber(), BufferGetPage(), CacheInvalidateHeapTuple(), CHECK_FOR_INTERRUPTS, CheckForSerializableConflictIn(), PGAlignedBlock::data, xl_multi_insert_tuple::datalen, END_CRIT_SECTION, ExecFetchSlotHeapTuple(), xl_heap_multi_insert::flags, GetCurrentTransactionId(), HEAP_DEFAULT_FILLFACTOR, HEAP_INSERT_FROZEN, HEAP_INSERT_NO_LOGICAL, heap_multi_insert_pages(), heap_prepare_insert(), i, init, InvalidBlockNumber, InvalidBuffer, InvalidTransactionId, InvalidXLogRecPtr, IsCatalogRelation(), ItemPointerGetOffsetNumber(), log_heap_new_cid(), MarkBufferDirty(), MAXALIGN, xl_heap_multi_insert::ntuples, xl_heap_multi_insert::offsets, PageClearAllVisible(), PageGetHeapFreeSpace(), PageGetMaxOffsetNumber(), PageIsAllVisible(), PageSetAllVisible(), PageSetLSN(), palloc(), pgstat_count_heap_insert(), REGBUF_KEEP_DATA, REGBUF_STANDARD, REGBUF_WILL_INIT, RelationGetBufferForTuple(), RelationGetRelid, RelationGetTargetPageFreeSpace, RelationIsAccessibleInLogicalDecoding, RelationIsLogicallyLogged, RelationNeedsWAL, RelationPutHeapTuple(), ReleaseBuffer(), SHORTALIGN, SizeOfHeapMultiInsert, SizeofHeapTupleHeader, SizeOfMultiInsertTuple, START_CRIT_SECTION, HeapTupleData::t_data, xl_multi_insert_tuple::t_hoff, HeapTupleHeaderData::t_hoff, xl_multi_insert_tuple::t_infomask, HeapTupleHeaderData::t_infomask, xl_multi_insert_tuple::t_infomask2, HeapTupleHeaderData::t_infomask2, HeapTupleData::t_len, HeapTupleData::t_self, HeapTupleData::t_tableOid, TupleTableSlot::tts_tableOid, UnlockReleaseBuffer(), VISIBILITYMAP_ALL_FROZEN, VISIBILITYMAP_ALL_VISIBLE, visibilitymap_clear(), visibilitymap_pin_ok(), visibilitymap_set(), VISIBILITYMAP_VALID_BITS, XLH_INSERT_ALL_FROZEN_SET, XLH_INSERT_ALL_VISIBLE_CLEARED, XLH_INSERT_CONTAINS_NEW_TUPLE, XLH_INSERT_LAST_IN_MULTI, XLOG_HEAP2_MULTI_INSERT, XLOG_HEAP_INIT_PAGE, XLOG_INCLUDE_ORIGIN, XLogBeginInsert(), XLogInsert(), XLogRegisterBufData(), XLogRegisterBuffer(), XLogRegisterData(), and XLogSetRecordFlags().

Referenced by CatalogTuplesMultiInsertWithInfo().

◆ heap_page_prune()

void heap_page_prune ( Relation  relation,
Buffer  buffer,
struct GlobalVisState vistest,
bool  mark_unused_now,
PruneResult presult,
PruneReason  reason,
OffsetNumber off_loc 
)

Definition at line 214 of file pruneheap.c.

220 {
221  Page page = BufferGetPage(buffer);
222  BlockNumber blockno = BufferGetBlockNumber(buffer);
223  OffsetNumber offnum,
224  maxoff;
225  PruneState prstate;
226  HeapTupleData tup;
227 
228  /*
229  * Our strategy is to scan the page and make lists of items to change,
230  * then apply the changes within a critical section. This keeps as much
231  * logic as possible out of the critical section, and also ensures that
232  * WAL replay will work the same as the normal case.
233  *
234  * First, initialize the new pd_prune_xid value to zero (indicating no
235  * prunable tuples). If we find any tuples which may soon become
236  * prunable, we will save the lowest relevant XID in new_prune_xid. Also
237  * initialize the rest of our working state.
238  */
240  prstate.vistest = vistest;
241  prstate.mark_unused_now = mark_unused_now;
243  prstate.nredirected = prstate.ndead = prstate.nunused = 0;
244  memset(prstate.marked, 0, sizeof(prstate.marked));
245 
246  /*
247  * presult->htsv is not initialized here because all ntuple spots in the
248  * array will be set either to a valid HTSV_Result value or -1.
249  */
250  presult->ndeleted = 0;
251  presult->nnewlpdead = 0;
252 
253  maxoff = PageGetMaxOffsetNumber(page);
254  tup.t_tableOid = RelationGetRelid(relation);
255 
256  /*
257  * Determine HTSV for all tuples.
258  *
259  * This is required for correctness to deal with cases where running HTSV
260  * twice could result in different results (e.g. RECENTLY_DEAD can turn to
261  * DEAD if another checked item causes GlobalVisTestIsRemovableFullXid()
262  * to update the horizon, INSERT_IN_PROGRESS can change to DEAD if the
263  * inserting transaction aborts, ...). That in turn could cause
264  * heap_prune_chain() to behave incorrectly if a tuple is reached twice,
265  * once directly via a heap_prune_chain() and once following a HOT chain.
266  *
267  * It's also good for performance. Most commonly tuples within a page are
268  * stored at decreasing offsets (while the items are stored at increasing
269  * offsets). When processing all tuples on a page this leads to reading
270  * memory at decreasing offsets within a page, with a variable stride.
271  * That's hard for CPU prefetchers to deal with. Processing the items in
272  * reverse order (and thus the tuples in increasing order) increases
273  * prefetching efficiency significantly / decreases the number of cache
274  * misses.
275  */
276  for (offnum = maxoff;
277  offnum >= FirstOffsetNumber;
278  offnum = OffsetNumberPrev(offnum))
279  {
280  ItemId itemid = PageGetItemId(page, offnum);
281  HeapTupleHeader htup;
282 
283  /* Nothing to do if slot doesn't contain a tuple */
284  if (!ItemIdIsNormal(itemid))
285  {
286  presult->htsv[offnum] = -1;
287  continue;
288  }
289 
290  htup = (HeapTupleHeader) PageGetItem(page, itemid);
291  tup.t_data = htup;
292  tup.t_len = ItemIdGetLength(itemid);
293  ItemPointerSet(&(tup.t_self), blockno, offnum);
294 
295  /*
296  * Set the offset number so that we can display it along with any
297  * error that occurred while processing this tuple.
298  */
299  if (off_loc)
300  *off_loc = offnum;
301 
302  presult->htsv[offnum] = heap_prune_satisfies_vacuum(&prstate, &tup,
303  buffer);
304  }
305 
306  /* Scan the page */
307  for (offnum = FirstOffsetNumber;
308  offnum <= maxoff;
309  offnum = OffsetNumberNext(offnum))
310  {
311  ItemId itemid;
312 
313  /* Ignore items already processed as part of an earlier chain */
314  if (prstate.marked[offnum])
315  continue;
316 
317  /* see preceding loop */
318  if (off_loc)
319  *off_loc = offnum;
320 
321  /* Nothing to do if slot is empty */
322  itemid = PageGetItemId(page, offnum);
323  if (!ItemIdIsUsed(itemid))
324  continue;
325 
326  /* Process this item or chain of items */
327  presult->ndeleted += heap_prune_chain(buffer, offnum,
328  presult->htsv, &prstate);
329  }
330 
331  /* Clear the offset information once we have processed the given page. */
332  if (off_loc)
333  *off_loc = InvalidOffsetNumber;
334 
335  /* Any error while applying the changes is critical */
337 
338  /* Have we found any prunable items? */
339  if (prstate.nredirected > 0 || prstate.ndead > 0 || prstate.nunused > 0)
340  {
341  /*
342  * Apply the planned item changes, then repair page fragmentation, and
343  * update the page's hint bit about whether it has free line pointers.
344  */
345  heap_page_prune_execute(buffer, false,
346  prstate.redirected, prstate.nredirected,
347  prstate.nowdead, prstate.ndead,
348  prstate.nowunused, prstate.nunused);
349 
350  /*
351  * Update the page's pd_prune_xid field to either zero, or the lowest
352  * XID of any soon-prunable tuple.
353  */
354  ((PageHeader) page)->pd_prune_xid = prstate.new_prune_xid;
355 
356  /*
357  * Also clear the "page is full" flag, since there's no point in
358  * repeating the prune/defrag process until something else happens to
359  * the page.
360  */
361  PageClearFull(page);
362 
363  MarkBufferDirty(buffer);
364 
365  /*
366  * Emit a WAL XLOG_HEAP2_PRUNE_FREEZE record showing what we did
367  */
368  if (RelationNeedsWAL(relation))
369  {
370  log_heap_prune_and_freeze(relation, buffer,
371  prstate.snapshotConflictHorizon,
372  true, reason,
373  NULL, 0,
374  prstate.redirected, prstate.nredirected,
375  prstate.nowdead, prstate.ndead,
376  prstate.nowunused, prstate.nunused);
377  }
378  }
379  else
380  {
381  /*
382  * If we didn't prune anything, but have found a new value for the
383  * pd_prune_xid field, update it and mark the buffer dirty. This is
384  * treated as a non-WAL-logged hint.
385  *
386  * Also clear the "page is full" flag if it is set, since there's no
387  * point in repeating the prune/defrag process until something else
388  * happens to the page.
389  */
390  if (((PageHeader) page)->pd_prune_xid != prstate.new_prune_xid ||
391  PageIsFull(page))
392  {
393  ((PageHeader) page)->pd_prune_xid = prstate.new_prune_xid;
394  PageClearFull(page);
395  MarkBufferDirtyHint(buffer, true);
396  }
397  }
398 
400 
401  /* Record number of newly-set-LP_DEAD items for caller */
402  presult->nnewlpdead = prstate.ndead;
403 }
void MarkBufferDirtyHint(Buffer buffer, bool buffer_std)
Definition: bufmgr.c:4624
PageHeaderData * PageHeader
Definition: bufpage.h:170
static void PageClearFull(Page page)
Definition: bufpage.h:420
static bool PageIsFull(Page page)
Definition: bufpage.h:410
#define OffsetNumberPrev(offsetNumber)
Definition: off.h:54
static int heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum, int8 *htsv, PruneState *prstate)
Definition: pruneheap.c:455
static HTSV_Result heap_prune_satisfies_vacuum(PruneState *prstate, HeapTuple tup, Buffer buffer)
Definition: pruneheap.c:410
void heap_page_prune_execute(Buffer buffer, bool lp_truncate_only, OffsetNumber *redirected, int nredirected, OffsetNumber *nowdead, int ndead, OffsetNumber *nowunused, int nunused)
Definition: pruneheap.c:812
int nnewlpdead
Definition: heapam.h:200
int ndeleted
Definition: heapam.h:199
int8 htsv[MaxHeapTuplesPerPage+1]
Definition: heapam.h:211
int ndead
Definition: pruneheap.c:40
TransactionId new_prune_xid
Definition: pruneheap.c:37
OffsetNumber nowdead[MaxHeapTuplesPerPage]
Definition: pruneheap.c:44
bool marked[MaxHeapTuplesPerPage+1]
Definition: pruneheap.c:53
OffsetNumber nowunused[MaxHeapTuplesPerPage]
Definition: pruneheap.c:45
bool mark_unused_now
Definition: pruneheap.c:35
GlobalVisState * vistest
Definition: pruneheap.c:33
OffsetNumber redirected[MaxHeapTuplesPerPage *2]
Definition: pruneheap.c:43
int nredirected
Definition: pruneheap.c:39
int nunused
Definition: pruneheap.c:41
TransactionId snapshotConflictHorizon
Definition: pruneheap.c:38

References BufferGetBlockNumber(), BufferGetPage(), END_CRIT_SECTION, FirstOffsetNumber, heap_page_prune_execute(), heap_prune_chain(), heap_prune_satisfies_vacuum(), PruneResult::htsv, InvalidOffsetNumber, InvalidTransactionId, ItemIdGetLength, ItemIdIsNormal, ItemIdIsUsed, ItemPointerSet(), log_heap_prune_and_freeze(), PruneState::mark_unused_now, MarkBufferDirty(), MarkBufferDirtyHint(), PruneState::marked, PruneState::ndead, PruneResult::ndeleted, PruneState::new_prune_xid, PruneResult::nnewlpdead, PruneState::nowdead, PruneState::nowunused, PruneState::nredirected, PruneState::nunused, OffsetNumberNext, OffsetNumberPrev, PageClearFull(), PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), PageIsFull(), PruneState::redirected, RelationGetRelid, RelationNeedsWAL, PruneState::snapshotConflictHorizon, START_CRIT_SECTION, HeapTupleData::t_data, HeapTupleData::t_len, HeapTupleData::t_self, HeapTupleData::t_tableOid, and PruneState::vistest.

Referenced by heap_page_prune_opt(), and lazy_scan_prune().

◆ heap_page_prune_execute()

void heap_page_prune_execute ( Buffer  buffer,
bool  lp_truncate_only,
OffsetNumber redirected,
int  nredirected,
OffsetNumber nowdead,
int  ndead,
OffsetNumber nowunused,
int  nunused 
)

Definition at line 812 of file pruneheap.c.

816 {
817  Page page = (Page) BufferGetPage(buffer);
818  OffsetNumber *offnum;
820 
821  /* Shouldn't be called unless there's something to do */
822  Assert(nredirected > 0 || ndead > 0 || nunused > 0);
823 
824  /* If 'lp_truncate_only', we can only remove already-dead line pointers */
825  Assert(!lp_truncate_only || (nredirected == 0 && ndead == 0));
826 
827  /* Update all redirected line pointers */
828  offnum = redirected;
829  for (int i = 0; i < nredirected; i++)
830  {
831  OffsetNumber fromoff = *offnum++;
832  OffsetNumber tooff = *offnum++;
833  ItemId fromlp = PageGetItemId(page, fromoff);
835 
836 #ifdef USE_ASSERT_CHECKING
837 
838  /*
839  * Any existing item that we set as an LP_REDIRECT (any 'from' item)
840  * must be the first item from a HOT chain. If the item has tuple
841  * storage then it can't be a heap-only tuple. Otherwise we are just
842  * maintaining an existing LP_REDIRECT from an existing HOT chain that
843  * has been pruned at least once before now.
844  */
845  if (!ItemIdIsRedirected(fromlp))
846  {
847  Assert(ItemIdHasStorage(fromlp) && ItemIdIsNormal(fromlp));
848 
849  htup = (HeapTupleHeader) PageGetItem(page, fromlp);
851  }
852  else
853  {
854  /* We shouldn't need to redundantly set the redirect */
855  Assert(ItemIdGetRedirect(fromlp) != tooff);
856  }
857 
858  /*
859  * The item that we're about to set as an LP_REDIRECT (the 'from'
860  * item) will point to an existing item (the 'to' item) that is
861  * already a heap-only tuple. There can be at most one LP_REDIRECT
862  * item per HOT chain.
863  *
864  * We need to keep around an LP_REDIRECT item (after original
865  * non-heap-only root tuple gets pruned away) so that it's always
866  * possible for VACUUM to easily figure out what TID to delete from
867  * indexes when an entire HOT chain becomes dead. A heap-only tuple
868  * can never become LP_DEAD; an LP_REDIRECT item or a regular heap
869  * tuple can.
870  *
871  * This check may miss problems, e.g. the target of a redirect could
872  * be marked as unused subsequently. The page_verify_redirects() check
873  * below will catch such problems.
874  */
875  tolp = PageGetItemId(page, tooff);
876  Assert(ItemIdHasStorage(tolp) && ItemIdIsNormal(tolp));
877  htup = (HeapTupleHeader) PageGetItem(page, tolp);
879 #endif
880 
881  ItemIdSetRedirect(fromlp, tooff);
882  }
883 
884  /* Update all now-dead line pointers */
885  offnum = nowdead;
886  for (int i = 0; i < ndead; i++)
887  {
888  OffsetNumber off = *offnum++;
889  ItemId lp = PageGetItemId(page, off);
890 
891 #ifdef USE_ASSERT_CHECKING
892 
893  /*
894  * An LP_DEAD line pointer must be left behind when the original item
895  * (which is dead to everybody) could still be referenced by a TID in
896  * an index. This should never be necessary with any individual
897  * heap-only tuple item, though. (It's not clear how much of a problem
898  * that would be, but there is no reason to allow it.)
899  */
900  if (ItemIdHasStorage(lp))
901  {
902  Assert(ItemIdIsNormal(lp));
903  htup = (HeapTupleHeader) PageGetItem(page, lp);
905  }
906  else
907  {
908  /* Whole HOT chain becomes dead */
910  }
911 #endif
912 
913  ItemIdSetDead(lp);
914  }
915 
916  /* Update all now-unused line pointers */
917  offnum = nowunused;
918  for (int i = 0; i < nunused; i++)
919  {
920  OffsetNumber off = *offnum++;
921  ItemId lp = PageGetItemId(page, off);
922 
923 #ifdef USE_ASSERT_CHECKING
924 
925  if (lp_truncate_only)
926  {
927  /* Setting LP_DEAD to LP_UNUSED in vacuum's second pass */
928  Assert(ItemIdIsDead(lp) && !ItemIdHasStorage(lp));
929  }
930  else
931  {
932  /*
933  * When heap_page_prune() was called, mark_unused_now may have
934  * been passed as true, which allows would-be LP_DEAD items to be
935  * made LP_UNUSED instead. This is only possible if the relation
936  * has no indexes. If there are any dead items, then
937  * mark_unused_now was not true and every item being marked
938  * LP_UNUSED must refer to a heap-only tuple.
939  */
940  if (ndead > 0)
941  {
943  htup = (HeapTupleHeader) PageGetItem(page, lp);
945  }
946  else
947  Assert(ItemIdIsUsed(lp));
948  }
949 
950 #endif
951 
952  ItemIdSetUnused(lp);
953  }
954 
955  if (lp_truncate_only)
957  else
958  {
959  /*
960  * Finally, repair any fragmentation, and update the page's hint bit
961  * about whether it has free pointers.
962  */
964 
965  /*
966  * Now that the page has been modified, assert that redirect items
967  * still point to valid targets.
968  */
969  page_verify_redirects(page);
970  }
971 }
void PageRepairFragmentation(Page page)
Definition: bufpage.c:699
void PageTruncateLinePointerArray(Page page)
Definition: bufpage.c:835
#define PG_USED_FOR_ASSERTS_ONLY
Definition: c.h:169
#define ItemIdSetRedirect(itemId, link)
Definition: itemid.h:152
#define ItemIdSetDead(itemId)
Definition: itemid.h:164
#define ItemIdSetUnused(itemId)
Definition: itemid.h:128
#define ItemIdHasStorage(itemId)
Definition: itemid.h:120
static void page_verify_redirects(Page page)
Definition: pruneheap.c:988

References Assert(), BufferGetPage(), HeapTupleHeaderIsHeapOnly, i, ItemIdGetRedirect, ItemIdHasStorage, ItemIdIsDead, ItemIdIsNormal, ItemIdIsRedirected, ItemIdIsUsed, ItemIdSetDead, ItemIdSetRedirect, ItemIdSetUnused, page_verify_redirects(), PageGetItem(), PageGetItemId(), PageRepairFragmentation(), PageTruncateLinePointerArray(), and PG_USED_FOR_ASSERTS_ONLY.

Referenced by heap_page_prune(), and heap_xlog_prune_freeze().

◆ heap_page_prune_opt()

void heap_page_prune_opt ( Relation  relation,
Buffer  buffer 
)

Definition at line 86 of file pruneheap.c.

87 {
88  Page page = BufferGetPage(buffer);
89  TransactionId prune_xid;
90  GlobalVisState *vistest;
91  Size minfree;
92 
93  /*
94  * We can't write WAL in recovery mode, so there's no point trying to
95  * clean the page. The primary will likely issue a cleaning WAL record
96  * soon anyway, so this is no particular loss.
97  */
98  if (RecoveryInProgress())
99  return;
100 
101  /*
102  * First check whether there's any chance there's something to prune,
103  * determining the appropriate horizon is a waste if there's no prune_xid
104  * (i.e. no updates/deletes left potentially dead tuples around).
105  */
106  prune_xid = ((PageHeader) page)->pd_prune_xid;
107  if (!TransactionIdIsValid(prune_xid))
108  return;
109 
110  /*
111  * Check whether prune_xid indicates that there may be dead rows that can
112  * be cleaned up.
113  */
114  vistest = GlobalVisTestFor(relation);
115 
116  if (!GlobalVisTestIsRemovableXid(vistest, prune_xid))
117  return;
118 
119  /*
120  * We prune when a previous UPDATE failed to find enough space on the page
121  * for a new tuple version, or when free space falls below the relation's
122  * fill-factor target (but not less than 10%).
123  *
124  * Checking free space here is questionable since we aren't holding any
125  * lock on the buffer; in the worst case we could get a bogus answer. It's
126  * unlikely to be *seriously* wrong, though, since reading either pd_lower
127  * or pd_upper is probably atomic. Avoiding taking a lock seems more
128  * important than sometimes getting a wrong answer in what is after all
129  * just a heuristic estimate.
130  */
131  minfree = RelationGetTargetPageFreeSpace(relation,
133  minfree = Max(minfree, BLCKSZ / 10);
134 
135  if (PageIsFull(page) || PageGetHeapFreeSpace(page) < minfree)
136  {
137  /* OK, try to get exclusive buffer lock */
138  if (!ConditionalLockBufferForCleanup(buffer))
139  return;
140 
141  /*
142  * Now that we have buffer lock, get accurate information about the
143  * page's free space, and recheck the heuristic about whether to
144  * prune.
145  */
146  if (PageIsFull(page) || PageGetHeapFreeSpace(page) < minfree)
147  {
148  PruneResult presult;
149 
150  /*
151  * For now, pass mark_unused_now as false regardless of whether or
152  * not the relation has indexes, since we cannot safely determine
153  * that during on-access pruning with the current implementation.
154  */
155  heap_page_prune(relation, buffer, vistest, false,
156  &presult, PRUNE_ON_ACCESS, NULL);
157 
158  /*
159  * Report the number of tuples reclaimed to pgstats. This is
160  * presult.ndeleted minus the number of newly-LP_DEAD-set items.
161  *
162  * We derive the number of dead tuples like this to avoid totally
163  * forgetting about items that were set to LP_DEAD, since they
164  * still need to be cleaned up by VACUUM. We only want to count
165  * heap-only tuples that just became LP_UNUSED in our report,
166  * which don't.
167  *
168  * VACUUM doesn't have to compensate in the same way when it
169  * tracks ndeleted, since it will set the same LP_DEAD items to
170  * LP_UNUSED separately.
171  */
172  if (presult.ndeleted > presult.nnewlpdead)
174  presult.ndeleted - presult.nnewlpdead);
175  }
176 
177  /* And release buffer lock */
179 
180  /*
181  * We avoid reuse of any free space created on the page by unrelated
182  * UPDATEs/INSERTs by opting to not update the FSM at this point. The
183  * free space should be reused by UPDATEs to *this* page.
184  */
185  }
186 }
bool ConditionalLockBufferForCleanup(Buffer buffer)
Definition: bufmgr.c:5036
#define Max(x, y)
Definition: c.h:985
void pgstat_update_heap_dead_tuples(Relation rel, int delta)
bool GlobalVisTestIsRemovableXid(GlobalVisState *state, TransactionId xid)
Definition: procarray.c:4248
void heap_page_prune(Relation relation, Buffer buffer, GlobalVisState *vistest, bool mark_unused_now, PruneResult *presult, PruneReason reason, OffsetNumber *off_loc)
Definition: pruneheap.c:214
bool RecoveryInProgress(void)
Definition: xlog.c:6201

References BUFFER_LOCK_UNLOCK, BufferGetPage(), ConditionalLockBufferForCleanup(), GlobalVisTestFor(), GlobalVisTestIsRemovableXid(), HEAP_DEFAULT_FILLFACTOR, heap_page_prune(), LockBuffer(), Max, PruneResult::ndeleted, PruneResult::nnewlpdead, PageGetHeapFreeSpace(), PageIsFull(), pgstat_update_heap_dead_tuples(), PRUNE_ON_ACCESS, RecoveryInProgress(), RelationGetTargetPageFreeSpace, and TransactionIdIsValid.

Referenced by heapam_index_fetch_tuple(), heapam_scan_bitmap_next_block(), and heapgetpage().

◆ heap_prepare_freeze_tuple()

bool heap_prepare_freeze_tuple ( HeapTupleHeader  tuple,
const struct VacuumCutoffs cutoffs,
HeapPageFreeze pagefrz,
HeapTupleFreeze frz,
bool totally_frozen 
)

Definition at line 6477 of file heapam.c.

6481 {
6482  bool xmin_already_frozen = false,
6483  xmax_already_frozen = false;
6484  bool freeze_xmin = false,
6485  replace_xvac = false,
6486  replace_xmax = false,
6487  freeze_xmax = false;
6488  TransactionId xid;
6489 
6490  frz->xmax = HeapTupleHeaderGetRawXmax(tuple);
6491  frz->t_infomask2 = tuple->t_infomask2;
6492  frz->t_infomask = tuple->t_infomask;
6493  frz->frzflags = 0;
6494  frz->checkflags = 0;
6495 
6496  /*
6497  * Process xmin, while keeping track of whether it's already frozen, or
6498  * will become frozen iff our freeze plan is executed by caller (could be
6499  * neither).
6500  */
6501  xid = HeapTupleHeaderGetXmin(tuple);
6502  if (!TransactionIdIsNormal(xid))
6503  xmin_already_frozen = true;
6504  else
6505  {
6506  if (TransactionIdPrecedes(xid, cutoffs->relfrozenxid))
6507  ereport(ERROR,
6509  errmsg_internal("found xmin %u from before relfrozenxid %u",
6510  xid, cutoffs->relfrozenxid)));
6511 
6512  /* Will set freeze_xmin flags in freeze plan below */
6513  freeze_xmin = TransactionIdPrecedes(xid, cutoffs->OldestXmin);
6514 
6515  /* Verify that xmin committed if and when freeze plan is executed */
6516  if (freeze_xmin)
6518  }
6519 
6520  /*
6521  * Old-style VACUUM FULL is gone, but we have to process xvac for as long
6522  * as we support having MOVED_OFF/MOVED_IN tuples in the database
6523  */
6524  xid = HeapTupleHeaderGetXvac(tuple);
6525  if (TransactionIdIsNormal(xid))
6526  {
6528  Assert(TransactionIdPrecedes(xid, cutoffs->OldestXmin));
6529 
6530  /*
6531  * For Xvac, we always freeze proactively. This allows totally_frozen
6532  * tracking to ignore xvac.
6533  */
6534  replace_xvac = pagefrz->freeze_required = true;
6535 
6536  /* Will set replace_xvac flags in freeze plan below */
6537  }
6538 
6539  /* Now process xmax */
6540  xid = frz->xmax;
6541  if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
6542  {
6543  /* Raw xmax is a MultiXactId */
6544  TransactionId newxmax;
6545  uint16 flags;
6546 
6547  /*
6548  * We will either remove xmax completely (in the "freeze_xmax" path),
6549  * process xmax by replacing it (in the "replace_xmax" path), or
6550  * perform no-op xmax processing. The only constraint is that the
6551  * FreezeLimit/MultiXactCutoff postcondition must never be violated.
6552  */
6553  newxmax = FreezeMultiXactId(xid, tuple->t_infomask, cutoffs,
6554  &flags, pagefrz);
6555 
6556  if (flags & FRM_NOOP)
6557  {
6558  /*
6559  * xmax is a MultiXactId, and nothing about it changes for now.
6560  * This is the only case where 'freeze_required' won't have been
6561  * set for us by FreezeMultiXactId, as well as the only case where
6562  * neither freeze_xmax nor replace_xmax are set (given a multi).
6563  *
6564  * This is a no-op, but the call to FreezeMultiXactId might have
6565  * ratcheted back NewRelfrozenXid and/or NewRelminMxid trackers
6566  * for us (the "freeze page" variants, specifically). That'll
6567  * make it safe for our caller to freeze the page later on, while
6568  * leaving this particular xmax undisturbed.
6569  *
6570  * FreezeMultiXactId is _not_ responsible for the "no freeze"
6571  * NewRelfrozenXid/NewRelminMxid trackers, though -- that's our
6572  * job. A call to heap_tuple_should_freeze for this same tuple
6573  * will take place below if 'freeze_required' isn't set already.
6574  * (This repeats work from FreezeMultiXactId, but allows "no
6575  * freeze" tracker maintenance to happen in only one place.)
6576  */
6577  Assert(!MultiXactIdPrecedes(newxmax, cutoffs->MultiXactCutoff));
6578  Assert(MultiXactIdIsValid(newxmax) && xid == newxmax);
6579  }
6580  else if (flags & FRM_RETURN_IS_XID)
6581  {
6582  /*
6583  * xmax will become an updater Xid (original MultiXact's updater
6584  * member Xid will be carried forward as a simple Xid in Xmax).
6585  */
6586  Assert(!TransactionIdPrecedes(newxmax, cutoffs->OldestXmin));
6587 
6588  /*
6589  * NB -- some of these transformations are only valid because we
6590  * know the return Xid is a tuple updater (i.e. not merely a
6591  * locker.) Also note that the only reason we don't explicitly
6592  * worry about HEAP_KEYS_UPDATED is because it lives in
6593  * t_infomask2 rather than t_infomask.
6594  */
6595  frz->t_infomask &= ~HEAP_XMAX_BITS;
6596  frz->xmax = newxmax;
6597  if (flags & FRM_MARK_COMMITTED)
6599  replace_xmax = true;
6600  }
6601  else if (flags & FRM_RETURN_IS_MULTI)
6602  {
6603  uint16 newbits;
6604  uint16 newbits2;
6605 
6606  /*
6607  * xmax is an old MultiXactId that we have to replace with a new
6608  * MultiXactId, to carry forward two or more original member XIDs.
6609  */
6610  Assert(!MultiXactIdPrecedes(newxmax, cutoffs->OldestMxact));
6611 
6612  /*
6613  * We can't use GetMultiXactIdHintBits directly on the new multi
6614  * here; that routine initializes the masks to all zeroes, which
6615  * would lose other bits we need. Doing it this way ensures all
6616  * unrelated bits remain untouched.
6617  */
6618  frz->t_infomask &= ~HEAP_XMAX_BITS;
6619  frz->t_infomask2 &= ~HEAP_KEYS_UPDATED;
6620  GetMultiXactIdHintBits(newxmax, &newbits, &newbits2);
6621  frz->t_infomask |= newbits;
6622  frz->t_infomask2 |= newbits2;
6623  frz->xmax = newxmax;
6624  replace_xmax = true;
6625  }
6626  else
6627  {
6628  /*
6629  * Freeze plan for tuple "freezes xmax" in the strictest sense:
6630  * it'll leave nothing in xmax (neither an Xid nor a MultiXactId).
6631  */
6632  Assert(flags & FRM_INVALIDATE_XMAX);
6633  Assert(!TransactionIdIsValid(newxmax));
6634 
6635  /* Will set freeze_xmax flags in freeze plan below */
6636  freeze_xmax = true;
6637  }
6638 
6639  /* MultiXactId processing forces freezing (barring FRM_NOOP case) */
6640  Assert(pagefrz->freeze_required || (!freeze_xmax && !replace_xmax));
6641  }
6642  else if (TransactionIdIsNormal(xid))
6643  {
6644  /* Raw xmax is normal XID */
6645  if (TransactionIdPrecedes(xid, cutoffs->relfrozenxid))
6646  ereport(ERROR,
6648  errmsg_internal("found xmax %u from before relfrozenxid %u",
6649  xid, cutoffs->relfrozenxid)));
6650 
6651  /* Will set freeze_xmax flags in freeze plan below */
6652  freeze_xmax = TransactionIdPrecedes(xid, cutoffs->OldestXmin);
6653 
6654  /*
6655  * Verify that xmax aborted if and when freeze plan is executed,
6656  * provided it's from an update. (A lock-only xmax can be removed
6657  * independent of this, since the lock is released at xact end.)
6658  */
6659  if (freeze_xmax && !HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask))
6661  }
6662  else if (!TransactionIdIsValid(xid))
6663  {
6664  /* Raw xmax is InvalidTransactionId XID */
6665  Assert((tuple->t_infomask & HEAP_XMAX_IS_MULTI) == 0);
6666  xmax_already_frozen = true;
6667  }
6668  else
6669  ereport(ERROR,
6671  errmsg_internal("found raw xmax %u (infomask 0x%04x) not invalid and not multi",
6672  xid, tuple->t_infomask)));
6673 
6674  if (freeze_xmin)
6675  {
6676  Assert(!xmin_already_frozen);
6677 
6678  frz->t_infomask |= HEAP_XMIN_FROZEN;
6679  }
6680  if (replace_xvac)
6681  {
6682  /*
6683  * If a MOVED_OFF tuple is not dead, the xvac transaction must have
6684  * failed; whereas a non-dead MOVED_IN tuple must mean the xvac
6685  * transaction succeeded.
6686  */
6687  Assert(pagefrz->freeze_required);
6688  if (tuple->t_infomask & HEAP_MOVED_OFF)
6689  frz->frzflags |= XLH_INVALID_XVAC;
6690  else
6691  frz->frzflags |= XLH_FREEZE_XVAC;
6692  }
6693  if (replace_xmax)
6694  {
6695  Assert(!xmax_already_frozen && !freeze_xmax);
6696  Assert(pagefrz->freeze_required);
6697 
6698  /* Already set replace_xmax flags in freeze plan earlier */
6699  }
6700  if (freeze_xmax)
6701  {
6702  Assert(!xmax_already_frozen && !replace_xmax);
6703 
6704  frz->xmax = InvalidTransactionId;
6705 
6706  /*
6707  * The tuple might be marked either XMAX_INVALID or XMAX_COMMITTED +
6708  * LOCKED. Normalize to INVALID just to be sure no one gets confused.
6709  * Also get rid of the HEAP_KEYS_UPDATED bit.
6710  */
6711  frz->t_infomask &= ~HEAP_XMAX_BITS;
6712  frz->t_infomask |= HEAP_XMAX_INVALID;
6713  frz->t_infomask2 &= ~HEAP_HOT_UPDATED;
6714  frz->t_infomask2 &= ~HEAP_KEYS_UPDATED;
6715  }
6716 
6717  /*
6718  * Determine if this tuple is already totally frozen, or will become
6719  * totally frozen (provided caller executes freeze plans for the page)
6720  */
6721  *totally_frozen = ((freeze_xmin || xmin_already_frozen) &&
6722  (freeze_xmax || xmax_already_frozen));
6723 
6724  if (!pagefrz->freeze_required && !(xmin_already_frozen &&
6725  xmax_already_frozen))
6726  {
6727  /*
6728  * So far no previous tuple from the page made freezing mandatory.
6729  * Does this tuple force caller to freeze the entire page?
6730  */
6731  pagefrz->freeze_required =
6732  heap_tuple_should_freeze(tuple, cutoffs,
6733  &pagefrz->NoFreezePageRelfrozenXid,
6734  &pagefrz->NoFreezePageRelminMxid);
6735  }
6736 
6737  /* Tell caller if this tuple has a usable freeze plan set in *frz */
6738  return freeze_xmin || replace_xvac || replace_xmax || freeze_xmax;
6739 }
static void GetMultiXactIdHintBits(MultiXactId multi, uint16 *new_infomask, uint16 *new_infomask2)
Definition: heapam.c:6914
#define FRM_RETURN_IS_XID
Definition: heapam.c:6076
static TransactionId FreezeMultiXactId(MultiXactId multi, uint16 t_infomask, const struct VacuumCutoffs *cutoffs, uint16 *flags, HeapPageFreeze *pagefrz)
Definition: heapam.c:6127
bool heap_tuple_should_freeze(HeapTupleHeader tuple, const struct VacuumCutoffs *cutoffs, TransactionId *NoFreezePageRelfrozenXid, MultiXactId *NoFreezePageRelminMxid)
Definition: heapam.c:7331
#define FRM_MARK_COMMITTED
Definition: heapam.c:6078
#define FRM_NOOP
Definition: heapam.c:6074
#define FRM_RETURN_IS_MULTI
Definition: heapam.c:6077
#define FRM_INVALIDATE_XMAX
Definition: heapam.c:6075
#define XLH_INVALID_XVAC
Definition: heapam_xlog.h:339
#define XLH_FREEZE_XVAC
Definition: heapam_xlog.h:338
#define HEAP_MOVED_OFF
Definition: htup_details.h:211
#define HEAP_XMIN_FROZEN
Definition: htup_details.h:206
#define HEAP_HOT_UPDATED
Definition: htup_details.h:276
#define HeapTupleHeaderGetXvac(tup)
Definition: htup_details.h:411
#define HEAP_XMAX_COMMITTED
Definition: htup_details.h:207
bool MultiXactIdPrecedes(MultiXactId multi1, MultiXactId multi2)
Definition: multixact.c:3234
#define MultiXactIdIsValid(multi)
Definition: multixact.h:28
MultiXactId NoFreezePageRelminMxid
Definition: heapam.h:190
bool freeze_required
Definition: heapam.h:152
TransactionId NoFreezePageRelfrozenXid
Definition: heapam.h:189
uint8 frzflags
Definition: heapam.h:117
uint16 t_infomask2
Definition: heapam.h:115
TransactionId xmax
Definition: heapam.h:114
uint16 t_infomask
Definition: heapam.h:116
TransactionId OldestXmin
Definition: vacuum.h:266
MultiXactId OldestMxact
Definition: vacuum.h:267
bool TransactionIdPrecedesOrEquals(TransactionId id1, TransactionId id2)
Definition: transam.c:299

References Assert(), HeapTupleFreeze::checkflags, ereport, errcode(), ERRCODE_DATA_CORRUPTED, errmsg_internal(), ERROR, HeapPageFreeze::freeze_required, FreezeMultiXactId(), FRM_INVALIDATE_XMAX, FRM_MARK_COMMITTED, FRM_NOOP, FRM_RETURN_IS_MULTI, FRM_RETURN_IS_XID, HeapTupleFreeze::frzflags, GetMultiXactIdHintBits(), HEAP_FREEZE_CHECK_XMAX_ABORTED, HEAP_FREEZE_CHECK_XMIN_COMMITTED, HEAP_HOT_UPDATED, HEAP_KEYS_UPDATED, HEAP_MOVED_OFF, heap_tuple_should_freeze(), HEAP_XMAX_BITS, HEAP_XMAX_COMMITTED, HEAP_XMAX_INVALID, HEAP_XMAX_IS_LOCKED_ONLY, HEAP_XMAX_IS_MULTI, HEAP_XMIN_FROZEN, HeapTupleHeaderGetRawXmax, HeapTupleHeaderGetXmin, HeapTupleHeaderGetXvac, InvalidTransactionId, VacuumCutoffs::MultiXactCutoff, MultiXactIdIsValid, MultiXactIdPrecedes(), HeapPageFreeze::NoFreezePageRelfrozenXid, HeapPageFreeze::NoFreezePageRelminMxid, VacuumCutoffs::OldestMxact, VacuumCutoffs::OldestXmin, VacuumCutoffs::relfrozenxid, HeapTupleFreeze::t_infomask, HeapTupleHeaderData::t_infomask, HeapTupleFreeze::t_infomask2, HeapTupleHeaderData::t_infomask2, TransactionIdIsNormal, TransactionIdIsValid, TransactionIdPrecedes(), TransactionIdPrecedesOrEquals(), XLH_FREEZE_XVAC, XLH_INVALID_XVAC, and HeapTupleFreeze::xmax.

Referenced by heap_freeze_tuple(), and lazy_scan_prune().

◆ heap_rescan()

void heap_rescan ( TableScanDesc  sscan,
ScanKey  key,
bool  set_params,
bool  allow_strat,
bool  allow_sync,
bool  allow_pagemode 
)

Definition at line 1009 of file heapam.c.

1011 {
1012  HeapScanDesc scan = (HeapScanDesc) sscan;
1013 
1014  if (set_params)
1015  {
1016  if (allow_strat)
1017  scan->rs_base.rs_flags |= SO_ALLOW_STRAT;
1018  else
1019  scan->rs_base.rs_flags &= ~SO_ALLOW_STRAT;
1020 
1021  if (allow_sync)
1022  scan->rs_base.rs_flags |= SO_ALLOW_SYNC;
1023  else
1024  scan->rs_base.rs_flags &= ~SO_ALLOW_SYNC;
1025 
1026  if (allow_pagemode && scan->rs_base.rs_snapshot &&
1029  else
1031  }
1032 
1033  /*
1034  * unpin scan buffers
1035  */
1036  if (BufferIsValid(scan->rs_cbuf))
1037  ReleaseBuffer(scan->rs_cbuf);
1038 
1039  /*
1040  * reinitialize scan descriptor
1041  */
1042  initscan(scan, key, true);
1043 }
@ SO_ALLOW_STRAT
Definition: tableam.h:57
@ SO_ALLOW_SYNC
Definition: tableam.h:59

References BufferIsValid(), initscan(), IsMVCCSnapshot, sort-test::key, ReleaseBuffer(), HeapScanDescData::rs_base, HeapScanDescData::rs_cbuf, TableScanDescData::rs_flags, TableScanDescData::rs_snapshot, SO_ALLOW_PAGEMODE, SO_ALLOW_STRAT, and SO_ALLOW_SYNC.

◆ heap_set_tidrange()

void heap_set_tidrange ( TableScanDesc  sscan,
ItemPointer  mintid,
ItemPointer  maxtid 
)

Definition at line 1158 of file heapam.c.

1160 {
1161  HeapScanDesc scan = (HeapScanDesc) sscan;
1162  BlockNumber startBlk;
1163  BlockNumber numBlks;
1164  ItemPointerData highestItem;
1165  ItemPointerData lowestItem;
1166 
1167  /*
1168  * For relations without any pages, we can simply leave the TID range
1169  * unset. There will be no tuples to scan, therefore no tuples outside
1170  * the given TID range.
1171  */
1172  if (scan->rs_nblocks == 0)
1173  return;
1174 
1175  /*
1176  * Set up some ItemPointers which point to the first and last possible
1177  * tuples in the heap.
1178  */
1179  ItemPointerSet(&highestItem, scan->rs_nblocks - 1, MaxOffsetNumber);
1180  ItemPointerSet(&lowestItem, 0, FirstOffsetNumber);
1181 
1182  /*
1183  * If the given maximum TID is below the highest possible TID in the
1184  * relation, then restrict the range to that, otherwise we scan to the end
1185  * of the relation.
1186  */
1187  if (ItemPointerCompare(maxtid, &highestItem) < 0)
1188  ItemPointerCopy(maxtid, &highestItem);
1189 
1190  /*
1191  * If the given minimum TID is above the lowest possible TID in the
1192  * relation, then restrict the range to only scan for TIDs above that.
1193  */
1194  if (ItemPointerCompare(mintid, &lowestItem) > 0)
1195  ItemPointerCopy(mintid, &lowestItem);
1196 
1197  /*
1198  * Check for an empty range and protect from would be negative results
1199  * from the numBlks calculation below.
1200  */
1201  if (ItemPointerCompare(&highestItem, &lowestItem) < 0)
1202  {
1203  /* Set an empty range of blocks to scan */
1204  heap_setscanlimits(sscan, 0, 0);
1205  return;
1206  }
1207 
1208  /*
1209  * Calculate the first block and the number of blocks we must scan. We
1210  * could be more aggressive here and perform some more validation to try
1211  * and further narrow the scope of blocks to scan by checking if the
1212  * lowestItem has an offset above MaxOffsetNumber. In this case, we could
1213  * advance startBlk by one. Likewise, if highestItem has an offset of 0
1214  * we could scan one fewer blocks. However, such an optimization does not
1215  * seem worth troubling over, currently.
1216  */
1217  startBlk = ItemPointerGetBlockNumberNoCheck(&lowestItem);
1218 
1219  numBlks = ItemPointerGetBlockNumberNoCheck(&highestItem) -
1220  ItemPointerGetBlockNumberNoCheck(&lowestItem) + 1;
1221 
1222  /* Set the start block and number of blocks to scan */
1223  heap_setscanlimits(sscan, startBlk, numBlks);
1224 
1225  /* Finally, set the TID range in sscan */
1226  ItemPointerCopy(&lowestItem, &sscan->rs_mintid);
1227  ItemPointerCopy(&highestItem, &sscan->rs_maxtid);
1228 }
void heap_setscanlimits(TableScanDesc sscan, BlockNumber startBlk, BlockNumber numBlks)
Definition: heapam.c:347
static BlockNumber ItemPointerGetBlockNumberNoCheck(const ItemPointerData *pointer)
Definition: itemptr.h:93
#define MaxOffsetNumber
Definition: off.h:28
BlockNumber rs_nblocks
Definition: heapam.h:53

References FirstOffsetNumber, heap_setscanlimits(), ItemPointerCompare(), ItemPointerCopy(), ItemPointerGetBlockNumberNoCheck(), ItemPointerSet(), MaxOffsetNumber, TableScanDescData::rs_maxtid, TableScanDescData::rs_mintid, and HeapScanDescData::rs_nblocks.

◆ heap_setscanlimits()

void heap_setscanlimits ( TableScanDesc  sscan,
BlockNumber  startBlk,
BlockNumber  numBlks 
)

Definition at line 347 of file heapam.c.

348 {
349  HeapScanDesc scan = (HeapScanDesc) sscan;
350 
351  Assert(!scan->rs_inited); /* else too late to change */
352  /* else rs_startblock is significant */
353  Assert(!(scan->rs_base.rs_flags & SO_ALLOW_SYNC));
354 
355  /* Check startBlk is valid (but allow case of zero blocks...) */
356  Assert(startBlk == 0 || startBlk < scan->rs_nblocks);
357 
358  scan->rs_startblock = startBlk;
359  scan->rs_numblocks = numBlks;
360 }
bool rs_inited
Definition: heapam.h:59
BlockNumber rs_startblock
Definition: heapam.h:54
BlockNumber rs_numblocks
Definition: heapam.h:55

References Assert(), HeapScanDescData::rs_base, TableScanDescData::rs_flags, HeapScanDescData::rs_inited, HeapScanDescData::rs_numblocks, HeapScanDescData::rs_startblock, and SO_ALLOW_SYNC.

Referenced by heap_set_tidrange(), and heapam_index_build_range_scan().

◆ heap_tuple_needs_eventual_freeze()

bool heap_tuple_needs_eventual_freeze ( HeapTupleHeader  tuple)

Definition at line 7276 of file heapam.c.

7277 {
7278  TransactionId xid;
7279 
7280  /*
7281  * If xmin is a normal transaction ID, this tuple is definitely not
7282  * frozen.
7283  */
7284  xid = HeapTupleHeaderGetXmin(tuple);
7285  if (TransactionIdIsNormal(xid))
7286  return true;
7287 
7288  /*
7289  * If xmax is a valid xact or multixact, this tuple is also not frozen.
7290  */
7291  if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
7292  {
7293  MultiXactId multi;
7294 
7295  multi = HeapTupleHeaderGetRawXmax(tuple);
7296  if (MultiXactIdIsValid(multi))
7297  return true;
7298  }
7299  else
7300  {
7301  xid = HeapTupleHeaderGetRawXmax(tuple);
7302  if (TransactionIdIsNormal(xid))
7303  return true;
7304  }
7305 
7306  if (tuple->t_infomask & HEAP_MOVED)
7307  {
7308  xid = HeapTupleHeaderGetXvac(tuple);
7309  if (TransactionIdIsNormal(xid))
7310  return true;
7311  }
7312 
7313  return false;
7314 }

References HEAP_MOVED, HEAP_XMAX_IS_MULTI, HeapTupleHeaderGetRawXmax, HeapTupleHeaderGetXmin, HeapTupleHeaderGetXvac, MultiXactIdIsValid, HeapTupleHeaderData::t_infomask, and TransactionIdIsNormal.

Referenced by collect_corrupt_items(), and heap_page_is_all_visible().

◆ heap_tuple_should_freeze()

bool heap_tuple_should_freeze ( HeapTupleHeader  tuple,
const struct VacuumCutoffs cutoffs,
TransactionId NoFreezePageRelfrozenXid,
MultiXactId NoFreezePageRelminMxid 
)

Definition at line 7331 of file heapam.c.

7335 {
7336  TransactionId xid;
7337  MultiXactId multi;
7338  bool freeze = false;
7339 
7340  /* First deal with xmin */
7341  xid = HeapTupleHeaderGetXmin(tuple);
7342  if (TransactionIdIsNormal(xid))
7343  {
7345  if (TransactionIdPrecedes(xid, *NoFreezePageRelfrozenXid))
7346  *NoFreezePageRelfrozenXid = xid;
7347  if (TransactionIdPrecedes(xid, cutoffs->FreezeLimit))
7348  freeze = true;
7349  }
7350 
7351  /* Now deal with xmax */
7352  xid = InvalidTransactionId;
7353  multi = InvalidMultiXactId;
7354  if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
7355  multi = HeapTupleHeaderGetRawXmax(tuple);
7356  else
7357  xid = HeapTupleHeaderGetRawXmax(tuple);
7358 
7359  if (TransactionIdIsNormal(xid))
7360  {
7362  /* xmax is a non-permanent XID */
7363  if (TransactionIdPrecedes(xid, *NoFreezePageRelfrozenXid))
7364  *NoFreezePageRelfrozenXid = xid;
7365  if (TransactionIdPrecedes(xid, cutoffs->FreezeLimit))
7366  freeze = true;
7367  }
7368  else if (!MultiXactIdIsValid(multi))
7369  {
7370  /* xmax is a permanent XID or invalid MultiXactId/XID */
7371  }
7372  else if (HEAP_LOCKED_UPGRADED(tuple->t_infomask))
7373  {
7374  /* xmax is a pg_upgrade'd MultiXact, which can't have updater XID */
7375  if (MultiXactIdPrecedes(multi, *NoFreezePageRelminMxid))
7376  *NoFreezePageRelminMxid = multi;
7377  /* heap_prepare_freeze_tuple always freezes pg_upgrade'd xmax */
7378  freeze = true;
7379  }
7380  else
7381  {
7382  /* xmax is a MultiXactId that may have an updater XID */
7383  MultiXactMember *members;
7384  int nmembers;
7385 
7386  Assert(MultiXactIdPrecedesOrEquals(cutoffs->relminmxid, multi));
7387  if (MultiXactIdPrecedes(multi, *NoFreezePageRelminMxid))
7388  *NoFreezePageRelminMxid = multi;
7389  if (MultiXactIdPrecedes(multi, cutoffs->MultiXactCutoff))
7390  freeze = true;
7391 
7392  /* need to check whether any member of the mxact is old */
7393  nmembers = GetMultiXactIdMembers(multi, &members, false,
7395 
7396  for (int i = 0; i < nmembers; i++)
7397  {
7398  xid = members[i].xid;
7400  if (TransactionIdPrecedes(xid, *NoFreezePageRelfrozenXid))
7401  *NoFreezePageRelfrozenXid = xid;
7402  if (TransactionIdPrecedes(xid, cutoffs->FreezeLimit))
7403  freeze = true;
7404  }
7405  if (nmembers > 0)
7406  pfree(members);
7407  }
7408 
7409  if (tuple->t_infomask & HEAP_MOVED)
7410  {
7411  xid = HeapTupleHeaderGetXvac(tuple);
7412  if (TransactionIdIsNormal(xid))
7413  {
7415  if (TransactionIdPrecedes(xid, *NoFreezePageRelfrozenXid))
7416  *NoFreezePageRelfrozenXid = xid;
7417  /* heap_prepare_freeze_tuple forces xvac freezing */
7418  freeze = true;
7419  }
7420  }
7421 
7422  return freeze;
7423 }
#define HEAP_LOCKED_UPGRADED(infomask)
Definition: htup_details.h:249
bool MultiXactIdPrecedesOrEquals(MultiXactId multi1, MultiXactId multi2)
Definition: multixact.c:3248
#define InvalidMultiXactId
Definition: multixact.h:24
TransactionId xid
Definition: multixact.h:58

References Assert(), VacuumCutoffs::FreezeLimit, GetMultiXactIdMembers(), HEAP_LOCKED_UPGRADED, HEAP_MOVED, HEAP_XMAX_IS_LOCKED_ONLY, HEAP_XMAX_IS_MULTI, HeapTupleHeaderGetRawXmax, HeapTupleHeaderGetXmin, HeapTupleHeaderGetXvac, i, InvalidMultiXactId, InvalidTransactionId, VacuumCutoffs::MultiXactCutoff, MultiXactIdIsValid, MultiXactIdPrecedes(), MultiXactIdPrecedesOrEquals(), pfree(), VacuumCutoffs::relfrozenxid, VacuumCutoffs::relminmxid, HeapTupleHeaderData::t_infomask, TransactionIdIsNormal, TransactionIdPrecedes(), TransactionIdPrecedesOrEquals(), and MultiXactMember::xid.

Referenced by heap_prepare_freeze_tuple(), and lazy_scan_noprune().

◆ heap_update()

TM_Result heap_update ( Relation  relation,
ItemPointer  otid,
HeapTuple  newtup,
CommandId  cid,
Snapshot  crosscheck,
int  options,
struct TM_FailureData tmfd,
LockTupleMode lockmode,
TU_UpdateIndexes update_indexes,
TupleTableSlot oldSlot 
)

Definition at line 3019 of file heapam.c.

3023 {
3024  TM_Result result;
3026  Bitmapset *hot_attrs;
3027  Bitmapset *sum_attrs;
3028  Bitmapset *key_attrs;
3029  Bitmapset *id_attrs;
3030  Bitmapset *interesting_attrs;
3031  Bitmapset *modified_attrs;
3032  ItemId lp;
3033  HeapTupleData oldtup;
3034  HeapTuple heaptup;
3035  HeapTuple old_key_tuple = NULL;
3036  bool old_key_copied = false;
3037  Page page;
3038  BlockNumber block;
3039  MultiXactStatus mxact_status;
3040  Buffer buffer,
3041  newbuf,
3042  vmbuffer = InvalidBuffer,
3043  vmbuffer_new = InvalidBuffer;
3044  bool need_toast;
3045  Size newtupsize,
3046  pagefree;
3047  bool have_tuple_lock = false;
3048  bool iscombo;
3049  bool use_hot_update = false;
3050  bool summarized_update = false;
3051  bool key_intact;
3052  bool all_visible_cleared = false;
3053  bool all_visible_cleared_new = false;
3054  bool checked_lockers;
3055  bool locker_remains;
3056  bool id_has_external = false;
3057  TransactionId xmax_new_tuple,
3058  xmax_old_tuple;
3059  uint16 infomask_old_tuple,
3060  infomask2_old_tuple,
3061  infomask_new_tuple,
3062  infomask2_new_tuple;
3063 
3064  Assert(ItemPointerIsValid(otid));
3065 
3066  /* Cheap, simplistic check that the tuple matches the rel's rowtype. */
3068  RelationGetNumberOfAttributes(relation));
3069 
3070  /*
3071  * Forbid this during a parallel operation, lest it allocate a combo CID.
3072  * Other workers might need that combo CID for visibility checks, and we
3073  * have no provision for broadcasting it to them.
3074  */
3075  if (IsInParallelMode())
3076  ereport(ERROR,
3077  (errcode(ERRCODE_INVALID_TRANSACTION_STATE),
3078  errmsg("cannot update tuples during a parallel operation")));
3079 
3080  /*
3081  * Fetch the list of attributes to be checked for various operations.
3082  *
3083  * For HOT considerations, this is wasted effort if we fail to update or
3084  * have to put the new tuple on a different page. But we must compute the
3085  * list before obtaining buffer lock --- in the worst case, if we are
3086  * doing an update on one of the relevant system catalogs, we could
3087  * deadlock if we try to fetch the list later. In any case, the relcache
3088  * caches the data so this is usually pretty cheap.
3089  *
3090  * We also need columns used by the replica identity and columns that are
3091  * considered the "key" of rows in the table.
3092  *
3093  * Note that we get copies of each bitmap, so we need not worry about
3094  * relcache flush happening midway through.
3095  */
3096  hot_attrs = RelationGetIndexAttrBitmap(relation,
3098  sum_attrs = RelationGetIndexAttrBitmap(relation,
3100  key_attrs = RelationGetIndexAttrBitmap(relation, INDEX_ATTR_BITMAP_KEY);
3101  id_attrs = RelationGetIndexAttrBitmap(relation,
3103  interesting_attrs = NULL;
3104  interesting_attrs = bms_add_members(interesting_attrs, hot_attrs);
3105  interesting_attrs = bms_add_members(interesting_attrs, sum_attrs);
3106  interesting_attrs = bms_add_members(interesting_attrs, key_attrs);
3107  interesting_attrs = bms_add_members(interesting_attrs, id_attrs);
3108 
3109  block = ItemPointerGetBlockNumber(otid);
3110  buffer = ReadBuffer(relation, block);
3111  page = BufferGetPage(buffer);
3112 
3113  /*
3114  * Before locking the buffer, pin the visibility map page if it appears to
3115  * be necessary. Since we haven't got the lock yet, someone else might be
3116  * in the middle of changing this, so we'll need to recheck after we have
3117  * the lock.
3118  */
3119  if (PageIsAllVisible(page))
3120  visibilitymap_pin(relation, block, &vmbuffer);
3121 
3123 
3124  lp = PageGetItemId(page, ItemPointerGetOffsetNumber(otid));
3125  Assert(ItemIdIsNormal(lp));
3126 
3127  /*
3128  * Fill in enough data in oldtup for HeapDetermineColumnsInfo to work
3129  * properly.
3130  */
3131  oldtup.t_tableOid = RelationGetRelid(relation);
3132  oldtup.t_data = (HeapTupleHeader) PageGetItem(page, lp);
3133  oldtup.t_len = ItemIdGetLength(lp);
3134  oldtup.t_self = *otid;
3135 
3136  /* the new tuple is ready, except for this: */
3137  newtup->t_tableOid = RelationGetRelid(relation);
3138 
3139  /*
3140  * Determine columns modified by the update. Additionally, identify
3141  * whether any of the unmodified replica identity key attributes in the
3142  * old tuple is externally stored or not. This is required because for
3143  * such attributes the flattened value won't be WAL logged as part of the
3144  * new tuple so we must include it as part of the old_key_tuple. See
3145  * ExtractReplicaIdentity.
3146  */
3147  modified_attrs = HeapDetermineColumnsInfo(relation, interesting_attrs,
3148  id_attrs, &oldtup,
3149  newtup, &id_has_external);
3150 
3151  /*
3152  * If we're not updating any "key" column, we can grab a weaker lock type.
3153  * This allows for more concurrency when we are running simultaneously
3154  * with foreign key checks.
3155  *
3156  * Note that if a column gets detoasted while executing the update, but
3157  * the value ends up being the same, this test will fail and we will use
3158  * the stronger lock. This is acceptable; the important case to optimize
3159  * is updates that don't manipulate key columns, not those that
3160  * serendipitously arrive at the same key values.
3161  */
3162  if (!bms_overlap(modified_attrs, key_attrs))
3163  {
3164  *lockmode = LockTupleNoKeyExclusive;
3165  mxact_status = MultiXactStatusNoKeyUpdate;
3166  key_intact = true;
3167 
3168  /*
3169  * If this is the first possibly-multixact-able operation in the
3170  * current transaction, set my per-backend OldestMemberMXactId
3171  * setting. We can be certain that the transaction will never become a
3172  * member of any older MultiXactIds than that. (We have to do this
3173  * even if we end up just using our own TransactionId below, since
3174  * some other backend could incorporate our XID into a MultiXact
3175  * immediately afterwards.)
3176  */
3178  }
3179  else
3180  {
3181  *lockmode = LockTupleExclusive;
3182  mxact_status = MultiXactStatusUpdate;
3183  key_intact = false;
3184  }
3185 
3186  /*
3187  * Note: beyond this point, use oldtup not otid to refer to old tuple.
3188  * otid may very well point at newtup->t_self, which we will overwrite
3189  * with the new tuple's location, so there's great risk of confusion if we
3190  * use otid anymore.
3191  */
3192 
3193 l2:
3194  checked_lockers = false;
3195  locker_remains = false;
3196  result = HeapTupleSatisfiesUpdate(&oldtup, cid, buffer);
3197 
3198  /* see below about the "no wait" case */
3200 
3201  if (result == TM_Invisible)
3202  {
3203  UnlockReleaseBuffer(buffer);
3204  ereport(ERROR,
3205  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
3206  errmsg("attempted to update invisible tuple")));
3207  }
3208  else if (result == TM_BeingModified && (options & TABLE_MODIFY_WAIT))
3209  {
3210  TransactionId xwait;
3211  uint16 infomask;
3212  bool can_continue = false;
3213 
3214  /*
3215  * XXX note that we don't consider the "no wait" case here. This
3216  * isn't a problem currently because no caller uses that case, but it
3217  * should be fixed if such a caller is introduced. It wasn't a
3218  * problem previously because this code would always wait, but now
3219  * that some tuple locks do not conflict with one of the lock modes we
3220  * use, it is possible that this case is interesting to handle
3221  * specially.
3222  *
3223  * This may cause failures with third-party code that calls
3224  * heap_update directly.
3225  */
3226 
3227  /* must copy state data before unlocking buffer */
3228  xwait = HeapTupleHeaderGetRawXmax(oldtup.t_data);
3229  infomask = oldtup.t_data->t_infomask;
3230 
3231  /*
3232  * Now we have to do something about the existing locker. If it's a
3233  * multi, sleep on it; we might be awakened before it is completely
3234  * gone (or even not sleep at all in some cases); we need to preserve
3235  * it as locker, unless it is gone completely.
3236  *
3237  * If it's not a multi, we need to check for sleeping conditions
3238  * before actually going to sleep. If the update doesn't conflict
3239  * with the locks, we just continue without sleeping (but making sure
3240  * it is preserved).
3241  *
3242  * Before sleeping, we need to acquire tuple lock to establish our
3243  * priority for the tuple (see heap_lock_tuple). LockTuple will
3244  * release us when we are next-in-line for the tuple. Note we must
3245  * not acquire the tuple lock until we're sure we're going to sleep;
3246  * otherwise we're open for race conditions with other transactions
3247  * holding the tuple lock which sleep on us.
3248  *
3249  * If we are forced to "start over" below, we keep the tuple lock;
3250  * this arranges that we stay at the head of the line while rechecking
3251  * tuple state.
3252  */
3253  if (infomask & HEAP_XMAX_IS_MULTI)
3254  {
3255  TransactionId update_xact;
3256  int remain;
3257  bool current_is_member = false;
3258 
3259  if (DoesMultiXactIdConflict((MultiXactId) xwait, infomask,
3260  *lockmode, &current_is_member))
3261  {
3262  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
3263 
3264  /*
3265  * Acquire the lock, if necessary (but skip it when we're
3266  * requesting a lock and already have one; avoids deadlock).
3267  */
3268  if (!current_is_member)
3269  heap_acquire_tuplock(relation, &(oldtup.t_self), *lockmode,
3270  LockWaitBlock, &have_tuple_lock);
3271 
3272  /* wait for multixact */
3273  MultiXactIdWait((MultiXactId) xwait, mxact_status, infomask,
3274  relation, &oldtup.t_self, XLTW_Update,
3275  &remain);
3276  checked_lockers = true;
3277  locker_remains = remain != 0;
3279 
3280  /*
3281  * If xwait had just locked the tuple then some other xact
3282  * could update this tuple before we get to this point. Check
3283  * for xmax change, and start over if so.
3284  */
3286  infomask) ||
3288  xwait))
3289  goto l2;
3290  }
3291 
3292  /*
3293  * Note that the multixact may not be done by now. It could have
3294  * surviving members; our own xact or other subxacts of this
3295  * backend, and also any other concurrent transaction that locked
3296  * the tuple with LockTupleKeyShare if we only got
3297  * LockTupleNoKeyExclusive. If this is the case, we have to be
3298  * careful to mark the updated tuple with the surviving members in
3299  * Xmax.
3300  *
3301  * Note that there could have been another update in the
3302  * MultiXact. In that case, we need to check whether it committed
3303  * or aborted. If it aborted we are safe to update it again;
3304  * otherwise there is an update conflict, and we have to return
3305  * TableTuple{Deleted, Updated} below.
3306  *
3307  * In the LockTupleExclusive case, we still need to preserve the
3308  * surviving members: those would include the tuple locks we had
3309  * before this one, which are important to keep in case this
3310  * subxact aborts.
3311  */
3313  update_xact = HeapTupleGetUpdateXid(oldtup.t_data);
3314  else
3315  update_xact = InvalidTransactionId;
3316 
3317  /*
3318  * There was no UPDATE in the MultiXact; or it aborted. No
3319  * TransactionIdIsInProgress() call needed here, since we called
3320  * MultiXactIdWait() above.
3321  */
3322  if (!TransactionIdIsValid(update_xact) ||
3323  TransactionIdDidAbort(update_xact))
3324  can_continue = true;
3325  }
3326  else if (TransactionIdIsCurrentTransactionId(xwait))
3327  {
3328  /*
3329  * The only locker is ourselves; we can avoid grabbing the tuple
3330  * lock here, but must preserve our locking information.
3331  */
3332  checked_lockers = true;
3333  locker_remains = true;
3334  can_continue = true;
3335  }
3336  else if (HEAP_XMAX_IS_KEYSHR_LOCKED(infomask) && key_intact)
3337  {
3338  /*
3339  * If it's just a key-share locker, and we're not changing the key
3340  * columns, we don't need to wait for it to end; but we need to
3341  * preserve it as locker.
3342  */
3343  checked_lockers = true;
3344  locker_remains = true;
3345  can_continue = true;
3346  }
3347  else
3348  {
3349  /*
3350  * Wait for regular transaction to end; but first, acquire tuple
3351  * lock.
3352  */
3353  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
3354  heap_acquire_tuplock(relation, &(oldtup.t_self), *lockmode,
3355  LockWaitBlock, &have_tuple_lock);
3356  XactLockTableWait(xwait, relation, &oldtup.t_self,
3357  XLTW_Update);
3358  checked_lockers = true;
3360 
3361  /*
3362  * xwait is done, but if xwait had just locked the tuple then some
3363  * other xact could update this tuple before we get to this point.
3364  * Check for xmax change, and start over if so.
3365  */
3366  if (xmax_infomask_changed(oldtup.t_data->t_infomask, infomask) ||
3367  !TransactionIdEquals(xwait,
3369  goto l2;
3370 
3371  /* Otherwise check if it committed or aborted */
3372  UpdateXmaxHintBits(oldtup.t_data, buffer, xwait);
3373  if (oldtup.t_data->t_infomask & HEAP_XMAX_INVALID)
3374  can_continue = true;
3375  }
3376 
3377  if (can_continue)
3378  result = TM_Ok;
3379  else if (!ItemPointerEquals(&oldtup.t_self, &oldtup.t_data->t_ctid))
3380  result = TM_Updated;
3381  else
3382  result = TM_Deleted;
3383  }
3384 
3385  /* Sanity check the result HeapTupleSatisfiesUpdate() and the logic above */
3386  if (result != TM_Ok)
3387  {
3388  Assert(result == TM_SelfModified ||
3389  result == TM_Updated ||
3390  result == TM_Deleted ||
3391  result == TM_BeingModified);
3392  Assert(!(oldtup.t_data->t_infomask & HEAP_XMAX_INVALID));
3393  Assert(result != TM_Updated ||
3394  !ItemPointerEquals(&oldtup.t_self, &oldtup.t_data->t_ctid));
3395  }
3396 
3397  if (crosscheck != InvalidSnapshot && result == TM_Ok)
3398  {
3399  /* Perform additional check for transaction-snapshot mode RI updates */
3400  if (!HeapTupleSatisfiesVisibility(&oldtup, crosscheck, buffer))
3401  result = TM_Updated;
3402  }
3403 
3404  if (result != TM_Ok)
3405  {
3406  tmfd->ctid = oldtup.t_data->t_ctid;
3407  tmfd->xmax = HeapTupleHeaderGetUpdateXid(oldtup.t_data);
3408  if (result == TM_SelfModified)
3409  tmfd->cmax = HeapTupleHeaderGetCmax(oldtup.t_data);
3410  else
3411  tmfd->cmax = InvalidCommandId;
3412 
3413  /*
3414  * If we're asked to lock the updated tuple, we just fetch the
3415  * existing tuple. That lets the caller save some resources on
3416  * placing the lock.
3417  */
3418  if (result == TM_Updated &&
3420  {
3421  BufferHeapTupleTableSlot *bslot;
3422 
3423  Assert(TTS_IS_BUFFERTUPLE(oldSlot));
3424  bslot = (BufferHeapTupleTableSlot *) oldSlot;
3425 
3426  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
3427  bslot->base.tupdata = oldtup;
3428  ExecStorePinnedBufferHeapTuple(&bslot->base.tupdata,
3429  oldSlot,
3430  buffer);
3431  }
3432  else
3433  {
3434  UnlockReleaseBuffer(buffer);
3435  }
3436  if (have_tuple_lock)
3437  UnlockTupleTuplock(relation, &(oldtup.t_self), *lockmode);
3438  if (vmbuffer != InvalidBuffer)
3439  ReleaseBuffer(vmbuffer);
3440  *update_indexes = TU_None;
3441 
3442  bms_free(hot_attrs);
3443  bms_free(sum_attrs);
3444  bms_free(key_attrs);
3445  bms_free(id_attrs);
3446  bms_free(modified_attrs);
3447  bms_free(interesting_attrs);
3448  return result;
3449  }
3450 
3451  /*
3452  * If we didn't pin the visibility map page and the page has become all
3453  * visible while we were busy locking the buffer, or during some
3454  * subsequent window during which we had it unlocked, we'll have to unlock
3455  * and re-lock, to avoid holding the buffer lock across an I/O. That's a
3456  * bit unfortunate, especially since we'll now have to recheck whether the
3457  * tuple has been locked or updated under us, but hopefully it won't
3458  * happen very often.
3459  */
3460  if (vmbuffer == InvalidBuffer && PageIsAllVisible(page))
3461  {
3462  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
3463  visibilitymap_pin(relation, block, &vmbuffer);
3465  goto l2;
3466  }
3467 
3468  /* Fill in transaction status data */
3469 
3470  /*
3471  * If the tuple we're updating is locked, we need to preserve the locking
3472  * info in the old tuple's Xmax. Prepare a new Xmax value for this.
3473  */
3475  oldtup.t_data->t_infomask,
3476  oldtup.t_data->t_infomask2,
3477  xid, *lockmode, true,
3478  &xmax_old_tuple, &infomask_old_tuple,
3479  &infomask2_old_tuple);
3480 
3481  /*
3482  * And also prepare an Xmax value for the new copy of the tuple. If there
3483  * was no xmax previously, or there was one but all lockers are now gone,
3484  * then use InvalidTransactionId; otherwise, get the xmax from the old
3485  * tuple. (In rare cases that might also be InvalidTransactionId and yet
3486  * not have the HEAP_XMAX_INVALID bit set; that's fine.)
3487  */
3488  if ((oldtup.t_data->t_infomask & HEAP_XMAX_INVALID) ||
3490  (checked_lockers && !locker_remains))
3491  xmax_new_tuple = InvalidTransactionId;
3492  else
3493  xmax_new_tuple = HeapTupleHeaderGetRawXmax(oldtup.t_data);
3494 
3495  if (!TransactionIdIsValid(xmax_new_tuple))
3496  {
3497  infomask_new_tuple = HEAP_XMAX_INVALID;
3498  infomask2_new_tuple = 0;
3499  }
3500  else
3501  {
3502  /*
3503  * If we found a valid Xmax for the new tuple, then the infomask bits
3504  * to use on the new tuple depend on what was there on the old one.
3505  * Note that since we're doing an update, the only possibility is that
3506  * the lockers had FOR KEY SHARE lock.
3507  */
3508  if (oldtup.t_data->t_infomask & HEAP_XMAX_IS_MULTI)
3509  {
3510  GetMultiXactIdHintBits(xmax_new_tuple, &infomask_new_tuple,
3511  &infomask2_new_tuple);
3512  }
3513  else
3514  {
3515  infomask_new_tuple = HEAP_XMAX_KEYSHR_LOCK | HEAP_XMAX_LOCK_ONLY;
3516  infomask2_new_tuple = 0;
3517  }
3518  }
3519 
3520  /*
3521  * Prepare the new tuple with the appropriate initial values of Xmin and
3522  * Xmax, as well as initial infomask bits as computed above.
3523  */
3524  newtup->t_data->t_infomask &= ~(HEAP_XACT_MASK);
3525  newtup->t_data->t_infomask2 &= ~(HEAP2_XACT_MASK);
3526  HeapTupleHeaderSetXmin(newtup->t_data, xid);
3527  HeapTupleHeaderSetCmin(newtup->t_data, cid);
3528  newtup->t_data->t_infomask |= HEAP_UPDATED | infomask_new_tuple;
3529  newtup->t_data->t_infomask2 |= infomask2_new_tuple;
3530  HeapTupleHeaderSetXmax(newtup->t_data, xmax_new_tuple);
3531 
3532  /*
3533  * Replace cid with a combo CID if necessary. Note that we already put
3534  * the plain cid into the new tuple.
3535  */
3536  HeapTupleHeaderAdjustCmax(oldtup.t_data, &cid, &iscombo);
3537 
3538  /*
3539  * If the toaster needs to be activated, OR if the new tuple will not fit
3540  * on the same page as the old, then we need to release the content lock
3541  * (but not the pin!) on the old tuple's buffer while we are off doing
3542  * TOAST and/or table-file-extension work. We must mark the old tuple to
3543  * show that it's locked, else other processes may try to update it
3544  * themselves.
3545  *
3546  * We need to invoke the toaster if there are already any out-of-line
3547  * toasted values present, or if the new tuple is over-threshold.
3548  */
3549  if (relation->rd_rel->relkind != RELKIND_RELATION &&
3550  relation->rd_rel->relkind != RELKIND_MATVIEW)
3551  {
3552  /* toast table entries should never be recursively toasted */
3553  Assert(!HeapTupleHasExternal(&oldtup));
3554  Assert(!HeapTupleHasExternal(newtup));
3555  need_toast = false;
3556  }
3557  else
3558  need_toast = (HeapTupleHasExternal(&oldtup) ||
3559  HeapTupleHasExternal(newtup) ||
3560  newtup->t_len > TOAST_TUPLE_THRESHOLD);
3561 
3562  pagefree = PageGetHeapFreeSpace(page);
3563 
3564  newtupsize = MAXALIGN(newtup->t_len);
3565 
3566  if (need_toast || newtupsize > pagefree)
3567  {
3568  TransactionId xmax_lock_old_tuple;
3569  uint16 infomask_lock_old_tuple,
3570  infomask2_lock_old_tuple;
3571  bool cleared_all_frozen = false;
3572 
3573  /*
3574  * To prevent concurrent sessions from updating the tuple, we have to
3575  * temporarily mark it locked, while we release the page-level lock.
3576  *
3577  * To satisfy the rule that any xid potentially appearing in a buffer
3578  * written out to disk, we unfortunately have to WAL log this
3579  * temporary modification. We can reuse xl_heap_lock for this
3580  * purpose. If we crash/error before following through with the
3581  * actual update, xmax will be of an aborted transaction, allowing
3582  * other sessions to proceed.
3583  */
3584 
3585  /*
3586  * Compute xmax / infomask appropriate for locking the tuple. This has
3587  * to be done separately from the combo that's going to be used for
3588  * updating, because the potentially created multixact would otherwise
3589  * be wrong.
3590  */
3592  oldtup.t_data->t_infomask,
3593  oldtup.t_data->t_infomask2,
3594  xid, *lockmode, false,
3595  &xmax_lock_old_tuple, &infomask_lock_old_tuple,
3596  &infomask2_lock_old_tuple);
3597 
3598  Assert(HEAP_XMAX_IS_LOCKED_ONLY(infomask_lock_old_tuple));
3599 
3601 
3602  /* Clear obsolete visibility flags ... */
3603  oldtup.t_data->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);
3604  oldtup.t_data->t_infomask2 &= ~HEAP_KEYS_UPDATED;
3605  HeapTupleClearHotUpdated(&oldtup);
3606  /* ... and store info about transaction updating this tuple */
3607  Assert(TransactionIdIsValid(xmax_lock_old_tuple));
3608  HeapTupleHeaderSetXmax(oldtup.t_data, xmax_lock_old_tuple);
3609  oldtup.t_data->t_infomask |= infomask_lock_old_tuple;
3610  oldtup.t_data->t_infomask2 |= infomask2_lock_old_tuple;
3611  HeapTupleHeaderSetCmax(oldtup.t_data, cid, iscombo);
3612 
3613  /* temporarily make it look not-updated, but locked */
3614  oldtup.t_data->t_ctid = oldtup.t_self;
3615 
3616  /*
3617  * Clear all-frozen bit on visibility map if needed. We could
3618  * immediately reset ALL_VISIBLE, but given that the WAL logging
3619  * overhead would be unchanged, that doesn't seem necessarily
3620  * worthwhile.
3621  */
3622  if (PageIsAllVisible(page) &&
3623  visibilitymap_clear(relation, block, vmbuffer,
3625  cleared_all_frozen = true;
3626 
3627  MarkBufferDirty(buffer);
3628 
3629  if (RelationNeedsWAL(relation))
3630  {
3631  xl_heap_lock xlrec;
3632  XLogRecPtr recptr;
3633 
3634  XLogBeginInsert();
3635  XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
3636 
3637  xlrec.offnum = ItemPointerGetOffsetNumber(&oldtup.t_self);
3638  xlrec.xmax = xmax_lock_old_tuple;
3640  oldtup.t_data->t_infomask2);
3641  xlrec.flags =
3642  cleared_all_frozen ? XLH_LOCK_ALL_FROZEN_CLEARED : 0;
3643  XLogRegisterData((char *) &xlrec, SizeOfHeapLock);
3644  recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_LOCK);
3645  PageSetLSN(page, recptr);
3646  }
3647 
3648  END_CRIT_SECTION();
3649 
3650  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
3651 
3652  /*
3653  * Let the toaster do its thing, if needed.
3654  *
3655  * Note: below this point, heaptup is the data we actually intend to
3656  * store into the relation; newtup is the caller's original untoasted
3657  * data.
3658  */
3659  if (need_toast)
3660  {
3661  /* Note we always use WAL and FSM during updates */
3662  heaptup = heap_toast_insert_or_update(relation, newtup, &oldtup, 0);
3663  newtupsize = MAXALIGN(heaptup->t_len);
3664  }
3665  else
3666  heaptup = newtup;
3667 
3668  /*
3669  * Now, do we need a new page for the tuple, or not? This is a bit
3670  * tricky since someone else could have added tuples to the page while
3671  * we weren't looking. We have to recheck the available space after
3672  * reacquiring the buffer lock. But don't bother to do that if the
3673  * former amount of free space is still not enough; it's unlikely
3674  * there's more free now than before.
3675  *
3676  * What's more, if we need to get a new page, we will need to acquire
3677  * buffer locks on both old and new pages. To avoid deadlock against
3678  * some other backend trying to get the same two locks in the other
3679  * order, we must be consistent about the order we get the locks in.
3680  * We use the rule "lock the lower-numbered page of the relation
3681  * first". To implement this, we must do RelationGetBufferForTuple
3682  * while not holding the lock on the old page, and we must rely on it
3683  * to get the locks on both pages in the correct order.
3684  *
3685  * Another consideration is that we need visibility map page pin(s) if
3686  * we will have to clear the all-visible flag on either page. If we
3687  * call RelationGetBufferForTuple, we rely on it to acquire any such
3688  * pins; but if we don't, we have to handle that here. Hence we need
3689  * a loop.
3690  */
3691  for (;;)
3692  {
3693  if (newtupsize > pagefree)
3694  {
3695  /* It doesn't fit, must use RelationGetBufferForTuple. */
3696  newbuf = RelationGetBufferForTuple(relation, heaptup->t_len,
3697  buffer, 0, NULL,
3698  &vmbuffer_new, &vmbuffer,
3699  0);
3700  /* We're all done. */
3701  break;
3702  }
3703  /* Acquire VM page pin if needed and we don't have it. */
3704  if (vmbuffer == InvalidBuffer && PageIsAllVisible(page))
3705  visibilitymap_pin(relation, block, &vmbuffer);
3706  /* Re-acquire the lock on the old tuple's page. */
3708  /* Re-check using the up-to-date free space */
3709  pagefree = PageGetHeapFreeSpace(page);
3710  if (newtupsize > pagefree ||
3711  (vmbuffer == InvalidBuffer && PageIsAllVisible(page)))
3712  {
3713  /*
3714  * Rats, it doesn't fit anymore, or somebody just now set the
3715  * all-visible flag. We must now unlock and loop to avoid
3716  * deadlock. Fortunately, this path should seldom be taken.
3717  */
3718  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
3719  }
3720  else
3721  {
3722  /* We're all done. */
3723  newbuf = buffer;
3724  break;
3725  }
3726  }
3727  }
3728  else
3729  {
3730  /* No TOAST work needed, and it'll fit on same page */
3731  newbuf = buffer;
3732  heaptup = newtup;
3733  }
3734 
3735  /*
3736  * We're about to do the actual update -- check for conflict first, to
3737  * avoid possibly having to roll back work we've just done.
3738  *
3739  * This is safe without a recheck as long as there is no possibility of
3740  * another process scanning the pages between this check and the update
3741  * being visible to the scan (i.e., exclusive buffer content lock(s) are
3742  * continuously held from this point until the tuple update is visible).
3743  *
3744  * For the new tuple the only check needed is at the relation level, but
3745  * since both tuples are in the same relation and the check for oldtup
3746  * will include checking the relation level, there is no benefit to a
3747  * separate check for the new tuple.
3748  */
3749  CheckForSerializableConflictIn(relation, &oldtup.t_self,
3750  BufferGetBlockNumber(buffer));
3751 
3752  /*
3753  * At this point newbuf and buffer are both pinned and locked, and newbuf
3754  * has enough space for the new tuple. If they are the same buffer, only
3755  * one pin is held.
3756  */
3757 
3758  if (newbuf == buffer)
3759  {
3760  /*
3761  * Since the new tuple is going into the same page, we might be able
3762  * to do a HOT update. Check if any of the index columns have been
3763  * changed.
3764  */
3765  if (!bms_overlap(modified_attrs, hot_attrs))
3766  {
3767  use_hot_update = true;
3768 
3769  /*
3770  * If none of the columns that are used in hot-blocking indexes
3771  * were updated, we can apply HOT, but we do still need to check
3772  * if we need to update the summarizing indexes, and update those
3773  * indexes if the columns were updated, or we may fail to detect
3774  * e.g. value bound changes in BRIN minmax indexes.
3775  */
3776  if (bms_overlap(modified_attrs, sum_attrs))
3777  summarized_update = true;
3778  }
3779  }
3780  else
3781  {
3782  /* Set a hint that the old page could use prune/defrag */
3783  PageSetFull(page);
3784  }
3785 
3786  /*
3787  * Compute replica identity tuple before entering the critical section so
3788  * we don't PANIC upon a memory allocation failure.
3789  * ExtractReplicaIdentity() will return NULL if nothing needs to be
3790  * logged. Pass old key required as true only if the replica identity key
3791  * columns are modified or it has external data.
3792  */
3793  old_key_tuple = ExtractReplicaIdentity(relation, &oldtup,
3794  bms_overlap(modified_attrs, id_attrs) ||
3795  id_has_external,
3796  &old_key_copied);
3797 
3798  /* NO EREPORT(ERROR) from here till changes are logged */
3800 
3801  /*
3802  * If this transaction commits, the old tuple will become DEAD sooner or
3803  * later. Set flag that this page is a candidate for pruning once our xid
3804  * falls below the OldestXmin horizon. If the transaction finally aborts,
3805  * the subsequent page pruning will be a no-op and the hint will be
3806  * cleared.
3807  *
3808  * XXX Should we set hint on newbuf as well? If the transaction aborts,
3809  * there would be a prunable tuple in the newbuf; but for now we choose
3810  * not to optimize for aborts. Note that heap_xlog_update must be kept in
3811  * sync if this decision changes.
3812  */
3813  PageSetPrunable(page, xid);
3814 
3815  if (use_hot_update)
3816  {
3817  /* Mark the old tuple as HOT-updated */
3818  HeapTupleSetHotUpdated(&oldtup);
3819  /* And mark the new tuple as heap-only */
3820  HeapTupleSetHeapOnly(heaptup);
3821  /* Mark the caller's copy too, in case different from heaptup */
3822  HeapTupleSetHeapOnly(newtup);
3823  }
3824  else
3825  {
3826  /* Make sure tuples are correctly marked as not-HOT */
3827  HeapTupleClearHotUpdated(&oldtup);
3828  HeapTupleClearHeapOnly(heaptup);
3829  HeapTupleClearHeapOnly(newtup);
3830  }
3831 
3832  RelationPutHeapTuple(relation, newbuf, heaptup, false); /* insert new tuple */
3833 
3834 
3835  /* Clear obsolete visibility flags, possibly set by ourselves above... */
3836  oldtup.t_data->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);
3837  oldtup.t_data->t_infomask2 &= ~HEAP_KEYS_UPDATED;
3838  /* ... and store info about transaction updating this tuple */
3839  Assert(TransactionIdIsValid(xmax_old_tuple));
3840  HeapTupleHeaderSetXmax(oldtup.t_data, xmax_old_tuple);
3841  oldtup.t_data->t_infomask |= infomask_old_tuple;
3842  oldtup.t_data->t_infomask2 |= infomask2_old_tuple;
3843  HeapTupleHeaderSetCmax(oldtup.t_data, cid, iscombo);
3844 
3845  /* record address of new tuple in t_ctid of old one */
3846  oldtup.t_data->t_ctid = heaptup->t_self;
3847 
3848  /* clear PD_ALL_VISIBLE flags, reset all visibilitymap bits */
3849  if (PageIsAllVisible(BufferGetPage(buffer)))
3850  {
3851  all_visible_cleared = true;
3853  visibilitymap_clear(relation, BufferGetBlockNumber(buffer),
3854  vmbuffer, VISIBILITYMAP_VALID_BITS);
3855  }
3856  if (newbuf != buffer && PageIsAllVisible(BufferGetPage(newbuf)))
3857  {
3858  all_visible_cleared_new = true;
3860  visibilitymap_clear(relation, BufferGetBlockNumber(newbuf),
3861  vmbuffer_new, VISIBILITYMAP_VALID_BITS);
3862  }
3863 
3864  if (newbuf != buffer)
3865  MarkBufferDirty(newbuf);
3866  MarkBufferDirty(buffer);
3867 
3868  /* XLOG stuff */
3869  if (RelationNeedsWAL(relation))
3870  {
3871  XLogRecPtr recptr;
3872 
3873  /*
3874  * For logical decoding we need combo CIDs to properly decode the
3875  * catalog.
3876  */
3878  {
3879  log_heap_new_cid(relation, &oldtup);
3880  log_heap_new_cid(relation, heaptup);
3881  }
3882 
3883  recptr = log_heap_update(relation, buffer,
3884  newbuf, &oldtup, heaptup,
3885  old_key_tuple,
3886  all_visible_cleared,
3887  all_visible_cleared_new);
3888  if (newbuf != buffer)
3889  {
3890  PageSetLSN(BufferGetPage(newbuf), recptr);
3891  }
3892  PageSetLSN(BufferGetPage(buffer), recptr);
3893  }
3894 
3895  END_CRIT_SECTION();
3896 
3897  if (newbuf != buffer)
3898  LockBuffer(newbuf, BUFFER_LOCK_UNLOCK);
3899  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
3900 
3901  /*
3902  * Mark old tuple for invalidation from system caches at next command
3903  * boundary, and mark the new tuple for invalidation in case we abort. We
3904  * have to do this before releasing the buffer because oldtup is in the
3905  * buffer. (heaptup is all in local memory, but it's necessary to process
3906  * both tuple versions in one call to inval.c so we can avoid redundant
3907  * sinval messages.)
3908  */
3909  CacheInvalidateHeapTuple(relation, &oldtup, heaptup);
3910 
3911  /* Now we can release the buffer(s) */
3912  if (newbuf != buffer)
3913  ReleaseBuffer(newbuf);
3914 
3915  /* Fetch the old tuple version if we're asked for that. */
3917  {
3918  BufferHeapTupleTableSlot *bslot;
3919 
3920  Assert(TTS_IS_BUFFERTUPLE(oldSlot));
3921  bslot = (BufferHeapTupleTableSlot *) oldSlot;
3922 
3923  bslot->base.tupdata = oldtup;
3924  ExecStorePinnedBufferHeapTuple(&bslot->base.tupdata,
3925  oldSlot,
3926  buffer);
3927  }
3928  else
3929  {
3930  /* Now we can release the buffer */
3931  ReleaseBuffer(buffer);
3932  }
3933 
3934  if (BufferIsValid(vmbuffer_new))
3935  ReleaseBuffer(vmbuffer_new);
3936  if (BufferIsValid(vmbuffer))
3937  ReleaseBuffer(vmbuffer);
3938 
3939  /*
3940  * Release the lmgr tuple lock, if we had it.
3941  */
3942  if (have_tuple_lock)
3943  UnlockTupleTuplock(relation, &(oldtup.t_self), *lockmode);
3944 
3945  pgstat_count_heap_update(relation, use_hot_update, newbuf != buffer);
3946 
3947  /*
3948  * If heaptup is a private copy, release it. Don't forget to copy t_self
3949  * back to the caller's image, too.
3950  */
3951  if (heaptup != newtup)
3952  {
3953  newtup->t_self = heaptup->t_self;
3954  heap_freetuple(heaptup);
3955  }
3956 
3957  /*
3958  * If it is a HOT update, the update may still need to update summarized
3959  * indexes, lest we fail to update those summaries and get incorrect
3960  * results (for example, minmax bounds of the block may change with this
3961  * update).
3962  */
3963  if (use_hot_update)
3964  {
3965  if (summarized_update)
3966  *update_indexes = TU_Summarizing;
3967  else
3968  *update_indexes = TU_None;
3969  }
3970  else
3971  *update_indexes = TU_All;
3972 
3973  if (old_key_tuple != NULL && old_key_copied)
3974  heap_freetuple(old_key_tuple);
3975 
3976  bms_free(hot_attrs);
3977  bms_free(sum_attrs);
3978  bms_free(key_attrs);
3979  bms_free(id_attrs);
3980  bms_free(modified_attrs);
3981  bms_free(interesting_attrs);
3982 
3983  return TM_Ok;
3984 }
void bms_free(Bitmapset *a)
Definition: bitmapset.c:239
Bitmapset * bms_add_members(Bitmapset *a, const Bitmapset *b)
Definition: bitmapset.c:917
bool bms_overlap(const Bitmapset *a, const Bitmapset *b)
Definition: bitmapset.c:582
static void PageSetFull(Page page)
Definition: bufpage.h:415
TransactionId HeapTupleGetUpdateXid(HeapTupleHeader tuple)
Definition: heapam.c:7047
static Bitmapset * HeapDetermineColumnsInfo(Relation relation, Bitmapset *interesting_cols, Bitmapset *external_cols, HeapTuple oldtup, HeapTuple newtup, bool *has_external)
Definition: heapam.c:4042
static XLogRecPtr log_heap_update(Relation reln, Buffer oldbuf, Buffer newbuf, HeapTuple oldtup, HeapTuple newtup, HeapTuple old_key_tuple, bool all_visible_cleared, bool new_all_visible_cleared)
Definition: heapam.c:8305
HeapTuple heap_toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup, int options)
Definition: heaptoast.c:96
#define TOAST_TUPLE_THRESHOLD
Definition: heaptoast.h:48
#define HeapTupleSetHotUpdated(tuple)
Definition: htup_details.h:677
#define HEAP2_XACT_MASK
Definition: htup_details.h:279
#define HEAP_XMAX_LOCK_ONLY
Definition: htup_details.h:197
#define HeapTupleHeaderSetCmin(tup, cid)
Definition: htup_details.h:393
#define HEAP_XACT_MASK
Definition: htup_details.h:215
#define HeapTupleSetHeapOnly(tuple)
Definition: htup_details.h:686
#define HeapTupleClearHeapOnly(tuple)
Definition: htup_details.h:689
#define HEAP_UPDATED
Definition: htup_details.h:210
#define HEAP_XMAX_KEYSHR_LOCK
Definition: htup_details.h:194
#define HeapTupleClearHotUpdated(tuple)
Definition: htup_details.h:680
@ XLTW_Update
Definition: lmgr.h:27
void pgstat_count_heap_update(Relation rel, bool hot, bool newpage)
Bitmapset * RelationGetIndexAttrBitmap(Relation relation, IndexAttrBitmapKind attrKind)
Definition: relcache.c:5222
@ INDEX_ATTR_BITMAP_KEY
Definition: relcache.h:61
@ INDEX_ATTR_BITMAP_HOT_BLOCKING
Definition: relcache.h:64
@ INDEX_ATTR_BITMAP_SUMMARIZED
Definition: relcache.h:65
@ INDEX_ATTR_BITMAP_IDENTITY_KEY
Definition: relcache.h:63
@ TU_Summarizing
Definition: tableam.h:118
@ TU_All
Definition: tableam.h:115
@ TU_None
Definition: tableam.h:112
bool TransactionIdDidAbort(TransactionId transactionId)
Definition: transam.c:188

References Assert(), bms_add_members(), bms_free(), bms_overlap(), BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferGetBlockNumber(), BufferGetPage(), BufferIsValid(), CacheInvalidateHeapTuple(), CheckForSerializableConflictIn(), TM_FailureData::cmax, compute_infobits(), compute_new_xmax_infomask(), TM_FailureData::ctid, DoesMultiXactIdConflict(), END_CRIT_SECTION, ereport, errcode(), errmsg(), ERROR, ExecStorePinnedBufferHeapTuple(), ExtractReplicaIdentity(), xl_heap_lock::flags, GetCurrentTransactionId(), GetMultiXactIdHintBits(), HEAP2_XACT_MASK, heap_acquire_tuplock(), heap_freetuple(), HEAP_KEYS_UPDATED, HEAP_LOCKED_UPGRADED, HEAP_MOVED, heap_toast_insert_or_update(), HEAP_UPDATED, HEAP_XACT_MASK, HEAP_XMAX_BITS, HEAP_XMAX_INVALID, HEAP_XMAX_IS_KEYSHR_LOCKED, HEAP_XMAX_IS_LOCKED_ONLY, HEAP_XMAX_IS_MULTI, HEAP_XMAX_KEYSHR_LOCK, HEAP_XMAX_LOCK_ONLY, HeapDetermineColumnsInfo(), HeapTupleClearHeapOnly, HeapTupleClearHotUpdated, HeapTupleGetUpdateXid(), HeapTupleHasExternal, HeapTupleHeaderAdjustCmax(), HeapTupleHeaderGetCmax(), HeapTupleHeaderGetNatts, HeapTupleHeaderGetRawXmax, HeapTupleHeaderGetUpdateXid, HeapTupleHeaderSetCmax, HeapTupleHeaderSetCmin, HeapTupleHeaderSetXmax, HeapTupleHeaderSetXmin, HeapTupleSatisfiesUpdate(), HeapTupleSatisfiesVisibility(), HeapTupleSetHeapOnly, HeapTupleSetHotUpdated, INDEX_ATTR_BITMAP_HOT_BLOCKING, INDEX_ATTR_BITMAP_IDENTITY_KEY, INDEX_ATTR_BITMAP_KEY, INDEX_ATTR_BITMAP_SUMMARIZED, xl_heap_lock::infobits_set, InvalidBuffer, InvalidCommandId, InvalidSnapshot, InvalidTransactionId, IsInParallelMode(), ItemIdGetLength, ItemIdIsNormal, ItemPointerEquals(), ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), ItemPointerIsValid(), LockBuffer(), LockTupleExclusive, LockTupleNoKeyExclusive, LockWaitBlock, log_heap_new_cid(), log_heap_update(), MarkBufferDirty(), MAXALIGN, MultiXactIdSetOldestMember(), MultiXactIdWait(), MultiXactStatusNoKeyUpdate, MultiXactStatusUpdate, xl_heap_lock::offnum, PageClearAllVisible(), PageGetHeapFreeSpace(), PageGetItem(), PageGetItemId(), PageIsAllVisible(), PageSetFull(), PageSetLSN(), PageSetPrunable, pgstat_count_heap_update(), RelationData::rd_rel, ReadBuffer(), REGBUF_STANDARD, RelationGetBufferForTuple(), RelationGetIndexAttrBitmap(), RelationGetNumberOfAttributes, RelationGetRelid, RelationIsAccessibleInLogicalDecoding, RelationNeedsWAL, RelationPutHeapTuple(), ReleaseBuffer(), SizeOfHeapLock, START_CRIT_SECTION, HeapTupleHeaderData::t_ctid, HeapTupleData::t_data, HeapTupleHeaderData::t_infomask, HeapTupleHeaderData::t_infomask2, HeapTupleData::t_len, HeapTupleData::t_self, HeapTupleData::t_tableOid, TABLE_MODIFY_FETCH_OLD_TUPLE, TABLE_MODIFY_LOCK_UPDATED, TABLE_MODIFY_WAIT, TM_BeingModified, TM_Deleted, TM_Invisible, TM_Ok, TM_SelfModified, TM_Updated, TOAST_TUPLE_THRESHOLD, TransactionIdDidAbort(), TransactionIdEquals, TransactionIdIsCurrentTransactionId(), TransactionIdIsValid, TTS_IS_BUFFERTUPLE, TU_All, TU_None, TU_Summarizing, UnlockReleaseBuffer(), UnlockTupleTuplock, UpdateXmaxHintBits(), VISIBILITYMAP_ALL_FROZEN, visibilitymap_clear(), visibilitymap_pin(), VISIBILITYMAP_VALID_BITS, XactLockTableWait(), XLH_LOCK_ALL_FROZEN_CLEARED, XLOG_HEAP_LOCK, XLogBeginInsert(), XLogInsert(), XLogRegisterBuffer(), XLogRegisterData(), XLTW_Update, xl_heap_lock::xmax, TM_FailureData::xmax, and xmax_infomask_changed().

Referenced by heapam_tuple_update(), and simple_heap_update().

◆ heap_vacuum_rel()

void heap_vacuum_rel ( Relation  rel,
struct VacuumParams params,
BufferAccessStrategy  bstrategy 
)

Definition at line 285 of file vacuumlazy.c.

287 {
288  LVRelState *vacrel;
289  bool verbose,
290  instrument,
291  skipwithvm,
292  frozenxid_updated,
293  minmulti_updated;
294  BlockNumber orig_rel_pages,
295  new_rel_pages,
296  new_rel_allvisible;
297  PGRUsage ru0;
298  TimestampTz starttime = 0;
299  PgStat_Counter startreadtime = 0,
300  startwritetime = 0;
301  WalUsage startwalusage = pgWalUsage;
302  int64 StartPageHit = VacuumPageHit,
303  StartPageMiss = VacuumPageMiss,
304  StartPageDirty = VacuumPageDirty;
305  ErrorContextCallback errcallback;
306  char **indnames = NULL;
307 
308  verbose = (params->options & VACOPT_VERBOSE) != 0;
309  instrument = (verbose || (AmAutoVacuumWorkerProcess() &&
310  params->log_min_duration >= 0));
311  if (instrument)
312  {
313  pg_rusage_init(&ru0);
314  starttime = GetCurrentTimestamp();
315  if (track_io_timing)
316  {
317  startreadtime = pgStatBlockReadTime;
318  startwritetime = pgStatBlockWriteTime;
319  }
320  }
321 
323  RelationGetRelid(rel));
324 
325  /*
326  * Setup error traceback support for ereport() first. The idea is to set
327  * up an error context callback to display additional information on any
328  * error during a vacuum. During different phases of vacuum, we update
329  * the state so that the error context callback always display current
330  * information.
331  *
332  * Copy the names of heap rel into local memory for error reporting
333  * purposes, too. It isn't always safe to assume that we can get the name
334  * of each rel. It's convenient for code in lazy_scan_heap to always use
335  * these temp copies.
336  */
337  vacrel = (LVRelState *) palloc0(sizeof(LVRelState));
340  vacrel->relname = pstrdup(RelationGetRelationName(rel));
341  vacrel->indname = NULL;
343  vacrel->verbose = verbose;
344  errcallback.callback = vacuum_error_callback;
345  errcallback.arg = vacrel;
346  errcallback.previous = error_context_stack;
347  error_context_stack = &errcallback;
348 
349  /* Set up high level stuff about rel and its indexes */
350  vacrel->rel = rel;
351  vac_open_indexes(vacrel->rel, RowExclusiveLock, &vacrel->nindexes,
352  &vacrel->indrels);
353  vacrel->bstrategy = bstrategy;
354  if (instrument && vacrel->nindexes > 0)
355  {
356  /* Copy index names used by instrumentation (not error reporting) */
357  indnames = palloc(sizeof(char *) * vacrel->nindexes);
358  for (int i = 0; i < vacrel->nindexes; i++)
359  indnames[i] = pstrdup(RelationGetRelationName(vacrel->indrels[i]));
360  }
361 
362  /*
363  * The index_cleanup param either disables index vacuuming and cleanup or
364  * forces it to go ahead when we would otherwise apply the index bypass
365  * optimization. The default is 'auto', which leaves the final decision
366  * up to lazy_vacuum().
367  *
368  * The truncate param allows user to avoid attempting relation truncation,
369  * though it can't force truncation to happen.
370  */
373  params->truncate != VACOPTVALUE_AUTO);
374 
375  /*
376  * While VacuumFailSafeActive is reset to false before calling this, we
377  * still need to reset it here due to recursive calls.
378  */
379  VacuumFailsafeActive = false;
380  vacrel->consider_bypass_optimization = true;
381  vacrel->do_index_vacuuming = true;
382  vacrel->do_index_cleanup = true;
383  vacrel->do_rel_truncate = (params->truncate != VACOPTVALUE_DISABLED);
384  if (params->index_cleanup == VACOPTVALUE_DISABLED)
385  {
386  /* Force disable index vacuuming up-front */
387  vacrel->do_index_vacuuming = false;
388  vacrel->do_index_cleanup = false;
389  }
390  else if (params->index_cleanup == VACOPTVALUE_ENABLED)
391  {
392  /* Force index vacuuming. Note that failsafe can still bypass. */
393  vacrel->consider_bypass_optimization = false;
394  }
395  else
396  {
397  /* Default/auto, make all decisions dynamically */
399  }
400 
401  /* Initialize page counters explicitly (be tidy) */
402  vacrel->scanned_pages = 0;
403  vacrel->removed_pages = 0;
404  vacrel->frozen_pages = 0;
405  vacrel->lpdead_item_pages = 0;
406  vacrel->missed_dead_pages = 0;
407  vacrel->nonempty_pages = 0;
408  /* dead_items_alloc allocates vacrel->dead_items later on */
409 
410  /* Allocate/initialize output statistics state */
411  vacrel->new_rel_tuples = 0;
412  vacrel->new_live_tuples = 0;
413  vacrel->indstats = (IndexBulkDeleteResult **)
414  palloc0(vacrel->nindexes * sizeof(IndexBulkDeleteResult *));
415 
416  /* Initialize remaining counters (be tidy) */
417  vacrel->num_index_scans = 0;
418  vacrel->tuples_deleted = 0;
419  vacrel->tuples_frozen = 0;
420  vacrel->lpdead_items = 0;
421  vacrel->live_tuples = 0;
422  vacrel->recently_dead_tuples = 0;
423  vacrel->missed_dead_tuples = 0;
424 
425  /*
426  * Get cutoffs that determine which deleted tuples are considered DEAD,
427  * not just RECENTLY_DEAD, and which XIDs/MXIDs to freeze. Then determine
428  * the extent of the blocks that we'll scan in lazy_scan_heap. It has to
429  * happen in this order to ensure that the OldestXmin cutoff field works
430  * as an upper bound on the XIDs stored in the pages we'll actually scan
431  * (NewRelfrozenXid tracking must never be allowed to miss unfrozen XIDs).
432  *
433  * Next acquire vistest, a related cutoff that's used in heap_page_prune.
434  * We expect vistest will always make heap_page_prune remove any deleted
435  * tuple whose xmax is < OldestXmin. lazy_scan_prune must never become
436  * confused about whether a tuple should be frozen or removed. (In the
437  * future we might want to teach lazy_scan_prune to recompute vistest from
438  * time to time, to increase the number of dead tuples it can prune away.)
439  */
440  vacrel->aggressive = vacuum_get_cutoffs(rel, params, &vacrel->cutoffs);
441  vacrel->rel_pages = orig_rel_pages = RelationGetNumberOfBlocks(rel);
442  vacrel->vistest = GlobalVisTestFor(rel);
443  /* Initialize state used to track oldest extant XID/MXID */
444  vacrel->NewRelfrozenXid = vacrel->cutoffs.OldestXmin;
445  vacrel->NewRelminMxid = vacrel->cutoffs.OldestMxact;
446  vacrel->skippedallvis = false;
447  skipwithvm = true;
449  {
450  /*
451  * Force aggressive mode, and disable skipping blocks using the
452  * visibility map (even those set all-frozen)
453  */
454  vacrel->aggressive = true;
455  skipwithvm = false;
456  }
457 
458  vacrel->skipwithvm = skipwithvm;
459 
460  if (verbose)
461  {
462  if (vacrel->aggressive)
463  ereport(INFO,
464  (errmsg("aggressively vacuuming \"%s.%s.%s\"",
465  vacrel->dbname, vacrel->relnamespace,
466  vacrel->relname)));
467  else
468  ereport(INFO,
469  (errmsg("vacuuming \"%s.%s.%s\"",
470  vacrel->dbname, vacrel->relnamespace,
471  vacrel->relname)));
472  }
473 
474  /*
475  * Allocate dead_items array memory using dead_items_alloc. This handles
476  * parallel VACUUM initialization as part of allocating shared memory
477  * space used for dead_items. (But do a failsafe precheck first, to
478  * ensure that parallel VACUUM won't be attempted at all when relfrozenxid
479  * is already dangerously old.)
480  */
482  dead_items_alloc(vacrel, params->nworkers);
483 
484  /*
485  * Call lazy_scan_heap to perform all required heap pruning, index
486  * vacuuming, and heap vacuuming (plus related processing)
487  */
488  lazy_scan_heap(vacrel);
489 
490  /*
491  * Free resources managed by dead_items_alloc. This ends parallel mode in
492  * passing when necessary.
493  */
494  dead_items_cleanup(vacrel);
496 
497  /*
498  * Update pg_class entries for each of rel's indexes where appropriate.
499  *
500  * Unlike the later update to rel's pg_class entry, this is not critical.
501  * Maintains relpages/reltuples statistics used by the planner only.
502  */
503  if (vacrel->do_index_cleanup)
505 
506  /* Done with rel's indexes */
507  vac_close_indexes(vacrel->nindexes, vacrel->indrels, NoLock);
508 
509  /* Optionally truncate rel */
510  if (should_attempt_truncation(vacrel))
511  lazy_truncate_heap(vacrel);
512 
513  /* Pop the error context stack */
514  error_context_stack = errcallback.previous;
515 
516  /* Report that we are now doing final cleanup */
519 
520  /*
521  * Prepare to update rel's pg_class entry.
522  *
523  * Aggressive VACUUMs must always be able to advance relfrozenxid to a
524  * value >= FreezeLimit, and relminmxid to a value >= MultiXactCutoff.
525  * Non-aggressive VACUUMs may advance them by any amount, or not at all.
526  */
527  Assert(vacrel->NewRelfrozenXid == vacrel->cutoffs.OldestXmin ||
529  vacrel->cutoffs.relfrozenxid,
530  vacrel->NewRelfrozenXid));
531  Assert(vacrel->NewRelminMxid == vacrel->cutoffs.OldestMxact ||
533  vacrel->cutoffs.relminmxid,
534  vacrel->NewRelminMxid));
535  if (vacrel->skippedallvis)
536  {
537  /*
538  * Must keep original relfrozenxid in a non-aggressive VACUUM that
539  * chose to skip an all-visible page range. The state that tracks new
540  * values will have missed unfrozen XIDs from the pages we skipped.
541  */
542  Assert(!vacrel->aggressive);
545  }
546 
547  /*
548  * For safety, clamp relallvisible to be not more than what we're setting
549  * pg_class.relpages to
550  */
551  new_rel_pages = vacrel->rel_pages; /* After possible rel truncation */
552  visibilitymap_count(rel, &new_rel_allvisible, NULL);
553  if (new_rel_allvisible > new_rel_pages)
554  new_rel_allvisible = new_rel_pages;
555 
556  /*
557  * Now actually update rel's pg_class entry.
558  *
559  * In principle new_live_tuples could be -1 indicating that we (still)
560  * don't know the tuple count. In practice that can't happen, since we
561  * scan every page that isn't skipped using the visibility map.
562  */
563  vac_update_relstats(rel, new_rel_pages, vacrel->new_live_tuples,
564  new_rel_allvisible, vacrel->nindexes > 0,
565  vacrel->NewRelfrozenXid, vacrel->NewRelminMxid,
566  &frozenxid_updated, &minmulti_updated, false);
567 
568  /*
569  * Report results to the cumulative stats system, too.
570  *
571  * Deliberately avoid telling the stats system about LP_DEAD items that
572  * remain in the table due to VACUUM bypassing index and heap vacuuming.
573  * ANALYZE will consider the remaining LP_DEAD items to be dead "tuples".
574  * It seems like a good idea to err on the side of not vacuuming again too
575  * soon in cases where the failsafe prevented significant amounts of heap
576  * vacuuming.
577  */
579  rel->rd_rel->relisshared,
580  Max(vacrel->new_live_tuples, 0),
581  vacrel->recently_dead_tuples +
582  vacrel->missed_dead_tuples);
584 
585  if (instrument)
586  {
587  TimestampTz endtime = GetCurrentTimestamp();
588 
589  if (verbose || params->log_min_duration == 0 ||
590  TimestampDifferenceExceeds(starttime, endtime,
591  params->log_min_duration))
592  {
593  long secs_dur;
594  int usecs_dur;
595  WalUsage walusage;
597  char *msgfmt;
598  int32 diff;
599  int64 PageHitOp = VacuumPageHit - StartPageHit,
600  PageMissOp = VacuumPageMiss - StartPageMiss,
601  PageDirtyOp = VacuumPageDirty - StartPageDirty;
602  double read_rate = 0,
603  write_rate = 0;
604 
605  TimestampDifference(starttime, endtime, &secs_dur, &usecs_dur);
606  memset(&walusage, 0, sizeof(WalUsage));
607  WalUsageAccumDiff(&walusage, &pgWalUsage, &startwalusage);
608 
610  if (verbose)
611  {
612  /*
613  * Aggressiveness already reported earlier, in dedicated
614  * VACUUM VERBOSE ereport
615  */
616  Assert(!params->is_wraparound);
617  msgfmt = _("finished vacuuming \"%s.%s.%s\": index scans: %d\n");
618  }
619  else if (params->is_wraparound)
620  {
621  /*
622  * While it's possible for a VACUUM to be both is_wraparound
623  * and !aggressive, that's just a corner-case -- is_wraparound
624  * implies aggressive. Produce distinct output for the corner
625  * case all the same, just in case.
626  */
627  if (vacrel->aggressive)
628  msgfmt = _("automatic aggressive vacuum to prevent wraparound of table \"%s.%s.%s\": index scans: %d\n");
629  else
630  msgfmt = _("automatic vacuum to prevent wraparound of table \"%s.%s.%s\": index scans: %d\n");
631  }
632  else
633  {
634  if (vacrel->aggressive)
635  msgfmt = _("automatic aggressive vacuum of table \"%s.%s.%s\": index scans: %d\n");
636  else
637  msgfmt = _("automatic vacuum of table \"%s.%s.%s\": index scans: %d\n");
638  }
639  appendStringInfo(&buf, msgfmt,
640  vacrel->dbname,
641  vacrel->relnamespace,
642  vacrel->relname,
643  vacrel->num_index_scans);
644  appendStringInfo(&buf, _("pages: %u removed, %u remain, %u scanned (%.2f%% of total)\n"),
645  vacrel->removed_pages,
646  new_rel_pages,
647  vacrel->scanned_pages,
648  orig_rel_pages == 0 ? 100.0 :
649  100.0 * vacrel->scanned_pages / orig_rel_pages);
651  _("tuples: %lld removed, %lld remain, %lld are dead but not yet removable\n"),
652  (long long) vacrel->tuples_deleted,
653  (long long) vacrel->new_rel_tuples,
654  (long long) vacrel->recently_dead_tuples);
655  if (vacrel->missed_dead_tuples > 0)
657  _("tuples missed: %lld dead from %u pages not removed due to cleanup lock contention\n"),
658  (long long) vacrel->missed_dead_tuples,
659  vacrel->missed_dead_pages);
660  diff = (int32) (ReadNextTransactionId() -
661  vacrel->cutoffs.OldestXmin);
663  _("removable cutoff: %u, which was %d XIDs old when operation ended\n"),
664  vacrel->cutoffs.OldestXmin, diff);
665  if (frozenxid_updated)
666  {
667  diff = (int32) (vacrel->NewRelfrozenXid -
668  vacrel->cutoffs.relfrozenxid);
670  _("new relfrozenxid: %u, which is %d XIDs ahead of previous value\n"),
671  vacrel->NewRelfrozenXid, diff);
672  }
673  if (minmulti_updated)
674  {
675  diff = (int32) (vacrel->NewRelminMxid -
676  vacrel->cutoffs.relminmxid);
678  _("new relminmxid: %u, which is %d MXIDs ahead of previous value\n"),
679  vacrel->NewRelminMxid, diff);
680  }
681  appendStringInfo(&buf, _("frozen: %u pages from table (%.2f%% of total) had %lld tuples frozen\n"),
682  vacrel->frozen_pages,
683  orig_rel_pages == 0 ? 100.0 :
684  100.0 * vacrel->frozen_pages / orig_rel_pages,
685  (long long) vacrel->tuples_frozen);
686  if (vacrel->do_index_vacuuming)
687  {
688  if (vacrel->nindexes == 0 || vacrel->num_index_scans == 0)
689  appendStringInfoString(&buf, _("index scan not needed: "));
690  else
691  appendStringInfoString(&buf, _("index scan needed: "));
692 
693  msgfmt = _("%u pages from table (%.2f%% of total) had %lld dead item identifiers removed\n");
694  }
695  else
696  {
698  appendStringInfoString(&buf, _("index scan bypassed: "));
699  else
700  appendStringInfoString(&buf, _("index scan bypassed by failsafe: "));
701 
702  msgfmt = _("%u pages from table (%.2f%% of total) have %lld dead item identifiers\n");
703  }
704  appendStringInfo(&buf, msgfmt,
705  vacrel->lpdead_item_pages,
706  orig_rel_pages == 0 ? 100.0 :
707  100.0 * vacrel->lpdead_item_pages / orig_rel_pages,
708  (long long) vacrel->lpdead_items);
709  for (int i = 0; i < vacrel->nindexes; i++)
710  {
711  IndexBulkDeleteResult *istat = vacrel->indstats[i];
712 
713  if (!istat)
714  continue;
715 
717  _("index \"%s\": pages: %u in total, %u newly deleted, %u currently deleted, %u reusable\n"),
718  indnames[i],
719  istat->num_pages,
720  istat->pages_newly_deleted,
721  istat->pages_deleted,
722  istat->pages_free);
723  }
724  if (track_io_timing)
725  {
726  double read_ms = (double) (pgStatBlockReadTime - startreadtime) / 1000;
727  double write_ms = (double) (pgStatBlockWriteTime - startwritetime) / 1000;
728 
729  appendStringInfo(&buf, _("I/O timings: read: %.3f ms, write: %.3f ms\n"),
730  read_ms, write_ms);
731  }
732  if (secs_dur > 0 || usecs_dur > 0)
733  {
734  read_rate = (double) BLCKSZ * PageMissOp / (1024 * 1024) /
735  (secs_dur + usecs_dur / 1000000.0);
736  write_rate = (double) BLCKSZ * PageDirtyOp / (1024 * 1024) /
737  (secs_dur + usecs_dur / 1000000.0);
738  }
739  appendStringInfo(&buf, _("avg read rate: %.3f MB/s, avg write rate: %.3f MB/s\n"),
740  read_rate, write_rate);
742  _("buffer usage: %lld hits, %lld misses, %lld dirtied\n"),
743  (long long) PageHitOp,
744  (long long) PageMissOp,
745  (long long) PageDirtyOp);
747  _("WAL usage: %lld records, %lld full page images, %llu bytes\n"),
748  (long long) walusage.wal_records,
749  (long long) walusage.wal_fpi,
750  (unsigned long long) walusage.wal_bytes);
751  appendStringInfo(&buf, _("system usage: %s"), pg_rusage_show(&ru0));
752 
753  ereport(verbose ? INFO : LOG,
754  (errmsg_internal("%s", buf.data)));
755  pfree(buf.data);
756  }
757  }
758 
759  /* Cleanup index statistics and index names */
760  for (int i = 0; i < vacrel->nindexes; i++)
761  {
762  if (vacrel->indstats[i])
763  pfree(vacrel->indstats[i]);
764 
765  if (instrument)
766  pfree(indnames[i]);
767  }
768 }
void TimestampDifference(TimestampTz start_time, TimestampTz stop_time, long *secs, int *microsecs)
Definition: timestamp.c:1730
bool TimestampDifferenceExceeds(TimestampTz start_time, TimestampTz stop_time, int msec)
Definition: timestamp.c:1790
TimestampTz GetCurrentTimestamp(void)
Definition: timestamp.c:1654
void pgstat_progress_start_command(ProgressCommandType cmdtype, Oid relid)
void pgstat_progress_update_param(int index, int64 val)
void pgstat_progress_end_command(void)
@ PROGRESS_COMMAND_VACUUM
bool track_io_timing
Definition: bufmgr.c:138
#define RelationGetNumberOfBlocks(reln)
Definition: bufmgr.h:229
signed int int32
Definition: c.h:481
int64 TimestampTz
Definition: timestamp.h:39
char * get_database_name(Oid dbid)
Definition: dbcommands.c:3153
ErrorContextCallback * error_context_stack
Definition: elog.c:94
#define _(x)
Definition: elog.c:90
#define LOG
Definition: elog.h:31
#define INFO
Definition: elog.h:34
int64 VacuumPageHit
Definition: globals.c:154
int64 VacuumPageMiss
Definition: globals.c:155
int64 VacuumPageDirty
Definition: globals.c:156
Oid MyDatabaseId
Definition: globals.c:91
int verbose
WalUsage pgWalUsage
Definition: instrument.c:22
void WalUsageAccumDiff(WalUsage *dst, const WalUsage *add, const WalUsage *sub)
Definition: instrument.c:286
#define NoLock
Definition: lockdefs.h:34
#define RowExclusiveLock
Definition: lockdefs.h:38
char * get_namespace_name(Oid nspid)
Definition: lsyscache.c:3344
char * pstrdup(const char *in)
Definition: mcxt.c:1683
void * palloc0(Size size)
Definition: mcxt.c:1334
#define AmAutoVacuumWorkerProcess()
Definition: miscadmin.h:375
const char * pg_rusage_show(const PGRUsage *ru0)
Definition: pg_rusage.c:40
void pg_rusage_init(PGRUsage *ru0)
Definition: pg_rusage.c:27
int64 PgStat_Counter
Definition: pgstat.h:89
PgStat_Counter pgStatBlockReadTime
PgStat_Counter pgStatBlockWriteTime
void pgstat_report_vacuum(Oid tableoid, bool shared, PgStat_Counter livetuples, PgStat_Counter deadtuples)
#define PROGRESS_VACUUM_PHASE_FINAL_CLEANUP
Definition: progress.h:37
#define PROGRESS_VACUUM_PHASE
Definition: progress.h:21
#define RelationGetNamespace(relation)
Definition: rel.h:548
void appendStringInfo(StringInfo str, const char *fmt,...)
Definition: stringinfo.c:97
void appendStringInfoString(StringInfo str, const char *s)
Definition: stringinfo.c:182
void initStringInfo(StringInfo str)
Definition: stringinfo.c:59
struct ErrorContextCallback * previous
Definition: elog.h:295
void(* callback)(void *arg)
Definition: elog.h:296
BlockNumber pages_deleted
Definition: genam.h:82
BlockNumber pages_newly_deleted
Definition: genam.h:81
BlockNumber pages_free
Definition: genam.h:83
BlockNumber num_pages
Definition: genam.h:77
bool verbose
Definition: vacuumlazy.c:174
int nindexes
Definition: vacuumlazy.c:140
int64 tuples_deleted
Definition: vacuumlazy.c:201
BlockNumber nonempty_pages
Definition: vacuumlazy.c:190
bool do_rel_truncate
Definition: vacuumlazy.c:156
BlockNumber scanned_pages
Definition: vacuumlazy.c:185
bool aggressive
Definition: vacuumlazy.c:147
GlobalVisState * vistest
Definition: vacuumlazy.c:160
BlockNumber removed_pages
Definition: vacuumlazy.c:186
int num_index_scans
Definition: vacuumlazy.c:199
IndexBulkDeleteResult ** indstats
Definition: vacuumlazy.c:196
double new_live_tuples
Definition: vacuumlazy.c:194
double new_rel_tuples
Definition: vacuumlazy.c:193
TransactionId NewRelfrozenXid
Definition: vacuumlazy.c:162
Relation rel
Definition: vacuumlazy.c:138
bool consider_bypass_optimization
Definition: vacuumlazy.c:151
BlockNumber rel_pages
Definition: vacuumlazy.c:184
int64 recently_dead_tuples
Definition: vacuumlazy.c:205
int64 tuples_frozen
Definition: vacuumlazy.c:202
BlockNumber frozen_pages
Definition: vacuumlazy.c:187
char * dbname
Definition: vacuumlazy.c:167
BlockNumber missed_dead_pages
Definition: vacuumlazy.c:189
char * relnamespace
Definition: vacuumlazy.c:168
int64 live_tuples
Definition: vacuumlazy.c:204
int64 lpdead_items
Definition: vacuumlazy.c:203
BufferAccessStrategy bstrategy
Definition: vacuumlazy.c:143
bool skippedallvis
Definition: vacuumlazy.c:164
BlockNumber lpdead_item_pages
Definition: vacuumlazy.c:188
Relation * indrels
Definition: vacuumlazy.c:139
bool skipwithvm
Definition: vacuumlazy.c:149
bool do_index_cleanup
Definition: vacuumlazy.c:155
MultiXactId NewRelminMxid
Definition: vacuumlazy.c:163
int64 missed_dead_tuples
Definition: vacuumlazy.c:206
struct VacuumCutoffs cutoffs
Definition: vacuumlazy.c:159
char * relname
Definition: vacuumlazy.c:169
VacErrPhase phase
Definition: vacuumlazy.c:173
char * indname
Definition: vacuumlazy.c:170
bool do_index_vacuuming
Definition: vacuumlazy.c:154
int nworkers
Definition: vacuum.h:238
VacOptValue truncate
Definition: vacuum.h:230
bits32 options
Definition: vacuum.h:218
bool is_wraparound
Definition: vacuum.h:225
int log_min_duration
Definition: vacuum.h:226
VacOptValue index_cleanup
Definition: vacuum.h:229
uint64 wal_bytes
Definition: instrument.h:55
int64 wal_fpi
Definition: instrument.h:54
int64 wal_records
Definition: instrument.h:53
static TransactionId ReadNextTransactionId(void)
Definition: transam.h:315
void vac_open_indexes(Relation relation, LOCKMODE lockmode, int *nindexes, Relation **Irel)
Definition: vacuum.c:2273
void vac_update_relstats(Relation relation, BlockNumber num_pages, double num_tuples, BlockNumber num_all_visible_pages, bool hasindex, TransactionId frozenxid, MultiXactId minmulti, bool *frozenxid_updated, bool *minmulti_updated, bool in_outer_xact)
Definition: vacuum.c:1399
void vac_close_indexes(int nindexes, Relation *Irel, LOCKMODE lockmode)
Definition: vacuum.c:2316
bool vacuum_get_cutoffs(Relation rel, const VacuumParams *params, struct VacuumCutoffs *cutoffs)
Definition: vacuum.c:1073
bool VacuumFailsafeActive
Definition: vacuum.c:96
#define VACOPT_VERBOSE
Definition: vacuum.h:181
@ VACOPTVALUE_AUTO
Definition: vacuum.h:202
@ VACOPTVALUE_ENABLED
Definition: vacuum.h:204
@ VACOPTVALUE_UNSPECIFIED
Definition: vacuum.h:201
@ VACOPTVALUE_DISABLED
Definition: vacuum.h:203
#define VACOPT_DISABLE_PAGE_SKIPPING
Definition: vacuum.h:187
static void dead_items_cleanup(LVRelState *vacrel)
Definition: vacuumlazy.c:3227
static void update_relstats_all_indexes(LVRelState *vacrel)
Definition: vacuumlazy.c:3368
static void vacuum_error_callback(void *arg)
Definition: vacuumlazy.c:3403
static void lazy_truncate_heap(LVRelState *vacrel)
Definition: vacuumlazy.c:2857
static bool should_attempt_truncation(LVRelState *vacrel)
Definition: vacuumlazy.c:2837
@ VACUUM_ERRCB_PHASE_UNKNOWN
Definition: vacuumlazy.c:127
static void lazy_scan_heap(LVRelState *vacrel)
Definition: vacuumlazy.c:807
static bool lazy_check_wraparound_failsafe(LVRelState *vacrel)
Definition: vacuumlazy.c:2608
static void dead_items_alloc(LVRelState *vacrel, int nworkers)
Definition: vacuumlazy.c:3170
void visibilitymap_count(Relation rel, BlockNumber *all_visible, BlockNumber *all_frozen)

References _, LVRelState::aggressive, AmAutoVacuumWorkerProcess, appendStringInfo(), appendStringInfoString(), ErrorContextCallback::arg, Assert(), LVRelState::bstrategy, buf, ErrorContextCallback::callback, LVRelState::consider_bypass_optimization, LVRelState::cutoffs, LVRelState::dbname, dead_items_alloc(), dead_items_cleanup(), LVRelState::do_index_cleanup, LVRelState::do_index_vacuuming, LVRelState::do_rel_truncate, ereport, errmsg(), errmsg_internal(), error_context_stack, VacuumCutoffs::FreezeLimit, LVRelState::frozen_pages, get_database_name(), get_namespace_name(), GetCurrentTimestamp(), GlobalVisTestFor(), i, VacuumParams::index_cleanup, LVRelState::indname, LVRelState::indrels, LVRelState::indstats, INFO, initStringInfo(), InvalidMultiXactId, InvalidTransactionId, VacuumParams::is_wraparound, IsInParallelMode(), lazy_check_wraparound_failsafe(), lazy_scan_heap(), lazy_truncate_heap(), LVRelState::live_tuples, LOG, VacuumParams::log_min_duration, LVRelState::lpdead_item_pages, LVRelState::lpdead_items, Max, LVRelState::missed_dead_pages, LVRelState::missed_dead_tuples, VacuumCutoffs::MultiXactCutoff, MultiXactIdPrecedesOrEquals(), MyDatabaseId, LVRelState::new_live_tuples, LVRelState::new_rel_tuples, LVRelState::NewRelfrozenXid, LVRelState::NewRelminMxid, LVRelState::nindexes, NoLock, LVRelState::nonempty_pages, LVRelState::num_index_scans, IndexBulkDeleteResult::num_pages, VacuumParams::nworkers, VacuumCutoffs::OldestMxact, VacuumCutoffs::OldestXmin, VacuumParams::options, IndexBulkDeleteResult::pages_deleted, IndexBulkDeleteResult::pages_free, IndexBulkDeleteResult::pages_newly_deleted, palloc(), palloc0(), pfree(), pg_rusage_init(), pg_rusage_show(), pgstat_progress_end_command(), pgstat_progress_start_command(), pgstat_progress_update_param(), pgstat_report_vacuum(), pgStatBlockReadTime, pgStatBlockWriteTime, pgWalUsage, LVRelState::phase, ErrorContextCallback::previous, PROGRESS_COMMAND_VACUUM, PROGRESS_VACUUM_PHASE, PROGRESS_VACUUM_PHASE_FINAL_CLEANUP, pstrdup(), RelationData::rd_rel, ReadNextTransactionId(), LVRelState::recently_dead_tuples, LVRelState::rel, LVRelState::rel_pages, RelationGetNamespace, RelationGetNumberOfBlocks, RelationGetRelationName, RelationGetRelid, VacuumCutoffs::relfrozenxid, VacuumCutoffs::relminmxid, LVRelState::relname, LVRelState::relnamespace, LVRelState::removed_pages, RowExclusiveLock, LVRelState::scanned_pages, should_attempt_truncation(), LVRelState::skippedallvis, LVRelState::skipwithvm, TimestampDifference(), TimestampDifferenceExceeds(), track_io_timing, TransactionIdPrecedesOrEquals(), VacuumParams::truncate, LVRelState::tuples_deleted, LVRelState::tuples_frozen, update_relstats_all_indexes(), vac_close_indexes(), vac_open_indexes(), vac_update_relstats(), VACOPT_DISABLE_PAGE_SKIPPING, VACOPT_VERBOSE, VACOPTVALUE_AUTO, VACOPTVALUE_DISABLED, VACOPTVALUE_ENABLED, VACOPTVALUE_UNSPECIFIED, VACUUM_ERRCB_PHASE_UNKNOWN, vacuum_error_callback(), vacuum_get_cutoffs(), VacuumFailsafeActive, VacuumPageDirty, VacuumPageHit, VacuumPageMiss, LVRelState::verbose, verbose, visibilitymap_count(), LVRelState::vistest, WalUsage::wal_bytes, WalUsage::wal_fpi, WalUsage::wal_records, and WalUsageAccumDiff().

◆ HeapCheckForSerializableConflictOut()

void HeapCheckForSerializableConflictOut ( bool  visible,
Relation  relation,
HeapTuple  tuple,
Buffer  buffer,
Snapshot  snapshot 
)

Definition at line 10009 of file heapam.c.

10012 {
10013  TransactionId xid;
10014  HTSV_Result htsvResult;
10015 
10016  if (!CheckForSerializableConflictOutNeeded(relation, snapshot))
10017  return;
10018 
10019  /*
10020  * Check to see whether the tuple has been written to by a concurrent
10021  * transaction, either to create it not visible to us, or to delete it
10022  * while it is visible to us. The "visible" bool indicates whether the
10023  * tuple is visible to us, while HeapTupleSatisfiesVacuum checks what else
10024  * is going on with it.
10025  *
10026  * In the event of a concurrently inserted tuple that also happens to have
10027  * been concurrently updated (by a separate transaction), the xmin of the
10028  * tuple will be used -- not the updater's xid.
10029  */
10030  htsvResult = HeapTupleSatisfiesVacuum(tuple, TransactionXmin, buffer);
10031  switch (htsvResult)
10032  {
10033  case HEAPTUPLE_LIVE:
10034  if (visible)
10035  return;
10036  xid = HeapTupleHeaderGetXmin(tuple->t_data);
10037  break;
10040  if (visible)
10041  xid = HeapTupleHeaderGetUpdateXid(tuple->t_data);
10042  else
10043  xid = HeapTupleHeaderGetXmin(tuple->t_data);
10044 
10046  {
10047  /* This is like the HEAPTUPLE_DEAD case */
10048  Assert(!visible);
10049  return;
10050  }
10051  break;
10053  xid = HeapTupleHeaderGetXmin(tuple->t_data);
10054  break;
10055  case HEAPTUPLE_DEAD:
10056  Assert(!visible);
10057  return;
10058  default:
10059 
10060  /*
10061  * The only way to get to this default clause is if a new value is
10062  * added to the enum type without adding it to this switch
10063  * statement. That's a bug, so elog.
10064  */
10065  elog(ERROR, "unrecognized return value from HeapTupleSatisfiesVacuum: %u", htsvResult);
10066 
10067  /*
10068  * In spite of having all enum values covered and calling elog on
10069  * this default, some compilers think this is a code path which
10070  * allows xid to be used below without initialization. Silence
10071  * that warning.
10072  */
10073  xid = InvalidTransactionId;
10074  }
10075 
10078 
10079  /*
10080  * Find top level xid. Bail out if xid is too early to be a conflict, or
10081  * if it's our own xid.
10082  */
10084  return;
10085  xid = SubTransGetTopmostTransaction(xid);
10087  return;
10088 
10089  CheckForSerializableConflictOut(relation, xid, snapshot);
10090 }
HTSV_Result HeapTupleSatisfiesVacuum(HeapTuple htup, TransactionId OldestXmin, Buffer buffer)
void CheckForSerializableConflictOut(Relation relation, TransactionId xid, Snapshot snapshot)
Definition: predicate.c:4003
bool CheckForSerializableConflictOutNeeded(Relation relation, Snapshot snapshot)
Definition: predicate.c:3971
TransactionId SubTransGetTopmostTransaction(TransactionId xid)
Definition: subtrans.c:163
bool TransactionIdFollowsOrEquals(TransactionId id1, TransactionId id2)
Definition: transam.c:329
TransactionId GetTopTransactionIdIfAny(void)
Definition: xact.c:438

References Assert(), CheckForSerializableConflictOut(), CheckForSerializableConflictOutNeeded(), elog, ERROR, GetTopTransactionIdIfAny(), HEAPTUPLE_DEAD, HEAPTUPLE_DELETE_IN_PROGRESS, HEAPTUPLE_INSERT_IN_PROGRESS, HEAPTUPLE_LIVE, HEAPTUPLE_RECENTLY_DEAD, HeapTupleHeaderGetUpdateXid, HeapTupleHeaderGetXmin, HeapTupleSatisfiesVacuum(), InvalidTransactionId, SubTransGetTopmostTransaction(), HeapTupleData::t_data, TransactionIdEquals, TransactionIdFollowsOrEquals(), TransactionIdIsValid, TransactionIdPrecedes(), and TransactionXmin.

Referenced by heap_fetch(), heap_get_latest_tid(), heap_hot_search_buffer(), heapam_scan_bitmap_next_block(), heapam_scan_sample_next_tuple(), heapgetpage(), and heapgettup().

◆ heapgetpage()

void heapgetpage ( TableScanDesc  sscan,
BlockNumber  block 
)

Definition at line 370 of file heapam.c.

371 {
372  HeapScanDesc scan = (HeapScanDesc) sscan;
373  Buffer buffer;
374  Snapshot snapshot;
375  Page page;
376  int lines;
377  int ntup;
378  OffsetNumber lineoff;
379  bool all_visible;
380 
381  Assert(block < scan->rs_nblocks);
382 
383  /* release previous scan buffer, if any */
384  if (BufferIsValid(scan->rs_cbuf))
385  {
386  ReleaseBuffer(scan->rs_cbuf);
387  scan->rs_cbuf = InvalidBuffer;
388  }
389 
390  /*
391  * Be sure to check for interrupts at least once per page. Checks at
392  * higher code levels won't be able to stop a seqscan that encounters many
393  * pages' worth of consecutive dead tuples.
394  */
396 
397  /* read page using selected strategy */
398  scan->rs_cbuf = ReadBufferExtended(scan->rs_base.rs_rd, MAIN_FORKNUM, block,
399  RBM_NORMAL, scan->rs_strategy);
400  scan->rs_cblock = block;
401 
402  if (!(scan->rs_base.rs_flags & SO_ALLOW_PAGEMODE))
403  return;
404 
405  buffer = scan->rs_cbuf;
406  snapshot = scan->rs_base.rs_snapshot;
407 
408  /*
409  * Prune and repair fragmentation for the whole page, if possible.
410  */
411  heap_page_prune_opt(scan->rs_base.rs_rd, buffer);
412 
413  /*
414  * We must hold share lock on the buffer content while examining tuple
415  * visibility. Afterwards, however, the tuples we have found to be
416  * visible are guaranteed good as long as we hold the buffer pin.
417  */
418  LockBuffer(buffer, BUFFER_LOCK_SHARE);
419 
420  page = BufferGetPage(buffer);
421  lines = PageGetMaxOffsetNumber(page);
422  ntup = 0;
423 
424  /*
425  * If the all-visible flag indicates that all tuples on the page are
426  * visible to everyone, we can skip the per-tuple visibility tests.
427  *
428  * Note: In hot standby, a tuple that's already visible to all
429  * transactions on the primary might still be invisible to a read-only
430  * transaction in the standby. We partly handle this problem by tracking
431  * the minimum xmin of visible tuples as the cut-off XID while marking a
432  * page all-visible on the primary and WAL log that along with the
433  * visibility map SET operation. In hot standby, we wait for (or abort)
434  * all transactions that can potentially may not see one or more tuples on
435  * the page. That's how index-only scans work fine in hot standby. A
436  * crucial difference between index-only scans and heap scans is that the
437  * index-only scan completely relies on the visibility map where as heap
438  * scan looks at the page-level PD_ALL_VISIBLE flag. We are not sure if
439  * the page-level flag can be trusted in the same way, because it might
440  * get propagated somehow without being explicitly WAL-logged, e.g. via a
441  * full page write. Until we can prove that beyond doubt, let's check each
442  * tuple for visibility the hard way.
443  */
444  all_visible = PageIsAllVisible(page) && !snapshot->takenDuringRecovery;
445 
446  for (lineoff = FirstOffsetNumber; lineoff <= lines; lineoff++)
447  {
448  ItemId lpp = PageGetItemId(page, lineoff);
449  HeapTupleData loctup;
450  bool valid;
451 
452  if (!ItemIdIsNormal(lpp))
453  continue;
454 
455  loctup.t_tableOid = RelationGetRelid(scan->rs_base.rs_rd);
456  loctup.t_data = (HeapTupleHeader) PageGetItem(page, lpp);
457  loctup.t_len = ItemIdGetLength(lpp);
458  ItemPointerSet(&(loctup.t_self), block, lineoff);
459 
460  if (all_visible)
461  valid = true;
462  else
463  valid = HeapTupleSatisfiesVisibility(&loctup, snapshot, buffer);
464 
466  &loctup, buffer, snapshot);
467 
468  if (valid)
469  scan->rs_vistuples[ntup++] = lineoff;
470  }
471 
473 
474  Assert(ntup <= MaxHeapTuplesPerPage);
475  scan->rs_ntuples = ntup;
476 }
Buffer ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy)
Definition: bufmgr.c:781
@ RBM_NORMAL
Definition: bufmgr.h:44
void heap_page_prune_opt(Relation relation, Buffer buffer)
Definition: pruneheap.c:86
@ MAIN_FORKNUM
Definition: relpath.h:50
int rs_ntuples
Definition: heapam.h:77
OffsetNumber rs_vistuples[MaxHeapTuplesPerPage]
Definition: heapam.h:78
BlockNumber rs_cblock
Definition: heapam.h:61
bool takenDuringRecovery
Definition: snapshot.h:184

References Assert(), BUFFER_LOCK_SHARE, BUFFER_LOCK_UNLOCK, BufferGetPage(), BufferIsValid(), CHECK_FOR_INTERRUPTS, FirstOffsetNumber, heap_page_prune_opt(), HeapCheckForSerializableConflictOut(), HeapTupleSatisfiesVisibility(), InvalidBuffer, ItemIdGetLength, ItemIdIsNormal, ItemPointerSet(), LockBuffer(), MAIN_FORKNUM, MaxHeapTuplesPerPage, PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), PageIsAllVisible(), RBM_NORMAL, ReadBufferExtended(), RelationGetRelid, ReleaseBuffer(), HeapScanDescData::rs_base, HeapScanDescData::rs_cblock, HeapScanDescData::rs_cbuf, TableScanDescData::rs_flags, HeapScanDescData::rs_ntuples, TableScanDescData::rs_rd, TableScanDescData::rs_snapshot, HeapScanDescData::rs_strategy, HeapScanDescData::rs_vistuples, SO_ALLOW_PAGEMODE, HeapTupleData::t_data, HeapTupleData::t_len, HeapTupleData::t_self, HeapTupleData::t_tableOid, and SnapshotData::takenDuringRecovery.

Referenced by heapam_scan_sample_next_block(), heapgettup(), and heapgettup_pagemode().

◆ HeapTupleHeaderIsOnlyLocked()

bool HeapTupleHeaderIsOnlyLocked ( HeapTupleHeader  tuple)

Definition at line 1520 of file heapam_visibility.c.

1521 {
1522  TransactionId xmax;
1523 
1524  /* if there's no valid Xmax, then there's obviously no update either */
1525  if (tuple->t_infomask & HEAP_XMAX_INVALID)
1526  return true;
1527 
1528  if (tuple->t_infomask & HEAP_XMAX_LOCK_ONLY)
1529  return true;
1530 
1531  /* invalid xmax means no update */
1533  return true;
1534 
1535  /*
1536  * if HEAP_XMAX_LOCK_ONLY is not set and not a multi, then this must
1537  * necessarily have been updated
1538  */
1539  if (!(tuple->t_infomask & HEAP_XMAX_IS_MULTI))
1540  return false;
1541 
1542  /* ... but if it's a multi, then perhaps the updating Xid aborted. */
1543  xmax = HeapTupleGetUpdateXid(tuple);
1544 
1545  /* not LOCKED_ONLY, so it has to have an xmax */
1547 
1549  return false;
1550  if (TransactionIdIsInProgress(xmax))
1551  return false;
1552  if (TransactionIdDidCommit(xmax))
1553  return false;
1554 
1555  /*
1556  * not current, not in progress, not committed -- must have aborted or
1557  * crashed
1558  */
1559  return true;
1560 }
bool TransactionIdIsInProgress(TransactionId xid)
Definition: procarray.c:1390

References Assert(), HEAP_XMAX_INVALID, HEAP_XMAX_IS_MULTI, HEAP_XMAX_LOCK_ONLY, HeapTupleGetUpdateXid(), HeapTupleHeaderGetRawXmax, HeapTupleHeaderData::t_infomask, TransactionIdDidCommit(), TransactionIdIsCurrentTransactionId(), TransactionIdIsInProgress(), and TransactionIdIsValid.

Referenced by heap_delete(), heap_get_latest_tid(), heap_lock_tuple(), heap_lock_updated_tuple_rec(), HeapTupleSatisfiesVacuumHorizon(), and rewrite_heap_tuple().

◆ HeapTupleIsSurelyDead()

bool HeapTupleIsSurelyDead ( HeapTuple  htup,
struct GlobalVisState vistest 
)

Definition at line 1465 of file heapam_visibility.c.

1466 {
1467  HeapTupleHeader tuple = htup->t_data;
1468 
1469  Assert(ItemPointerIsValid(&htup->t_self));
1470  Assert(htup->t_tableOid != InvalidOid);
1471 
1472  /*
1473  * If the inserting transaction is marked invalid, then it aborted, and
1474  * the tuple is definitely dead. If it's marked neither committed nor
1475  * invalid, then we assume it's still alive (since the presumption is that
1476  * all relevant hint bits were just set moments ago).
1477  */
1478  if (!HeapTupleHeaderXminCommitted(tuple))
1479  return HeapTupleHeaderXminInvalid(tuple);
1480 
1481  /*
1482  * If the inserting transaction committed, but any deleting transaction
1483  * aborted, the tuple is still alive.
1484  */
1485  if (tuple->t_infomask & HEAP_XMAX_INVALID)
1486  return false;
1487 
1488  /*
1489  * If the XMAX is just a lock, the tuple is still alive.
1490  */
1492  return false;
1493 
1494  /*
1495  * If the Xmax is a MultiXact, it might be dead or alive, but we cannot
1496  * know without checking pg_multixact.
1497  */
1498  if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
1499  return false;
1500 
1501  /* If deleter isn't known to have committed, assume it's still running. */
1502  if (!(tuple->t_infomask & HEAP_XMAX_COMMITTED))
1503  return false;
1504 
1505  /* Deleter committed, so tuple is dead if the XID is old enough. */
1506  return GlobalVisTestIsRemovableXid(vistest,
1507  HeapTupleHeaderGetRawXmax(tuple));
1508 }
#define HeapTupleHeaderXminCommitted(tup)
Definition: htup_details.h:320
#define HeapTupleHeaderXminInvalid(tup)
Definition: htup_details.h:325
#define InvalidOid
Definition: postgres_ext.h:36

References Assert(), GlobalVisTestIsRemovableXid(), HEAP_XMAX_COMMITTED, HEAP_XMAX_INVALID, HEAP_XMAX_IS_LOCKED_ONLY, HEAP_XMAX_IS_MULTI, HeapTupleHeaderGetRawXmax, HeapTupleHeaderXminCommitted, HeapTupleHeaderXminInvalid, InvalidOid, ItemPointerIsValid(), HeapTupleData::t_data, HeapTupleHeaderData::t_infomask, HeapTupleData::t_self, and HeapTupleData::t_tableOid.

Referenced by heap_hot_search_buffer().

◆ HeapTupleSatisfiesUpdate()

TM_Result HeapTupleSatisfiesUpdate ( HeapTuple  htup,
CommandId  curcid,
Buffer  buffer 
)

Definition at line 458 of file heapam_visibility.c.

460 {
461  HeapTupleHeader tuple = htup->t_data;
462 
464  Assert(htup->t_tableOid != InvalidOid);
465 
466  if (!HeapTupleHeaderXminCommitted(tuple))
467  {
468  if (HeapTupleHeaderXminInvalid(tuple))
469  return TM_Invisible;
470 
471  /* Used by pre-9.0 binary upgrades */
472  if (tuple->t_infomask & HEAP_MOVED_OFF)
473  {
475 
477  return TM_Invisible;
478  if (!TransactionIdIsInProgress(xvac))
479  {
480  if (TransactionIdDidCommit(xvac))
481  {
482  SetHintBits(tuple, buffer, HEAP_XMIN_INVALID,
484  return TM_Invisible;
485  }
486  SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED,
488  }
489  }
490  /* Used by pre-9.0 binary upgrades */
491  else if (tuple->t_infomask & HEAP_MOVED_IN)
492  {
494 
496  {
497  if (TransactionIdIsInProgress(xvac))
498  return TM_Invisible;
499  if (TransactionIdDidCommit(xvac))
500  SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED,
502  else
503  {
504  SetHintBits(tuple, buffer, HEAP_XMIN_INVALID,
506  return TM_Invisible;
507  }
508  }
509  }
511  {
512  if (HeapTupleHeaderGetCmin(tuple) >= curcid)
513  return TM_Invisible; /* inserted after scan started */
514 
515  if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid */
516  return TM_Ok;
517 
519  {
520  TransactionId xmax;
521 
522  xmax = HeapTupleHeaderGetRawXmax(tuple);
523 
524  /*
525  * Careful here: even though this tuple was created by our own
526  * transaction, it might be locked by other transactions, if
527  * the original version was key-share locked when we updated
528  * it.
529  */
530 
531  if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
532  {
533  if (MultiXactIdIsRunning(xmax, true))
534  return TM_BeingModified;
535  else
536  return TM_Ok;
537  }
538 
539  /*
540  * If the locker is gone, then there is nothing of interest
541  * left in this Xmax; otherwise, report the tuple as
542  * locked/updated.
543  */
544  if (!TransactionIdIsInProgress(xmax))
545  return TM_Ok;
546  return TM_BeingModified;
547  }
548 
549  if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
550  {
551  TransactionId xmax;
552 
553  xmax = HeapTupleGetUpdateXid(tuple);
554 
555  /* not LOCKED_ONLY, so it has to have an xmax */
557 
558  /* deleting subtransaction must have aborted */
560  {
562  false))
563  return TM_BeingModified;
564  return TM_Ok;
565  }
566  else
567  {
568  if (HeapTupleHeaderGetCmax(tuple) >= curcid)
569  return TM_SelfModified; /* updated after scan started */
570  else
571  return TM_Invisible; /* updated before scan started */
572  }
573  }
574 
576  {
577  /* deleting subtransaction must have aborted */
578  SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
580  return TM_Ok;
581  }
582 
583  if (HeapTupleHeaderGetCmax(tuple) >= curcid)
584  return TM_SelfModified; /* updated after scan started */
585  else
586  return TM_Invisible; /* updated before scan started */
587  }
589  return TM_Invisible;
591  SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED,
593  else
594  {
595  /* it must have aborted or crashed */
596  SetHintBits(tuple, buffer, HEAP_XMIN_INVALID,
598  return TM_Invisible;
599  }
600  }
601 
602  /* by here, the inserting transaction has committed */
603 
604  if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid or aborted */
605  return TM_Ok;
606 
607  if (tuple->t_infomask & HEAP_XMAX_COMMITTED)
608  {
610  return TM_Ok;
611  if (!ItemPointerEquals(&htup->t_self, &tuple->t_ctid))
612  return TM_Updated; /* updated by other */
613  else
614  return TM_Deleted; /* deleted by other */
615  }
616 
617  if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
618  {
619  TransactionId xmax;
620 
621  if (HEAP_LOCKED_UPGRADED(tuple->t_infomask))
622  return TM_Ok;
623 
625  {
627  return TM_BeingModified;
628 
630  return TM_Ok;
631  }
632 
633  xmax = HeapTupleGetUpdateXid(tuple);
634  if (!TransactionIdIsValid(xmax))
635  {
637  return TM_BeingModified;
638  }
639 
640  /* not LOCKED_ONLY, so it has to have an xmax */
642 
644  {
645  if (HeapTupleHeaderGetCmax(tuple) >= curcid)
646  return TM_SelfModified; /* updated after scan started */
647  else
648  return TM_Invisible; /* updated before scan started */
649  }
650 
652  return TM_BeingModified;
653 
654  if (TransactionIdDidCommit(xmax))
655  {
656  if (!ItemPointerEquals(&htup->t_self, &tuple->t_ctid))
657  return TM_Updated;
658  else
659  return TM_Deleted;
660  }
661 
662  /*
663  * By here, the update in the Xmax is either aborted or crashed, but
664  * what about the other members?
665  */
666 
668  {
669  /*
670  * There's no member, even just a locker, alive anymore, so we can
671  * mark the Xmax as invalid.
672  */
673  SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
675  return TM_Ok;
676  }
677  else
678  {
679  /* There are lockers running */
680  return TM_BeingModified;
681  }
682  }
683 
685  {
687  return TM_BeingModified;
688  if (HeapTupleHeaderGetCmax(tuple) >= curcid)
689  return TM_SelfModified; /* updated after scan started */
690  else
691  return TM_Invisible; /* updated before scan started */
692  }
693 
695  return TM_BeingModified;
696 
698  {
699  /* it must have aborted or crashed */
700  SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
702  return TM_Ok;
703  }
704 
705  /* xmax transaction committed */
706 
708  {
709  SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
711  return TM_Ok;
712  }
713 
714  SetHintBits(tuple, buffer, HEAP_XMAX_COMMITTED,
716  if (!ItemPointerEquals(&htup->t_self, &tuple->t_ctid))
717  return TM_Updated; /* updated by other */
718  else
719  return TM_Deleted; /* deleted by other */
720 }
CommandId HeapTupleHeaderGetCmin(HeapTupleHeader tup)
Definition: combocid.c:104
static void SetHintBits(HeapTupleHeader tuple, Buffer buffer, uint16 infomask, TransactionId xid)
#define HEAP_XMIN_COMMITTED
Definition: htup_details.h:204
#define HEAP_MOVED_IN
Definition: htup_details.h:212
#define HEAP_XMIN_INVALID
Definition: htup_details.h:205
bool MultiXactIdIsRunning(MultiXactId multi, bool isLockOnly)
Definition: multixact.c:550

References Assert(), HEAP_LOCKED_UPGRADED, HEAP_MOVED_IN, HEAP_MOVED_OFF, HEAP_XMAX_COMMITTED, HEAP_XMAX_INVALID, HEAP_XMAX_IS_LOCKED_ONLY, HEAP_XMAX_IS_MULTI, HEAP_XMIN_COMMITTED, HEAP_XMIN_INVALID, HeapTupleGetUpdateXid(), HeapTupleHeaderGetCmax(), HeapTupleHeaderGetCmin(), HeapTupleHeaderGetRawXmax, HeapTupleHeaderGetRawXmin, HeapTupleHeaderGetXvac, HeapTupleHeaderXminCommitted, HeapTupleHeaderXminInvalid, InvalidOid, InvalidTransactionId, ItemPointerEquals(), ItemPointerIsValid(), MultiXactIdIsRunning(), SetHintBits(), HeapTupleHeaderData::t_ctid, HeapTupleData::t_data, HeapTupleHeaderData::t_infomask, HeapTupleData::t_self, HeapTupleData::t_tableOid, TM_BeingModified, TM_Deleted, TM_Invisible, TM_Ok, TM_SelfModified, TM_Updated, TransactionIdDidCommit(), TransactionIdIsCurrentTransactionId(), TransactionIdIsInProgress(), and TransactionIdIsValid.

Referenced by heap_delete(), heap_lock_tuple(), heap_update(), and pgrowlocks().

◆ HeapTupleSatisfiesVacuum()

HTSV_Result HeapTupleSatisfiesVacuum ( HeapTuple  htup,
TransactionId  OldestXmin,
Buffer  buffer 
)

Definition at line 1162 of file heapam_visibility.c.

1164 {
1165  TransactionId dead_after = InvalidTransactionId;
1166  HTSV_Result res;
1167 
1168  res = HeapTupleSatisfiesVacuumHorizon(htup, buffer, &dead_after);
1169 
1171  {
1172  Assert(TransactionIdIsValid(dead_after));
1173 
1174  if (TransactionIdPrecedes(dead_after, OldestXmin))
1175  res = HEAPTUPLE_DEAD;
1176  }
1177  else
1178  Assert(!TransactionIdIsValid(dead_after));
1179 
1180  return res;
1181 }
HTSV_Result HeapTupleSatisfiesVacuumHorizon(HeapTuple htup, Buffer buffer, TransactionId *dead_after)

References Assert(), HEAPTUPLE_DEAD, HEAPTUPLE_RECENTLY_DEAD, HeapTupleSatisfiesVacuumHorizon(), InvalidTransactionId, res, TransactionIdIsValid, and TransactionIdPrecedes().

Referenced by heap_page_is_all_visible(), heapam_index_build_range_scan(), heapam_relation_copy_for_cluster(), heapam_scan_analyze_next_tuple(), HeapCheckForSerializableConflictOut(), lazy_scan_noprune(), statapprox_heap(), and tuple_all_visible().

◆ HeapTupleSatisfiesVacuumHorizon()

HTSV_Result HeapTupleSatisfiesVacuumHorizon ( HeapTuple  htup,
Buffer  buffer,
TransactionId dead_after 
)

Definition at line 1196 of file heapam_visibility.c.

1197 {
1198  HeapTupleHeader tuple = htup->t_data;
1199 
1200  Assert(ItemPointerIsValid(&htup->t_self));
1201  Assert(htup->t_tableOid != InvalidOid);
1202  Assert(dead_after != NULL);
1203 
1204  *dead_after = InvalidTransactionId;
1205 
1206  /*
1207  * Has inserting transaction committed?
1208  *
1209  * If the inserting transaction aborted, then the tuple was never visible
1210  * to any other transaction, so we can delete it immediately.
1211  */
1212  if (!HeapTupleHeaderXminCommitted(tuple))
1213  {
1214  if (HeapTupleHeaderXminInvalid(tuple))
1215  return HEAPTUPLE_DEAD;
1216  /* Used by pre-9.0 binary upgrades */
1217  else if (tuple->t_infomask & HEAP_MOVED_OFF)
1218  {
1219  TransactionId xvac = HeapTupleHeaderGetXvac(tuple);
1220 
1223  if (TransactionIdIsInProgress(xvac))
1225  if (TransactionIdDidCommit(xvac))
1226  {
1227  SetHintBits(tuple, buffer, HEAP_XMIN_INVALID,
1229  return HEAPTUPLE_DEAD;
1230  }
1231  SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED,
1233  }
1234  /* Used by pre-9.0 binary upgrades */
1235  else if (tuple->t_infomask & HEAP_MOVED_IN)
1236  {
1237  TransactionId xvac = HeapTupleHeaderGetXvac(tuple);
1238 
1241  if (TransactionIdIsInProgress(xvac))
1243  if (TransactionIdDidCommit(xvac))
1244  SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED,
1246  else
1247  {
1248  SetHintBits(tuple, buffer, HEAP_XMIN_INVALID,
1250  return HEAPTUPLE_DEAD;
1251  }
1252  }
1254  {
1255  if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid */
1257  /* only locked? run infomask-only check first, for performance */
1258  if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask) ||
1261  /* inserted and then deleted by same xact */
1264  /* deleting subtransaction must have aborted */
1266  }
1268  {
1269  /*
1270  * It'd be possible to discern between INSERT/DELETE in progress
1271  * here by looking at xmax - but that doesn't seem beneficial for
1272  * the majority of callers and even detrimental for some. We'd
1273  * rather have callers look at/wait for xmin than xmax. It's
1274  * always correct to return INSERT_IN_PROGRESS because that's
1275  * what's happening from the view of other backends.
1276  */
1278  }
1280  SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED,
1281  HeapTupleHeaderGetRawXmin(tuple));
1282  else
1283  {
1284  /*
1285  * Not in Progress, Not Committed, so either Aborted or crashed
1286  */
1287  SetHintBits(tuple, buffer, HEAP_XMIN_INVALID,
1289  return HEAPTUPLE_DEAD;
1290  }
1291 
1292  /*
1293  * At this point the xmin is known committed, but we might not have
1294  * been able to set the hint bit yet; so we can no longer Assert that
1295  * it's set.
1296  */
1297  }
1298 
1299  /*
1300  * Okay, the inserter committed, so it was good at some point. Now what
1301  * about the deleting transaction?
1302  */
1303  if (tuple->t_infomask & HEAP_XMAX_INVALID)
1304  return HEAPTUPLE_LIVE;
1305 
1307  {
1308  /*
1309  * "Deleting" xact really only locked it, so the tuple is live in any
1310  * case. However, we should make sure that either XMAX_COMMITTED or
1311  * XMAX_INVALID gets set once the xact is gone, to reduce the costs of
1312  * examining the tuple for future xacts.
1313  */
1314  if (!(tuple->t_infomask & HEAP_XMAX_COMMITTED))
1315  {
1316  if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
1317  {
1318  /*
1319  * If it's a pre-pg_upgrade tuple, the multixact cannot
1320  * possibly be running; otherwise have to check.
1321  */
1322  if (!HEAP_LOCKED_UPGRADED(tuple->t_infomask) &&
1324  true))
1325  return HEAPTUPLE_LIVE;
1327  }
1328  else
1329  {
1331  return HEAPTUPLE_LIVE;
1332  SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
1334  }
1335  }
1336 
1337  /*
1338  * We don't really care whether xmax did commit, abort or crash. We
1339  * know that xmax did lock the tuple, but it did not and will never
1340  * actually update it.
1341  */
1342 
1343  return HEAPTUPLE_LIVE;
1344  }
1345 
1346  if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
1347  {
1348  TransactionId xmax = HeapTupleGetUpdateXid(tuple);
1349 
1350  /* already checked above */
1352 
1353  /* not LOCKED_ONLY, so it has to have an xmax */
1355 
1356  if (TransactionIdIsInProgress(xmax))
1358  else if (TransactionIdDidCommit(xmax))
1359  {
1360  /*
1361  * The multixact might still be running due to lockers. Need to
1362  * allow for pruning if below the xid horizon regardless --
1363  * otherwise we could end up with a tuple where the updater has to
1364  * be removed due to the horizon, but is not pruned away. It's
1365  * not a problem to prune that tuple, because any remaining
1366  * lockers will also be present in newer tuple versions.
1367  */
1368  *dead_after = xmax;
1369  return HEAPTUPLE_RECENTLY_DEAD;
1370  }
1371  else if (!MultiXactIdIsRunning(HeapTupleHeaderGetRawXmax(tuple), false))
1372  {
1373  /*
1374  * Not in Progress, Not Committed, so either Aborted or crashed.
1375  * Mark the Xmax as invalid.
1376  */
1378  }
1379 
1380  return HEAPTUPLE_LIVE;
1381  }
1382 
1383  if (!(tuple->t_infomask & HEAP_XMAX_COMMITTED))
1384  {
1388  SetHintBits(tuple, buffer, HEAP_XMAX_COMMITTED,
1389  HeapTupleHeaderGetRawXmax(tuple));
1390  else
1391  {
1392  /*
1393  * Not in Progress, Not Committed, so either Aborted or crashed
1394  */
1395  SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
1397  return HEAPTUPLE_LIVE;
1398  }
1399 
1400  /*
1401  * At this point the xmax is known committed, but we might not have
1402  * been able to set the hint bit yet; so we can no longer Assert that
1403  * it's set.
1404  */
1405  }
1406 
1407  /*
1408  * Deleter committed, allow caller to check if it was recent enough that
1409  * some open transactions could still see the tuple.
1410  */
1411  *dead_after = HeapTupleHeaderGetRawXmax(tuple);
1412  return HEAPTUPLE_RECENTLY_DEAD;
1413 }

References Assert(), HEAP_LOCKED_UPGRADED, HEAP_MOVED_IN, HEAP_MOVED_OFF, HEAP_XMAX_COMMITTED, HEAP_XMAX_INVALID, HEAP_XMAX_IS_LOCKED_ONLY, HEAP_XMAX_IS_MULTI, HEAP_XMIN_COMMITTED, HEAP_XMIN_INVALID, HEAPTUPLE_DEAD, HEAPTUPLE_DELETE_IN_PROGRESS, HEAPTUPLE_INSERT_IN_PROGRESS, HEAPTUPLE_LIVE, HEAPTUPLE_RECENTLY_DEAD, HeapTupleGetUpdateXid(), HeapTupleHeaderGetRawXmax, HeapTupleHeaderGetRawXmin, HeapTupleHeaderGetUpdateXid, HeapTupleHeaderGetXvac, HeapTupleHeaderIsOnlyLocked(), HeapTupleHeaderXminCommitted, HeapTupleHeaderXminInvalid, InvalidOid, InvalidTransactionId, ItemPointerIsValid(), MultiXactIdIsRunning(), SetHintBits(), HeapTupleData::t_data, HeapTupleHeaderData::t_infomask, HeapTupleData::t_self, HeapTupleData::t_tableOid, TransactionIdDidCommit(), TransactionIdIsCurrentTransactionId(), TransactionIdIsInProgress(), and TransactionIdIsValid.

Referenced by heap_prune_satisfies_vacuum(), HeapTupleSatisfiesNonVacuumable(), and HeapTupleSatisfiesVacuum().

◆ HeapTupleSatisfiesVisibility()

bool HeapTupleSatisfiesVisibility ( HeapTuple  htup,
Snapshot  snapshot,
Buffer  buffer 
)

Definition at line 1767 of file heapam_visibility.c.

1768 {
1769  switch (snapshot->snapshot_type)
1770  {
1771  case SNAPSHOT_MVCC:
1772  return HeapTupleSatisfiesMVCC(htup, snapshot, buffer);
1773  case SNAPSHOT_SELF:
1774  return HeapTupleSatisfiesSelf(htup, snapshot, buffer);
1775  case SNAPSHOT_ANY:
1776  return HeapTupleSatisfiesAny(htup, snapshot, buffer);
1777  case SNAPSHOT_TOAST:
1778  return HeapTupleSatisfiesToast(htup, snapshot, buffer);
1779  case SNAPSHOT_DIRTY:
1780  return HeapTupleSatisfiesDirty(htup, snapshot, buffer);
1782  return HeapTupleSatisfiesHistoricMVCC(htup, snapshot, buffer);
1784  return HeapTupleSatisfiesNonVacuumable(htup, snapshot, buffer);
1785  }
1786 
1787  return false; /* keep compiler quiet */
1788 }
static bool HeapTupleSatisfiesAny(HeapTuple htup, Snapshot snapshot, Buffer buffer)
static bool HeapTupleSatisfiesNonVacuumable(HeapTuple htup, Snapshot snapshot, Buffer buffer)
static bool HeapTupleSatisfiesToast(HeapTuple htup, Snapshot snapshot, Buffer buffer)
static bool HeapTupleSatisfiesDirty(HeapTuple htup, Snapshot snapshot, Buffer buffer)
static bool HeapTupleSatisfiesMVCC(HeapTuple htup, Snapshot snapshot, Buffer buffer)
static bool HeapTupleSatisfiesSelf(HeapTuple htup, Snapshot snapshot, Buffer buffer)
static bool HeapTupleSatisfiesHistoricMVCC(HeapTuple htup, Snapshot snapshot, Buffer buffer)
@ SNAPSHOT_TOAST
Definition: snapshot.h:74
@ SNAPSHOT_SELF
Definition: snapshot.h:64
@ SNAPSHOT_NON_VACUUMABLE
Definition: snapshot.h:118
@ SNAPSHOT_MVCC
Definition: snapshot.h:50
@ SNAPSHOT_ANY
Definition: snapshot.h:69
@ SNAPSHOT_HISTORIC_MVCC
Definition: snapshot.h:109
@ SNAPSHOT_DIRTY
Definition: snapshot.h:102
SnapshotType snapshot_type
Definition: snapshot.h:144

References HeapTupleSatisfiesAny(), HeapTupleSatisfiesDirty(), HeapTupleSatisfiesHistoricMVCC(), HeapTupleSatisfiesMVCC(), HeapTupleSatisfiesNonVacuumable(), HeapTupleSatisfiesSelf(), HeapTupleSatisfiesToast(), SNAPSHOT_ANY, SNAPSHOT_DIRTY, SNAPSHOT_HISTORIC_MVCC, SNAPSHOT_MVCC, SNAPSHOT_NON_VACUUMABLE, SNAPSHOT_SELF, SNAPSHOT_TOAST, and SnapshotData::snapshot_type.

Referenced by heap_delete(), heap_fetch(), heap_get_latest_tid(), heap_hot_search_buffer(), heap_update(), heapam_scan_bitmap_next_block(), heapam_tuple_satisfies_snapshot(), heapgetpage(), heapgettup(), pgstat_heap(), SampleHeapTupleVisible(), and ScanSourceDatabasePgClassPage().

◆ HeapTupleSetHintBits()

void HeapTupleSetHintBits ( HeapTupleHeader  tuple,
Buffer  buffer,
uint16  infomask,
TransactionId  xid 
)

Definition at line 141 of file heapam_visibility.c.

143 {
144  SetHintBits(tuple, buffer, infomask, xid);
145 }

References SetHintBits().

Referenced by UpdateXmaxHintBits().

◆ htsv_get_valid_status()

static HTSV_Result htsv_get_valid_status ( int  status)
inlinestatic

Definition at line 229 of file heapam.h.

230 {
231  Assert(status >= HEAPTUPLE_DEAD &&
232  status <= HEAPTUPLE_DELETE_IN_PROGRESS);
233  return (HTSV_Result) status;
234 }

References Assert(), HEAPTUPLE_DEAD, and HEAPTUPLE_DELETE_IN_PROGRESS.

Referenced by heap_prune_chain(), and lazy_scan_prune().

◆ log_heap_prune_and_freeze()

void log_heap_prune_and_freeze ( Relation  relation,
Buffer  buffer,
TransactionId  conflict_xid,
bool  lp_truncate_only,
PruneReason  reason,
HeapTupleFreeze frozen,
int  nfrozen,
OffsetNumber redirected,
int  nredirected,
OffsetNumber dead,
int  ndead,
OffsetNumber unused,
int  nunused 
)

Definition at line 1304 of file pruneheap.c.

1312 {
1313  xl_heap_prune xlrec;
1314  XLogRecPtr recptr;
1315  uint8 info;
1316 
1317  /* The following local variables hold data registered in the WAL record: */
1319  xlhp_freeze_plans freeze_plans;
1320  xlhp_prune_items redirect_items;
1321  xlhp_prune_items dead_items;
1322  xlhp_prune_items unused_items;
1323  OffsetNumber frz_offsets[MaxHeapTuplesPerPage];
1324 
1325  xlrec.flags = 0;
1326 
1327  /*
1328  * Prepare data for the buffer. The arrays are not actually in the
1329  * buffer, but we pretend that they are. When XLogInsert stores a full
1330  * page image, the arrays can be omitted.
1331  */
1332  XLogBeginInsert();
1333  XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
1334  if (nfrozen > 0)
1335  {
1336  int nplans;
1337 
1338  xlrec.flags |= XLHP_HAS_FREEZE_PLANS;
1339 
1340  /*
1341  * Prepare deduplicated representation for use in the WAL record. This
1342  * destructively sorts frozen tuples array in-place.
1343  */
1344  nplans = heap_log_freeze_plan(frozen, nfrozen, plans, frz_offsets);
1345 
1346  freeze_plans.nplans = nplans;
1347  XLogRegisterBufData(0, (char *) &freeze_plans,
1348  offsetof(xlhp_freeze_plans, plans));
1349  XLogRegisterBufData(0, (char *) plans,
1350  sizeof(xlhp_freeze_plan) * nplans);
1351  }
1352  if (nredirected > 0)
1353  {
1354  xlrec.flags |= XLHP_HAS_REDIRECTIONS;
1355 
1356  redirect_items.ntargets = nredirected;
1357  XLogRegisterBufData(0, (char *) &redirect_items,
1358  offsetof(xlhp_prune_items, data));
1359  XLogRegisterBufData(0, (char *) redirected,
1360  sizeof(OffsetNumber[2]) * nredirected);
1361  }
1362  if (ndead > 0)
1363  {
1364  xlrec.flags |= XLHP_HAS_DEAD_ITEMS;
1365 
1366  dead_items.ntargets = ndead;
1367  XLogRegisterBufData(0, (char *) &dead_items,
1368  offsetof(xlhp_prune_items, data));
1369  XLogRegisterBufData(0, (char *) dead,
1370  sizeof(OffsetNumber) * ndead);
1371  }
1372  if (nunused > 0)
1373  {
1375 
1376  unused_items.ntargets = nunused;
1377  XLogRegisterBufData(0, (char *) &unused_items,
1378  offsetof(xlhp_prune_items, data));
1379  XLogRegisterBufData(0, (char *) unused,
1380  sizeof(OffsetNumber) * nunused);
1381  }
1382  if (nfrozen > 0)
1383  XLogRegisterBufData(0, (char *) frz_offsets,
1384  sizeof(OffsetNumber) * nfrozen);
1385 
1386  /*
1387  * Prepare the main xl_heap_prune record. We already set the XLPH_HAS_*
1388  * flag above.
1389  */
1391  xlrec.flags |= XLHP_IS_CATALOG_REL;
1392  if (TransactionIdIsValid(conflict_xid))
1394  if (cleanup_lock)
1395  xlrec.flags |= XLHP_CLEANUP_LOCK;
1396  else
1397  {
1398  Assert(nredirected == 0 && ndead == 0);
1399  /* also, any items in 'unused' must've been LP_DEAD previously */
1400  }
1401  XLogRegisterData((char *) &xlrec, SizeOfHeapPrune);
1402  if (TransactionIdIsValid(conflict_xid))
1403  XLogRegisterData((char *) &conflict_xid, sizeof(TransactionId));
1404 
1405  switch (reason)
1406  {
1407  case PRUNE_ON_ACCESS:
1409  break;
1410  case PRUNE_VACUUM_SCAN:
1412  break;
1413  case PRUNE_VACUUM_CLEANUP:
1415  break;
1416  default:
1417  elog(ERROR, "unrecognized prune reason: %d", (int) reason);
1418  break;
1419  }
1420  recptr = XLogInsert(RM_HEAP2_ID, info);
1421 
1422  PageSetLSN(BufferGetPage(buffer), recptr);
1423 }
#define XLHP_HAS_CONFLICT_HORIZON
Definition: heapam_xlog.h:316
#define XLHP_HAS_FREEZE_PLANS
Definition: heapam_xlog.h:322
#define SizeOfHeapPrune
Definition: heapam_xlog.h:295
#define XLHP_HAS_NOW_UNUSED_ITEMS
Definition: heapam_xlog.h:331
#define XLHP_HAS_REDIRECTIONS
Definition: heapam_xlog.h:329
#define XLOG_HEAP2_PRUNE_VACUUM_SCAN
Definition: heapam_xlog.h:60
#define XLOG_HEAP2_PRUNE_ON_ACCESS
Definition: heapam_xlog.h:59
#define XLHP_CLEANUP_LOCK
Definition: heapam_xlog.h:308
#define XLHP_HAS_DEAD_ITEMS
Definition: heapam_xlog.h:330
#define XLOG_HEAP2_PRUNE_VACUUM_CLEANUP
Definition: heapam_xlog.h:61
#define XLHP_IS_CATALOG_REL
Definition: heapam_xlog.h:298
const void * data
static int heap_log_freeze_plan(HeapTupleFreeze *tuples, int ntuples, xlhp_freeze_plan *plans_out, OffsetNumber *offsets_out)
Definition: pruneheap.c:1229

References Assert(), BufferGetPage(), data, elog, ERROR, xl_heap_prune::flags, heap_log_freeze_plan(), MaxHeapTuplesPerPage, xlhp_freeze_plans::nplans, xlhp_prune_items::ntargets, PageSetLSN(), PRUNE_ON_ACCESS, PRUNE_VACUUM_CLEANUP, PRUNE_VACUUM_SCAN, REGBUF_STANDARD, RelationIsAccessibleInLogicalDecoding, SizeOfHeapPrune, TransactionIdIsValid, XLHP_CLEANUP_LOCK, XLHP_HAS_CONFLICT_HORIZON, XLHP_HAS_DEAD_ITEMS, XLHP_HAS_FREEZE_PLANS, XLHP_HAS_NOW_UNUSED_ITEMS, XLHP_HAS_REDIRECTIONS, XLHP_IS_CATALOG_REL, XLOG_HEAP2_PRUNE_ON_ACCESS, XLOG_HEAP2_PRUNE_VACUUM_CLEANUP, XLOG_HEAP2_PRUNE_VACUUM_SCAN, XLogBeginInsert(), XLogInsert(), XLogRegisterBufData(), XLogRegisterBuffer(), and XLogRegisterData().

Referenced by heap_freeze_execute_prepared(), heap_page_prune(), and lazy_vacuum_heap_page().

◆ ReleaseBulkInsertStatePin()

void ReleaseBulkInsertStatePin ( BulkInsertState  bistate)

Definition at line 1783 of file heapam.c.

1784 {
1785  if (bistate->current_buf != InvalidBuffer)
1786  ReleaseBuffer(bistate->current_buf);
1787  bistate->current_buf = InvalidBuffer;
1788 
1789  /*
1790  * Despite the name, we also reset bulk relation extension state.
1791  * Otherwise we can end up erroring out due to looking for free space in
1792  * ->next_free of one partition, even though ->next_free was set when
1793  * extending another partition. It could obviously also be bad for
1794  * efficiency to look at existing blocks at offsets from another
1795  * partition, even if we don't error out.
1796  */
1797  bistate->next_free = InvalidBlockNumber;
1798  bistate->last_free = InvalidBlockNumber;
1799 }

References BulkInsertStateData::current_buf, InvalidBlockNumber, InvalidBuffer, BulkInsertStateData::last_free, BulkInsertStateData::next_free, and ReleaseBuffer().

Referenced by CopyFrom().

◆ ResolveCminCmaxDuringDecoding()

bool ResolveCminCmaxDuringDecoding ( struct HTAB tuplecid_data,
Snapshot  snapshot,
HeapTuple  htup,
Buffer  buffer,
CommandId cmin,
CommandId cmax 
)

Definition at line 5207 of file reorderbuffer.c.

5211 {
5214  ForkNumber forkno;
5215  BlockNumber blockno;
5216  bool updated_mapping = false;
5217 
5218  /*
5219  * Return unresolved if tuplecid_data is not valid. That's because when
5220  * streaming in-progress transactions we may run into tuples with the CID
5221  * before actually decoding them. Think e.g. about INSERT followed by
5222  * TRUNCATE, where the TRUNCATE may not be decoded yet when applying the
5223  * INSERT. So in such cases, we assume the CID is from the future
5224  * command.
5225  */
5226  if (tuplecid_data == NULL)
5227  return false;
5228 
5229  /* be careful about padding */
5230  memset(&key, 0, sizeof(key));
5231 
5232  Assert(!BufferIsLocal(buffer));
5233 
5234  /*
5235  * get relfilelocator from the buffer, no convenient way to access it
5236  * other than that.
5237  */
5238  BufferGetTag(buffer, &key.rlocator, &forkno, &blockno);
5239 
5240  /* tuples can only be in the main fork */
5241  Assert(forkno == MAIN_FORKNUM);
5242  Assert(blockno == ItemPointerGetBlockNumber(&htup->t_self));
5243 
5244  ItemPointerCopy(&htup->t_self,
5245  &key.tid);
5246 
5247 restart:
5248  ent = (ReorderBufferTupleCidEnt *)
5250 
5251  /*
5252  * failed to find a mapping, check whether the table was rewritten and
5253  * apply mapping if so, but only do that once - there can be no new
5254  * mappings while we are in here since we have to hold a lock on the
5255  * relation.
5256  */
5257  if (ent == NULL && !updated_mapping)
5258  {
5260  /* now check but don't update for a mapping again */
5261  updated_mapping = true;
5262  goto restart;
5263  }
5264  else if (ent == NULL)
5265  return false;
5266 
5267  if (cmin)
5268  *cmin = ent->cmin;
5269  if (cmax)
5270  *cmax = ent->cmax;
5271  return true;
5272 }
#define BufferIsLocal(buffer)
Definition: buf.h:37
void BufferGetTag(Buffer buffer, RelFileLocator *rlocator, ForkNumber *forknum, BlockNumber *blknum)
Definition: bufmgr.c:3398
void * hash_search(HTAB *hashp, const void *keyPtr, HASHACTION action, bool *foundPtr)
Definition: dynahash.c:955
@ HASH_FIND
Definition: hsearch.h:113
ForkNumber
Definition: relpath.h:48
static void UpdateLogicalMappings(HTAB *tuplecid_data, Oid relid, Snapshot snapshot)
static HTAB * tuplecid_data
Definition: snapmgr.c:102

References Assert(), BufferGetTag(), BufferIsLocal, ReorderBufferTupleCidEnt::cmax, ReorderBufferTupleCidEnt::cmin, HASH_FIND, hash_search(), ItemPointerCopy(), ItemPointerGetBlockNumber(), sort-test::key, MAIN_FORKNUM, HeapTupleData::t_self, HeapTupleData::t_tableOid, tuplecid_data, and UpdateLogicalMappings().

Referenced by HeapTupleSatisfiesHistoricMVCC().

◆ simple_heap_delete()

void simple_heap_delete ( Relation  relation,
ItemPointer  tid 
)

Definition at line 2972 of file heapam.c.

2973 {
2974  TM_Result result;
2975  TM_FailureData tmfd;
2976 
2977  result = heap_delete(relation, tid,
2979  TABLE_MODIFY_WAIT /* wait for commit */ ,
2980  &tmfd, false /* changingPart */ , NULL);
2981  switch (result)
2982  {
2983  case TM_SelfModified:
2984  /* Tuple was already updated in current command? */
2985  elog(ERROR, "tuple already updated by self");
2986  break;
2987 
2988  case TM_Ok:
2989  /* done successfully */
2990  break;
2991 
2992  case TM_Updated:
2993  elog(ERROR, "tuple concurrently updated");
2994  break;
2995 
2996  case TM_Deleted:
2997  elog(ERROR, "tuple concurrently deleted");
2998  break;
2999 
3000  default:
3001  elog(ERROR, "unrecognized heap_delete status: %u", result);
3002  break;
3003  }
3004 }
TM_Result heap_delete(Relation relation, ItemPointer tid, CommandId cid, Snapshot crosscheck, int options, TM_FailureData *tmfd, bool changingPart, TupleTableSlot *oldSlot)
Definition: heapam.c:2511
CommandId GetCurrentCommandId(bool used)
Definition: xact.c:826

References elog, ERROR, GetCurrentCommandId(), heap_delete(), InvalidSnapshot, TABLE_MODIFY_WAIT, TM_Deleted, TM_Ok, TM_SelfModified, and TM_Updated.

Referenced by CatalogTupleDelete(), and toast_delete_datum().

◆ simple_heap_insert()

void simple_heap_insert ( Relation  relation,
HeapTuple  tup 
)

Definition at line 2452 of file heapam.c.

2453 {
2454  heap_insert(relation, tup, GetCurrentCommandId(true), 0, NULL);
2455 }
void heap_insert(Relation relation, HeapTuple tup, CommandId cid, int options, BulkInsertState bistate)
Definition: heapam.c:1821

References GetCurrentCommandId(), and heap_insert().

Referenced by CatalogTupleInsert(), CatalogTupleInsertWithInfo(), and InsertOneTuple().

◆ simple_heap_update()

void simple_heap_update ( Relation  relation,
ItemPointer  otid,
HeapTuple  tup,
TU_UpdateIndexes update_indexes 
)

Definition at line 4132 of file heapam.c.

4134 {
4135  TM_Result result;
4136  TM_FailureData tmfd;
4137  LockTupleMode lockmode;
4138 
4139  result = heap_update(relation, otid, tup,
4141  TABLE_MODIFY_WAIT /* wait for commit */ ,
4142  &tmfd, &lockmode, update_indexes, NULL);
4143  switch (result)
4144  {
4145  case TM_SelfModified:
4146  /* Tuple was already updated in current command? */
4147  elog(ERROR, "tuple already updated by self");
4148  break;
4149 
4150  case TM_Ok:
4151  /* done successfully */
4152  break;
4153 
4154  case TM_Updated:
4155  elog(ERROR, "tuple concurrently updated");
4156  break;
4157 
4158  case TM_Deleted:
4159  elog(ERROR, "tuple concurrently deleted");
4160  break;
4161 
4162  default:
4163  elog(ERROR, "unrecognized heap_update status: %u", result);
4164  break;
4165  }
4166 }
TM_Result heap_update(Relation relation, ItemPointer otid, HeapTuple newtup, CommandId cid, Snapshot crosscheck, int options, TM_FailureData *tmfd, LockTupleMode *lockmode, TU_UpdateIndexes *update_indexes, TupleTableSlot *oldSlot)
Definition: heapam.c:3019
LockTupleMode
Definition: lockoptions.h:50

References elog, ERROR, GetCurrentCommandId(), heap_update(), InvalidSnapshot, TABLE_MODIFY_WAIT, TM_Deleted, TM_Ok, TM_SelfModified, and TM_Updated.

Referenced by CatalogTupleUpdate(), and CatalogTupleUpdateWithInfo().