PostgreSQL Source Code  git master
heapam.h File Reference
#include "access/heapam_xlog.h"
#include "access/relation.h"
#include "access/relscan.h"
#include "access/sdir.h"
#include "access/skey.h"
#include "access/table.h"
#include "access/tableam.h"
#include "nodes/lockoptions.h"
#include "nodes/primnodes.h"
#include "storage/bufpage.h"
#include "storage/dsm.h"
#include "storage/lockdefs.h"
#include "storage/read_stream.h"
#include "storage/shm_toc.h"
#include "utils/relcache.h"
#include "utils/snapshot.h"
Include dependency graph for heapam.h:
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Data Structures

struct  HeapScanDescData
 
struct  IndexFetchHeapData
 
struct  HeapTupleFreeze
 
struct  HeapPageFreeze
 
struct  PruneFreezeResult
 

Macros

#define HEAP_INSERT_SKIP_FSM   TABLE_INSERT_SKIP_FSM
 
#define HEAP_INSERT_FROZEN   TABLE_INSERT_FROZEN
 
#define HEAP_INSERT_NO_LOGICAL   TABLE_INSERT_NO_LOGICAL
 
#define HEAP_INSERT_SPECULATIVE   0x0010
 
#define HEAP_PAGE_PRUNE_MARK_UNUSED_NOW   (1 << 0)
 
#define HEAP_PAGE_PRUNE_FREEZE   (1 << 1)
 
#define MaxLockTupleMode   LockTupleExclusive
 
#define HEAP_FREEZE_CHECK_XMIN_COMMITTED   0x01
 
#define HEAP_FREEZE_CHECK_XMAX_ABORTED   0x02
 
#define HeapScanIsValid(scan)   PointerIsValid(scan)
 

Typedefs

typedef struct BulkInsertStateDataBulkInsertState
 
typedef struct HeapScanDescData HeapScanDescData
 
typedef struct HeapScanDescDataHeapScanDesc
 
typedef struct IndexFetchHeapData IndexFetchHeapData
 
typedef struct HeapTupleFreeze HeapTupleFreeze
 
typedef struct HeapPageFreeze HeapPageFreeze
 
typedef struct PruneFreezeResult PruneFreezeResult
 

Enumerations

enum  HTSV_Result {
  HEAPTUPLE_DEAD , HEAPTUPLE_LIVE , HEAPTUPLE_RECENTLY_DEAD , HEAPTUPLE_INSERT_IN_PROGRESS ,
  HEAPTUPLE_DELETE_IN_PROGRESS
}
 
enum  PruneReason { PRUNE_ON_ACCESS , PRUNE_VACUUM_SCAN , PRUNE_VACUUM_CLEANUP }
 

Functions

TableScanDesc heap_beginscan (Relation relation, Snapshot snapshot, int nkeys, ScanKey key, ParallelTableScanDesc parallel_scan, uint32 flags)
 
void heap_setscanlimits (TableScanDesc sscan, BlockNumber startBlk, BlockNumber numBlks)
 
void heap_prepare_pagescan (TableScanDesc sscan)
 
void heap_rescan (TableScanDesc sscan, ScanKey key, bool set_params, bool allow_strat, bool allow_sync, bool allow_pagemode)
 
void heap_endscan (TableScanDesc sscan)
 
HeapTuple heap_getnext (TableScanDesc sscan, ScanDirection direction)
 
bool heap_getnextslot (TableScanDesc sscan, ScanDirection direction, struct TupleTableSlot *slot)
 
void heap_set_tidrange (TableScanDesc sscan, ItemPointer mintid, ItemPointer maxtid)
 
bool heap_getnextslot_tidrange (TableScanDesc sscan, ScanDirection direction, TupleTableSlot *slot)
 
bool heap_fetch (Relation relation, Snapshot snapshot, HeapTuple tuple, Buffer *userbuf, bool keep_buf)
 
bool heap_hot_search_buffer (ItemPointer tid, Relation relation, Buffer buffer, Snapshot snapshot, HeapTuple heapTuple, bool *all_dead, bool first_call)
 
void heap_get_latest_tid (TableScanDesc sscan, ItemPointer tid)
 
BulkInsertState GetBulkInsertState (void)
 
void FreeBulkInsertState (BulkInsertState)
 
void ReleaseBulkInsertStatePin (BulkInsertState bistate)
 
void heap_insert (Relation relation, HeapTuple tup, CommandId cid, int options, BulkInsertState bistate)
 
void heap_multi_insert (Relation relation, struct TupleTableSlot **slots, int ntuples, CommandId cid, int options, BulkInsertState bistate)
 
TM_Result heap_delete (Relation relation, ItemPointer tid, CommandId cid, Snapshot crosscheck, bool wait, struct TM_FailureData *tmfd, bool changingPart)
 
void heap_finish_speculative (Relation relation, ItemPointer tid)
 
void heap_abort_speculative (Relation relation, ItemPointer tid)
 
TM_Result heap_update (Relation relation, ItemPointer otid, HeapTuple newtup, CommandId cid, Snapshot crosscheck, bool wait, struct TM_FailureData *tmfd, LockTupleMode *lockmode, TU_UpdateIndexes *update_indexes)
 
TM_Result heap_lock_tuple (Relation relation, HeapTuple tuple, CommandId cid, LockTupleMode mode, LockWaitPolicy wait_policy, bool follow_updates, Buffer *buffer, struct TM_FailureData *tmfd)
 
bool heap_inplace_lock (Relation relation, HeapTuple oldtup_ptr, Buffer buffer, void(*release_callback)(void *), void *arg)
 
void heap_inplace_update_and_unlock (Relation relation, HeapTuple oldtup, HeapTuple tuple, Buffer buffer)
 
void heap_inplace_unlock (Relation relation, HeapTuple oldtup, Buffer buffer)
 
bool heap_prepare_freeze_tuple (HeapTupleHeader tuple, const struct VacuumCutoffs *cutoffs, HeapPageFreeze *pagefrz, HeapTupleFreeze *frz, bool *totally_frozen)
 
void heap_pre_freeze_checks (Buffer buffer, HeapTupleFreeze *tuples, int ntuples)
 
void heap_freeze_prepared_tuples (Buffer buffer, HeapTupleFreeze *tuples, int ntuples)
 
bool heap_freeze_tuple (HeapTupleHeader tuple, TransactionId relfrozenxid, TransactionId relminmxid, TransactionId FreezeLimit, TransactionId MultiXactCutoff)
 
bool heap_tuple_should_freeze (HeapTupleHeader tuple, const struct VacuumCutoffs *cutoffs, TransactionId *NoFreezePageRelfrozenXid, MultiXactId *NoFreezePageRelminMxid)
 
bool heap_tuple_needs_eventual_freeze (HeapTupleHeader tuple)
 
void simple_heap_insert (Relation relation, HeapTuple tup)
 
void simple_heap_delete (Relation relation, ItemPointer tid)
 
void simple_heap_update (Relation relation, ItemPointer otid, HeapTuple tup, TU_UpdateIndexes *update_indexes)
 
TransactionId heap_index_delete_tuples (Relation rel, TM_IndexDeleteOp *delstate)
 
void heap_page_prune_opt (Relation relation, Buffer buffer)
 
void heap_page_prune_and_freeze (Relation relation, Buffer buffer, struct GlobalVisState *vistest, int options, struct VacuumCutoffs *cutoffs, PruneFreezeResult *presult, PruneReason reason, OffsetNumber *off_loc, TransactionId *new_relfrozen_xid, MultiXactId *new_relmin_mxid)
 
void heap_page_prune_execute (Buffer buffer, bool lp_truncate_only, OffsetNumber *redirected, int nredirected, OffsetNumber *nowdead, int ndead, OffsetNumber *nowunused, int nunused)
 
void heap_get_root_tuples (Page page, OffsetNumber *root_offsets)
 
void log_heap_prune_and_freeze (Relation relation, Buffer buffer, TransactionId conflict_xid, bool cleanup_lock, PruneReason reason, HeapTupleFreeze *frozen, int nfrozen, OffsetNumber *redirected, int nredirected, OffsetNumber *dead, int ndead, OffsetNumber *unused, int nunused)
 
void heap_vacuum_rel (Relation rel, struct VacuumParams *params, BufferAccessStrategy bstrategy)
 
bool HeapTupleSatisfiesVisibility (HeapTuple htup, Snapshot snapshot, Buffer buffer)
 
TM_Result HeapTupleSatisfiesUpdate (HeapTuple htup, CommandId curcid, Buffer buffer)
 
HTSV_Result HeapTupleSatisfiesVacuum (HeapTuple htup, TransactionId OldestXmin, Buffer buffer)
 
HTSV_Result HeapTupleSatisfiesVacuumHorizon (HeapTuple htup, Buffer buffer, TransactionId *dead_after)
 
void HeapTupleSetHintBits (HeapTupleHeader tuple, Buffer buffer, uint16 infomask, TransactionId xid)
 
bool HeapTupleHeaderIsOnlyLocked (HeapTupleHeader tuple)
 
bool HeapTupleIsSurelyDead (HeapTuple htup, struct GlobalVisState *vistest)
 
bool ResolveCminCmaxDuringDecoding (struct HTAB *tuplecid_data, Snapshot snapshot, HeapTuple htup, Buffer buffer, CommandId *cmin, CommandId *cmax)
 
void HeapCheckForSerializableConflictOut (bool visible, Relation relation, HeapTuple tuple, Buffer buffer, Snapshot snapshot)
 
static void heap_execute_freeze_tuple (HeapTupleHeader tuple, HeapTupleFreeze *frz)
 

Macro Definition Documentation

◆ HEAP_FREEZE_CHECK_XMAX_ABORTED

#define HEAP_FREEZE_CHECK_XMAX_ABORTED   0x02

Definition at line 138 of file heapam.h.

◆ HEAP_FREEZE_CHECK_XMIN_COMMITTED

#define HEAP_FREEZE_CHECK_XMIN_COMMITTED   0x01

Definition at line 137 of file heapam.h.

◆ HEAP_INSERT_FROZEN

#define HEAP_INSERT_FROZEN   TABLE_INSERT_FROZEN

Definition at line 37 of file heapam.h.

◆ HEAP_INSERT_NO_LOGICAL

#define HEAP_INSERT_NO_LOGICAL   TABLE_INSERT_NO_LOGICAL

Definition at line 38 of file heapam.h.

◆ HEAP_INSERT_SKIP_FSM

#define HEAP_INSERT_SKIP_FSM   TABLE_INSERT_SKIP_FSM

Definition at line 36 of file heapam.h.

◆ HEAP_INSERT_SPECULATIVE

#define HEAP_INSERT_SPECULATIVE   0x0010

Definition at line 39 of file heapam.h.

◆ HEAP_PAGE_PRUNE_FREEZE

#define HEAP_PAGE_PRUNE_FREEZE   (1 << 1)

Definition at line 43 of file heapam.h.

◆ HEAP_PAGE_PRUNE_MARK_UNUSED_NOW

#define HEAP_PAGE_PRUNE_MARK_UNUSED_NOW   (1 << 0)

Definition at line 42 of file heapam.h.

◆ HeapScanIsValid

#define HeapScanIsValid (   scan)    PointerIsValid(scan)

Definition at line 288 of file heapam.h.

◆ MaxLockTupleMode

#define MaxLockTupleMode   LockTupleExclusive

Definition at line 49 of file heapam.h.

Typedef Documentation

◆ BulkInsertState

Definition at line 45 of file heapam.h.

◆ HeapPageFreeze

◆ HeapScanDesc

typedef struct HeapScanDescData* HeapScanDesc

Definition at line 110 of file heapam.h.

◆ HeapScanDescData

◆ HeapTupleFreeze

◆ IndexFetchHeapData

◆ PruneFreezeResult

Enumeration Type Documentation

◆ HTSV_Result

Enumerator
HEAPTUPLE_DEAD 
HEAPTUPLE_LIVE 
HEAPTUPLE_RECENTLY_DEAD 
HEAPTUPLE_INSERT_IN_PROGRESS 
HEAPTUPLE_DELETE_IN_PROGRESS 

Definition at line 124 of file heapam.h.

125 {
126  HEAPTUPLE_DEAD, /* tuple is dead and deletable */
127  HEAPTUPLE_LIVE, /* tuple is live (committed, no deleter) */
128  HEAPTUPLE_RECENTLY_DEAD, /* tuple is dead, but not deletable yet */
129  HEAPTUPLE_INSERT_IN_PROGRESS, /* inserting xact is still in progress */
130  HEAPTUPLE_DELETE_IN_PROGRESS, /* deleting xact is still in progress */
131 } HTSV_Result;
HTSV_Result
Definition: heapam.h:125
@ HEAPTUPLE_RECENTLY_DEAD
Definition: heapam.h:128
@ HEAPTUPLE_INSERT_IN_PROGRESS
Definition: heapam.h:129
@ HEAPTUPLE_LIVE
Definition: heapam.h:127
@ HEAPTUPLE_DELETE_IN_PROGRESS
Definition: heapam.h:130
@ HEAPTUPLE_DEAD
Definition: heapam.h:126

◆ PruneReason

Enumerator
PRUNE_ON_ACCESS 
PRUNE_VACUUM_SCAN 
PRUNE_VACUUM_CLEANUP 

Definition at line 268 of file heapam.h.

269 {
270  PRUNE_ON_ACCESS, /* on-access pruning */
271  PRUNE_VACUUM_SCAN, /* VACUUM 1st heap pass */
272  PRUNE_VACUUM_CLEANUP, /* VACUUM 2nd heap pass */
273 } PruneReason;
PruneReason
Definition: heapam.h:269
@ PRUNE_VACUUM_CLEANUP
Definition: heapam.h:272
@ PRUNE_ON_ACCESS
Definition: heapam.h:270
@ PRUNE_VACUUM_SCAN
Definition: heapam.h:271

Function Documentation

◆ FreeBulkInsertState()

void FreeBulkInsertState ( BulkInsertState  bistate)

Definition at line 1935 of file heapam.c.

1936 {
1937  if (bistate->current_buf != InvalidBuffer)
1938  ReleaseBuffer(bistate->current_buf);
1939  FreeAccessStrategy(bistate->strategy);
1940  pfree(bistate);
1941 }
#define InvalidBuffer
Definition: buf.h:25
void ReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:4924
void FreeAccessStrategy(BufferAccessStrategy strategy)
Definition: freelist.c:681
void pfree(void *pointer)
Definition: mcxt.c:1521
BufferAccessStrategy strategy
Definition: hio.h:31
Buffer current_buf
Definition: hio.h:32

References BulkInsertStateData::current_buf, FreeAccessStrategy(), InvalidBuffer, pfree(), ReleaseBuffer(), and BulkInsertStateData::strategy.

Referenced by ATRewriteTable(), CopyFrom(), CopyMultiInsertBufferCleanup(), intorel_shutdown(), and transientrel_shutdown().

◆ GetBulkInsertState()

BulkInsertState GetBulkInsertState ( void  )

Definition at line 1918 of file heapam.c.

1919 {
1920  BulkInsertState bistate;
1921 
1922  bistate = (BulkInsertState) palloc(sizeof(BulkInsertStateData));
1924  bistate->current_buf = InvalidBuffer;
1925  bistate->next_free = InvalidBlockNumber;
1926  bistate->last_free = InvalidBlockNumber;
1927  bistate->already_extended_by = 0;
1928  return bistate;
1929 }
#define InvalidBlockNumber
Definition: block.h:33
@ BAS_BULKWRITE
Definition: bufmgr.h:38
BufferAccessStrategy GetAccessStrategy(BufferAccessStrategyType btype)
Definition: freelist.c:541
struct BulkInsertStateData * BulkInsertState
Definition: heapam.h:45
void * palloc(Size size)
Definition: mcxt.c:1317
BlockNumber last_free
Definition: hio.h:49
uint32 already_extended_by
Definition: hio.h:50
BlockNumber next_free
Definition: hio.h:48

References BulkInsertStateData::already_extended_by, BAS_BULKWRITE, BulkInsertStateData::current_buf, GetAccessStrategy(), InvalidBlockNumber, InvalidBuffer, BulkInsertStateData::last_free, BulkInsertStateData::next_free, palloc(), and BulkInsertStateData::strategy.

Referenced by ATRewriteTable(), CopyFrom(), CopyMultiInsertBufferInit(), intorel_startup(), and transientrel_startup().

◆ heap_abort_speculative()

void heap_abort_speculative ( Relation  relation,
ItemPointer  tid 
)

Definition at line 6023 of file heapam.c.

6024 {
6026  ItemId lp;
6027  HeapTupleData tp;
6028  Page page;
6029  BlockNumber block;
6030  Buffer buffer;
6031 
6032  Assert(ItemPointerIsValid(tid));
6033 
6034  block = ItemPointerGetBlockNumber(tid);
6035  buffer = ReadBuffer(relation, block);
6036  page = BufferGetPage(buffer);
6037 
6039 
6040  /*
6041  * Page can't be all visible, we just inserted into it, and are still
6042  * running.
6043  */
6044  Assert(!PageIsAllVisible(page));
6045 
6046  lp = PageGetItemId(page, ItemPointerGetOffsetNumber(tid));
6047  Assert(ItemIdIsNormal(lp));
6048 
6049  tp.t_tableOid = RelationGetRelid(relation);
6050  tp.t_data = (HeapTupleHeader) PageGetItem(page, lp);
6051  tp.t_len = ItemIdGetLength(lp);
6052  tp.t_self = *tid;
6053 
6054  /*
6055  * Sanity check that the tuple really is a speculatively inserted tuple,
6056  * inserted by us.
6057  */
6058  if (tp.t_data->t_choice.t_heap.t_xmin != xid)
6059  elog(ERROR, "attempted to kill a tuple inserted by another transaction");
6060  if (!(IsToastRelation(relation) || HeapTupleHeaderIsSpeculative(tp.t_data)))
6061  elog(ERROR, "attempted to kill a non-speculative tuple");
6063 
6064  /*
6065  * No need to check for serializable conflicts here. There is never a
6066  * need for a combo CID, either. No need to extract replica identity, or
6067  * do anything special with infomask bits.
6068  */
6069 
6071 
6072  /*
6073  * The tuple will become DEAD immediately. Flag that this page is a
6074  * candidate for pruning by setting xmin to TransactionXmin. While not
6075  * immediately prunable, it is the oldest xid we can cheaply determine
6076  * that's safe against wraparound / being older than the table's
6077  * relfrozenxid. To defend against the unlikely case of a new relation
6078  * having a newer relfrozenxid than our TransactionXmin, use relfrozenxid
6079  * if so (vacuum can't subsequently move relfrozenxid to beyond
6080  * TransactionXmin, so there's no race here).
6081  */
6083  {
6084  TransactionId relfrozenxid = relation->rd_rel->relfrozenxid;
6085  TransactionId prune_xid;
6086 
6087  if (TransactionIdPrecedes(TransactionXmin, relfrozenxid))
6088  prune_xid = relfrozenxid;
6089  else
6090  prune_xid = TransactionXmin;
6091  PageSetPrunable(page, prune_xid);
6092  }
6093 
6094  /* store transaction information of xact deleting the tuple */
6097 
6098  /*
6099  * Set the tuple header xmin to InvalidTransactionId. This makes the
6100  * tuple immediately invisible everyone. (In particular, to any
6101  * transactions waiting on the speculative token, woken up later.)
6102  */
6104 
6105  /* Clear the speculative insertion token too */
6106  tp.t_data->t_ctid = tp.t_self;
6107 
6108  MarkBufferDirty(buffer);
6109 
6110  /*
6111  * XLOG stuff
6112  *
6113  * The WAL records generated here match heap_delete(). The same recovery
6114  * routines are used.
6115  */
6116  if (RelationNeedsWAL(relation))
6117  {
6118  xl_heap_delete xlrec;
6119  XLogRecPtr recptr;
6120 
6121  xlrec.flags = XLH_DELETE_IS_SUPER;
6123  tp.t_data->t_infomask2);
6125  xlrec.xmax = xid;
6126 
6127  XLogBeginInsert();
6128  XLogRegisterData((char *) &xlrec, SizeOfHeapDelete);
6129  XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
6130 
6131  /* No replica identity & replication origin logged */
6132 
6133  recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_DELETE);
6134 
6135  PageSetLSN(page, recptr);
6136  }
6137 
6138  END_CRIT_SECTION();
6139 
6140  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
6141 
6142  if (HeapTupleHasExternal(&tp))
6143  {
6144  Assert(!IsToastRelation(relation));
6145  heap_toast_delete(relation, &tp, true);
6146  }
6147 
6148  /*
6149  * Never need to mark tuple for invalidation, since catalogs don't support
6150  * speculative insertion
6151  */
6152 
6153  /* Now we can release the buffer */
6154  ReleaseBuffer(buffer);
6155 
6156  /* count deletion, as we counted the insertion too */
6157  pgstat_count_heap_delete(relation);
6158 }
uint32 BlockNumber
Definition: block.h:31
int Buffer
Definition: buf.h:23
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:2532
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:5158
Buffer ReadBuffer(Relation reln, BlockNumber blockNum)
Definition: bufmgr.c:746
#define BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:189
static Page BufferGetPage(Buffer buffer)
Definition: bufmgr.h:400
#define BUFFER_LOCK_EXCLUSIVE
Definition: bufmgr.h:191
Pointer Page
Definition: bufpage.h:81
static Item PageGetItem(Page page, ItemId itemId)
Definition: bufpage.h:354
static ItemId PageGetItemId(Page page, OffsetNumber offsetNumber)
Definition: bufpage.h:243
static bool PageIsAllVisible(Page page)
Definition: bufpage.h:429
static void PageSetLSN(Page page, XLogRecPtr lsn)
Definition: bufpage.h:391
#define PageSetPrunable(page, xid)
Definition: bufpage.h:447
#define Assert(condition)
Definition: c.h:812
uint32 TransactionId
Definition: c.h:606
bool IsToastRelation(Relation relation)
Definition: catalog.c:175
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:225
static uint8 compute_infobits(uint16 infomask, uint16 infomask2)
Definition: heapam.c:2629
#define XLOG_HEAP_DELETE
Definition: heapam_xlog.h:34
#define SizeOfHeapDelete
Definition: heapam_xlog.h:121
#define XLH_DELETE_IS_SUPER
Definition: heapam_xlog.h:105
void heap_toast_delete(Relation rel, HeapTuple oldtup, bool is_speculative)
Definition: heaptoast.c:43
HeapTupleHeaderData * HeapTupleHeader
Definition: htup.h:23
#define HEAP_KEYS_UPDATED
Definition: htup_details.h:275
#define HeapTupleHeaderIsHeapOnly(tup)
Definition: htup_details.h:499
#define HeapTupleHeaderSetXmin(tup, xid)
Definition: htup_details.h:315
#define HEAP_XMAX_BITS
Definition: htup_details.h:267
#define HeapTupleHasExternal(tuple)
Definition: htup_details.h:671
#define HEAP_MOVED
Definition: htup_details.h:213
#define HeapTupleHeaderIsSpeculative(tup)
Definition: htup_details.h:428
#define ItemIdGetLength(itemId)
Definition: itemid.h:59
#define ItemIdIsNormal(itemId)
Definition: itemid.h:99
static OffsetNumber ItemPointerGetOffsetNumber(const ItemPointerData *pointer)
Definition: itemptr.h:124
static BlockNumber ItemPointerGetBlockNumber(const ItemPointerData *pointer)
Definition: itemptr.h:103
static bool ItemPointerIsValid(const ItemPointerData *pointer)
Definition: itemptr.h:83
#define START_CRIT_SECTION()
Definition: miscadmin.h:149
#define END_CRIT_SECTION()
Definition: miscadmin.h:151
void pgstat_count_heap_delete(Relation rel)
#define RelationGetRelid(relation)
Definition: rel.h:505
#define RelationNeedsWAL(relation)
Definition: rel.h:628
TransactionId TransactionXmin
Definition: snapmgr.c:99
ItemPointerData t_self
Definition: htup.h:65
uint32 t_len
Definition: htup.h:64
HeapTupleHeader t_data
Definition: htup.h:68
Oid t_tableOid
Definition: htup.h:66
TransactionId t_xmin
Definition: htup_details.h:124
union HeapTupleHeaderData::@46 t_choice
ItemPointerData t_ctid
Definition: htup_details.h:161
HeapTupleFields t_heap
Definition: htup_details.h:157
Form_pg_class rd_rel
Definition: rel.h:111
TransactionId xmax
Definition: heapam_xlog.h:115
OffsetNumber offnum
Definition: heapam_xlog.h:116
uint8 infobits_set
Definition: heapam_xlog.h:117
bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition: transam.c:280
#define InvalidTransactionId
Definition: transam.h:31
#define TransactionIdIsValid(xid)
Definition: transam.h:41
TransactionId GetCurrentTransactionId(void)
Definition: xact.c:453
uint64 XLogRecPtr
Definition: xlogdefs.h:21
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition: xloginsert.c:474
void XLogRegisterData(const char *data, uint32 len)
Definition: xloginsert.c:364
void XLogRegisterBuffer(uint8 block_id, Buffer buffer, uint8 flags)
Definition: xloginsert.c:242
void XLogBeginInsert(void)
Definition: xloginsert.c:149
#define REGBUF_STANDARD
Definition: xloginsert.h:34

References Assert, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferGetPage(), compute_infobits(), elog, END_CRIT_SECTION, ERROR, xl_heap_delete::flags, GetCurrentTransactionId(), HEAP_KEYS_UPDATED, HEAP_MOVED, heap_toast_delete(), HEAP_XMAX_BITS, HeapTupleHasExternal, HeapTupleHeaderIsHeapOnly, HeapTupleHeaderIsSpeculative, HeapTupleHeaderSetXmin, xl_heap_delete::infobits_set, InvalidTransactionId, IsToastRelation(), ItemIdGetLength, ItemIdIsNormal, ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), ItemPointerIsValid(), LockBuffer(), MarkBufferDirty(), xl_heap_delete::offnum, PageGetItem(), PageGetItemId(), PageIsAllVisible(), PageSetLSN(), PageSetPrunable, pgstat_count_heap_delete(), RelationData::rd_rel, ReadBuffer(), REGBUF_STANDARD, RelationGetRelid, RelationNeedsWAL, ReleaseBuffer(), SizeOfHeapDelete, START_CRIT_SECTION, HeapTupleHeaderData::t_choice, HeapTupleHeaderData::t_ctid, HeapTupleData::t_data, HeapTupleHeaderData::t_heap, HeapTupleHeaderData::t_infomask, HeapTupleHeaderData::t_infomask2, HeapTupleData::t_len, HeapTupleData::t_self, HeapTupleData::t_tableOid, HeapTupleFields::t_xmin, TransactionIdIsValid, TransactionIdPrecedes(), TransactionXmin, XLH_DELETE_IS_SUPER, XLOG_HEAP_DELETE, XLogBeginInsert(), XLogInsert(), XLogRegisterBuffer(), XLogRegisterData(), and xl_heap_delete::xmax.

Referenced by heapam_tuple_complete_speculative(), and toast_delete_datum().

◆ heap_beginscan()

TableScanDesc heap_beginscan ( Relation  relation,
Snapshot  snapshot,
int  nkeys,
ScanKey  key,
ParallelTableScanDesc  parallel_scan,
uint32  flags 
)

Definition at line 1029 of file heapam.c.

1033 {
1034  HeapScanDesc scan;
1035 
1036  /*
1037  * increment relation ref count while scanning relation
1038  *
1039  * This is just to make really sure the relcache entry won't go away while
1040  * the scan has a pointer to it. Caller should be holding the rel open
1041  * anyway, so this is redundant in all normal scenarios...
1042  */
1044 
1045  /*
1046  * allocate and initialize scan descriptor
1047  */
1048  scan = (HeapScanDesc) palloc(sizeof(HeapScanDescData));
1049 
1050  scan->rs_base.rs_rd = relation;
1051  scan->rs_base.rs_snapshot = snapshot;
1052  scan->rs_base.rs_nkeys = nkeys;
1053  scan->rs_base.rs_flags = flags;
1054  scan->rs_base.rs_parallel = parallel_scan;
1055  scan->rs_strategy = NULL; /* set in initscan */
1056  scan->rs_vmbuffer = InvalidBuffer;
1057  scan->rs_empty_tuples_pending = 0;
1058 
1059  /*
1060  * Disable page-at-a-time mode if it's not a MVCC-safe snapshot.
1061  */
1062  if (!(snapshot && IsMVCCSnapshot(snapshot)))
1064 
1065  /*
1066  * For seqscan and sample scans in a serializable transaction, acquire a
1067  * predicate lock on the entire relation. This is required not only to
1068  * lock all the matching tuples, but also to conflict with new insertions
1069  * into the table. In an indexscan, we take page locks on the index pages
1070  * covering the range specified in the scan qual, but in a heap scan there
1071  * is nothing more fine-grained to lock. A bitmap scan is a different
1072  * story, there we have already scanned the index and locked the index
1073  * pages covering the predicate. But in that case we still have to lock
1074  * any matching heap tuples. For sample scan we could optimize the locking
1075  * to be at least page-level granularity, but we'd need to add per-tuple
1076  * locking for that.
1077  */
1079  {
1080  /*
1081  * Ensure a missing snapshot is noticed reliably, even if the
1082  * isolation mode means predicate locking isn't performed (and
1083  * therefore the snapshot isn't used here).
1084  */
1085  Assert(snapshot);
1086  PredicateLockRelation(relation, snapshot);
1087  }
1088 
1089  /* we only need to set this up once */
1090  scan->rs_ctup.t_tableOid = RelationGetRelid(relation);
1091 
1092  /*
1093  * Allocate memory to keep track of page allocation for parallel workers
1094  * when doing a parallel scan.
1095  */
1096  if (parallel_scan != NULL)
1098  else
1099  scan->rs_parallelworkerdata = NULL;
1100 
1101  /*
1102  * we do this here instead of in initscan() because heap_rescan also calls
1103  * initscan() and we don't want to allocate memory again
1104  */
1105  if (nkeys > 0)
1106  scan->rs_base.rs_key = (ScanKey) palloc(sizeof(ScanKeyData) * nkeys);
1107  else
1108  scan->rs_base.rs_key = NULL;
1109 
1110  initscan(scan, key, false);
1111 
1112  scan->rs_read_stream = NULL;
1113 
1114  /*
1115  * Set up a read stream for sequential scans and TID range scans. This
1116  * should be done after initscan() because initscan() allocates the
1117  * BufferAccessStrategy object passed to the read stream API.
1118  */
1119  if (scan->rs_base.rs_flags & SO_TYPE_SEQSCAN ||
1121  {
1123 
1124  if (scan->rs_base.rs_parallel)
1126  else
1128 
1130  scan->rs_strategy,
1131  scan->rs_base.rs_rd,
1132  MAIN_FORKNUM,
1133  cb,
1134  scan,
1135  0);
1136  }
1137 
1138 
1139  return (TableScanDesc) scan;
1140 }
static BlockNumber heap_scan_stream_read_next_parallel(ReadStream *stream, void *callback_private_data, void *per_buffer_data)
Definition: heapam.c:223
static BlockNumber heap_scan_stream_read_next_serial(ReadStream *stream, void *callback_private_data, void *per_buffer_data)
Definition: heapam.c:261
static void initscan(HeapScanDesc scan, ScanKey key, bool keep_startblock)
Definition: heapam.c:285
struct HeapScanDescData * HeapScanDesc
Definition: heapam.h:110
if(TABLE==NULL||TABLE_index==NULL)
Definition: isn.c:76
void PredicateLockRelation(Relation relation, Snapshot snapshot)
Definition: predicate.c:2566
ReadStream * read_stream_begin_relation(int flags, BufferAccessStrategy strategy, Relation rel, ForkNumber forknum, ReadStreamBlockNumberCB callback, void *callback_private_data, size_t per_buffer_data_size)
Definition: read_stream.c:551
BlockNumber(* ReadStreamBlockNumberCB)(ReadStream *stream, void *callback_private_data, void *per_buffer_data)
Definition: read_stream.h:56
#define READ_STREAM_SEQUENTIAL
Definition: read_stream.h:36
void RelationIncrementReferenceCount(Relation rel)
Definition: relcache.c:2150
@ MAIN_FORKNUM
Definition: relpath.h:58
ScanKeyData * ScanKey
Definition: skey.h:75
#define IsMVCCSnapshot(snapshot)
Definition: snapmgr.h:56
Buffer rs_vmbuffer
Definition: heapam.h:102
BufferAccessStrategy rs_strategy
Definition: heapam.h:71
ParallelBlockTableScanWorkerData * rs_parallelworkerdata
Definition: heapam.h:93
HeapTupleData rs_ctup
Definition: heapam.h:73
ReadStream * rs_read_stream
Definition: heapam.h:76
int rs_empty_tuples_pending
Definition: heapam.h:103
TableScanDescData rs_base
Definition: heapam.h:56
Relation rs_rd
Definition: relscan.h:38
uint32 rs_flags
Definition: relscan.h:70
struct ScanKeyData * rs_key
Definition: relscan.h:41
struct SnapshotData * rs_snapshot
Definition: relscan.h:39
struct ParallelTableScanDescData * rs_parallel
Definition: relscan.h:72
@ SO_TYPE_TIDRANGESCAN
Definition: tableam.h:52
@ SO_ALLOW_PAGEMODE
Definition: tableam.h:61
@ SO_TYPE_SAMPLESCAN
Definition: tableam.h:50
@ SO_TYPE_SEQSCAN
Definition: tableam.h:48

References Assert, heap_scan_stream_read_next_parallel(), heap_scan_stream_read_next_serial(), if(), initscan(), InvalidBuffer, IsMVCCSnapshot, sort-test::key, MAIN_FORKNUM, palloc(), PredicateLockRelation(), read_stream_begin_relation(), READ_STREAM_SEQUENTIAL, RelationGetRelid, RelationIncrementReferenceCount(), HeapScanDescData::rs_base, HeapScanDescData::rs_ctup, HeapScanDescData::rs_empty_tuples_pending, TableScanDescData::rs_flags, TableScanDescData::rs_key, TableScanDescData::rs_nkeys, TableScanDescData::rs_parallel, HeapScanDescData::rs_parallelworkerdata, TableScanDescData::rs_rd, HeapScanDescData::rs_read_stream, TableScanDescData::rs_snapshot, HeapScanDescData::rs_strategy, HeapScanDescData::rs_vmbuffer, SO_ALLOW_PAGEMODE, SO_TYPE_SAMPLESCAN, SO_TYPE_SEQSCAN, SO_TYPE_TIDRANGESCAN, and HeapTupleData::t_tableOid.

◆ heap_delete()

TM_Result heap_delete ( Relation  relation,
ItemPointer  tid,
CommandId  cid,
Snapshot  crosscheck,
bool  wait,
struct TM_FailureData tmfd,
bool  changingPart 
)

Definition at line 2674 of file heapam.c.

2677 {
2678  TM_Result result;
2680  ItemId lp;
2681  HeapTupleData tp;
2682  Page page;
2683  BlockNumber block;
2684  Buffer buffer;
2685  Buffer vmbuffer = InvalidBuffer;
2686  TransactionId new_xmax;
2687  uint16 new_infomask,
2688  new_infomask2;
2689  bool have_tuple_lock = false;
2690  bool iscombo;
2691  bool all_visible_cleared = false;
2692  HeapTuple old_key_tuple = NULL; /* replica identity of the tuple */
2693  bool old_key_copied = false;
2694 
2695  Assert(ItemPointerIsValid(tid));
2696 
2697  /*
2698  * Forbid this during a parallel operation, lest it allocate a combo CID.
2699  * Other workers might need that combo CID for visibility checks, and we
2700  * have no provision for broadcasting it to them.
2701  */
2702  if (IsInParallelMode())
2703  ereport(ERROR,
2704  (errcode(ERRCODE_INVALID_TRANSACTION_STATE),
2705  errmsg("cannot delete tuples during a parallel operation")));
2706 
2707  block = ItemPointerGetBlockNumber(tid);
2708  buffer = ReadBuffer(relation, block);
2709  page = BufferGetPage(buffer);
2710 
2711  /*
2712  * Before locking the buffer, pin the visibility map page if it appears to
2713  * be necessary. Since we haven't got the lock yet, someone else might be
2714  * in the middle of changing this, so we'll need to recheck after we have
2715  * the lock.
2716  */
2717  if (PageIsAllVisible(page))
2718  visibilitymap_pin(relation, block, &vmbuffer);
2719 
2721 
2722  lp = PageGetItemId(page, ItemPointerGetOffsetNumber(tid));
2723  Assert(ItemIdIsNormal(lp));
2724 
2725  tp.t_tableOid = RelationGetRelid(relation);
2726  tp.t_data = (HeapTupleHeader) PageGetItem(page, lp);
2727  tp.t_len = ItemIdGetLength(lp);
2728  tp.t_self = *tid;
2729 
2730 l1:
2731 
2732  /*
2733  * If we didn't pin the visibility map page and the page has become all
2734  * visible while we were busy locking the buffer, we'll have to unlock and
2735  * re-lock, to avoid holding the buffer lock across an I/O. That's a bit
2736  * unfortunate, but hopefully shouldn't happen often.
2737  */
2738  if (vmbuffer == InvalidBuffer && PageIsAllVisible(page))
2739  {
2740  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
2741  visibilitymap_pin(relation, block, &vmbuffer);
2743  }
2744 
2745  result = HeapTupleSatisfiesUpdate(&tp, cid, buffer);
2746 
2747  if (result == TM_Invisible)
2748  {
2749  UnlockReleaseBuffer(buffer);
2750  ereport(ERROR,
2751  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
2752  errmsg("attempted to delete invisible tuple")));
2753  }
2754  else if (result == TM_BeingModified && wait)
2755  {
2756  TransactionId xwait;
2757  uint16 infomask;
2758 
2759  /* must copy state data before unlocking buffer */
2760  xwait = HeapTupleHeaderGetRawXmax(tp.t_data);
2761  infomask = tp.t_data->t_infomask;
2762 
2763  /*
2764  * Sleep until concurrent transaction ends -- except when there's a
2765  * single locker and it's our own transaction. Note we don't care
2766  * which lock mode the locker has, because we need the strongest one.
2767  *
2768  * Before sleeping, we need to acquire tuple lock to establish our
2769  * priority for the tuple (see heap_lock_tuple). LockTuple will
2770  * release us when we are next-in-line for the tuple.
2771  *
2772  * If we are forced to "start over" below, we keep the tuple lock;
2773  * this arranges that we stay at the head of the line while rechecking
2774  * tuple state.
2775  */
2776  if (infomask & HEAP_XMAX_IS_MULTI)
2777  {
2778  bool current_is_member = false;
2779 
2780  if (DoesMultiXactIdConflict((MultiXactId) xwait, infomask,
2781  LockTupleExclusive, &current_is_member))
2782  {
2783  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
2784 
2785  /*
2786  * Acquire the lock, if necessary (but skip it when we're
2787  * requesting a lock and already have one; avoids deadlock).
2788  */
2789  if (!current_is_member)
2791  LockWaitBlock, &have_tuple_lock);
2792 
2793  /* wait for multixact */
2795  relation, &(tp.t_self), XLTW_Delete,
2796  NULL);
2798 
2799  /*
2800  * If xwait had just locked the tuple then some other xact
2801  * could update this tuple before we get to this point. Check
2802  * for xmax change, and start over if so.
2803  *
2804  * We also must start over if we didn't pin the VM page, and
2805  * the page has become all visible.
2806  */
2807  if ((vmbuffer == InvalidBuffer && PageIsAllVisible(page)) ||
2808  xmax_infomask_changed(tp.t_data->t_infomask, infomask) ||
2810  xwait))
2811  goto l1;
2812  }
2813 
2814  /*
2815  * You might think the multixact is necessarily done here, but not
2816  * so: it could have surviving members, namely our own xact or
2817  * other subxacts of this backend. It is legal for us to delete
2818  * the tuple in either case, however (the latter case is
2819  * essentially a situation of upgrading our former shared lock to
2820  * exclusive). We don't bother changing the on-disk hint bits
2821  * since we are about to overwrite the xmax altogether.
2822  */
2823  }
2824  else if (!TransactionIdIsCurrentTransactionId(xwait))
2825  {
2826  /*
2827  * Wait for regular transaction to end; but first, acquire tuple
2828  * lock.
2829  */
2830  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
2832  LockWaitBlock, &have_tuple_lock);
2833  XactLockTableWait(xwait, relation, &(tp.t_self), XLTW_Delete);
2835 
2836  /*
2837  * xwait is done, but if xwait had just locked the tuple then some
2838  * other xact could update this tuple before we get to this point.
2839  * Check for xmax change, and start over if so.
2840  *
2841  * We also must start over if we didn't pin the VM page, and the
2842  * page has become all visible.
2843  */
2844  if ((vmbuffer == InvalidBuffer && PageIsAllVisible(page)) ||
2845  xmax_infomask_changed(tp.t_data->t_infomask, infomask) ||
2847  xwait))
2848  goto l1;
2849 
2850  /* Otherwise check if it committed or aborted */
2851  UpdateXmaxHintBits(tp.t_data, buffer, xwait);
2852  }
2853 
2854  /*
2855  * We may overwrite if previous xmax aborted, or if it committed but
2856  * only locked the tuple without updating it.
2857  */
2858  if ((tp.t_data->t_infomask & HEAP_XMAX_INVALID) ||
2861  result = TM_Ok;
2862  else if (!ItemPointerEquals(&tp.t_self, &tp.t_data->t_ctid))
2863  result = TM_Updated;
2864  else
2865  result = TM_Deleted;
2866  }
2867 
2868  /* sanity check the result HeapTupleSatisfiesUpdate() and the logic above */
2869  if (result != TM_Ok)
2870  {
2871  Assert(result == TM_SelfModified ||
2872  result == TM_Updated ||
2873  result == TM_Deleted ||
2874  result == TM_BeingModified);
2876  Assert(result != TM_Updated ||
2877  !ItemPointerEquals(&tp.t_self, &tp.t_data->t_ctid));
2878  }
2879 
2880  if (crosscheck != InvalidSnapshot && result == TM_Ok)
2881  {
2882  /* Perform additional check for transaction-snapshot mode RI updates */
2883  if (!HeapTupleSatisfiesVisibility(&tp, crosscheck, buffer))
2884  result = TM_Updated;
2885  }
2886 
2887  if (result != TM_Ok)
2888  {
2889  tmfd->ctid = tp.t_data->t_ctid;
2891  if (result == TM_SelfModified)
2892  tmfd->cmax = HeapTupleHeaderGetCmax(tp.t_data);
2893  else
2894  tmfd->cmax = InvalidCommandId;
2895  UnlockReleaseBuffer(buffer);
2896  if (have_tuple_lock)
2897  UnlockTupleTuplock(relation, &(tp.t_self), LockTupleExclusive);
2898  if (vmbuffer != InvalidBuffer)
2899  ReleaseBuffer(vmbuffer);
2900  return result;
2901  }
2902 
2903  /*
2904  * We're about to do the actual delete -- check for conflict first, to
2905  * avoid possibly having to roll back work we've just done.
2906  *
2907  * This is safe without a recheck as long as there is no possibility of
2908  * another process scanning the page between this check and the delete
2909  * being visible to the scan (i.e., an exclusive buffer content lock is
2910  * continuously held from this point until the tuple delete is visible).
2911  */
2912  CheckForSerializableConflictIn(relation, tid, BufferGetBlockNumber(buffer));
2913 
2914  /* replace cid with a combo CID if necessary */
2915  HeapTupleHeaderAdjustCmax(tp.t_data, &cid, &iscombo);
2916 
2917  /*
2918  * Compute replica identity tuple before entering the critical section so
2919  * we don't PANIC upon a memory allocation failure.
2920  */
2921  old_key_tuple = ExtractReplicaIdentity(relation, &tp, true, &old_key_copied);
2922 
2923  /*
2924  * If this is the first possibly-multixact-able operation in the current
2925  * transaction, set my per-backend OldestMemberMXactId setting. We can be
2926  * certain that the transaction will never become a member of any older
2927  * MultiXactIds than that. (We have to do this even if we end up just
2928  * using our own TransactionId below, since some other backend could
2929  * incorporate our XID into a MultiXact immediately afterwards.)
2930  */
2932 
2935  xid, LockTupleExclusive, true,
2936  &new_xmax, &new_infomask, &new_infomask2);
2937 
2939 
2940  /*
2941  * If this transaction commits, the tuple will become DEAD sooner or
2942  * later. Set flag that this page is a candidate for pruning once our xid
2943  * falls below the OldestXmin horizon. If the transaction finally aborts,
2944  * the subsequent page pruning will be a no-op and the hint will be
2945  * cleared.
2946  */
2947  PageSetPrunable(page, xid);
2948 
2949  if (PageIsAllVisible(page))
2950  {
2951  all_visible_cleared = true;
2952  PageClearAllVisible(page);
2953  visibilitymap_clear(relation, BufferGetBlockNumber(buffer),
2954  vmbuffer, VISIBILITYMAP_VALID_BITS);
2955  }
2956 
2957  /* store transaction information of xact deleting the tuple */
2960  tp.t_data->t_infomask |= new_infomask;
2961  tp.t_data->t_infomask2 |= new_infomask2;
2963  HeapTupleHeaderSetXmax(tp.t_data, new_xmax);
2964  HeapTupleHeaderSetCmax(tp.t_data, cid, iscombo);
2965  /* Make sure there is no forward chain link in t_ctid */
2966  tp.t_data->t_ctid = tp.t_self;
2967 
2968  /* Signal that this is actually a move into another partition */
2969  if (changingPart)
2971 
2972  MarkBufferDirty(buffer);
2973 
2974  /*
2975  * XLOG stuff
2976  *
2977  * NB: heap_abort_speculative() uses the same xlog record and replay
2978  * routines.
2979  */
2980  if (RelationNeedsWAL(relation))
2981  {
2982  xl_heap_delete xlrec;
2983  xl_heap_header xlhdr;
2984  XLogRecPtr recptr;
2985 
2986  /*
2987  * For logical decode we need combo CIDs to properly decode the
2988  * catalog
2989  */
2991  log_heap_new_cid(relation, &tp);
2992 
2993  xlrec.flags = 0;
2994  if (all_visible_cleared)
2996  if (changingPart)
2999  tp.t_data->t_infomask2);
3001  xlrec.xmax = new_xmax;
3002 
3003  if (old_key_tuple != NULL)
3004  {
3005  if (relation->rd_rel->relreplident == REPLICA_IDENTITY_FULL)
3007  else
3009  }
3010 
3011  XLogBeginInsert();
3012  XLogRegisterData((char *) &xlrec, SizeOfHeapDelete);
3013 
3014  XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
3015 
3016  /*
3017  * Log replica identity of the deleted tuple if there is one
3018  */
3019  if (old_key_tuple != NULL)
3020  {
3021  xlhdr.t_infomask2 = old_key_tuple->t_data->t_infomask2;
3022  xlhdr.t_infomask = old_key_tuple->t_data->t_infomask;
3023  xlhdr.t_hoff = old_key_tuple->t_data->t_hoff;
3024 
3025  XLogRegisterData((char *) &xlhdr, SizeOfHeapHeader);
3026  XLogRegisterData((char *) old_key_tuple->t_data
3028  old_key_tuple->t_len
3030  }
3031 
3032  /* filtering by origin on a row level is much more efficient */
3034 
3035  recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_DELETE);
3036 
3037  PageSetLSN(page, recptr);
3038  }
3039 
3040  END_CRIT_SECTION();
3041 
3042  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
3043 
3044  if (vmbuffer != InvalidBuffer)
3045  ReleaseBuffer(vmbuffer);
3046 
3047  /*
3048  * If the tuple has toasted out-of-line attributes, we need to delete
3049  * those items too. We have to do this before releasing the buffer
3050  * because we need to look at the contents of the tuple, but it's OK to
3051  * release the content lock on the buffer first.
3052  */
3053  if (relation->rd_rel->relkind != RELKIND_RELATION &&
3054  relation->rd_rel->relkind != RELKIND_MATVIEW)
3055  {
3056  /* toast table entries should never be recursively toasted */
3058  }
3059  else if (HeapTupleHasExternal(&tp))
3060  heap_toast_delete(relation, &tp, false);
3061 
3062  /*
3063  * Mark tuple for invalidation from system caches at next command
3064  * boundary. We have to do this before releasing the buffer because we
3065  * need to look at the contents of the tuple.
3066  */
3067  CacheInvalidateHeapTuple(relation, &tp, NULL);
3068 
3069  /* Now we can release the buffer */
3070  ReleaseBuffer(buffer);
3071 
3072  /*
3073  * Release the lmgr tuple lock, if we had it.
3074  */
3075  if (have_tuple_lock)
3076  UnlockTupleTuplock(relation, &(tp.t_self), LockTupleExclusive);
3077 
3078  pgstat_count_heap_delete(relation);
3079 
3080  if (old_key_tuple != NULL && old_key_copied)
3081  heap_freetuple(old_key_tuple);
3082 
3083  return TM_Ok;
3084 }
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:3724
void UnlockReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:4941
static void PageClearAllVisible(Page page)
Definition: bufpage.h:439
#define InvalidCommandId
Definition: c.h:623
TransactionId MultiXactId
Definition: c.h:616
uint16_t uint16
Definition: c.h:484
void HeapTupleHeaderAdjustCmax(HeapTupleHeader tup, CommandId *cmax, bool *iscombo)
Definition: combocid.c:153
CommandId HeapTupleHeaderGetCmax(HeapTupleHeader tup)
Definition: combocid.c:118
int errcode(int sqlerrcode)
Definition: elog.c:853
int errmsg(const char *fmt,...)
Definition: elog.c:1070
#define ereport(elevel,...)
Definition: elog.h:149
static bool DoesMultiXactIdConflict(MultiXactId multi, uint16 infomask, LockTupleMode lockmode, bool *current_is_member)
Definition: heapam.c:7435
static XLogRecPtr log_heap_new_cid(Relation relation, HeapTuple tup)
Definition: heapam.c:8898
static void compute_new_xmax_infomask(TransactionId xmax, uint16 old_infomask, uint16 old_infomask2, TransactionId add_to_xmax, LockTupleMode mode, bool is_update, TransactionId *result_xmax, uint16 *result_infomask, uint16 *result_infomask2)
Definition: heapam.c:5174
static HeapTuple ExtractReplicaIdentity(Relation relation, HeapTuple tp, bool key_required, bool *copy)
Definition: heapam.c:8979
static bool heap_acquire_tuplock(Relation relation, ItemPointer tid, LockTupleMode mode, LockWaitPolicy wait_policy, bool *have_tuple_lock)
Definition: heapam.c:5125
static void MultiXactIdWait(MultiXactId multi, MultiXactStatus status, uint16 infomask, Relation rel, ItemPointer ctid, XLTW_Oper oper, int *remaining)
Definition: heapam.c:7612
static bool xmax_infomask_changed(uint16 new_infomask, uint16 old_infomask)
Definition: heapam.c:2651
#define UnlockTupleTuplock(rel, tup, mode)
Definition: heapam.c:161
static void UpdateXmaxHintBits(HeapTupleHeader tuple, Buffer buffer, TransactionId xid)
Definition: heapam.c:1896
bool HeapTupleSatisfiesVisibility(HeapTuple htup, Snapshot snapshot, Buffer buffer)
bool HeapTupleHeaderIsOnlyLocked(HeapTupleHeader tuple)
TM_Result HeapTupleSatisfiesUpdate(HeapTuple htup, CommandId curcid, Buffer buffer)
#define XLH_DELETE_CONTAINS_OLD_KEY
Definition: heapam_xlog.h:104
#define XLH_DELETE_ALL_VISIBLE_CLEARED
Definition: heapam_xlog.h:102
#define SizeOfHeapHeader
Definition: heapam_xlog.h:157
#define XLH_DELETE_IS_PARTITION_MOVE
Definition: heapam_xlog.h:106
#define XLH_DELETE_CONTAINS_OLD_TUPLE
Definition: heapam_xlog.h:103
void heap_freetuple(HeapTuple htup)
Definition: heaptuple.c:1434
#define HEAP_XMAX_IS_LOCKED_ONLY(infomask)
Definition: htup_details.h:227
#define SizeofHeapTupleHeader
Definition: htup_details.h:185
#define HeapTupleHeaderSetXmax(tup, xid)
Definition: htup_details.h:376
#define HeapTupleHeaderClearHotUpdated(tup)
Definition: htup_details.h:494
#define HEAP_XMAX_IS_MULTI
Definition: htup_details.h:209
#define HEAP_XMAX_INVALID
Definition: htup_details.h:208
#define HeapTupleHeaderSetMovedPartitions(tup)
Definition: htup_details.h:447
#define HeapTupleHeaderGetRawXmax(tup)
Definition: htup_details.h:371
#define HeapTupleHeaderGetUpdateXid(tup)
Definition: htup_details.h:361
#define HeapTupleHeaderSetCmax(tup, cid, iscombo)
Definition: htup_details.h:401
void CacheInvalidateHeapTuple(Relation relation, HeapTuple tuple, HeapTuple newtuple)
Definition: inval.c:1493
bool ItemPointerEquals(ItemPointer pointer1, ItemPointer pointer2)
Definition: itemptr.c:35
void XactLockTableWait(TransactionId xid, Relation rel, ItemPointer ctid, XLTW_Oper oper)
Definition: lmgr.c:656
@ XLTW_Delete
Definition: lmgr.h:28
@ LockWaitBlock
Definition: lockoptions.h:39
@ LockTupleExclusive
Definition: lockoptions.h:58
void MultiXactIdSetOldestMember(void)
Definition: multixact.c:673
@ MultiXactStatusUpdate
Definition: multixact.h:46
void CheckForSerializableConflictIn(Relation relation, ItemPointer tid, BlockNumber blkno)
Definition: predicate.c:4326
#define RelationIsAccessibleInLogicalDecoding(relation)
Definition: rel.h:684
#define InvalidSnapshot
Definition: snapshot.h:119
TransactionId xmax
Definition: tableam.h:150
CommandId cmax
Definition: tableam.h:151
ItemPointerData ctid
Definition: tableam.h:149
uint16 t_infomask
Definition: heapam_xlog.h:153
uint16 t_infomask2
Definition: heapam_xlog.h:152
TM_Result
Definition: tableam.h:79
@ TM_Ok
Definition: tableam.h:84
@ TM_BeingModified
Definition: tableam.h:106
@ TM_Deleted
Definition: tableam.h:99
@ TM_Updated
Definition: tableam.h:96
@ TM_SelfModified
Definition: tableam.h:90
@ TM_Invisible
Definition: tableam.h:87
#define TransactionIdEquals(id1, id2)
Definition: transam.h:43
bool visibilitymap_clear(Relation rel, BlockNumber heapBlk, Buffer vmbuf, uint8 flags)
void visibilitymap_pin(Relation rel, BlockNumber heapBlk, Buffer *vmbuf)
#define VISIBILITYMAP_VALID_BITS
bool TransactionIdIsCurrentTransactionId(TransactionId xid)
Definition: xact.c:940
bool IsInParallelMode(void)
Definition: xact.c:1088
#define XLOG_INCLUDE_ORIGIN
Definition: xlog.h:154
void XLogSetRecordFlags(uint8 flags)
Definition: xloginsert.c:456

References Assert, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferGetBlockNumber(), BufferGetPage(), CacheInvalidateHeapTuple(), CheckForSerializableConflictIn(), TM_FailureData::cmax, compute_infobits(), compute_new_xmax_infomask(), TM_FailureData::ctid, DoesMultiXactIdConflict(), END_CRIT_SECTION, ereport, errcode(), errmsg(), ERROR, ExtractReplicaIdentity(), xl_heap_delete::flags, GetCurrentTransactionId(), heap_acquire_tuplock(), heap_freetuple(), HEAP_KEYS_UPDATED, HEAP_MOVED, heap_toast_delete(), HEAP_XMAX_BITS, HEAP_XMAX_INVALID, HEAP_XMAX_IS_LOCKED_ONLY, HEAP_XMAX_IS_MULTI, HeapTupleHasExternal, HeapTupleHeaderAdjustCmax(), HeapTupleHeaderClearHotUpdated, HeapTupleHeaderGetCmax(), HeapTupleHeaderGetRawXmax, HeapTupleHeaderGetUpdateXid, HeapTupleHeaderIsOnlyLocked(), HeapTupleHeaderSetCmax, HeapTupleHeaderSetMovedPartitions, HeapTupleHeaderSetXmax, HeapTupleSatisfiesUpdate(), HeapTupleSatisfiesVisibility(), xl_heap_delete::infobits_set, InvalidBuffer, InvalidCommandId, InvalidSnapshot, IsInParallelMode(), ItemIdGetLength, ItemIdIsNormal, ItemPointerEquals(), ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), ItemPointerIsValid(), LockBuffer(), LockTupleExclusive, LockWaitBlock, log_heap_new_cid(), MarkBufferDirty(), MultiXactIdSetOldestMember(), MultiXactIdWait(), MultiXactStatusUpdate, xl_heap_delete::offnum, PageClearAllVisible(), PageGetItem(), PageGetItemId(), PageIsAllVisible(), PageSetLSN(), PageSetPrunable, pgstat_count_heap_delete(), RelationData::rd_rel, ReadBuffer(), REGBUF_STANDARD, RelationGetRelid, RelationIsAccessibleInLogicalDecoding, RelationNeedsWAL, ReleaseBuffer(), SizeOfHeapDelete, SizeOfHeapHeader, SizeofHeapTupleHeader, START_CRIT_SECTION, HeapTupleHeaderData::t_ctid, HeapTupleData::t_data, xl_heap_header::t_hoff, HeapTupleHeaderData::t_hoff, xl_heap_header::t_infomask, HeapTupleHeaderData::t_infomask, xl_heap_header::t_infomask2, HeapTupleHeaderData::t_infomask2, HeapTupleData::t_len, HeapTupleData::t_self, HeapTupleData::t_tableOid, TM_BeingModified, TM_Deleted, TM_Invisible, TM_Ok, TM_SelfModified, TM_Updated, TransactionIdEquals, TransactionIdIsCurrentTransactionId(), UnlockReleaseBuffer(), UnlockTupleTuplock, UpdateXmaxHintBits(), visibilitymap_clear(), visibilitymap_pin(), VISIBILITYMAP_VALID_BITS, XactLockTableWait(), XLH_DELETE_ALL_VISIBLE_CLEARED, XLH_DELETE_CONTAINS_OLD_KEY, XLH_DELETE_CONTAINS_OLD_TUPLE, XLH_DELETE_IS_PARTITION_MOVE, XLOG_HEAP_DELETE, XLOG_INCLUDE_ORIGIN, XLogBeginInsert(), XLogInsert(), XLogRegisterBuffer(), XLogRegisterData(), XLogSetRecordFlags(), XLTW_Delete, xl_heap_delete::xmax, TM_FailureData::xmax, and xmax_infomask_changed().

Referenced by heapam_tuple_delete(), and simple_heap_delete().

◆ heap_endscan()

void heap_endscan ( TableScanDesc  sscan)

Definition at line 1201 of file heapam.c.

1202 {
1203  HeapScanDesc scan = (HeapScanDesc) sscan;
1204 
1205  /* Note: no locking manipulations needed */
1206 
1207  /*
1208  * unpin scan buffers
1209  */
1210  if (BufferIsValid(scan->rs_cbuf))
1211  ReleaseBuffer(scan->rs_cbuf);
1212 
1213  if (BufferIsValid(scan->rs_vmbuffer))
1214  ReleaseBuffer(scan->rs_vmbuffer);
1215 
1216  /*
1217  * Must free the read stream before freeing the BufferAccessStrategy.
1218  */
1219  if (scan->rs_read_stream)
1221 
1222  /*
1223  * decrement relation reference count and free scan descriptor storage
1224  */
1226 
1227  if (scan->rs_base.rs_key)
1228  pfree(scan->rs_base.rs_key);
1229 
1230  if (scan->rs_strategy != NULL)
1232 
1233  if (scan->rs_parallelworkerdata != NULL)
1235 
1236  if (scan->rs_base.rs_flags & SO_TEMP_SNAPSHOT)
1238 
1239  pfree(scan);
1240 }
static bool BufferIsValid(Buffer bufnum)
Definition: bufmgr.h:351
void read_stream_end(ReadStream *stream)
Definition: read_stream.c:846
void RelationDecrementReferenceCount(Relation rel)
Definition: relcache.c:2163
void UnregisterSnapshot(Snapshot snapshot)
Definition: snapmgr.c:798
Buffer rs_cbuf
Definition: heapam.h:68
@ SO_TEMP_SNAPSHOT
Definition: tableam.h:64

References BufferIsValid(), FreeAccessStrategy(), pfree(), read_stream_end(), RelationDecrementReferenceCount(), ReleaseBuffer(), HeapScanDescData::rs_base, HeapScanDescData::rs_cbuf, TableScanDescData::rs_flags, TableScanDescData::rs_key, HeapScanDescData::rs_parallelworkerdata, TableScanDescData::rs_rd, HeapScanDescData::rs_read_stream, TableScanDescData::rs_snapshot, HeapScanDescData::rs_strategy, HeapScanDescData::rs_vmbuffer, SO_TEMP_SNAPSHOT, and UnregisterSnapshot().

◆ heap_execute_freeze_tuple()

static void heap_execute_freeze_tuple ( HeapTupleHeader  tuple,
HeapTupleFreeze frz 
)
inlinestatic

◆ heap_fetch()

bool heap_fetch ( Relation  relation,
Snapshot  snapshot,
HeapTuple  tuple,
Buffer userbuf,
bool  keep_buf 
)

Definition at line 1502 of file heapam.c.

1507 {
1508  ItemPointer tid = &(tuple->t_self);
1509  ItemId lp;
1510  Buffer buffer;
1511  Page page;
1512  OffsetNumber offnum;
1513  bool valid;
1514 
1515  /*
1516  * Fetch and pin the appropriate page of the relation.
1517  */
1518  buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(tid));
1519 
1520  /*
1521  * Need share lock on buffer to examine tuple commit status.
1522  */
1523  LockBuffer(buffer, BUFFER_LOCK_SHARE);
1524  page = BufferGetPage(buffer);
1525 
1526  /*
1527  * We'd better check for out-of-range offnum in case of VACUUM since the
1528  * TID was obtained.
1529  */
1530  offnum = ItemPointerGetOffsetNumber(tid);
1531  if (offnum < FirstOffsetNumber || offnum > PageGetMaxOffsetNumber(page))
1532  {
1533  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
1534  ReleaseBuffer(buffer);
1535  *userbuf = InvalidBuffer;
1536  tuple->t_data = NULL;
1537  return false;
1538  }
1539 
1540  /*
1541  * get the item line pointer corresponding to the requested tid
1542  */
1543  lp = PageGetItemId(page, offnum);
1544 
1545  /*
1546  * Must check for deleted tuple.
1547  */
1548  if (!ItemIdIsNormal(lp))
1549  {
1550  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
1551  ReleaseBuffer(buffer);
1552  *userbuf = InvalidBuffer;
1553  tuple->t_data = NULL;
1554  return false;
1555  }
1556 
1557  /*
1558  * fill in *tuple fields
1559  */
1560  tuple->t_data = (HeapTupleHeader) PageGetItem(page, lp);
1561  tuple->t_len = ItemIdGetLength(lp);
1562  tuple->t_tableOid = RelationGetRelid(relation);
1563 
1564  /*
1565  * check tuple visibility, then release lock
1566  */
1567  valid = HeapTupleSatisfiesVisibility(tuple, snapshot, buffer);
1568 
1569  if (valid)
1570  PredicateLockTID(relation, &(tuple->t_self), snapshot,
1571  HeapTupleHeaderGetXmin(tuple->t_data));
1572 
1573  HeapCheckForSerializableConflictOut(valid, relation, tuple, buffer, snapshot);
1574 
1575  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
1576 
1577  if (valid)
1578  {
1579  /*
1580  * All checks passed, so return the tuple as valid. Caller is now
1581  * responsible for releasing the buffer.
1582  */
1583  *userbuf = buffer;
1584 
1585  return true;
1586  }
1587 
1588  /* Tuple failed time qual, but maybe caller wants to see it anyway. */
1589  if (keep_buf)
1590  *userbuf = buffer;
1591  else
1592  {
1593  ReleaseBuffer(buffer);
1594  *userbuf = InvalidBuffer;
1595  tuple->t_data = NULL;
1596  }
1597 
1598  return false;
1599 }
#define BUFFER_LOCK_SHARE
Definition: bufmgr.h:190
static OffsetNumber PageGetMaxOffsetNumber(Page page)
Definition: bufpage.h:372
void HeapCheckForSerializableConflictOut(bool visible, Relation relation, HeapTuple tuple, Buffer buffer, Snapshot snapshot)
Definition: heapam.c:9083
#define HeapTupleHeaderGetXmin(tup)
Definition: htup_details.h:309
uint16 OffsetNumber
Definition: off.h:24
void PredicateLockTID(Relation relation, ItemPointer tid, Snapshot snapshot, TransactionId tuple_xid)
Definition: predicate.c:2611

References BUFFER_LOCK_SHARE, BUFFER_LOCK_UNLOCK, BufferGetPage(), HeapCheckForSerializableConflictOut(), HeapTupleHeaderGetXmin, HeapTupleSatisfiesVisibility(), InvalidBuffer, ItemIdGetLength, ItemIdIsNormal, ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), LockBuffer(), PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), PredicateLockTID(), ReadBuffer(), RelationGetRelid, ReleaseBuffer(), HeapTupleData::t_data, HeapTupleData::t_len, HeapTupleData::t_self, and HeapTupleData::t_tableOid.

Referenced by heap_lock_updated_tuple_rec(), heapam_fetch_row_version(), and heapam_tuple_lock().

◆ heap_finish_speculative()

void heap_finish_speculative ( Relation  relation,
ItemPointer  tid 
)

Definition at line 5936 of file heapam.c.

5937 {
5938  Buffer buffer;
5939  Page page;
5940  OffsetNumber offnum;
5941  ItemId lp = NULL;
5942  HeapTupleHeader htup;
5943 
5944  buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(tid));
5946  page = (Page) BufferGetPage(buffer);
5947 
5948  offnum = ItemPointerGetOffsetNumber(tid);
5949  if (PageGetMaxOffsetNumber(page) >= offnum)
5950  lp = PageGetItemId(page, offnum);
5951 
5952  if (PageGetMaxOffsetNumber(page) < offnum || !ItemIdIsNormal(lp))
5953  elog(ERROR, "invalid lp");
5954 
5955  htup = (HeapTupleHeader) PageGetItem(page, lp);
5956 
5957  /* NO EREPORT(ERROR) from here till changes are logged */
5959 
5961 
5962  MarkBufferDirty(buffer);
5963 
5964  /*
5965  * Replace the speculative insertion token with a real t_ctid, pointing to
5966  * itself like it does on regular tuples.
5967  */
5968  htup->t_ctid = *tid;
5969 
5970  /* XLOG stuff */
5971  if (RelationNeedsWAL(relation))
5972  {
5973  xl_heap_confirm xlrec;
5974  XLogRecPtr recptr;
5975 
5976  xlrec.offnum = ItemPointerGetOffsetNumber(tid);
5977 
5978  XLogBeginInsert();
5979 
5980  /* We want the same filtering on this as on a plain insert */
5982 
5983  XLogRegisterData((char *) &xlrec, SizeOfHeapConfirm);
5984  XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
5985 
5986  recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_CONFIRM);
5987 
5988  PageSetLSN(page, recptr);
5989  }
5990 
5991  END_CRIT_SECTION();
5992 
5993  UnlockReleaseBuffer(buffer);
5994 }
#define SizeOfHeapConfirm
Definition: heapam_xlog.h:423
#define XLOG_HEAP_CONFIRM
Definition: heapam_xlog.h:38
OffsetNumber offnum
Definition: heapam_xlog.h:420

References Assert, BUFFER_LOCK_EXCLUSIVE, BufferGetPage(), elog, END_CRIT_SECTION, ERROR, HeapTupleHeaderIsSpeculative, ItemIdIsNormal, ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), LockBuffer(), MarkBufferDirty(), xl_heap_confirm::offnum, PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), PageSetLSN(), ReadBuffer(), REGBUF_STANDARD, RelationNeedsWAL, SizeOfHeapConfirm, START_CRIT_SECTION, HeapTupleHeaderData::t_ctid, UnlockReleaseBuffer(), XLOG_HEAP_CONFIRM, XLOG_INCLUDE_ORIGIN, XLogBeginInsert(), XLogInsert(), XLogRegisterBuffer(), XLogRegisterData(), and XLogSetRecordFlags().

Referenced by heapam_tuple_complete_speculative().

◆ heap_freeze_prepared_tuples()

void heap_freeze_prepared_tuples ( Buffer  buffer,
HeapTupleFreeze tuples,
int  ntuples 
)

Definition at line 7220 of file heapam.c.

7221 {
7222  Page page = BufferGetPage(buffer);
7223 
7224  for (int i = 0; i < ntuples; i++)
7225  {
7226  HeapTupleFreeze *frz = tuples + i;
7227  ItemId itemid = PageGetItemId(page, frz->offset);
7228  HeapTupleHeader htup;
7229 
7230  htup = (HeapTupleHeader) PageGetItem(page, itemid);
7231  heap_execute_freeze_tuple(htup, frz);
7232  }
7233 }
static void heap_execute_freeze_tuple(HeapTupleHeader tuple, HeapTupleFreeze *frz)
Definition: heapam.h:443
int i
Definition: isn.c:72
OffsetNumber offset
Definition: heapam.h:152

References BufferGetPage(), heap_execute_freeze_tuple(), i, HeapTupleFreeze::offset, PageGetItem(), and PageGetItemId().

Referenced by heap_page_prune_and_freeze().

◆ heap_freeze_tuple()

bool heap_freeze_tuple ( HeapTupleHeader  tuple,
TransactionId  relfrozenxid,
TransactionId  relminmxid,
TransactionId  FreezeLimit,
TransactionId  MultiXactCutoff 
)

Definition at line 7242 of file heapam.c.

7245 {
7246  HeapTupleFreeze frz;
7247  bool do_freeze;
7248  bool totally_frozen;
7249  struct VacuumCutoffs cutoffs;
7250  HeapPageFreeze pagefrz;
7251 
7252  cutoffs.relfrozenxid = relfrozenxid;
7253  cutoffs.relminmxid = relminmxid;
7254  cutoffs.OldestXmin = FreezeLimit;
7255  cutoffs.OldestMxact = MultiXactCutoff;
7256  cutoffs.FreezeLimit = FreezeLimit;
7257  cutoffs.MultiXactCutoff = MultiXactCutoff;
7258 
7259  pagefrz.freeze_required = true;
7260  pagefrz.FreezePageRelfrozenXid = FreezeLimit;
7261  pagefrz.FreezePageRelminMxid = MultiXactCutoff;
7262  pagefrz.NoFreezePageRelfrozenXid = FreezeLimit;
7263  pagefrz.NoFreezePageRelminMxid = MultiXactCutoff;
7264 
7265  do_freeze = heap_prepare_freeze_tuple(tuple, &cutoffs,
7266  &pagefrz, &frz, &totally_frozen);
7267 
7268  /*
7269  * Note that because this is not a WAL-logged operation, we don't need to
7270  * fill in the offset in the freeze record.
7271  */
7272 
7273  if (do_freeze)
7274  heap_execute_freeze_tuple(tuple, &frz);
7275  return do_freeze;
7276 }
bool heap_prepare_freeze_tuple(HeapTupleHeader tuple, const struct VacuumCutoffs *cutoffs, HeapPageFreeze *pagefrz, HeapTupleFreeze *frz, bool *totally_frozen)
Definition: heapam.c:6894
TransactionId FreezeLimit
Definition: vacuum.h:277
TransactionId relfrozenxid
Definition: vacuum.h:251
MultiXactId relminmxid
Definition: vacuum.h:252
MultiXactId MultiXactCutoff
Definition: vacuum.h:278

References VacuumCutoffs::FreezeLimit, heap_execute_freeze_tuple(), heap_prepare_freeze_tuple(), VacuumCutoffs::MultiXactCutoff, VacuumCutoffs::OldestMxact, VacuumCutoffs::OldestXmin, VacuumCutoffs::relfrozenxid, and VacuumCutoffs::relminmxid.

Referenced by rewrite_heap_tuple().

◆ heap_get_latest_tid()

void heap_get_latest_tid ( TableScanDesc  sscan,
ItemPointer  tid 
)

Definition at line 1774 of file heapam.c.

1776 {
1777  Relation relation = sscan->rs_rd;
1778  Snapshot snapshot = sscan->rs_snapshot;
1779  ItemPointerData ctid;
1780  TransactionId priorXmax;
1781 
1782  /*
1783  * table_tuple_get_latest_tid() verified that the passed in tid is valid.
1784  * Assume that t_ctid links are valid however - there shouldn't be invalid
1785  * ones in the table.
1786  */
1787  Assert(ItemPointerIsValid(tid));
1788 
1789  /*
1790  * Loop to chase down t_ctid links. At top of loop, ctid is the tuple we
1791  * need to examine, and *tid is the TID we will return if ctid turns out
1792  * to be bogus.
1793  *
1794  * Note that we will loop until we reach the end of the t_ctid chain.
1795  * Depending on the snapshot passed, there might be at most one visible
1796  * version of the row, but we don't try to optimize for that.
1797  */
1798  ctid = *tid;
1799  priorXmax = InvalidTransactionId; /* cannot check first XMIN */
1800  for (;;)
1801  {
1802  Buffer buffer;
1803  Page page;
1804  OffsetNumber offnum;
1805  ItemId lp;
1806  HeapTupleData tp;
1807  bool valid;
1808 
1809  /*
1810  * Read, pin, and lock the page.
1811  */
1812  buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(&ctid));
1813  LockBuffer(buffer, BUFFER_LOCK_SHARE);
1814  page = BufferGetPage(buffer);
1815 
1816  /*
1817  * Check for bogus item number. This is not treated as an error
1818  * condition because it can happen while following a t_ctid link. We
1819  * just assume that the prior tid is OK and return it unchanged.
1820  */
1821  offnum = ItemPointerGetOffsetNumber(&ctid);
1822  if (offnum < FirstOffsetNumber || offnum > PageGetMaxOffsetNumber(page))
1823  {
1824  UnlockReleaseBuffer(buffer);
1825  break;
1826  }
1827  lp = PageGetItemId(page, offnum);
1828  if (!ItemIdIsNormal(lp))
1829  {
1830  UnlockReleaseBuffer(buffer);
1831  break;
1832  }
1833 
1834  /* OK to access the tuple */
1835  tp.t_self = ctid;
1836  tp.t_data = (HeapTupleHeader) PageGetItem(page, lp);
1837  tp.t_len = ItemIdGetLength(lp);
1838  tp.t_tableOid = RelationGetRelid(relation);
1839 
1840  /*
1841  * After following a t_ctid link, we might arrive at an unrelated
1842  * tuple. Check for XMIN match.
1843  */
1844  if (TransactionIdIsValid(priorXmax) &&
1846  {
1847  UnlockReleaseBuffer(buffer);
1848  break;
1849  }
1850 
1851  /*
1852  * Check tuple visibility; if visible, set it as the new result
1853  * candidate.
1854  */
1855  valid = HeapTupleSatisfiesVisibility(&tp, snapshot, buffer);
1856  HeapCheckForSerializableConflictOut(valid, relation, &tp, buffer, snapshot);
1857  if (valid)
1858  *tid = ctid;
1859 
1860  /*
1861  * If there's a valid t_ctid link, follow it, else we're done.
1862  */
1863  if ((tp.t_data->t_infomask & HEAP_XMAX_INVALID) ||
1867  {
1868  UnlockReleaseBuffer(buffer);
1869  break;
1870  }
1871 
1872  ctid = tp.t_data->t_ctid;
1873  priorXmax = HeapTupleHeaderGetUpdateXid(tp.t_data);
1874  UnlockReleaseBuffer(buffer);
1875  } /* end of loop */
1876 }
#define HeapTupleHeaderIndicatesMovedPartitions(tup)
Definition: htup_details.h:444

References Assert, BUFFER_LOCK_SHARE, BufferGetPage(), HEAP_XMAX_INVALID, HeapCheckForSerializableConflictOut(), HeapTupleHeaderGetUpdateXid, HeapTupleHeaderGetXmin, HeapTupleHeaderIndicatesMovedPartitions, HeapTupleHeaderIsOnlyLocked(), HeapTupleSatisfiesVisibility(), InvalidTransactionId, ItemIdGetLength, ItemIdIsNormal, ItemPointerEquals(), ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), ItemPointerIsValid(), LockBuffer(), PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), ReadBuffer(), RelationGetRelid, TableScanDescData::rs_rd, TableScanDescData::rs_snapshot, HeapTupleHeaderData::t_ctid, HeapTupleData::t_data, HeapTupleHeaderData::t_infomask, HeapTupleData::t_len, HeapTupleData::t_self, HeapTupleData::t_tableOid, TransactionIdEquals, TransactionIdIsValid, and UnlockReleaseBuffer().

◆ heap_get_root_tuples()

void heap_get_root_tuples ( Page  page,
OffsetNumber root_offsets 
)

Definition at line 1785 of file pruneheap.c.

1786 {
1787  OffsetNumber offnum,
1788  maxoff;
1789 
1790  MemSet(root_offsets, InvalidOffsetNumber,
1792 
1793  maxoff = PageGetMaxOffsetNumber(page);
1794  for (offnum = FirstOffsetNumber; offnum <= maxoff; offnum = OffsetNumberNext(offnum))
1795  {
1796  ItemId lp = PageGetItemId(page, offnum);
1797  HeapTupleHeader htup;
1798  OffsetNumber nextoffnum;
1799  TransactionId priorXmax;
1800 
1801  /* skip unused and dead items */
1802  if (!ItemIdIsUsed(lp) || ItemIdIsDead(lp))
1803  continue;
1804 
1805  if (ItemIdIsNormal(lp))
1806  {
1807  htup = (HeapTupleHeader) PageGetItem(page, lp);
1808 
1809  /*
1810  * Check if this tuple is part of a HOT-chain rooted at some other
1811  * tuple. If so, skip it for now; we'll process it when we find
1812  * its root.
1813  */
1814  if (HeapTupleHeaderIsHeapOnly(htup))
1815  continue;
1816 
1817  /*
1818  * This is either a plain tuple or the root of a HOT-chain.
1819  * Remember it in the mapping.
1820  */
1821  root_offsets[offnum - 1] = offnum;
1822 
1823  /* If it's not the start of a HOT-chain, we're done with it */
1824  if (!HeapTupleHeaderIsHotUpdated(htup))
1825  continue;
1826 
1827  /* Set up to scan the HOT-chain */
1828  nextoffnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
1829  priorXmax = HeapTupleHeaderGetUpdateXid(htup);
1830  }
1831  else
1832  {
1833  /* Must be a redirect item. We do not set its root_offsets entry */
1835  /* Set up to scan the HOT-chain */
1836  nextoffnum = ItemIdGetRedirect(lp);
1837  priorXmax = InvalidTransactionId;
1838  }
1839 
1840  /*
1841  * Now follow the HOT-chain and collect other tuples in the chain.
1842  *
1843  * Note: Even though this is a nested loop, the complexity of the
1844  * function is O(N) because a tuple in the page should be visited not
1845  * more than twice, once in the outer loop and once in HOT-chain
1846  * chases.
1847  */
1848  for (;;)
1849  {
1850  /* Sanity check (pure paranoia) */
1851  if (offnum < FirstOffsetNumber)
1852  break;
1853 
1854  /*
1855  * An offset past the end of page's line pointer array is possible
1856  * when the array was truncated
1857  */
1858  if (offnum > maxoff)
1859  break;
1860 
1861  lp = PageGetItemId(page, nextoffnum);
1862 
1863  /* Check for broken chains */
1864  if (!ItemIdIsNormal(lp))
1865  break;
1866 
1867  htup = (HeapTupleHeader) PageGetItem(page, lp);
1868 
1869  if (TransactionIdIsValid(priorXmax) &&
1870  !TransactionIdEquals(priorXmax, HeapTupleHeaderGetXmin(htup)))
1871  break;
1872 
1873  /* Remember the root line pointer for this item */
1874  root_offsets[nextoffnum - 1] = offnum;
1875 
1876  /* Advance to next chain member, if any */
1877  if (!HeapTupleHeaderIsHotUpdated(htup))
1878  break;
1879 
1880  /* HOT implies it can't have moved to different partition */
1882 
1883  nextoffnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
1884  priorXmax = HeapTupleHeaderGetUpdateXid(htup);
1885  }
1886  }
1887 }
#define MemSet(start, val, len)
Definition: c.h:974
#define MaxHeapTuplesPerPage
Definition: htup_details.h:572
#define HeapTupleHeaderIsHotUpdated(tup)
Definition: htup_details.h:482
#define ItemIdGetRedirect(itemId)
Definition: itemid.h:78
#define ItemIdIsDead(itemId)
Definition: itemid.h:113
#define ItemIdIsUsed(itemId)
Definition: itemid.h:92
#define ItemIdIsRedirected(itemId)
Definition: itemid.h:106
#define InvalidOffsetNumber
Definition: off.h:26
#define OffsetNumberNext(offsetNumber)
Definition: off.h:52
#define FirstOffsetNumber
Definition: off.h:27

References Assert, FirstOffsetNumber, HeapTupleHeaderGetUpdateXid, HeapTupleHeaderGetXmin, HeapTupleHeaderIndicatesMovedPartitions, HeapTupleHeaderIsHeapOnly, HeapTupleHeaderIsHotUpdated, InvalidOffsetNumber, InvalidTransactionId, ItemIdGetRedirect, ItemIdIsDead, ItemIdIsNormal, ItemIdIsRedirected, ItemIdIsUsed, ItemPointerGetOffsetNumber(), MaxHeapTuplesPerPage, MemSet, OffsetNumberNext, PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), HeapTupleHeaderData::t_ctid, TransactionIdEquals, and TransactionIdIsValid.

Referenced by heapam_index_build_range_scan(), and heapam_index_validate_scan().

◆ heap_getnext()

HeapTuple heap_getnext ( TableScanDesc  sscan,
ScanDirection  direction 
)

Definition at line 1243 of file heapam.c.

1244 {
1245  HeapScanDesc scan = (HeapScanDesc) sscan;
1246 
1247  /*
1248  * This is still widely used directly, without going through table AM, so
1249  * add a safety check. It's possible we should, at a later point,
1250  * downgrade this to an assert. The reason for checking the AM routine,
1251  * rather than the AM oid, is that this allows to write regression tests
1252  * that create another AM reusing the heap handler.
1253  */
1255  ereport(ERROR,
1256  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1257  errmsg_internal("only heap AM is supported")));
1258 
1259  /*
1260  * We don't expect direct calls to heap_getnext with valid CheckXidAlive
1261  * for catalog or regular tables. See detailed comments in xact.c where
1262  * these variables are declared. Normally we have such a check at tableam
1263  * level API but this is called from many places so we need to ensure it
1264  * here.
1265  */
1267  elog(ERROR, "unexpected heap_getnext call during logical decoding");
1268 
1269  /* Note: no locking manipulations needed */
1270 
1271  if (scan->rs_base.rs_flags & SO_ALLOW_PAGEMODE)
1272  heapgettup_pagemode(scan, direction,
1273  scan->rs_base.rs_nkeys, scan->rs_base.rs_key);
1274  else
1275  heapgettup(scan, direction,
1276  scan->rs_base.rs_nkeys, scan->rs_base.rs_key);
1277 
1278  if (scan->rs_ctup.t_data == NULL)
1279  return NULL;
1280 
1281  /*
1282  * if we get here it means we have a new current scan tuple, so point to
1283  * the proper return buffer and return the tuple.
1284  */
1285 
1287 
1288  return &scan->rs_ctup;
1289 }
#define unlikely(x)
Definition: c.h:330
int errmsg_internal(const char *fmt,...)
Definition: elog.c:1157
static void heapgettup(HeapScanDesc scan, ScanDirection dir, int nkeys, ScanKey key)
Definition: heapam.c:829
static void heapgettup_pagemode(HeapScanDesc scan, ScanDirection dir, int nkeys, ScanKey key)
Definition: heapam.c:939
const TableAmRoutine * GetHeapamTableAmRoutine(void)
#define pgstat_count_heap_getnext(rel)
Definition: pgstat.h:654
const struct TableAmRoutine * rd_tableam
Definition: rel.h:189
bool bsysscan
Definition: xact.c:99
TransactionId CheckXidAlive
Definition: xact.c:98

References bsysscan, CheckXidAlive, elog, ereport, errcode(), errmsg_internal(), ERROR, GetHeapamTableAmRoutine(), heapgettup(), heapgettup_pagemode(), pgstat_count_heap_getnext, RelationData::rd_tableam, HeapScanDescData::rs_base, HeapScanDescData::rs_ctup, TableScanDescData::rs_flags, TableScanDescData::rs_key, TableScanDescData::rs_nkeys, TableScanDescData::rs_rd, SO_ALLOW_PAGEMODE, HeapTupleData::t_data, TransactionIdIsValid, and unlikely.

Referenced by AlterTableMoveAll(), AlterTableSpaceOptions(), check_db_file_conflict(), CreateDatabaseUsingFileCopy(), do_autovacuum(), DropSetting(), DropTableSpace(), find_typed_table_dependencies(), get_all_vacuum_rels(), get_database_list(), get_subscription_list(), get_tables_to_cluster(), get_tablespace_name(), get_tablespace_oid(), GetAllTablesPublicationRelations(), getRelationsInNamespace(), GetSchemaPublicationRelations(), heapam_index_build_range_scan(), heapam_index_validate_scan(), objectsInSchemaToOids(), pgrowlocks(), pgstat_heap(), populate_typ_list(), ReindexMultipleTables(), remove_dbtablespaces(), RemoveSubscriptionRel(), RenameTableSpace(), ThereIsAtLeastOneRole(), and vac_truncate_clog().

◆ heap_getnextslot()

bool heap_getnextslot ( TableScanDesc  sscan,
ScanDirection  direction,
struct TupleTableSlot slot 
)

Definition at line 1292 of file heapam.c.

1293 {
1294  HeapScanDesc scan = (HeapScanDesc) sscan;
1295 
1296  /* Note: no locking manipulations needed */
1297 
1298  if (sscan->rs_flags & SO_ALLOW_PAGEMODE)
1299  heapgettup_pagemode(scan, direction, sscan->rs_nkeys, sscan->rs_key);
1300  else
1301  heapgettup(scan, direction, sscan->rs_nkeys, sscan->rs_key);
1302 
1303  if (scan->rs_ctup.t_data == NULL)
1304  {
1305  ExecClearTuple(slot);
1306  return false;
1307  }
1308 
1309  /*
1310  * if we get here it means we have a new current scan tuple, so point to
1311  * the proper return buffer and return the tuple.
1312  */
1313 
1315 
1316  ExecStoreBufferHeapTuple(&scan->rs_ctup, slot,
1317  scan->rs_cbuf);
1318  return true;
1319 }
TupleTableSlot * ExecStoreBufferHeapTuple(HeapTuple tuple, TupleTableSlot *slot, Buffer buffer)
Definition: execTuples.c:1479
static TupleTableSlot * ExecClearTuple(TupleTableSlot *slot)
Definition: tuptable.h:454

References ExecClearTuple(), ExecStoreBufferHeapTuple(), heapgettup(), heapgettup_pagemode(), pgstat_count_heap_getnext, HeapScanDescData::rs_base, HeapScanDescData::rs_cbuf, HeapScanDescData::rs_ctup, TableScanDescData::rs_flags, TableScanDescData::rs_key, TableScanDescData::rs_nkeys, TableScanDescData::rs_rd, SO_ALLOW_PAGEMODE, and HeapTupleData::t_data.

◆ heap_getnextslot_tidrange()

bool heap_getnextslot_tidrange ( TableScanDesc  sscan,
ScanDirection  direction,
TupleTableSlot slot 
)

Definition at line 1395 of file heapam.c.

1397 {
1398  HeapScanDesc scan = (HeapScanDesc) sscan;
1399  ItemPointer mintid = &sscan->st.tidrange.rs_mintid;
1400  ItemPointer maxtid = &sscan->st.tidrange.rs_maxtid;
1401 
1402  /* Note: no locking manipulations needed */
1403  for (;;)
1404  {
1405  if (sscan->rs_flags & SO_ALLOW_PAGEMODE)
1406  heapgettup_pagemode(scan, direction, sscan->rs_nkeys, sscan->rs_key);
1407  else
1408  heapgettup(scan, direction, sscan->rs_nkeys, sscan->rs_key);
1409 
1410  if (scan->rs_ctup.t_data == NULL)
1411  {
1412  ExecClearTuple(slot);
1413  return false;
1414  }
1415 
1416  /*
1417  * heap_set_tidrange will have used heap_setscanlimits to limit the
1418  * range of pages we scan to only ones that can contain the TID range
1419  * we're scanning for. Here we must filter out any tuples from these
1420  * pages that are outside of that range.
1421  */
1422  if (ItemPointerCompare(&scan->rs_ctup.t_self, mintid) < 0)
1423  {
1424  ExecClearTuple(slot);
1425 
1426  /*
1427  * When scanning backwards, the TIDs will be in descending order.
1428  * Future tuples in this direction will be lower still, so we can
1429  * just return false to indicate there will be no more tuples.
1430  */
1431  if (ScanDirectionIsBackward(direction))
1432  return false;
1433 
1434  continue;
1435  }
1436 
1437  /*
1438  * Likewise for the final page, we must filter out TIDs greater than
1439  * maxtid.
1440  */
1441  if (ItemPointerCompare(&scan->rs_ctup.t_self, maxtid) > 0)
1442  {
1443  ExecClearTuple(slot);
1444 
1445  /*
1446  * When scanning forward, the TIDs will be in ascending order.
1447  * Future tuples in this direction will be higher still, so we can
1448  * just return false to indicate there will be no more tuples.
1449  */
1450  if (ScanDirectionIsForward(direction))
1451  return false;
1452  continue;
1453  }
1454 
1455  break;
1456  }
1457 
1458  /*
1459  * if we get here it means we have a new current scan tuple, so point to
1460  * the proper return buffer and return the tuple.
1461  */
1463 
1464  ExecStoreBufferHeapTuple(&scan->rs_ctup, slot, scan->rs_cbuf);
1465  return true;
1466 }
int32 ItemPointerCompare(ItemPointer arg1, ItemPointer arg2)
Definition: itemptr.c:51
#define ScanDirectionIsForward(direction)
Definition: sdir.h:64
#define ScanDirectionIsBackward(direction)
Definition: sdir.h:50
union TableScanDescData::@48 st
struct TableScanDescData::@48::@50 tidrange

References ExecClearTuple(), ExecStoreBufferHeapTuple(), heapgettup(), heapgettup_pagemode(), ItemPointerCompare(), pgstat_count_heap_getnext, HeapScanDescData::rs_base, HeapScanDescData::rs_cbuf, HeapScanDescData::rs_ctup, TableScanDescData::rs_flags, TableScanDescData::rs_key, TableScanDescData::rs_nkeys, TableScanDescData::rs_rd, ScanDirectionIsBackward, ScanDirectionIsForward, SO_ALLOW_PAGEMODE, TableScanDescData::st, HeapTupleData::t_data, HeapTupleData::t_self, and TableScanDescData::tidrange.

◆ heap_hot_search_buffer()

bool heap_hot_search_buffer ( ItemPointer  tid,
Relation  relation,
Buffer  buffer,
Snapshot  snapshot,
HeapTuple  heapTuple,
bool *  all_dead,
bool  first_call 
)

Definition at line 1622 of file heapam.c.

1625 {
1626  Page page = BufferGetPage(buffer);
1627  TransactionId prev_xmax = InvalidTransactionId;
1628  BlockNumber blkno;
1629  OffsetNumber offnum;
1630  bool at_chain_start;
1631  bool valid;
1632  bool skip;
1633  GlobalVisState *vistest = NULL;
1634 
1635  /* If this is not the first call, previous call returned a (live!) tuple */
1636  if (all_dead)
1637  *all_dead = first_call;
1638 
1639  blkno = ItemPointerGetBlockNumber(tid);
1640  offnum = ItemPointerGetOffsetNumber(tid);
1641  at_chain_start = first_call;
1642  skip = !first_call;
1643 
1644  /* XXX: we should assert that a snapshot is pushed or registered */
1646  Assert(BufferGetBlockNumber(buffer) == blkno);
1647 
1648  /* Scan through possible multiple members of HOT-chain */
1649  for (;;)
1650  {
1651  ItemId lp;
1652 
1653  /* check for bogus TID */
1654  if (offnum < FirstOffsetNumber || offnum > PageGetMaxOffsetNumber(page))
1655  break;
1656 
1657  lp = PageGetItemId(page, offnum);
1658 
1659  /* check for unused, dead, or redirected items */
1660  if (!ItemIdIsNormal(lp))
1661  {
1662  /* We should only see a redirect at start of chain */
1663  if (ItemIdIsRedirected(lp) && at_chain_start)
1664  {
1665  /* Follow the redirect */
1666  offnum = ItemIdGetRedirect(lp);
1667  at_chain_start = false;
1668  continue;
1669  }
1670  /* else must be end of chain */
1671  break;
1672  }
1673 
1674  /*
1675  * Update heapTuple to point to the element of the HOT chain we're
1676  * currently investigating. Having t_self set correctly is important
1677  * because the SSI checks and the *Satisfies routine for historical
1678  * MVCC snapshots need the correct tid to decide about the visibility.
1679  */
1680  heapTuple->t_data = (HeapTupleHeader) PageGetItem(page, lp);
1681  heapTuple->t_len = ItemIdGetLength(lp);
1682  heapTuple->t_tableOid = RelationGetRelid(relation);
1683  ItemPointerSet(&heapTuple->t_self, blkno, offnum);
1684 
1685  /*
1686  * Shouldn't see a HEAP_ONLY tuple at chain start.
1687  */
1688  if (at_chain_start && HeapTupleIsHeapOnly(heapTuple))
1689  break;
1690 
1691  /*
1692  * The xmin should match the previous xmax value, else chain is
1693  * broken.
1694  */
1695  if (TransactionIdIsValid(prev_xmax) &&
1696  !TransactionIdEquals(prev_xmax,
1697  HeapTupleHeaderGetXmin(heapTuple->t_data)))
1698  break;
1699 
1700  /*
1701  * When first_call is true (and thus, skip is initially false) we'll
1702  * return the first tuple we find. But on later passes, heapTuple
1703  * will initially be pointing to the tuple we returned last time.
1704  * Returning it again would be incorrect (and would loop forever), so
1705  * we skip it and return the next match we find.
1706  */
1707  if (!skip)
1708  {
1709  /* If it's visible per the snapshot, we must return it */
1710  valid = HeapTupleSatisfiesVisibility(heapTuple, snapshot, buffer);
1711  HeapCheckForSerializableConflictOut(valid, relation, heapTuple,
1712  buffer, snapshot);
1713 
1714  if (valid)
1715  {
1716  ItemPointerSetOffsetNumber(tid, offnum);
1717  PredicateLockTID(relation, &heapTuple->t_self, snapshot,
1718  HeapTupleHeaderGetXmin(heapTuple->t_data));
1719  if (all_dead)
1720  *all_dead = false;
1721  return true;
1722  }
1723  }
1724  skip = false;
1725 
1726  /*
1727  * If we can't see it, maybe no one else can either. At caller
1728  * request, check whether all chain members are dead to all
1729  * transactions.
1730  *
1731  * Note: if you change the criterion here for what is "dead", fix the
1732  * planner's get_actual_variable_range() function to match.
1733  */
1734  if (all_dead && *all_dead)
1735  {
1736  if (!vistest)
1737  vistest = GlobalVisTestFor(relation);
1738 
1739  if (!HeapTupleIsSurelyDead(heapTuple, vistest))
1740  *all_dead = false;
1741  }
1742 
1743  /*
1744  * Check to see if HOT chain continues past this tuple; if so fetch
1745  * the next offnum and loop around.
1746  */
1747  if (HeapTupleIsHotUpdated(heapTuple))
1748  {
1750  blkno);
1751  offnum = ItemPointerGetOffsetNumber(&heapTuple->t_data->t_ctid);
1752  at_chain_start = false;
1753  prev_xmax = HeapTupleHeaderGetUpdateXid(heapTuple->t_data);
1754  }
1755  else
1756  break; /* end of chain */
1757  }
1758 
1759  return false;
1760 }
bool HeapTupleIsSurelyDead(HeapTuple htup, GlobalVisState *vistest)
#define HeapTupleIsHeapOnly(tuple)
Definition: htup_details.h:683
#define HeapTupleIsHotUpdated(tuple)
Definition: htup_details.h:674
static void ItemPointerSet(ItemPointerData *pointer, BlockNumber blockNumber, OffsetNumber offNum)
Definition: itemptr.h:135
static void ItemPointerSetOffsetNumber(ItemPointerData *pointer, OffsetNumber offsetNumber)
Definition: itemptr.h:158
static const struct exclude_list_item skip[]
Definition: pg_checksums.c:107
GlobalVisState * GlobalVisTestFor(Relation rel)
Definition: procarray.c:4107
TransactionId RecentXmin
Definition: snapmgr.c:100

References Assert, BufferGetBlockNumber(), BufferGetPage(), GlobalVisTestFor(), HeapCheckForSerializableConflictOut(), HeapTupleHeaderGetUpdateXid, HeapTupleHeaderGetXmin, HeapTupleIsHeapOnly, HeapTupleIsHotUpdated, HeapTupleIsSurelyDead(), HeapTupleSatisfiesVisibility(), InvalidTransactionId, ItemIdGetLength, ItemIdGetRedirect, ItemIdIsNormal, ItemIdIsRedirected, ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), ItemPointerSet(), ItemPointerSetOffsetNumber(), PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), PredicateLockTID(), RecentXmin, RelationGetRelid, skip, HeapTupleHeaderData::t_ctid, HeapTupleData::t_data, HeapTupleData::t_len, HeapTupleData::t_self, HeapTupleData::t_tableOid, TransactionIdEquals, and TransactionIdIsValid.

Referenced by heap_index_delete_tuples(), heapam_index_fetch_tuple(), and heapam_scan_bitmap_next_block().

◆ heap_index_delete_tuples()

TransactionId heap_index_delete_tuples ( Relation  rel,
TM_IndexDeleteOp delstate 
)

Definition at line 7956 of file heapam.c.

7957 {
7958  /* Initial assumption is that earlier pruning took care of conflict */
7959  TransactionId snapshotConflictHorizon = InvalidTransactionId;
7962  Page page = NULL;
7964  TransactionId priorXmax;
7965 #ifdef USE_PREFETCH
7966  IndexDeletePrefetchState prefetch_state;
7967  int prefetch_distance;
7968 #endif
7969  SnapshotData SnapshotNonVacuumable;
7970  int finalndeltids = 0,
7971  nblocksaccessed = 0;
7972 
7973  /* State that's only used in bottom-up index deletion case */
7974  int nblocksfavorable = 0;
7975  int curtargetfreespace = delstate->bottomupfreespace,
7976  lastfreespace = 0,
7977  actualfreespace = 0;
7978  bool bottomup_final_block = false;
7979 
7980  InitNonVacuumableSnapshot(SnapshotNonVacuumable, GlobalVisTestFor(rel));
7981 
7982  /* Sort caller's deltids array by TID for further processing */
7983  index_delete_sort(delstate);
7984 
7985  /*
7986  * Bottom-up case: resort deltids array in an order attuned to where the
7987  * greatest number of promising TIDs are to be found, and determine how
7988  * many blocks from the start of sorted array should be considered
7989  * favorable. This will also shrink the deltids array in order to
7990  * eliminate completely unfavorable blocks up front.
7991  */
7992  if (delstate->bottomup)
7993  nblocksfavorable = bottomup_sort_and_shrink(delstate);
7994 
7995 #ifdef USE_PREFETCH
7996  /* Initialize prefetch state. */
7997  prefetch_state.cur_hblkno = InvalidBlockNumber;
7998  prefetch_state.next_item = 0;
7999  prefetch_state.ndeltids = delstate->ndeltids;
8000  prefetch_state.deltids = delstate->deltids;
8001 
8002  /*
8003  * Determine the prefetch distance that we will attempt to maintain.
8004  *
8005  * Since the caller holds a buffer lock somewhere in rel, we'd better make
8006  * sure that isn't a catalog relation before we call code that does
8007  * syscache lookups, to avoid risk of deadlock.
8008  */
8009  if (IsCatalogRelation(rel))
8010  prefetch_distance = maintenance_io_concurrency;
8011  else
8012  prefetch_distance =
8014 
8015  /* Cap initial prefetch distance for bottom-up deletion caller */
8016  if (delstate->bottomup)
8017  {
8018  Assert(nblocksfavorable >= 1);
8019  Assert(nblocksfavorable <= BOTTOMUP_MAX_NBLOCKS);
8020  prefetch_distance = Min(prefetch_distance, nblocksfavorable);
8021  }
8022 
8023  /* Start prefetching. */
8024  index_delete_prefetch_buffer(rel, &prefetch_state, prefetch_distance);
8025 #endif
8026 
8027  /* Iterate over deltids, determine which to delete, check their horizon */
8028  Assert(delstate->ndeltids > 0);
8029  for (int i = 0; i < delstate->ndeltids; i++)
8030  {
8031  TM_IndexDelete *ideltid = &delstate->deltids[i];
8032  TM_IndexStatus *istatus = delstate->status + ideltid->id;
8033  ItemPointer htid = &ideltid->tid;
8034  OffsetNumber offnum;
8035 
8036  /*
8037  * Read buffer, and perform required extra steps each time a new block
8038  * is encountered. Avoid refetching if it's the same block as the one
8039  * from the last htid.
8040  */
8041  if (blkno == InvalidBlockNumber ||
8042  ItemPointerGetBlockNumber(htid) != blkno)
8043  {
8044  /*
8045  * Consider giving up early for bottom-up index deletion caller
8046  * first. (Only prefetch next-next block afterwards, when it
8047  * becomes clear that we're at least going to access the next
8048  * block in line.)
8049  *
8050  * Sometimes the first block frees so much space for bottom-up
8051  * caller that the deletion process can end without accessing any
8052  * more blocks. It is usually necessary to access 2 or 3 blocks
8053  * per bottom-up deletion operation, though.
8054  */
8055  if (delstate->bottomup)
8056  {
8057  /*
8058  * We often allow caller to delete a few additional items
8059  * whose entries we reached after the point that space target
8060  * from caller was satisfied. The cost of accessing the page
8061  * was already paid at that point, so it made sense to finish
8062  * it off. When that happened, we finalize everything here
8063  * (by finishing off the whole bottom-up deletion operation
8064  * without needlessly paying the cost of accessing any more
8065  * blocks).
8066  */
8067  if (bottomup_final_block)
8068  break;
8069 
8070  /*
8071  * Give up when we didn't enable our caller to free any
8072  * additional space as a result of processing the page that we
8073  * just finished up with. This rule is the main way in which
8074  * we keep the cost of bottom-up deletion under control.
8075  */
8076  if (nblocksaccessed >= 1 && actualfreespace == lastfreespace)
8077  break;
8078  lastfreespace = actualfreespace; /* for next time */
8079 
8080  /*
8081  * Deletion operation (which is bottom-up) will definitely
8082  * access the next block in line. Prepare for that now.
8083  *
8084  * Decay target free space so that we don't hang on for too
8085  * long with a marginal case. (Space target is only truly
8086  * helpful when it allows us to recognize that we don't need
8087  * to access more than 1 or 2 blocks to satisfy caller due to
8088  * agreeable workload characteristics.)
8089  *
8090  * We are a bit more patient when we encounter contiguous
8091  * blocks, though: these are treated as favorable blocks. The
8092  * decay process is only applied when the next block in line
8093  * is not a favorable/contiguous block. This is not an
8094  * exception to the general rule; we still insist on finding
8095  * at least one deletable item per block accessed. See
8096  * bottomup_nblocksfavorable() for full details of the theory
8097  * behind favorable blocks and heap block locality in general.
8098  *
8099  * Note: The first block in line is always treated as a
8100  * favorable block, so the earliest possible point that the
8101  * decay can be applied is just before we access the second
8102  * block in line. The Assert() verifies this for us.
8103  */
8104  Assert(nblocksaccessed > 0 || nblocksfavorable > 0);
8105  if (nblocksfavorable > 0)
8106  nblocksfavorable--;
8107  else
8108  curtargetfreespace /= 2;
8109  }
8110 
8111  /* release old buffer */
8112  if (BufferIsValid(buf))
8114 
8115  blkno = ItemPointerGetBlockNumber(htid);
8116  buf = ReadBuffer(rel, blkno);
8117  nblocksaccessed++;
8118  Assert(!delstate->bottomup ||
8119  nblocksaccessed <= BOTTOMUP_MAX_NBLOCKS);
8120 
8121 #ifdef USE_PREFETCH
8122 
8123  /*
8124  * To maintain the prefetch distance, prefetch one more page for
8125  * each page we read.
8126  */
8127  index_delete_prefetch_buffer(rel, &prefetch_state, 1);
8128 #endif
8129 
8131 
8132  page = BufferGetPage(buf);
8133  maxoff = PageGetMaxOffsetNumber(page);
8134  }
8135 
8136  /*
8137  * In passing, detect index corruption involving an index page with a
8138  * TID that points to a location in the heap that couldn't possibly be
8139  * correct. We only do this with actual TIDs from caller's index page
8140  * (not items reached by traversing through a HOT chain).
8141  */
8142  index_delete_check_htid(delstate, page, maxoff, htid, istatus);
8143 
8144  if (istatus->knowndeletable)
8145  Assert(!delstate->bottomup && !istatus->promising);
8146  else
8147  {
8148  ItemPointerData tmp = *htid;
8149  HeapTupleData heapTuple;
8150 
8151  /* Are any tuples from this HOT chain non-vacuumable? */
8152  if (heap_hot_search_buffer(&tmp, rel, buf, &SnapshotNonVacuumable,
8153  &heapTuple, NULL, true))
8154  continue; /* can't delete entry */
8155 
8156  /* Caller will delete, since whole HOT chain is vacuumable */
8157  istatus->knowndeletable = true;
8158 
8159  /* Maintain index free space info for bottom-up deletion case */
8160  if (delstate->bottomup)
8161  {
8162  Assert(istatus->freespace > 0);
8163  actualfreespace += istatus->freespace;
8164  if (actualfreespace >= curtargetfreespace)
8165  bottomup_final_block = true;
8166  }
8167  }
8168 
8169  /*
8170  * Maintain snapshotConflictHorizon value for deletion operation as a
8171  * whole by advancing current value using heap tuple headers. This is
8172  * loosely based on the logic for pruning a HOT chain.
8173  */
8174  offnum = ItemPointerGetOffsetNumber(htid);
8175  priorXmax = InvalidTransactionId; /* cannot check first XMIN */
8176  for (;;)
8177  {
8178  ItemId lp;
8179  HeapTupleHeader htup;
8180 
8181  /* Sanity check (pure paranoia) */
8182  if (offnum < FirstOffsetNumber)
8183  break;
8184 
8185  /*
8186  * An offset past the end of page's line pointer array is possible
8187  * when the array was truncated
8188  */
8189  if (offnum > maxoff)
8190  break;
8191 
8192  lp = PageGetItemId(page, offnum);
8193  if (ItemIdIsRedirected(lp))
8194  {
8195  offnum = ItemIdGetRedirect(lp);
8196  continue;
8197  }
8198 
8199  /*
8200  * We'll often encounter LP_DEAD line pointers (especially with an
8201  * entry marked knowndeletable by our caller up front). No heap
8202  * tuple headers get examined for an htid that leads us to an
8203  * LP_DEAD item. This is okay because the earlier pruning
8204  * operation that made the line pointer LP_DEAD in the first place
8205  * must have considered the original tuple header as part of
8206  * generating its own snapshotConflictHorizon value.
8207  *
8208  * Relying on XLOG_HEAP2_PRUNE_VACUUM_SCAN records like this is
8209  * the same strategy that index vacuuming uses in all cases. Index
8210  * VACUUM WAL records don't even have a snapshotConflictHorizon
8211  * field of their own for this reason.
8212  */
8213  if (!ItemIdIsNormal(lp))
8214  break;
8215 
8216  htup = (HeapTupleHeader) PageGetItem(page, lp);
8217 
8218  /*
8219  * Check the tuple XMIN against prior XMAX, if any
8220  */
8221  if (TransactionIdIsValid(priorXmax) &&
8222  !TransactionIdEquals(HeapTupleHeaderGetXmin(htup), priorXmax))
8223  break;
8224 
8226  &snapshotConflictHorizon);
8227 
8228  /*
8229  * If the tuple is not HOT-updated, then we are at the end of this
8230  * HOT-chain. No need to visit later tuples from the same update
8231  * chain (they get their own index entries) -- just move on to
8232  * next htid from index AM caller.
8233  */
8234  if (!HeapTupleHeaderIsHotUpdated(htup))
8235  break;
8236 
8237  /* Advance to next HOT chain member */
8238  Assert(ItemPointerGetBlockNumber(&htup->t_ctid) == blkno);
8239  offnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
8240  priorXmax = HeapTupleHeaderGetUpdateXid(htup);
8241  }
8242 
8243  /* Enable further/final shrinking of deltids for caller */
8244  finalndeltids = i + 1;
8245  }
8246 
8248 
8249  /*
8250  * Shrink deltids array to exclude non-deletable entries at the end. This
8251  * is not just a minor optimization. Final deltids array size might be
8252  * zero for a bottom-up caller. Index AM is explicitly allowed to rely on
8253  * ndeltids being zero in all cases with zero total deletable entries.
8254  */
8255  Assert(finalndeltids > 0 || delstate->bottomup);
8256  delstate->ndeltids = finalndeltids;
8257 
8258  return snapshotConflictHorizon;
8259 }
int maintenance_io_concurrency
Definition: bufmgr.c:158
#define Min(x, y)
Definition: c.h:958
bool IsCatalogRelation(Relation relation)
Definition: catalog.c:103
static int bottomup_sort_and_shrink(TM_IndexDeleteOp *delstate)
Definition: heapam.c:8513
void HeapTupleHeaderAdvanceConflictHorizon(HeapTupleHeader tuple, TransactionId *snapshotConflictHorizon)
Definition: heapam.c:7811
static void index_delete_check_htid(TM_IndexDeleteOp *delstate, Page page, OffsetNumber maxoff, ItemPointer htid, TM_IndexStatus *istatus)
Definition: heapam.c:7896
#define BOTTOMUP_MAX_NBLOCKS
Definition: heapam.c:181
bool heap_hot_search_buffer(ItemPointer tid, Relation relation, Buffer buffer, Snapshot snapshot, HeapTuple heapTuple, bool *all_dead, bool first_call)
Definition: heapam.c:1622
static void index_delete_sort(TM_IndexDeleteOp *delstate)
Definition: heapam.c:8301
static char * buf
Definition: pg_test_fsync.c:72
#define InitNonVacuumableSnapshot(snapshotdata, vistestp)
Definition: snapmgr.h:51
int get_tablespace_maintenance_io_concurrency(Oid spcid)
Definition: spccache.c:229
TM_IndexStatus * status
Definition: tableam.h:254
int bottomupfreespace
Definition: tableam.h:249
TM_IndexDelete * deltids
Definition: tableam.h:253
ItemPointerData tid
Definition: tableam.h:212
bool knowndeletable
Definition: tableam.h:219
bool promising
Definition: tableam.h:222
int16 freespace
Definition: tableam.h:223

References Assert, TM_IndexDeleteOp::bottomup, BOTTOMUP_MAX_NBLOCKS, bottomup_sort_and_shrink(), TM_IndexDeleteOp::bottomupfreespace, buf, BUFFER_LOCK_SHARE, BufferGetPage(), BufferIsValid(), TM_IndexDeleteOp::deltids, FirstOffsetNumber, TM_IndexStatus::freespace, get_tablespace_maintenance_io_concurrency(), GlobalVisTestFor(), heap_hot_search_buffer(), HeapTupleHeaderAdvanceConflictHorizon(), HeapTupleHeaderGetUpdateXid, HeapTupleHeaderGetXmin, HeapTupleHeaderIsHotUpdated, i, TM_IndexDelete::id, index_delete_check_htid(), index_delete_sort(), InitNonVacuumableSnapshot, InvalidBlockNumber, InvalidBuffer, InvalidOffsetNumber, InvalidTransactionId, IsCatalogRelation(), ItemIdGetRedirect, ItemIdIsNormal, ItemIdIsRedirected, ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), TM_IndexStatus::knowndeletable, LockBuffer(), maintenance_io_concurrency, Min, TM_IndexDeleteOp::ndeltids, PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), TM_IndexStatus::promising, RelationData::rd_rel, ReadBuffer(), TM_IndexDeleteOp::status, HeapTupleHeaderData::t_ctid, TM_IndexDelete::tid, TransactionIdEquals, TransactionIdIsValid, and UnlockReleaseBuffer().

◆ heap_inplace_lock()

bool heap_inplace_lock ( Relation  relation,
HeapTuple  oldtup_ptr,
Buffer  buffer,
void(*)(void *)  release_callback,
void *  arg 
)

Definition at line 6202 of file heapam.c.

6205 {
6206  HeapTupleData oldtup = *oldtup_ptr; /* minimize diff vs. heap_update() */
6207  TM_Result result;
6208  bool ret;
6209 
6210 #ifdef USE_ASSERT_CHECKING
6211  if (RelationGetRelid(relation) == RelationRelationId)
6212  check_inplace_rel_lock(oldtup_ptr);
6213 #endif
6214 
6215  Assert(BufferIsValid(buffer));
6216 
6217  /*
6218  * Construct shared cache inval if necessary. Because we pass a tuple
6219  * version without our own inplace changes or inplace changes other
6220  * sessions complete while we wait for locks, inplace update mustn't
6221  * change catcache lookup keys. But we aren't bothering with index
6222  * updates either, so that's true a fortiori. After LockBuffer(), it
6223  * would be too late, because this might reach a
6224  * CatalogCacheInitializeCache() that locks "buffer".
6225  */
6226  CacheInvalidateHeapTupleInplace(relation, oldtup_ptr, NULL);
6227 
6228  LockTuple(relation, &oldtup.t_self, InplaceUpdateTupleLock);
6230 
6231  /*----------
6232  * Interpret HeapTupleSatisfiesUpdate() like heap_update() does, except:
6233  *
6234  * - wait unconditionally
6235  * - already locked tuple above, since inplace needs that unconditionally
6236  * - don't recheck header after wait: simpler to defer to next iteration
6237  * - don't try to continue even if the updater aborts: likewise
6238  * - no crosscheck
6239  */
6240  result = HeapTupleSatisfiesUpdate(&oldtup, GetCurrentCommandId(false),
6241  buffer);
6242 
6243  if (result == TM_Invisible)
6244  {
6245  /* no known way this can happen */
6246  ereport(ERROR,
6247  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
6248  errmsg_internal("attempted to overwrite invisible tuple")));
6249  }
6250  else if (result == TM_SelfModified)
6251  {
6252  /*
6253  * CREATE INDEX might reach this if an expression is silly enough to
6254  * call e.g. SELECT ... FROM pg_class FOR SHARE. C code of other SQL
6255  * statements might get here after a heap_update() of the same row, in
6256  * the absence of an intervening CommandCounterIncrement().
6257  */
6258  ereport(ERROR,
6259  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
6260  errmsg("tuple to be updated was already modified by an operation triggered by the current command")));
6261  }
6262  else if (result == TM_BeingModified)
6263  {
6264  TransactionId xwait;
6265  uint16 infomask;
6266 
6267  xwait = HeapTupleHeaderGetRawXmax(oldtup.t_data);
6268  infomask = oldtup.t_data->t_infomask;
6269 
6270  if (infomask & HEAP_XMAX_IS_MULTI)
6271  {
6274  int remain;
6275 
6276  if (DoesMultiXactIdConflict((MultiXactId) xwait, infomask,
6277  lockmode, NULL))
6278  {
6279  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
6280  release_callback(arg);
6281  ret = false;
6282  MultiXactIdWait((MultiXactId) xwait, mxact_status, infomask,
6283  relation, &oldtup.t_self, XLTW_Update,
6284  &remain);
6285  }
6286  else
6287  ret = true;
6288  }
6289  else if (TransactionIdIsCurrentTransactionId(xwait))
6290  ret = true;
6291  else if (HEAP_XMAX_IS_KEYSHR_LOCKED(infomask))
6292  ret = true;
6293  else
6294  {
6295  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
6296  release_callback(arg);
6297  ret = false;
6298  XactLockTableWait(xwait, relation, &oldtup.t_self,
6299  XLTW_Update);
6300  }
6301  }
6302  else
6303  {
6304  ret = (result == TM_Ok);
6305  if (!ret)
6306  {
6307  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
6308  release_callback(arg);
6309  }
6310  }
6311 
6312  /*
6313  * GetCatalogSnapshot() relies on invalidation messages to know when to
6314  * take a new snapshot. COMMIT of xwait is responsible for sending the
6315  * invalidation. We're not acquiring heavyweight locks sufficient to
6316  * block if not yet sent, so we must take a new snapshot to ensure a later
6317  * attempt has a fair chance. While we don't need this if xwait aborted,
6318  * don't bother optimizing that.
6319  */
6320  if (!ret)
6321  {
6322  UnlockTuple(relation, &oldtup.t_self, InplaceUpdateTupleLock);
6325  }
6326  return ret;
6327 }
#define HEAP_XMAX_IS_KEYSHR_LOCKED(infomask)
Definition: htup_details.h:263
void CacheInvalidateHeapTupleInplace(Relation relation, HeapTuple tuple, HeapTuple newtuple)
Definition: inval.c:1510
void ForgetInplace_Inval(void)
Definition: inval.c:1211
void LockTuple(Relation relation, ItemPointer tid, LOCKMODE lockmode)
Definition: lmgr.c:557
void UnlockTuple(Relation relation, ItemPointer tid, LOCKMODE lockmode)
Definition: lmgr.c:594
@ XLTW_Update
Definition: lmgr.h:27
#define InplaceUpdateTupleLock
Definition: lockdefs.h:48
LockTupleMode
Definition: lockoptions.h:50
@ LockTupleNoKeyExclusive
Definition: lockoptions.h:56
MultiXactStatus
Definition: multixact.h:38
@ MultiXactStatusNoKeyUpdate
Definition: multixact.h:44
void * arg
void InvalidateCatalogSnapshot(void)
Definition: snapmgr.c:388
CommandId GetCurrentCommandId(bool used)
Definition: xact.c:828

References arg, Assert, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferIsValid(), CacheInvalidateHeapTupleInplace(), DoesMultiXactIdConflict(), ereport, errcode(), errmsg(), errmsg_internal(), ERROR, ForgetInplace_Inval(), GetCurrentCommandId(), HEAP_XMAX_IS_KEYSHR_LOCKED, HEAP_XMAX_IS_MULTI, HeapTupleHeaderGetRawXmax, HeapTupleSatisfiesUpdate(), InplaceUpdateTupleLock, InvalidateCatalogSnapshot(), LockBuffer(), LockTuple(), LockTupleNoKeyExclusive, MultiXactIdWait(), MultiXactStatusNoKeyUpdate, RelationGetRelid, HeapTupleData::t_data, HeapTupleHeaderData::t_infomask, HeapTupleData::t_self, TM_BeingModified, TM_Invisible, TM_Ok, TM_SelfModified, TransactionIdIsCurrentTransactionId(), UnlockTuple(), XactLockTableWait(), and XLTW_Update.

Referenced by systable_inplace_update_begin().

◆ heap_inplace_unlock()

void heap_inplace_unlock ( Relation  relation,
HeapTuple  oldtup,
Buffer  buffer 
)

◆ heap_inplace_update_and_unlock()

void heap_inplace_update_and_unlock ( Relation  relation,
HeapTuple  oldtup,
HeapTuple  tuple,
Buffer  buffer 
)

Definition at line 6338 of file heapam.c.

6341 {
6342  HeapTupleHeader htup = oldtup->t_data;
6343  uint32 oldlen;
6344  uint32 newlen;
6345  char *dst;
6346  char *src;
6347  int nmsgs = 0;
6348  SharedInvalidationMessage *invalMessages = NULL;
6349  bool RelcacheInitFileInval = false;
6350 
6351  Assert(ItemPointerEquals(&oldtup->t_self, &tuple->t_self));
6352  oldlen = oldtup->t_len - htup->t_hoff;
6353  newlen = tuple->t_len - tuple->t_data->t_hoff;
6354  if (oldlen != newlen || htup->t_hoff != tuple->t_data->t_hoff)
6355  elog(ERROR, "wrong tuple length");
6356 
6357  dst = (char *) htup + htup->t_hoff;
6358  src = (char *) tuple->t_data + tuple->t_data->t_hoff;
6359 
6360  /* Like RecordTransactionCommit(), log only if needed */
6361  if (XLogStandbyInfoActive())
6362  nmsgs = inplaceGetInvalidationMessages(&invalMessages,
6363  &RelcacheInitFileInval);
6364 
6365  /*
6366  * Unlink relcache init files as needed. If unlinking, acquire
6367  * RelCacheInitLock until after associated invalidations. By doing this
6368  * in advance, if we checkpoint and then crash between inplace
6369  * XLogInsert() and inval, we don't rely on StartupXLOG() ->
6370  * RelationCacheInitFileRemove(). That uses elevel==LOG, so replay would
6371  * neglect to PANIC on EIO.
6372  */
6373  PreInplace_Inval();
6374 
6375  /*----------
6376  * NO EREPORT(ERROR) from here till changes are complete
6377  *
6378  * Our buffer lock won't stop a reader having already pinned and checked
6379  * visibility for this tuple. Hence, we write WAL first, then mutate the
6380  * buffer. Like in MarkBufferDirtyHint() or RecordTransactionCommit(),
6381  * checkpoint delay makes that acceptable. With the usual order of
6382  * changes, a crash after memcpy() and before XLogInsert() could allow
6383  * datfrozenxid to overtake relfrozenxid:
6384  *
6385  * ["D" is a VACUUM (ONLY_DATABASE_STATS)]
6386  * ["R" is a VACUUM tbl]
6387  * D: vac_update_datfrozenid() -> systable_beginscan(pg_class)
6388  * D: systable_getnext() returns pg_class tuple of tbl
6389  * R: memcpy() into pg_class tuple of tbl
6390  * D: raise pg_database.datfrozenxid, XLogInsert(), finish
6391  * [crash]
6392  * [recovery restores datfrozenxid w/o relfrozenxid]
6393  *
6394  * Like in MarkBufferDirtyHint() subroutine XLogSaveBufferForHint(), copy
6395  * the buffer to the stack before logging. Here, that facilitates a FPI
6396  * of the post-mutation block before we accept other sessions seeing it.
6397  */
6401 
6402  /* XLOG stuff */
6403  if (RelationNeedsWAL(relation))
6404  {
6405  xl_heap_inplace xlrec;
6406  PGAlignedBlock copied_buffer;
6407  char *origdata = (char *) BufferGetBlock(buffer);
6408  Page page = BufferGetPage(buffer);
6409  uint16 lower = ((PageHeader) page)->pd_lower;
6410  uint16 upper = ((PageHeader) page)->pd_upper;
6411  uintptr_t dst_offset_in_block;
6412  RelFileLocator rlocator;
6413  ForkNumber forkno;
6414  BlockNumber blkno;
6415  XLogRecPtr recptr;
6416 
6417  xlrec.offnum = ItemPointerGetOffsetNumber(&tuple->t_self);
6418  xlrec.dbId = MyDatabaseId;
6419  xlrec.tsId = MyDatabaseTableSpace;
6420  xlrec.relcacheInitFileInval = RelcacheInitFileInval;
6421  xlrec.nmsgs = nmsgs;
6422 
6423  XLogBeginInsert();
6424  XLogRegisterData((char *) &xlrec, MinSizeOfHeapInplace);
6425  if (nmsgs != 0)
6426  XLogRegisterData((char *) invalMessages,
6427  nmsgs * sizeof(SharedInvalidationMessage));
6428 
6429  /* register block matching what buffer will look like after changes */
6430  memcpy(copied_buffer.data, origdata, lower);
6431  memcpy(copied_buffer.data + upper, origdata + upper, BLCKSZ - upper);
6432  dst_offset_in_block = dst - origdata;
6433  memcpy(copied_buffer.data + dst_offset_in_block, src, newlen);
6434  BufferGetTag(buffer, &rlocator, &forkno, &blkno);
6435  Assert(forkno == MAIN_FORKNUM);
6436  XLogRegisterBlock(0, &rlocator, forkno, blkno, copied_buffer.data,
6437  REGBUF_STANDARD);
6438  XLogRegisterBufData(0, src, newlen);
6439 
6440  /* inplace updates aren't decoded atm, don't log the origin */
6441 
6442  recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_INPLACE);
6443 
6444  PageSetLSN(page, recptr);
6445  }
6446 
6447  memcpy(dst, src, newlen);
6448 
6449  MarkBufferDirty(buffer);
6450 
6451  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
6452 
6453  /*
6454  * Send invalidations to shared queue. SearchSysCacheLocked1() assumes we
6455  * do this before UnlockTuple().
6456  *
6457  * If we're mutating a tuple visible only to this transaction, there's an
6458  * equivalent transactional inval from the action that created the tuple,
6459  * and this inval is superfluous.
6460  */
6461  AtInplace_Inval();
6462 
6464  END_CRIT_SECTION();
6465  UnlockTuple(relation, &tuple->t_self, InplaceUpdateTupleLock);
6466 
6467  AcceptInvalidationMessages(); /* local processing of just-sent inval */
6468 
6469  /*
6470  * Queue a transactional inval. The immediate invalidation we just sent
6471  * is the only one known to be necessary. To reduce risk from the
6472  * transition to immediate invalidation, continue sending a transactional
6473  * invalidation like we've long done. Third-party code might rely on it.
6474  */
6476  CacheInvalidateHeapTuple(relation, tuple, NULL);
6477 }
void BufferGetTag(Buffer buffer, RelFileLocator *rlocator, ForkNumber *forknum, BlockNumber *blknum)
Definition: bufmgr.c:3745
static Block BufferGetBlock(Buffer buffer)
Definition: bufmgr.h:367
PageHeaderData * PageHeader
Definition: bufpage.h:173
uint32_t uint32
Definition: c.h:485
Oid MyDatabaseTableSpace
Definition: globals.c:95
Oid MyDatabaseId
Definition: globals.c:93
#define MinSizeOfHeapInplace
Definition: heapam_xlog.h:436
#define XLOG_HEAP_INPLACE
Definition: heapam_xlog.h:40
void AcceptInvalidationMessages(void)
Definition: inval.c:863
int inplaceGetInvalidationMessages(SharedInvalidationMessage **msgs, bool *RelcacheInitFileInval)
Definition: inval.c:1015
void PreInplace_Inval(void)
Definition: inval.c:1175
void AtInplace_Inval(void)
Definition: inval.c:1188
#define IsBootstrapProcessingMode()
Definition: miscadmin.h:454
Datum lower(PG_FUNCTION_ARGS)
Definition: oracle_compat.c:49
Datum upper(PG_FUNCTION_ARGS)
Definition: oracle_compat.c:80
#define DELAY_CHKPT_START
Definition: proc.h:119
ForkNumber
Definition: relpath.h:56
PGPROC * MyProc
Definition: proc.c:66
int delayChkptFlags
Definition: proc.h:240
OffsetNumber offnum
Definition: heapam_xlog.h:428
bool relcacheInitFileInval
Definition: heapam_xlog.h:431
char data[BLCKSZ]
Definition: c.h:1073
#define XLogStandbyInfoActive()
Definition: xlog.h:123
void XLogRegisterBufData(uint8 block_id, const char *data, uint32 len)
Definition: xloginsert.c:405
void XLogRegisterBlock(uint8 block_id, RelFileLocator *rlocator, ForkNumber forknum, BlockNumber blknum, const char *page, uint8 flags)
Definition: xloginsert.c:309

References AcceptInvalidationMessages(), Assert, AtInplace_Inval(), BUFFER_LOCK_UNLOCK, BufferGetBlock(), BufferGetPage(), BufferGetTag(), CacheInvalidateHeapTuple(), PGAlignedBlock::data, xl_heap_inplace::dbId, DELAY_CHKPT_START, PGPROC::delayChkptFlags, elog, END_CRIT_SECTION, ERROR, inplaceGetInvalidationMessages(), InplaceUpdateTupleLock, IsBootstrapProcessingMode, ItemPointerEquals(), ItemPointerGetOffsetNumber(), LockBuffer(), lower(), MAIN_FORKNUM, MarkBufferDirty(), MinSizeOfHeapInplace, MyDatabaseId, MyDatabaseTableSpace, MyProc, xl_heap_inplace::nmsgs, xl_heap_inplace::offnum, PageSetLSN(), PreInplace_Inval(), REGBUF_STANDARD, RelationNeedsWAL, xl_heap_inplace::relcacheInitFileInval, START_CRIT_SECTION, HeapTupleData::t_data, HeapTupleHeaderData::t_hoff, HeapTupleData::t_len, HeapTupleData::t_self, xl_heap_inplace::tsId, UnlockTuple(), upper(), XLOG_HEAP_INPLACE, XLogBeginInsert(), XLogInsert(), XLogRegisterBlock(), XLogRegisterBufData(), XLogRegisterData(), and XLogStandbyInfoActive.

Referenced by systable_inplace_update_finish().

◆ heap_insert()

void heap_insert ( Relation  relation,
HeapTuple  tup,
CommandId  cid,
int  options,
BulkInsertState  bistate 
)

Definition at line 1985 of file heapam.c.

1987 {
1989  HeapTuple heaptup;
1990  Buffer buffer;
1991  Buffer vmbuffer = InvalidBuffer;
1992  bool all_visible_cleared = false;
1993 
1994  /* Cheap, simplistic check that the tuple matches the rel's rowtype. */
1996  RelationGetNumberOfAttributes(relation));
1997 
1998  /*
1999  * Fill in tuple header fields and toast the tuple if necessary.
2000  *
2001  * Note: below this point, heaptup is the data we actually intend to store
2002  * into the relation; tup is the caller's original untoasted data.
2003  */
2004  heaptup = heap_prepare_insert(relation, tup, xid, cid, options);
2005 
2006  /*
2007  * Find buffer to insert this tuple into. If the page is all visible,
2008  * this will also pin the requisite visibility map page.
2009  */
2010  buffer = RelationGetBufferForTuple(relation, heaptup->t_len,
2011  InvalidBuffer, options, bistate,
2012  &vmbuffer, NULL,
2013  0);
2014 
2015  /*
2016  * We're about to do the actual insert -- but check for conflict first, to
2017  * avoid possibly having to roll back work we've just done.
2018  *
2019  * This is safe without a recheck as long as there is no possibility of
2020  * another process scanning the page between this check and the insert
2021  * being visible to the scan (i.e., an exclusive buffer content lock is
2022  * continuously held from this point until the tuple insert is visible).
2023  *
2024  * For a heap insert, we only need to check for table-level SSI locks. Our
2025  * new tuple can't possibly conflict with existing tuple locks, and heap
2026  * page locks are only consolidated versions of tuple locks; they do not
2027  * lock "gaps" as index page locks do. So we don't need to specify a
2028  * buffer when making the call, which makes for a faster check.
2029  */
2031 
2032  /* NO EREPORT(ERROR) from here till changes are logged */
2034 
2035  RelationPutHeapTuple(relation, buffer, heaptup,
2036  (options & HEAP_INSERT_SPECULATIVE) != 0);
2037 
2038  if (PageIsAllVisible(BufferGetPage(buffer)))
2039  {
2040  all_visible_cleared = true;
2042  visibilitymap_clear(relation,
2043  ItemPointerGetBlockNumber(&(heaptup->t_self)),
2044  vmbuffer, VISIBILITYMAP_VALID_BITS);
2045  }
2046 
2047  /*
2048  * XXX Should we set PageSetPrunable on this page ?
2049  *
2050  * The inserting transaction may eventually abort thus making this tuple
2051  * DEAD and hence available for pruning. Though we don't want to optimize
2052  * for aborts, if no other tuple in this page is UPDATEd/DELETEd, the
2053  * aborted tuple will never be pruned until next vacuum is triggered.
2054  *
2055  * If you do add PageSetPrunable here, add it in heap_xlog_insert too.
2056  */
2057 
2058  MarkBufferDirty(buffer);
2059 
2060  /* XLOG stuff */
2061  if (RelationNeedsWAL(relation))
2062  {
2063  xl_heap_insert xlrec;
2064  xl_heap_header xlhdr;
2065  XLogRecPtr recptr;
2066  Page page = BufferGetPage(buffer);
2067  uint8 info = XLOG_HEAP_INSERT;
2068  int bufflags = 0;
2069 
2070  /*
2071  * If this is a catalog, we need to transmit combo CIDs to properly
2072  * decode, so log that as well.
2073  */
2075  log_heap_new_cid(relation, heaptup);
2076 
2077  /*
2078  * If this is the single and first tuple on page, we can reinit the
2079  * page instead of restoring the whole thing. Set flag, and hide
2080  * buffer references from XLogInsert.
2081  */
2082  if (ItemPointerGetOffsetNumber(&(heaptup->t_self)) == FirstOffsetNumber &&
2084  {
2085  info |= XLOG_HEAP_INIT_PAGE;
2086  bufflags |= REGBUF_WILL_INIT;
2087  }
2088 
2089  xlrec.offnum = ItemPointerGetOffsetNumber(&heaptup->t_self);
2090  xlrec.flags = 0;
2091  if (all_visible_cleared)
2096 
2097  /*
2098  * For logical decoding, we need the tuple even if we're doing a full
2099  * page write, so make sure it's included even if we take a full-page
2100  * image. (XXX We could alternatively store a pointer into the FPW).
2101  */
2102  if (RelationIsLogicallyLogged(relation) &&
2104  {
2106  bufflags |= REGBUF_KEEP_DATA;
2107 
2108  if (IsToastRelation(relation))
2110  }
2111 
2112  XLogBeginInsert();
2113  XLogRegisterData((char *) &xlrec, SizeOfHeapInsert);
2114 
2115  xlhdr.t_infomask2 = heaptup->t_data->t_infomask2;
2116  xlhdr.t_infomask = heaptup->t_data->t_infomask;
2117  xlhdr.t_hoff = heaptup->t_data->t_hoff;
2118 
2119  /*
2120  * note we mark xlhdr as belonging to buffer; if XLogInsert decides to
2121  * write the whole page to the xlog, we don't need to store
2122  * xl_heap_header in the xlog.
2123  */
2124  XLogRegisterBuffer(0, buffer, REGBUF_STANDARD | bufflags);
2125  XLogRegisterBufData(0, (char *) &xlhdr, SizeOfHeapHeader);
2126  /* PG73FORMAT: write bitmap [+ padding] [+ oid] + data */
2128  (char *) heaptup->t_data + SizeofHeapTupleHeader,
2129  heaptup->t_len - SizeofHeapTupleHeader);
2130 
2131  /* filtering by origin on a row level is much more efficient */
2133 
2134  recptr = XLogInsert(RM_HEAP_ID, info);
2135 
2136  PageSetLSN(page, recptr);
2137  }
2138 
2139  END_CRIT_SECTION();
2140 
2141  UnlockReleaseBuffer(buffer);
2142  if (vmbuffer != InvalidBuffer)
2143  ReleaseBuffer(vmbuffer);
2144 
2145  /*
2146  * If tuple is cachable, mark it for invalidation from the caches in case
2147  * we abort. Note it is OK to do this after releasing the buffer, because
2148  * the heaptup data structure is all in local memory, not in the shared
2149  * buffer.
2150  */
2151  CacheInvalidateHeapTuple(relation, heaptup, NULL);
2152 
2153  /* Note: speculative insertions are counted too, even if aborted later */
2154  pgstat_count_heap_insert(relation, 1);
2155 
2156  /*
2157  * If heaptup is a private copy, release it. Don't forget to copy t_self
2158  * back to the caller's image, too.
2159  */
2160  if (heaptup != tup)
2161  {
2162  tup->t_self = heaptup->t_self;
2163  heap_freetuple(heaptup);
2164  }
2165 }
uint8_t uint8
Definition: c.h:483
static HeapTuple heap_prepare_insert(Relation relation, HeapTuple tup, TransactionId xid, CommandId cid, int options)
Definition: heapam.c:2174
#define HEAP_INSERT_SPECULATIVE
Definition: heapam.h:39
#define HEAP_INSERT_NO_LOGICAL
Definition: heapam.h:38
#define XLH_INSERT_ON_TOAST_RELATION
Definition: heapam_xlog.h:76
#define XLH_INSERT_IS_SPECULATIVE
Definition: heapam_xlog.h:74
#define XLH_INSERT_ALL_VISIBLE_CLEARED
Definition: heapam_xlog.h:72
#define XLOG_HEAP_INSERT
Definition: heapam_xlog.h:33
#define SizeOfHeapInsert
Definition: heapam_xlog.h:168
#define XLH_INSERT_CONTAINS_NEW_TUPLE
Definition: heapam_xlog.h:75
#define XLOG_HEAP_INIT_PAGE
Definition: heapam_xlog.h:47
void RelationPutHeapTuple(Relation relation, Buffer buffer, HeapTuple tuple, bool token)
Definition: hio.c:35
Buffer RelationGetBufferForTuple(Relation relation, Size len, Buffer otherBuffer, int options, BulkInsertState bistate, Buffer *vmbuffer, Buffer *vmbuffer_other, int num_pages)
Definition: hio.c:502
#define HeapTupleHeaderGetNatts(tup)
Definition: htup_details.h:529
void pgstat_count_heap_insert(Relation rel, PgStat_Counter n)
#define RelationIsLogicallyLogged(relation)
Definition: rel.h:701
#define RelationGetNumberOfAttributes(relation)
Definition: rel.h:511
OffsetNumber offnum
Definition: heapam_xlog.h:162
#define REGBUF_KEEP_DATA
Definition: xloginsert.h:35
#define REGBUF_WILL_INIT
Definition: xloginsert.h:33

References Assert, BufferGetBlockNumber(), BufferGetPage(), CacheInvalidateHeapTuple(), CheckForSerializableConflictIn(), END_CRIT_SECTION, FirstOffsetNumber, xl_heap_insert::flags, GetCurrentTransactionId(), heap_freetuple(), HEAP_INSERT_NO_LOGICAL, HEAP_INSERT_SPECULATIVE, heap_prepare_insert(), HeapTupleHeaderGetNatts, InvalidBlockNumber, InvalidBuffer, IsToastRelation(), ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), log_heap_new_cid(), MarkBufferDirty(), xl_heap_insert::offnum, PageClearAllVisible(), PageGetMaxOffsetNumber(), PageIsAllVisible(), PageSetLSN(), pgstat_count_heap_insert(), REGBUF_KEEP_DATA, REGBUF_STANDARD, REGBUF_WILL_INIT, RelationGetBufferForTuple(), RelationGetNumberOfAttributes, RelationIsAccessibleInLogicalDecoding, RelationIsLogicallyLogged, RelationNeedsWAL, RelationPutHeapTuple(), ReleaseBuffer(), SizeOfHeapHeader, SizeOfHeapInsert, SizeofHeapTupleHeader, START_CRIT_SECTION, HeapTupleData::t_data, xl_heap_header::t_hoff, HeapTupleHeaderData::t_hoff, xl_heap_header::t_infomask, HeapTupleHeaderData::t_infomask, xl_heap_header::t_infomask2, HeapTupleHeaderData::t_infomask2, HeapTupleData::t_len, HeapTupleData::t_self, UnlockReleaseBuffer(), visibilitymap_clear(), VISIBILITYMAP_VALID_BITS, XLH_INSERT_ALL_VISIBLE_CLEARED, XLH_INSERT_CONTAINS_NEW_TUPLE, XLH_INSERT_IS_SPECULATIVE, XLH_INSERT_ON_TOAST_RELATION, XLOG_HEAP_INIT_PAGE, XLOG_HEAP_INSERT, XLOG_INCLUDE_ORIGIN, XLogBeginInsert(), XLogInsert(), XLogRegisterBufData(), XLogRegisterBuffer(), XLogRegisterData(), and XLogSetRecordFlags().

Referenced by heapam_tuple_insert(), heapam_tuple_insert_speculative(), simple_heap_insert(), and toast_save_datum().

◆ heap_lock_tuple()

TM_Result heap_lock_tuple ( Relation  relation,
HeapTuple  tuple,
CommandId  cid,
LockTupleMode  mode,
LockWaitPolicy  wait_policy,
bool  follow_updates,
Buffer buffer,
struct TM_FailureData tmfd 
)

Definition at line 4427 of file heapam.c.

4431 {
4432  TM_Result result;
4433  ItemPointer tid = &(tuple->t_self);
4434  ItemId lp;
4435  Page page;
4436  Buffer vmbuffer = InvalidBuffer;
4437  BlockNumber block;
4438  TransactionId xid,
4439  xmax;
4440  uint16 old_infomask,
4441  new_infomask,
4442  new_infomask2;
4443  bool first_time = true;
4444  bool skip_tuple_lock = false;
4445  bool have_tuple_lock = false;
4446  bool cleared_all_frozen = false;
4447 
4448  *buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(tid));
4449  block = ItemPointerGetBlockNumber(tid);
4450 
4451  /*
4452  * Before locking the buffer, pin the visibility map page if it appears to
4453  * be necessary. Since we haven't got the lock yet, someone else might be
4454  * in the middle of changing this, so we'll need to recheck after we have
4455  * the lock.
4456  */
4457  if (PageIsAllVisible(BufferGetPage(*buffer)))
4458  visibilitymap_pin(relation, block, &vmbuffer);
4459 
4461 
4462  page = BufferGetPage(*buffer);
4463  lp = PageGetItemId(page, ItemPointerGetOffsetNumber(tid));
4464  Assert(ItemIdIsNormal(lp));
4465 
4466  tuple->t_data = (HeapTupleHeader) PageGetItem(page, lp);
4467  tuple->t_len = ItemIdGetLength(lp);
4468  tuple->t_tableOid = RelationGetRelid(relation);
4469 
4470 l3:
4471  result = HeapTupleSatisfiesUpdate(tuple, cid, *buffer);
4472 
4473  if (result == TM_Invisible)
4474  {
4475  /*
4476  * This is possible, but only when locking a tuple for ON CONFLICT
4477  * UPDATE. We return this value here rather than throwing an error in
4478  * order to give that case the opportunity to throw a more specific
4479  * error.
4480  */
4481  result = TM_Invisible;
4482  goto out_locked;
4483  }
4484  else if (result == TM_BeingModified ||
4485  result == TM_Updated ||
4486  result == TM_Deleted)
4487  {
4488  TransactionId xwait;
4489  uint16 infomask;
4490  uint16 infomask2;
4491  bool require_sleep;
4492  ItemPointerData t_ctid;
4493 
4494  /* must copy state data before unlocking buffer */
4495  xwait = HeapTupleHeaderGetRawXmax(tuple->t_data);
4496  infomask = tuple->t_data->t_infomask;
4497  infomask2 = tuple->t_data->t_infomask2;
4498  ItemPointerCopy(&tuple->t_data->t_ctid, &t_ctid);
4499 
4500  LockBuffer(*buffer, BUFFER_LOCK_UNLOCK);
4501 
4502  /*
4503  * If any subtransaction of the current top transaction already holds
4504  * a lock as strong as or stronger than what we're requesting, we
4505  * effectively hold the desired lock already. We *must* succeed
4506  * without trying to take the tuple lock, else we will deadlock
4507  * against anyone wanting to acquire a stronger lock.
4508  *
4509  * Note we only do this the first time we loop on the HTSU result;
4510  * there is no point in testing in subsequent passes, because
4511  * evidently our own transaction cannot have acquired a new lock after
4512  * the first time we checked.
4513  */
4514  if (first_time)
4515  {
4516  first_time = false;
4517 
4518  if (infomask & HEAP_XMAX_IS_MULTI)
4519  {
4520  int i;
4521  int nmembers;
4522  MultiXactMember *members;
4523 
4524  /*
4525  * We don't need to allow old multixacts here; if that had
4526  * been the case, HeapTupleSatisfiesUpdate would have returned
4527  * MayBeUpdated and we wouldn't be here.
4528  */
4529  nmembers =
4530  GetMultiXactIdMembers(xwait, &members, false,
4531  HEAP_XMAX_IS_LOCKED_ONLY(infomask));
4532 
4533  for (i = 0; i < nmembers; i++)
4534  {
4535  /* only consider members of our own transaction */
4536  if (!TransactionIdIsCurrentTransactionId(members[i].xid))
4537  continue;
4538 
4539  if (TUPLOCK_from_mxstatus(members[i].status) >= mode)
4540  {
4541  pfree(members);
4542  result = TM_Ok;
4543  goto out_unlocked;
4544  }
4545  else
4546  {
4547  /*
4548  * Disable acquisition of the heavyweight tuple lock.
4549  * Otherwise, when promoting a weaker lock, we might
4550  * deadlock with another locker that has acquired the
4551  * heavyweight tuple lock and is waiting for our
4552  * transaction to finish.
4553  *
4554  * Note that in this case we still need to wait for
4555  * the multixact if required, to avoid acquiring
4556  * conflicting locks.
4557  */
4558  skip_tuple_lock = true;
4559  }
4560  }
4561 
4562  if (members)
4563  pfree(members);
4564  }
4565  else if (TransactionIdIsCurrentTransactionId(xwait))
4566  {
4567  switch (mode)
4568  {
4569  case LockTupleKeyShare:
4570  Assert(HEAP_XMAX_IS_KEYSHR_LOCKED(infomask) ||
4571  HEAP_XMAX_IS_SHR_LOCKED(infomask) ||
4572  HEAP_XMAX_IS_EXCL_LOCKED(infomask));
4573  result = TM_Ok;
4574  goto out_unlocked;
4575  case LockTupleShare:
4576  if (HEAP_XMAX_IS_SHR_LOCKED(infomask) ||
4577  HEAP_XMAX_IS_EXCL_LOCKED(infomask))
4578  {
4579  result = TM_Ok;
4580  goto out_unlocked;
4581  }
4582  break;
4584  if (HEAP_XMAX_IS_EXCL_LOCKED(infomask))
4585  {
4586  result = TM_Ok;
4587  goto out_unlocked;
4588  }
4589  break;
4590  case LockTupleExclusive:
4591  if (HEAP_XMAX_IS_EXCL_LOCKED(infomask) &&
4592  infomask2 & HEAP_KEYS_UPDATED)
4593  {
4594  result = TM_Ok;
4595  goto out_unlocked;
4596  }
4597  break;
4598  }
4599  }
4600  }
4601 
4602  /*
4603  * Initially assume that we will have to wait for the locking
4604  * transaction(s) to finish. We check various cases below in which
4605  * this can be turned off.
4606  */
4607  require_sleep = true;
4608  if (mode == LockTupleKeyShare)
4609  {
4610  /*
4611  * If we're requesting KeyShare, and there's no update present, we
4612  * don't need to wait. Even if there is an update, we can still
4613  * continue if the key hasn't been modified.
4614  *
4615  * However, if there are updates, we need to walk the update chain
4616  * to mark future versions of the row as locked, too. That way,
4617  * if somebody deletes that future version, we're protected
4618  * against the key going away. This locking of future versions
4619  * could block momentarily, if a concurrent transaction is
4620  * deleting a key; or it could return a value to the effect that
4621  * the transaction deleting the key has already committed. So we
4622  * do this before re-locking the buffer; otherwise this would be
4623  * prone to deadlocks.
4624  *
4625  * Note that the TID we're locking was grabbed before we unlocked
4626  * the buffer. For it to change while we're not looking, the
4627  * other properties we're testing for below after re-locking the
4628  * buffer would also change, in which case we would restart this
4629  * loop above.
4630  */
4631  if (!(infomask2 & HEAP_KEYS_UPDATED))
4632  {
4633  bool updated;
4634 
4635  updated = !HEAP_XMAX_IS_LOCKED_ONLY(infomask);
4636 
4637  /*
4638  * If there are updates, follow the update chain; bail out if
4639  * that cannot be done.
4640  */
4641  if (follow_updates && updated)
4642  {
4643  TM_Result res;
4644 
4645  res = heap_lock_updated_tuple(relation, tuple, &t_ctid,
4647  mode);
4648  if (res != TM_Ok)
4649  {
4650  result = res;
4651  /* recovery code expects to have buffer lock held */
4653  goto failed;
4654  }
4655  }
4656 
4658 
4659  /*
4660  * Make sure it's still an appropriate lock, else start over.
4661  * Also, if it wasn't updated before we released the lock, but
4662  * is updated now, we start over too; the reason is that we
4663  * now need to follow the update chain to lock the new
4664  * versions.
4665  */
4666  if (!HeapTupleHeaderIsOnlyLocked(tuple->t_data) &&
4667  ((tuple->t_data->t_infomask2 & HEAP_KEYS_UPDATED) ||
4668  !updated))
4669  goto l3;
4670 
4671  /* Things look okay, so we can skip sleeping */
4672  require_sleep = false;
4673 
4674  /*
4675  * Note we allow Xmax to change here; other updaters/lockers
4676  * could have modified it before we grabbed the buffer lock.
4677  * However, this is not a problem, because with the recheck we
4678  * just did we ensure that they still don't conflict with the
4679  * lock we want.
4680  */
4681  }
4682  }
4683  else if (mode == LockTupleShare)
4684  {
4685  /*
4686  * If we're requesting Share, we can similarly avoid sleeping if
4687  * there's no update and no exclusive lock present.
4688  */
4689  if (HEAP_XMAX_IS_LOCKED_ONLY(infomask) &&
4690  !HEAP_XMAX_IS_EXCL_LOCKED(infomask))
4691  {
4693 
4694  /*
4695  * Make sure it's still an appropriate lock, else start over.
4696  * See above about allowing xmax to change.
4697  */
4698  if (!HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_data->t_infomask) ||
4700  goto l3;
4701  require_sleep = false;
4702  }
4703  }
4704  else if (mode == LockTupleNoKeyExclusive)
4705  {
4706  /*
4707  * If we're requesting NoKeyExclusive, we might also be able to
4708  * avoid sleeping; just ensure that there no conflicting lock
4709  * already acquired.
4710  */
4711  if (infomask & HEAP_XMAX_IS_MULTI)
4712  {
4713  if (!DoesMultiXactIdConflict((MultiXactId) xwait, infomask,
4714  mode, NULL))
4715  {
4716  /*
4717  * No conflict, but if the xmax changed under us in the
4718  * meantime, start over.
4719  */
4721  if (xmax_infomask_changed(tuple->t_data->t_infomask, infomask) ||
4723  xwait))
4724  goto l3;
4725 
4726  /* otherwise, we're good */
4727  require_sleep = false;
4728  }
4729  }
4730  else if (HEAP_XMAX_IS_KEYSHR_LOCKED(infomask))
4731  {
4733 
4734  /* if the xmax changed in the meantime, start over */
4735  if (xmax_infomask_changed(tuple->t_data->t_infomask, infomask) ||
4737  xwait))
4738  goto l3;
4739  /* otherwise, we're good */
4740  require_sleep = false;
4741  }
4742  }
4743 
4744  /*
4745  * As a check independent from those above, we can also avoid sleeping
4746  * if the current transaction is the sole locker of the tuple. Note
4747  * that the strength of the lock already held is irrelevant; this is
4748  * not about recording the lock in Xmax (which will be done regardless
4749  * of this optimization, below). Also, note that the cases where we
4750  * hold a lock stronger than we are requesting are already handled
4751  * above by not doing anything.
4752  *
4753  * Note we only deal with the non-multixact case here; MultiXactIdWait
4754  * is well equipped to deal with this situation on its own.
4755  */
4756  if (require_sleep && !(infomask & HEAP_XMAX_IS_MULTI) &&
4758  {
4759  /* ... but if the xmax changed in the meantime, start over */
4761  if (xmax_infomask_changed(tuple->t_data->t_infomask, infomask) ||
4763  xwait))
4764  goto l3;
4766  require_sleep = false;
4767  }
4768 
4769  /*
4770  * Time to sleep on the other transaction/multixact, if necessary.
4771  *
4772  * If the other transaction is an update/delete that's already
4773  * committed, then sleeping cannot possibly do any good: if we're
4774  * required to sleep, get out to raise an error instead.
4775  *
4776  * By here, we either have already acquired the buffer exclusive lock,
4777  * or we must wait for the locking transaction or multixact; so below
4778  * we ensure that we grab buffer lock after the sleep.
4779  */
4780  if (require_sleep && (result == TM_Updated || result == TM_Deleted))
4781  {
4783  goto failed;
4784  }
4785  else if (require_sleep)
4786  {
4787  /*
4788  * Acquire tuple lock to establish our priority for the tuple, or
4789  * die trying. LockTuple will release us when we are next-in-line
4790  * for the tuple. We must do this even if we are share-locking,
4791  * but not if we already have a weaker lock on the tuple.
4792  *
4793  * If we are forced to "start over" below, we keep the tuple lock;
4794  * this arranges that we stay at the head of the line while
4795  * rechecking tuple state.
4796  */
4797  if (!skip_tuple_lock &&
4798  !heap_acquire_tuplock(relation, tid, mode, wait_policy,
4799  &have_tuple_lock))
4800  {
4801  /*
4802  * This can only happen if wait_policy is Skip and the lock
4803  * couldn't be obtained.
4804  */
4805  result = TM_WouldBlock;
4806  /* recovery code expects to have buffer lock held */
4808  goto failed;
4809  }
4810 
4811  if (infomask & HEAP_XMAX_IS_MULTI)
4812  {
4814 
4815  /* We only ever lock tuples, never update them */
4816  if (status >= MultiXactStatusNoKeyUpdate)
4817  elog(ERROR, "invalid lock mode in heap_lock_tuple");
4818 
4819  /* wait for multixact to end, or die trying */
4820  switch (wait_policy)
4821  {
4822  case LockWaitBlock:
4823  MultiXactIdWait((MultiXactId) xwait, status, infomask,
4824  relation, &tuple->t_self, XLTW_Lock, NULL);
4825  break;
4826  case LockWaitSkip:
4828  status, infomask, relation,
4829  NULL))
4830  {
4831  result = TM_WouldBlock;
4832  /* recovery code expects to have buffer lock held */
4834  goto failed;
4835  }
4836  break;
4837  case LockWaitError:
4839  status, infomask, relation,
4840  NULL))
4841  ereport(ERROR,
4842  (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
4843  errmsg("could not obtain lock on row in relation \"%s\"",
4844  RelationGetRelationName(relation))));
4845 
4846  break;
4847  }
4848 
4849  /*
4850  * Of course, the multixact might not be done here: if we're
4851  * requesting a light lock mode, other transactions with light
4852  * locks could still be alive, as well as locks owned by our
4853  * own xact or other subxacts of this backend. We need to
4854  * preserve the surviving MultiXact members. Note that it
4855  * isn't absolutely necessary in the latter case, but doing so
4856  * is simpler.
4857  */
4858  }
4859  else
4860  {
4861  /* wait for regular transaction to end, or die trying */
4862  switch (wait_policy)
4863  {
4864  case LockWaitBlock:
4865  XactLockTableWait(xwait, relation, &tuple->t_self,
4866  XLTW_Lock);
4867  break;
4868  case LockWaitSkip:
4869  if (!ConditionalXactLockTableWait(xwait))
4870  {
4871  result = TM_WouldBlock;
4872  /* recovery code expects to have buffer lock held */
4874  goto failed;
4875  }
4876  break;
4877  case LockWaitError:
4878  if (!ConditionalXactLockTableWait(xwait))
4879  ereport(ERROR,
4880  (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
4881  errmsg("could not obtain lock on row in relation \"%s\"",
4882  RelationGetRelationName(relation))));
4883  break;
4884  }
4885  }
4886 
4887  /* if there are updates, follow the update chain */
4888  if (follow_updates && !HEAP_XMAX_IS_LOCKED_ONLY(infomask))
4889  {
4890  TM_Result res;
4891 
4892  res = heap_lock_updated_tuple(relation, tuple, &t_ctid,
4894  mode);
4895  if (res != TM_Ok)
4896  {
4897  result = res;
4898  /* recovery code expects to have buffer lock held */
4900  goto failed;
4901  }
4902  }
4903 
4905 
4906  /*
4907  * xwait is done, but if xwait had just locked the tuple then some
4908  * other xact could update this tuple before we get to this point.
4909  * Check for xmax change, and start over if so.
4910  */
4911  if (xmax_infomask_changed(tuple->t_data->t_infomask, infomask) ||
4913  xwait))
4914  goto l3;
4915 
4916  if (!(infomask & HEAP_XMAX_IS_MULTI))
4917  {
4918  /*
4919  * Otherwise check if it committed or aborted. Note we cannot
4920  * be here if the tuple was only locked by somebody who didn't
4921  * conflict with us; that would have been handled above. So
4922  * that transaction must necessarily be gone by now. But
4923  * don't check for this in the multixact case, because some
4924  * locker transactions might still be running.
4925  */
4926  UpdateXmaxHintBits(tuple->t_data, *buffer, xwait);
4927  }
4928  }
4929 
4930  /* By here, we're certain that we hold buffer exclusive lock again */
4931 
4932  /*
4933  * We may lock if previous xmax aborted, or if it committed but only
4934  * locked the tuple without updating it; or if we didn't have to wait
4935  * at all for whatever reason.
4936  */
4937  if (!require_sleep ||
4938  (tuple->t_data->t_infomask & HEAP_XMAX_INVALID) ||
4941  result = TM_Ok;
4942  else if (!ItemPointerEquals(&tuple->t_self, &tuple->t_data->t_ctid))
4943  result = TM_Updated;
4944  else
4945  result = TM_Deleted;
4946  }
4947 
4948 failed:
4949  if (result != TM_Ok)
4950  {
4951  Assert(result == TM_SelfModified || result == TM_Updated ||
4952  result == TM_Deleted || result == TM_WouldBlock);
4953 
4954  /*
4955  * When locking a tuple under LockWaitSkip semantics and we fail with
4956  * TM_WouldBlock above, it's possible for concurrent transactions to
4957  * release the lock and set HEAP_XMAX_INVALID in the meantime. So
4958  * this assert is slightly different from the equivalent one in
4959  * heap_delete and heap_update.
4960  */
4961  Assert((result == TM_WouldBlock) ||
4962  !(tuple->t_data->t_infomask & HEAP_XMAX_INVALID));
4963  Assert(result != TM_Updated ||
4964  !ItemPointerEquals(&tuple->t_self, &tuple->t_data->t_ctid));
4965  tmfd->ctid = tuple->t_data->t_ctid;
4966  tmfd->xmax = HeapTupleHeaderGetUpdateXid(tuple->t_data);
4967  if (result == TM_SelfModified)
4968  tmfd->cmax = HeapTupleHeaderGetCmax(tuple->t_data);
4969  else
4970  tmfd->cmax = InvalidCommandId;
4971  goto out_locked;
4972  }
4973 
4974  /*
4975  * If we didn't pin the visibility map page and the page has become all
4976  * visible while we were busy locking the buffer, or during some
4977  * subsequent window during which we had it unlocked, we'll have to unlock
4978  * and re-lock, to avoid holding the buffer lock across I/O. That's a bit
4979  * unfortunate, especially since we'll now have to recheck whether the
4980  * tuple has been locked or updated under us, but hopefully it won't
4981  * happen very often.
4982  */
4983  if (vmbuffer == InvalidBuffer && PageIsAllVisible(page))
4984  {
4985  LockBuffer(*buffer, BUFFER_LOCK_UNLOCK);
4986  visibilitymap_pin(relation, block, &vmbuffer);
4988  goto l3;
4989  }
4990 
4991  xmax = HeapTupleHeaderGetRawXmax(tuple->t_data);
4992  old_infomask = tuple->t_data->t_infomask;
4993 
4994  /*
4995  * If this is the first possibly-multixact-able operation in the current
4996  * transaction, set my per-backend OldestMemberMXactId setting. We can be
4997  * certain that the transaction will never become a member of any older
4998  * MultiXactIds than that. (We have to do this even if we end up just
4999  * using our own TransactionId below, since some other backend could
5000  * incorporate our XID into a MultiXact immediately afterwards.)
5001  */
5003 
5004  /*
5005  * Compute the new xmax and infomask to store into the tuple. Note we do
5006  * not modify the tuple just yet, because that would leave it in the wrong
5007  * state if multixact.c elogs.
5008  */
5009  compute_new_xmax_infomask(xmax, old_infomask, tuple->t_data->t_infomask2,
5010  GetCurrentTransactionId(), mode, false,
5011  &xid, &new_infomask, &new_infomask2);
5012 
5014 
5015  /*
5016  * Store transaction information of xact locking the tuple.
5017  *
5018  * Note: Cmax is meaningless in this context, so don't set it; this avoids
5019  * possibly generating a useless combo CID. Moreover, if we're locking a
5020  * previously updated tuple, it's important to preserve the Cmax.
5021  *
5022  * Also reset the HOT UPDATE bit, but only if there's no update; otherwise
5023  * we would break the HOT chain.
5024  */
5025  tuple->t_data->t_infomask &= ~HEAP_XMAX_BITS;
5026  tuple->t_data->t_infomask2 &= ~HEAP_KEYS_UPDATED;
5027  tuple->t_data->t_infomask |= new_infomask;
5028  tuple->t_data->t_infomask2 |= new_infomask2;
5029  if (HEAP_XMAX_IS_LOCKED_ONLY(new_infomask))
5031  HeapTupleHeaderSetXmax(tuple->t_data, xid);
5032 
5033  /*
5034  * Make sure there is no forward chain link in t_ctid. Note that in the
5035  * cases where the tuple has been updated, we must not overwrite t_ctid,
5036  * because it was set by the updater. Moreover, if the tuple has been
5037  * updated, we need to follow the update chain to lock the new versions of
5038  * the tuple as well.
5039  */
5040  if (HEAP_XMAX_IS_LOCKED_ONLY(new_infomask))
5041  tuple->t_data->t_ctid = *tid;
5042 
5043  /* Clear only the all-frozen bit on visibility map if needed */
5044  if (PageIsAllVisible(page) &&
5045  visibilitymap_clear(relation, block, vmbuffer,
5047  cleared_all_frozen = true;
5048 
5049 
5050  MarkBufferDirty(*buffer);
5051 
5052  /*
5053  * XLOG stuff. You might think that we don't need an XLOG record because
5054  * there is no state change worth restoring after a crash. You would be
5055  * wrong however: we have just written either a TransactionId or a
5056  * MultiXactId that may never have been seen on disk before, and we need
5057  * to make sure that there are XLOG entries covering those ID numbers.
5058  * Else the same IDs might be re-used after a crash, which would be
5059  * disastrous if this page made it to disk before the crash. Essentially
5060  * we have to enforce the WAL log-before-data rule even in this case.
5061  * (Also, in a PITR log-shipping or 2PC environment, we have to have XLOG
5062  * entries for everything anyway.)
5063  */
5064  if (RelationNeedsWAL(relation))
5065  {
5066  xl_heap_lock xlrec;
5067  XLogRecPtr recptr;
5068 
5069  XLogBeginInsert();
5070  XLogRegisterBuffer(0, *buffer, REGBUF_STANDARD);
5071 
5072  xlrec.offnum = ItemPointerGetOffsetNumber(&tuple->t_self);
5073  xlrec.xmax = xid;
5074  xlrec.infobits_set = compute_infobits(new_infomask,
5075  tuple->t_data->t_infomask2);
5076  xlrec.flags = cleared_all_frozen ? XLH_LOCK_ALL_FROZEN_CLEARED : 0;
5077  XLogRegisterData((char *) &xlrec, SizeOfHeapLock);
5078 
5079  /* we don't decode row locks atm, so no need to log the origin */
5080 
5081  recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_LOCK);
5082 
5083  PageSetLSN(page, recptr);
5084  }
5085 
5086  END_CRIT_SECTION();
5087 
5088  result = TM_Ok;
5089 
5090 out_locked:
5091  LockBuffer(*buffer, BUFFER_LOCK_UNLOCK);
5092 
5093 out_unlocked:
5094  if (BufferIsValid(vmbuffer))
5095  ReleaseBuffer(vmbuffer);
5096 
5097  /*
5098  * Don't update the visibility map here. Locking a tuple doesn't change
5099  * visibility info.
5100  */
5101 
5102  /*
5103  * Now that we have successfully marked the tuple as locked, we can
5104  * release the lmgr tuple lock, if we had it.
5105  */
5106  if (have_tuple_lock)
5107  UnlockTupleTuplock(relation, tid, mode);
5108 
5109  return result;
5110 }
#define TUPLOCK_from_mxstatus(status)
Definition: heapam.c:210
static TM_Result heap_lock_updated_tuple(Relation rel, HeapTuple tuple, ItemPointer ctid, TransactionId xid, LockTupleMode mode)
Definition: heapam.c:5891
static bool ConditionalMultiXactIdWait(MultiXactId multi, MultiXactStatus status, uint16 infomask, Relation rel, int *remaining)
Definition: heapam.c:7634
static MultiXactStatus get_mxact_status_for_lock(LockTupleMode mode, bool is_update)
Definition: heapam.c:4379
#define XLH_LOCK_ALL_FROZEN_CLEARED
Definition: heapam_xlog.h:393
#define XLOG_HEAP_LOCK
Definition: heapam_xlog.h:39
#define SizeOfHeapLock
Definition: heapam_xlog.h:404
#define HEAP_XMAX_IS_EXCL_LOCKED(infomask)
Definition: htup_details.h:261
#define HEAP_XMAX_IS_SHR_LOCKED(infomask)
Definition: htup_details.h:259
static void ItemPointerCopy(const ItemPointerData *fromPointer, ItemPointerData *toPointer)
Definition: itemptr.h:172
bool ConditionalXactLockTableWait(TransactionId xid)
Definition: lmgr.c:729
@ XLTW_Lock
Definition: lmgr.h:29
@ LockWaitSkip
Definition: lockoptions.h:41
@ LockWaitError
Definition: lockoptions.h:43
@ LockTupleShare
Definition: lockoptions.h:54
@ LockTupleKeyShare
Definition: lockoptions.h:52
int GetMultiXactIdMembers(MultiXactId multi, MultiXactMember **members, bool from_pgupgrade, bool isLockOnly)
Definition: multixact.c:1299
static PgChecksumMode mode
Definition: pg_checksums.c:55
#define RelationGetRelationName(relation)
Definition: rel.h:539
uint8 infobits_set
Definition: heapam_xlog.h:400
OffsetNumber offnum
Definition: heapam_xlog.h:399
TransactionId xmax
Definition: heapam_xlog.h:398
@ TM_WouldBlock
Definition: tableam.h:109
#define VISIBILITYMAP_ALL_FROZEN

References Assert, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferGetPage(), BufferIsValid(), TM_FailureData::cmax, compute_infobits(), compute_new_xmax_infomask(), ConditionalMultiXactIdWait(), ConditionalXactLockTableWait(), TM_FailureData::ctid, DoesMultiXactIdConflict(), elog, END_CRIT_SECTION, ereport, errcode(), errmsg(), ERROR, xl_heap_lock::flags, get_mxact_status_for_lock(), GetCurrentTransactionId(), GetMultiXactIdMembers(), heap_acquire_tuplock(), HEAP_KEYS_UPDATED, heap_lock_updated_tuple(), HEAP_XMAX_BITS, HEAP_XMAX_INVALID, HEAP_XMAX_IS_EXCL_LOCKED, HEAP_XMAX_IS_KEYSHR_LOCKED, HEAP_XMAX_IS_LOCKED_ONLY, HEAP_XMAX_IS_MULTI, HEAP_XMAX_IS_SHR_LOCKED, HeapTupleHeaderClearHotUpdated, HeapTupleHeaderGetCmax(), HeapTupleHeaderGetRawXmax, HeapTupleHeaderGetUpdateXid, HeapTupleHeaderIsOnlyLocked(), HeapTupleHeaderSetXmax, HeapTupleSatisfiesUpdate(), i, xl_heap_lock::infobits_set, InvalidBuffer, InvalidCommandId, ItemIdGetLength, ItemIdIsNormal, ItemPointerCopy(), ItemPointerEquals(), ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), LockBuffer(), LockTupleExclusive, LockTupleKeyShare, LockTupleNoKeyExclusive, LockTupleShare, LockWaitBlock, LockWaitError, LockWaitSkip, MarkBufferDirty(), mode, MultiXactIdSetOldestMember(), MultiXactIdWait(), MultiXactStatusNoKeyUpdate, xl_heap_lock::offnum, PageGetItem(), PageGetItemId(), PageIsAllVisible(), PageSetLSN(), pfree(), ReadBuffer(), REGBUF_STANDARD, RelationGetRelationName, RelationGetRelid, RelationNeedsWAL, ReleaseBuffer(), res, SizeOfHeapLock, START_CRIT_SECTION, HeapTupleHeaderData::t_ctid, HeapTupleData::t_data, HeapTupleHeaderData::t_infomask, HeapTupleHeaderData::t_infomask2, HeapTupleData::t_len, HeapTupleData::t_self, HeapTupleData::t_tableOid, TM_BeingModified, TM_Deleted, TM_Invisible, TM_Ok, TM_SelfModified, TM_Updated, TM_WouldBlock, TransactionIdEquals, TransactionIdIsCurrentTransactionId(), TUPLOCK_from_mxstatus, UnlockTupleTuplock, UpdateXmaxHintBits(), VISIBILITYMAP_ALL_FROZEN, visibilitymap_clear(), visibilitymap_pin(), XactLockTableWait(), XLH_LOCK_ALL_FROZEN_CLEARED, XLOG_HEAP_LOCK, XLogBeginInsert(), XLogInsert(), XLogRegisterBuffer(), XLogRegisterData(), XLTW_Lock, xl_heap_lock::xmax, TM_FailureData::xmax, and xmax_infomask_changed().

Referenced by heapam_tuple_lock().

◆ heap_multi_insert()

void heap_multi_insert ( Relation  relation,
struct TupleTableSlot **  slots,
int  ntuples,
CommandId  cid,
int  options,
BulkInsertState  bistate 
)

Definition at line 2254 of file heapam.c.

2256 {
2258  HeapTuple *heaptuples;
2259  int i;
2260  int ndone;
2261  PGAlignedBlock scratch;
2262  Page page;
2263  Buffer vmbuffer = InvalidBuffer;
2264  bool needwal;
2265  Size saveFreeSpace;
2266  bool need_tuple_data = RelationIsLogicallyLogged(relation);
2267  bool need_cids = RelationIsAccessibleInLogicalDecoding(relation);
2268  bool starting_with_empty_page = false;
2269  int npages = 0;
2270  int npages_used = 0;
2271 
2272  /* currently not needed (thus unsupported) for heap_multi_insert() */
2274 
2275  needwal = RelationNeedsWAL(relation);
2276  saveFreeSpace = RelationGetTargetPageFreeSpace(relation,
2278 
2279  /* Toast and set header data in all the slots */
2280  heaptuples = palloc(ntuples * sizeof(HeapTuple));
2281  for (i = 0; i < ntuples; i++)
2282  {
2283  HeapTuple tuple;
2284 
2285  tuple = ExecFetchSlotHeapTuple(slots[i], true, NULL);
2286  slots[i]->tts_tableOid = RelationGetRelid(relation);
2287  tuple->t_tableOid = slots[i]->tts_tableOid;
2288  heaptuples[i] = heap_prepare_insert(relation, tuple, xid, cid,
2289  options);
2290  }
2291 
2292  /*
2293  * We're about to do the actual inserts -- but check for conflict first,
2294  * to minimize the possibility of having to roll back work we've just
2295  * done.
2296  *
2297  * A check here does not definitively prevent a serialization anomaly;
2298  * that check MUST be done at least past the point of acquiring an
2299  * exclusive buffer content lock on every buffer that will be affected,
2300  * and MAY be done after all inserts are reflected in the buffers and
2301  * those locks are released; otherwise there is a race condition. Since
2302  * multiple buffers can be locked and unlocked in the loop below, and it
2303  * would not be feasible to identify and lock all of those buffers before
2304  * the loop, we must do a final check at the end.
2305  *
2306  * The check here could be omitted with no loss of correctness; it is
2307  * present strictly as an optimization.
2308  *
2309  * For heap inserts, we only need to check for table-level SSI locks. Our
2310  * new tuples can't possibly conflict with existing tuple locks, and heap
2311  * page locks are only consolidated versions of tuple locks; they do not
2312  * lock "gaps" as index page locks do. So we don't need to specify a
2313  * buffer when making the call, which makes for a faster check.
2314  */
2316 
2317  ndone = 0;
2318  while (ndone < ntuples)
2319  {
2320  Buffer buffer;
2321  bool all_visible_cleared = false;
2322  bool all_frozen_set = false;
2323  int nthispage;
2324 
2326 
2327  /*
2328  * Compute number of pages needed to fit the to-be-inserted tuples in
2329  * the worst case. This will be used to determine how much to extend
2330  * the relation by in RelationGetBufferForTuple(), if needed. If we
2331  * filled a prior page from scratch, we can just update our last
2332  * computation, but if we started with a partially filled page,
2333  * recompute from scratch, the number of potentially required pages
2334  * can vary due to tuples needing to fit onto the page, page headers
2335  * etc.
2336  */
2337  if (ndone == 0 || !starting_with_empty_page)
2338  {
2339  npages = heap_multi_insert_pages(heaptuples, ndone, ntuples,
2340  saveFreeSpace);
2341  npages_used = 0;
2342  }
2343  else
2344  npages_used++;
2345 
2346  /*
2347  * Find buffer where at least the next tuple will fit. If the page is
2348  * all-visible, this will also pin the requisite visibility map page.
2349  *
2350  * Also pin visibility map page if COPY FREEZE inserts tuples into an
2351  * empty page. See all_frozen_set below.
2352  */
2353  buffer = RelationGetBufferForTuple(relation, heaptuples[ndone]->t_len,
2354  InvalidBuffer, options, bistate,
2355  &vmbuffer, NULL,
2356  npages - npages_used);
2357  page = BufferGetPage(buffer);
2358 
2359  starting_with_empty_page = PageGetMaxOffsetNumber(page) == 0;
2360 
2361  if (starting_with_empty_page && (options & HEAP_INSERT_FROZEN))
2362  all_frozen_set = true;
2363 
2364  /* NO EREPORT(ERROR) from here till changes are logged */
2366 
2367  /*
2368  * RelationGetBufferForTuple has ensured that the first tuple fits.
2369  * Put that on the page, and then as many other tuples as fit.
2370  */
2371  RelationPutHeapTuple(relation, buffer, heaptuples[ndone], false);
2372 
2373  /*
2374  * For logical decoding we need combo CIDs to properly decode the
2375  * catalog.
2376  */
2377  if (needwal && need_cids)
2378  log_heap_new_cid(relation, heaptuples[ndone]);
2379 
2380  for (nthispage = 1; ndone + nthispage < ntuples; nthispage++)
2381  {
2382  HeapTuple heaptup = heaptuples[ndone + nthispage];
2383 
2384  if (PageGetHeapFreeSpace(page) < MAXALIGN(heaptup->t_len) + saveFreeSpace)
2385  break;
2386 
2387  RelationPutHeapTuple(relation, buffer, heaptup, false);
2388 
2389  /*
2390  * For logical decoding we need combo CIDs to properly decode the
2391  * catalog.
2392  */
2393  if (needwal && need_cids)
2394  log_heap_new_cid(relation, heaptup);
2395  }
2396 
2397  /*
2398  * If the page is all visible, need to clear that, unless we're only
2399  * going to add further frozen rows to it.
2400  *
2401  * If we're only adding already frozen rows to a previously empty
2402  * page, mark it as all-visible.
2403  */
2404  if (PageIsAllVisible(page) && !(options & HEAP_INSERT_FROZEN))
2405  {
2406  all_visible_cleared = true;
2407  PageClearAllVisible(page);
2408  visibilitymap_clear(relation,
2409  BufferGetBlockNumber(buffer),
2410  vmbuffer, VISIBILITYMAP_VALID_BITS);
2411  }
2412  else if (all_frozen_set)
2413  PageSetAllVisible(page);
2414 
2415  /*
2416  * XXX Should we set PageSetPrunable on this page ? See heap_insert()
2417  */
2418 
2419  MarkBufferDirty(buffer);
2420 
2421  /* XLOG stuff */
2422  if (needwal)
2423  {
2424  XLogRecPtr recptr;
2425  xl_heap_multi_insert *xlrec;
2427  char *tupledata;
2428  int totaldatalen;
2429  char *scratchptr = scratch.data;
2430  bool init;
2431  int bufflags = 0;
2432 
2433  /*
2434  * If the page was previously empty, we can reinit the page
2435  * instead of restoring the whole thing.
2436  */
2437  init = starting_with_empty_page;
2438 
2439  /* allocate xl_heap_multi_insert struct from the scratch area */
2440  xlrec = (xl_heap_multi_insert *) scratchptr;
2441  scratchptr += SizeOfHeapMultiInsert;
2442 
2443  /*
2444  * Allocate offsets array. Unless we're reinitializing the page,
2445  * in that case the tuples are stored in order starting at
2446  * FirstOffsetNumber and we don't need to store the offsets
2447  * explicitly.
2448  */
2449  if (!init)
2450  scratchptr += nthispage * sizeof(OffsetNumber);
2451 
2452  /* the rest of the scratch space is used for tuple data */
2453  tupledata = scratchptr;
2454 
2455  /* check that the mutually exclusive flags are not both set */
2456  Assert(!(all_visible_cleared && all_frozen_set));
2457 
2458  xlrec->flags = 0;
2459  if (all_visible_cleared)
2461  if (all_frozen_set)
2463 
2464  xlrec->ntuples = nthispage;
2465 
2466  /*
2467  * Write out an xl_multi_insert_tuple and the tuple data itself
2468  * for each tuple.
2469  */
2470  for (i = 0; i < nthispage; i++)
2471  {
2472  HeapTuple heaptup = heaptuples[ndone + i];
2473  xl_multi_insert_tuple *tuphdr;
2474  int datalen;
2475 
2476  if (!init)
2477  xlrec->offsets[i] = ItemPointerGetOffsetNumber(&heaptup->t_self);
2478  /* xl_multi_insert_tuple needs two-byte alignment. */
2479  tuphdr = (xl_multi_insert_tuple *) SHORTALIGN(scratchptr);
2480  scratchptr = ((char *) tuphdr) + SizeOfMultiInsertTuple;
2481 
2482  tuphdr->t_infomask2 = heaptup->t_data->t_infomask2;
2483  tuphdr->t_infomask = heaptup->t_data->t_infomask;
2484  tuphdr->t_hoff = heaptup->t_data->t_hoff;
2485 
2486  /* write bitmap [+ padding] [+ oid] + data */
2487  datalen = heaptup->t_len - SizeofHeapTupleHeader;
2488  memcpy(scratchptr,
2489  (char *) heaptup->t_data + SizeofHeapTupleHeader,
2490  datalen);
2491  tuphdr->datalen = datalen;
2492  scratchptr += datalen;
2493  }
2494  totaldatalen = scratchptr - tupledata;
2495  Assert((scratchptr - scratch.data) < BLCKSZ);
2496 
2497  if (need_tuple_data)
2499 
2500  /*
2501  * Signal that this is the last xl_heap_multi_insert record
2502  * emitted by this call to heap_multi_insert(). Needed for logical
2503  * decoding so it knows when to cleanup temporary data.
2504  */
2505  if (ndone + nthispage == ntuples)
2506  xlrec->flags |= XLH_INSERT_LAST_IN_MULTI;
2507 
2508  if (init)
2509  {
2510  info |= XLOG_HEAP_INIT_PAGE;
2511  bufflags |= REGBUF_WILL_INIT;
2512  }
2513 
2514  /*
2515  * If we're doing logical decoding, include the new tuple data
2516  * even if we take a full-page image of the page.
2517  */
2518  if (need_tuple_data)
2519  bufflags |= REGBUF_KEEP_DATA;
2520 
2521  XLogBeginInsert();
2522  XLogRegisterData((char *) xlrec, tupledata - scratch.data);
2523  XLogRegisterBuffer(0, buffer, REGBUF_STANDARD | bufflags);
2524 
2525  XLogRegisterBufData(0, tupledata, totaldatalen);
2526 
2527  /* filtering by origin on a row level is much more efficient */
2529 
2530  recptr = XLogInsert(RM_HEAP2_ID, info);
2531 
2532  PageSetLSN(page, recptr);
2533  }
2534 
2535  END_CRIT_SECTION();
2536 
2537  /*
2538  * If we've frozen everything on the page, update the visibilitymap.
2539  * We're already holding pin on the vmbuffer.
2540  */
2541  if (all_frozen_set)
2542  {
2543  Assert(PageIsAllVisible(page));
2544  Assert(visibilitymap_pin_ok(BufferGetBlockNumber(buffer), vmbuffer));
2545 
2546  /*
2547  * It's fine to use InvalidTransactionId here - this is only used
2548  * when HEAP_INSERT_FROZEN is specified, which intentionally
2549  * violates visibility rules.
2550  */
2551  visibilitymap_set(relation, BufferGetBlockNumber(buffer), buffer,
2552  InvalidXLogRecPtr, vmbuffer,
2555  }
2556 
2557  UnlockReleaseBuffer(buffer);
2558  ndone += nthispage;
2559 
2560  /*
2561  * NB: Only release vmbuffer after inserting all tuples - it's fairly
2562  * likely that we'll insert into subsequent heap pages that are likely
2563  * to use the same vm page.
2564  */
2565  }
2566 
2567  /* We're done with inserting all tuples, so release the last vmbuffer. */
2568  if (vmbuffer != InvalidBuffer)
2569  ReleaseBuffer(vmbuffer);
2570 
2571  /*
2572  * We're done with the actual inserts. Check for conflicts again, to
2573  * ensure that all rw-conflicts in to these inserts are detected. Without
2574  * this final check, a sequential scan of the heap may have locked the
2575  * table after the "before" check, missing one opportunity to detect the
2576  * conflict, and then scanned the table before the new tuples were there,
2577  * missing the other chance to detect the conflict.
2578  *
2579  * For heap inserts, we only need to check for table-level SSI locks. Our
2580  * new tuples can't possibly conflict with existing tuple locks, and heap
2581  * page locks are only consolidated versions of tuple locks; they do not
2582  * lock "gaps" as index page locks do. So we don't need to specify a
2583  * buffer when making the call.
2584  */
2586 
2587  /*
2588  * If tuples are cachable, mark them for invalidation from the caches in
2589  * case we abort. Note it is OK to do this after releasing the buffer,
2590  * because the heaptuples data structure is all in local memory, not in
2591  * the shared buffer.
2592  */
2593  if (IsCatalogRelation(relation))
2594  {
2595  for (i = 0; i < ntuples; i++)
2596  CacheInvalidateHeapTuple(relation, heaptuples[i], NULL);
2597  }
2598 
2599  /* copy t_self fields back to the caller's slots */
2600  for (i = 0; i < ntuples; i++)
2601  slots[i]->tts_tid = heaptuples[i]->t_self;
2602 
2603  pgstat_count_heap_insert(relation, ntuples);
2604 }
Size PageGetHeapFreeSpace(Page page)
Definition: bufpage.c:980
static void PageSetAllVisible(Page page)
Definition: bufpage.h:434
#define MAXALIGN(LEN)
Definition: c.h:765
#define SHORTALIGN(LEN)
Definition: c.h:761
size_t Size
Definition: c.h:559
HeapTuple ExecFetchSlotHeapTuple(TupleTableSlot *slot, bool materialize, bool *shouldFree)
Definition: execTuples.c:1731
static int heap_multi_insert_pages(HeapTuple *heaptuples, int done, int ntuples, Size saveFreeSpace)
Definition: heapam.c:2222
#define HEAP_INSERT_FROZEN
Definition: heapam.h:37
#define SizeOfHeapMultiInsert
Definition: heapam_xlog.h:188
#define XLOG_HEAP2_MULTI_INSERT
Definition: heapam_xlog.h:64
#define XLH_INSERT_LAST_IN_MULTI
Definition: heapam_xlog.h:73
#define XLH_INSERT_ALL_FROZEN_SET
Definition: heapam_xlog.h:79
#define SizeOfMultiInsertTuple
Definition: heapam_xlog.h:199
int init
Definition: isn.c:74
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:122
#define RelationGetTargetPageFreeSpace(relation, defaultff)
Definition: rel.h:378
#define HEAP_DEFAULT_FILLFACTOR
Definition: rel.h:349
Oid tts_tableOid
Definition: tuptable.h:130
OffsetNumber offsets[FLEXIBLE_ARRAY_MEMBER]
Definition: heapam_xlog.h:185
void visibilitymap_set(Relation rel, BlockNumber heapBlk, Buffer heapBuf, XLogRecPtr recptr, Buffer vmBuf, TransactionId cutoff_xid, uint8 flags)
bool visibilitymap_pin_ok(BlockNumber heapBlk, Buffer vmbuf)
#define VISIBILITYMAP_ALL_VISIBLE
#define InvalidXLogRecPtr
Definition: xlogdefs.h:28

References Assert, BufferGetBlockNumber(), BufferGetPage(), CacheInvalidateHeapTuple(), CHECK_FOR_INTERRUPTS, CheckForSerializableConflictIn(), PGAlignedBlock::data, xl_multi_insert_tuple::datalen, END_CRIT_SECTION, ExecFetchSlotHeapTuple(), xl_heap_multi_insert::flags, GetCurrentTransactionId(), HEAP_DEFAULT_FILLFACTOR, HEAP_INSERT_FROZEN, HEAP_INSERT_NO_LOGICAL, heap_multi_insert_pages(), heap_prepare_insert(), i, init, InvalidBlockNumber, InvalidBuffer, InvalidTransactionId, InvalidXLogRecPtr, IsCatalogRelation(), ItemPointerGetOffsetNumber(), log_heap_new_cid(), MarkBufferDirty(), MAXALIGN, xl_heap_multi_insert::ntuples, xl_heap_multi_insert::offsets, PageClearAllVisible(), PageGetHeapFreeSpace(), PageGetMaxOffsetNumber(), PageIsAllVisible(), PageSetAllVisible(), PageSetLSN(), palloc(), pgstat_count_heap_insert(), REGBUF_KEEP_DATA, REGBUF_STANDARD, REGBUF_WILL_INIT, RelationGetBufferForTuple(), RelationGetRelid, RelationGetTargetPageFreeSpace, RelationIsAccessibleInLogicalDecoding, RelationIsLogicallyLogged, RelationNeedsWAL, RelationPutHeapTuple(), ReleaseBuffer(), SHORTALIGN, SizeOfHeapMultiInsert, SizeofHeapTupleHeader, SizeOfMultiInsertTuple, START_CRIT_SECTION, HeapTupleData::t_data, xl_multi_insert_tuple::t_hoff, HeapTupleHeaderData::t_hoff, xl_multi_insert_tuple::t_infomask, HeapTupleHeaderData::t_infomask, xl_multi_insert_tuple::t_infomask2, HeapTupleHeaderData::t_infomask2, HeapTupleData::t_len, HeapTupleData::t_self, HeapTupleData::t_tableOid, TupleTableSlot::tts_tableOid, UnlockReleaseBuffer(), VISIBILITYMAP_ALL_FROZEN, VISIBILITYMAP_ALL_VISIBLE, visibilitymap_clear(), visibilitymap_pin_ok(), visibilitymap_set(), VISIBILITYMAP_VALID_BITS, XLH_INSERT_ALL_FROZEN_SET, XLH_INSERT_ALL_VISIBLE_CLEARED, XLH_INSERT_CONTAINS_NEW_TUPLE, XLH_INSERT_LAST_IN_MULTI, XLOG_HEAP2_MULTI_INSERT, XLOG_HEAP_INIT_PAGE, XLOG_INCLUDE_ORIGIN, XLogBeginInsert(), XLogInsert(), XLogRegisterBufData(), XLogRegisterBuffer(), XLogRegisterData(), and XLogSetRecordFlags().

Referenced by CatalogTuplesMultiInsertWithInfo().

◆ heap_page_prune_and_freeze()

void heap_page_prune_and_freeze ( Relation  relation,
Buffer  buffer,
struct GlobalVisState vistest,
int  options,
struct VacuumCutoffs cutoffs,
PruneFreezeResult presult,
PruneReason  reason,
OffsetNumber off_loc,
TransactionId new_relfrozen_xid,
MultiXactId new_relmin_mxid 
)

Definition at line 350 of file pruneheap.c.

359 {
360  Page page = BufferGetPage(buffer);
361  BlockNumber blockno = BufferGetBlockNumber(buffer);
362  OffsetNumber offnum,
363  maxoff;
364  PruneState prstate;
365  HeapTupleData tup;
366  bool do_freeze;
367  bool do_prune;
368  bool do_hint;
369  bool hint_bit_fpi;
370  int64 fpi_before = pgWalUsage.wal_fpi;
371 
372  /* Copy parameters to prstate */
373  prstate.vistest = vistest;
375  prstate.freeze = (options & HEAP_PAGE_PRUNE_FREEZE) != 0;
376  prstate.cutoffs = cutoffs;
377 
378  /*
379  * Our strategy is to scan the page and make lists of items to change,
380  * then apply the changes within a critical section. This keeps as much
381  * logic as possible out of the critical section, and also ensures that
382  * WAL replay will work the same as the normal case.
383  *
384  * First, initialize the new pd_prune_xid value to zero (indicating no
385  * prunable tuples). If we find any tuples which may soon become
386  * prunable, we will save the lowest relevant XID in new_prune_xid. Also
387  * initialize the rest of our working state.
388  */
391  prstate.nredirected = prstate.ndead = prstate.nunused = prstate.nfrozen = 0;
392  prstate.nroot_items = 0;
393  prstate.nheaponly_items = 0;
394 
395  /* initialize page freezing working state */
396  prstate.pagefrz.freeze_required = false;
397  if (prstate.freeze)
398  {
399  Assert(new_relfrozen_xid && new_relmin_mxid);
400  prstate.pagefrz.FreezePageRelfrozenXid = *new_relfrozen_xid;
401  prstate.pagefrz.NoFreezePageRelfrozenXid = *new_relfrozen_xid;
402  prstate.pagefrz.FreezePageRelminMxid = *new_relmin_mxid;
403  prstate.pagefrz.NoFreezePageRelminMxid = *new_relmin_mxid;
404  }
405  else
406  {
407  Assert(new_relfrozen_xid == NULL && new_relmin_mxid == NULL);
412  }
413 
414  prstate.ndeleted = 0;
415  prstate.live_tuples = 0;
416  prstate.recently_dead_tuples = 0;
417  prstate.hastup = false;
418  prstate.lpdead_items = 0;
419  prstate.deadoffsets = presult->deadoffsets;
420 
421  /*
422  * Caller may update the VM after we're done. We can keep track of
423  * whether the page will be all-visible and all-frozen after pruning and
424  * freezing to help the caller to do that.
425  *
426  * Currently, only VACUUM sets the VM bits. To save the effort, only do
427  * the bookkeeping if the caller needs it. Currently, that's tied to
428  * HEAP_PAGE_PRUNE_FREEZE, but it could be a separate flag if you wanted
429  * to update the VM bits without also freezing or freeze without also
430  * setting the VM bits.
431  *
432  * In addition to telling the caller whether it can set the VM bit, we
433  * also use 'all_visible' and 'all_frozen' for our own decision-making. If
434  * the whole page would become frozen, we consider opportunistically
435  * freezing tuples. We will not be able to freeze the whole page if there
436  * are tuples present that are not visible to everyone or if there are
437  * dead tuples which are not yet removable. However, dead tuples which
438  * will be removed by the end of vacuuming should not preclude us from
439  * opportunistically freezing. Because of that, we do not clear
440  * all_visible when we see LP_DEAD items. We fix that at the end of the
441  * function, when we return the value to the caller, so that the caller
442  * doesn't set the VM bit incorrectly.
443  */
444  if (prstate.freeze)
445  {
446  prstate.all_visible = true;
447  prstate.all_frozen = true;
448  }
449  else
450  {
451  /*
452  * Initializing to false allows skipping the work to update them in
453  * heap_prune_record_unchanged_lp_normal().
454  */
455  prstate.all_visible = false;
456  prstate.all_frozen = false;
457  }
458 
459  /*
460  * The visibility cutoff xid is the newest xmin of live tuples on the
461  * page. In the common case, this will be set as the conflict horizon the
462  * caller can use for updating the VM. If, at the end of freezing and
463  * pruning, the page is all-frozen, there is no possibility that any
464  * running transaction on the standby does not see tuples on the page as
465  * all-visible, so the conflict horizon remains InvalidTransactionId.
466  */
468 
469  maxoff = PageGetMaxOffsetNumber(page);
470  tup.t_tableOid = RelationGetRelid(relation);
471 
472  /*
473  * Determine HTSV for all tuples, and queue them up for processing as HOT
474  * chain roots or as heap-only items.
475  *
476  * Determining HTSV only once for each tuple is required for correctness,
477  * to deal with cases where running HTSV twice could result in different
478  * results. For example, RECENTLY_DEAD can turn to DEAD if another
479  * checked item causes GlobalVisTestIsRemovableFullXid() to update the
480  * horizon, or INSERT_IN_PROGRESS can change to DEAD if the inserting
481  * transaction aborts.
482  *
483  * It's also good for performance. Most commonly tuples within a page are
484  * stored at decreasing offsets (while the items are stored at increasing
485  * offsets). When processing all tuples on a page this leads to reading
486  * memory at decreasing offsets within a page, with a variable stride.
487  * That's hard for CPU prefetchers to deal with. Processing the items in
488  * reverse order (and thus the tuples in increasing order) increases
489  * prefetching efficiency significantly / decreases the number of cache
490  * misses.
491  */
492  for (offnum = maxoff;
493  offnum >= FirstOffsetNumber;
494  offnum = OffsetNumberPrev(offnum))
495  {
496  ItemId itemid = PageGetItemId(page, offnum);
497  HeapTupleHeader htup;
498 
499  /*
500  * Set the offset number so that we can display it along with any
501  * error that occurred while processing this tuple.
502  */
503  *off_loc = offnum;
504 
505  prstate.processed[offnum] = false;
506  prstate.htsv[offnum] = -1;
507 
508  /* Nothing to do if slot doesn't contain a tuple */
509  if (!ItemIdIsUsed(itemid))
510  {
511  heap_prune_record_unchanged_lp_unused(page, &prstate, offnum);
512  continue;
513  }
514 
515  if (ItemIdIsDead(itemid))
516  {
517  /*
518  * If the caller set mark_unused_now true, we can set dead line
519  * pointers LP_UNUSED now.
520  */
521  if (unlikely(prstate.mark_unused_now))
522  heap_prune_record_unused(&prstate, offnum, false);
523  else
524  heap_prune_record_unchanged_lp_dead(page, &prstate, offnum);
525  continue;
526  }
527 
528  if (ItemIdIsRedirected(itemid))
529  {
530  /* This is the start of a HOT chain */
531  prstate.root_items[prstate.nroot_items++] = offnum;
532  continue;
533  }
534 
535  Assert(ItemIdIsNormal(itemid));
536 
537  /*
538  * Get the tuple's visibility status and queue it up for processing.
539  */
540  htup = (HeapTupleHeader) PageGetItem(page, itemid);
541  tup.t_data = htup;
542  tup.t_len = ItemIdGetLength(itemid);
543  ItemPointerSet(&tup.t_self, blockno, offnum);
544 
545  prstate.htsv[offnum] = heap_prune_satisfies_vacuum(&prstate, &tup,
546  buffer);
547 
548  if (!HeapTupleHeaderIsHeapOnly(htup))
549  prstate.root_items[prstate.nroot_items++] = offnum;
550  else
551  prstate.heaponly_items[prstate.nheaponly_items++] = offnum;
552  }
553 
554  /*
555  * If checksums are enabled, heap_prune_satisfies_vacuum() may have caused
556  * an FPI to be emitted.
557  */
558  hint_bit_fpi = fpi_before != pgWalUsage.wal_fpi;
559 
560  /*
561  * Process HOT chains.
562  *
563  * We added the items to the array starting from 'maxoff', so by
564  * processing the array in reverse order, we process the items in
565  * ascending offset number order. The order doesn't matter for
566  * correctness, but some quick micro-benchmarking suggests that this is
567  * faster. (Earlier PostgreSQL versions, which scanned all the items on
568  * the page instead of using the root_items array, also did it in
569  * ascending offset number order.)
570  */
571  for (int i = prstate.nroot_items - 1; i >= 0; i--)
572  {
573  offnum = prstate.root_items[i];
574 
575  /* Ignore items already processed as part of an earlier chain */
576  if (prstate.processed[offnum])
577  continue;
578 
579  /* see preceding loop */
580  *off_loc = offnum;
581 
582  /* Process this item or chain of items */
583  heap_prune_chain(page, blockno, maxoff, offnum, &prstate);
584  }
585 
586  /*
587  * Process any heap-only tuples that were not already processed as part of
588  * a HOT chain.
589  */
590  for (int i = prstate.nheaponly_items - 1; i >= 0; i--)
591  {
592  offnum = prstate.heaponly_items[i];
593 
594  if (prstate.processed[offnum])
595  continue;
596 
597  /* see preceding loop */
598  *off_loc = offnum;
599 
600  /*
601  * If the tuple is DEAD and doesn't chain to anything else, mark it
602  * unused. (If it does chain, we can only remove it as part of
603  * pruning its chain.)
604  *
605  * We need this primarily to handle aborted HOT updates, that is,
606  * XMIN_INVALID heap-only tuples. Those might not be linked to by any
607  * chain, since the parent tuple might be re-updated before any
608  * pruning occurs. So we have to be able to reap them separately from
609  * chain-pruning. (Note that HeapTupleHeaderIsHotUpdated will never
610  * return true for an XMIN_INVALID tuple, so this code will work even
611  * when there were sequential updates within the aborted transaction.)
612  */
613  if (prstate.htsv[offnum] == HEAPTUPLE_DEAD)
614  {
615  ItemId itemid = PageGetItemId(page, offnum);
616  HeapTupleHeader htup = (HeapTupleHeader) PageGetItem(page, itemid);
617 
619  {
621  &prstate.latest_xid_removed);
622  heap_prune_record_unused(&prstate, offnum, true);
623  }
624  else
625  {
626  /*
627  * This tuple should've been processed and removed as part of
628  * a HOT chain, so something's wrong. To preserve evidence,
629  * we don't dare to remove it. We cannot leave behind a DEAD
630  * tuple either, because that will cause VACUUM to error out.
631  * Throwing an error with a distinct error message seems like
632  * the least bad option.
633  */
634  elog(ERROR, "dead heap-only tuple (%u, %d) is not linked to from any HOT chain",
635  blockno, offnum);
636  }
637  }
638  else
639  heap_prune_record_unchanged_lp_normal(page, &prstate, offnum);
640  }
641 
642  /* We should now have processed every tuple exactly once */
643 #ifdef USE_ASSERT_CHECKING
644  for (offnum = FirstOffsetNumber;
645  offnum <= maxoff;
646  offnum = OffsetNumberNext(offnum))
647  {
648  *off_loc = offnum;
649 
650  Assert(prstate.processed[offnum]);
651  }
652 #endif
653 
654  /* Clear the offset information once we have processed the given page. */
655  *off_loc = InvalidOffsetNumber;
656 
657  do_prune = prstate.nredirected > 0 ||
658  prstate.ndead > 0 ||
659  prstate.nunused > 0;
660 
661  /*
662  * Even if we don't prune anything, if we found a new value for the
663  * pd_prune_xid field or the page was marked full, we will update the hint
664  * bit.
665  */
666  do_hint = ((PageHeader) page)->pd_prune_xid != prstate.new_prune_xid ||
667  PageIsFull(page);
668 
669  /*
670  * Decide if we want to go ahead with freezing according to the freeze
671  * plans we prepared, or not.
672  */
673  do_freeze = false;
674  if (prstate.freeze)
675  {
676  if (prstate.pagefrz.freeze_required)
677  {
678  /*
679  * heap_prepare_freeze_tuple indicated that at least one XID/MXID
680  * from before FreezeLimit/MultiXactCutoff is present. Must
681  * freeze to advance relfrozenxid/relminmxid.
682  */
683  do_freeze = true;
684  }
685  else
686  {
687  /*
688  * Opportunistically freeze the page if we are generating an FPI
689  * anyway and if doing so means that we can set the page
690  * all-frozen afterwards (might not happen until VACUUM's final
691  * heap pass).
692  *
693  * XXX: Previously, we knew if pruning emitted an FPI by checking
694  * pgWalUsage.wal_fpi before and after pruning. Once the freeze
695  * and prune records were combined, this heuristic couldn't be
696  * used anymore. The opportunistic freeze heuristic must be
697  * improved; however, for now, try to approximate the old logic.
698  */
699  if (prstate.all_visible && prstate.all_frozen && prstate.nfrozen > 0)
700  {
701  /*
702  * Freezing would make the page all-frozen. Have already
703  * emitted an FPI or will do so anyway?
704  */
705  if (RelationNeedsWAL(relation))
706  {
707  if (hint_bit_fpi)
708  do_freeze = true;
709  else if (do_prune)
710  {
711  if (XLogCheckBufferNeedsBackup(buffer))
712  do_freeze = true;
713  }
714  else if (do_hint)
715  {
717  do_freeze = true;
718  }
719  }
720  }
721  }
722  }
723 
724  if (do_freeze)
725  {
726  /*
727  * Validate the tuples we will be freezing before entering the
728  * critical section.
729  */
730  heap_pre_freeze_checks(buffer, prstate.frozen, prstate.nfrozen);
731  }
732  else if (prstate.nfrozen > 0)
733  {
734  /*
735  * The page contained some tuples that were not already frozen, and we
736  * chose not to freeze them now. The page won't be all-frozen then.
737  */
738  Assert(!prstate.pagefrz.freeze_required);
739 
740  prstate.all_frozen = false;
741  prstate.nfrozen = 0; /* avoid miscounts in instrumentation */
742  }
743  else
744  {
745  /*
746  * We have no freeze plans to execute. The page might already be
747  * all-frozen (perhaps only following pruning), though. Such pages
748  * can be marked all-frozen in the VM by our caller, even though none
749  * of its tuples were newly frozen here.
750  */
751  }
752 
753  /* Any error while applying the changes is critical */
755 
756  if (do_hint)
757  {
758  /*
759  * Update the page's pd_prune_xid field to either zero, or the lowest
760  * XID of any soon-prunable tuple.
761  */
762  ((PageHeader) page)->pd_prune_xid = prstate.new_prune_xid;
763 
764  /*
765  * Also clear the "page is full" flag, since there's no point in
766  * repeating the prune/defrag process until something else happens to
767  * the page.
768  */
769  PageClearFull(page);
770 
771  /*
772  * If that's all we had to do to the page, this is a non-WAL-logged
773  * hint. If we are going to freeze or prune the page, we will mark
774  * the buffer dirty below.
775  */
776  if (!do_freeze && !do_prune)
777  MarkBufferDirtyHint(buffer, true);
778  }
779 
780  if (do_prune || do_freeze)
781  {
782  /* Apply the planned item changes and repair page fragmentation. */
783  if (do_prune)
784  {
785  heap_page_prune_execute(buffer, false,
786  prstate.redirected, prstate.nredirected,
787  prstate.nowdead, prstate.ndead,
788  prstate.nowunused, prstate.nunused);
789  }
790 
791  if (do_freeze)
792  heap_freeze_prepared_tuples(buffer, prstate.frozen, prstate.nfrozen);
793 
794  MarkBufferDirty(buffer);
795 
796  /*
797  * Emit a WAL XLOG_HEAP2_PRUNE_FREEZE record showing what we did
798  */
799  if (RelationNeedsWAL(relation))
800  {
801  /*
802  * The snapshotConflictHorizon for the whole record should be the
803  * most conservative of all the horizons calculated for any of the
804  * possible modifications. If this record will prune tuples, any
805  * transactions on the standby older than the youngest xmax of the
806  * most recently removed tuple this record will prune will
807  * conflict. If this record will freeze tuples, any transactions
808  * on the standby with xids older than the youngest tuple this
809  * record will freeze will conflict.
810  */
811  TransactionId frz_conflict_horizon = InvalidTransactionId;
812  TransactionId conflict_xid;
813 
814  /*
815  * We can use the visibility_cutoff_xid as our cutoff for
816  * conflicts when the whole page is eligible to become all-frozen
817  * in the VM once we're done with it. Otherwise we generate a
818  * conservative cutoff by stepping back from OldestXmin.
819  */
820  if (do_freeze)
821  {
822  if (prstate.all_visible && prstate.all_frozen)
823  frz_conflict_horizon = prstate.visibility_cutoff_xid;
824  else
825  {
826  /* Avoids false conflicts when hot_standby_feedback in use */
827  frz_conflict_horizon = prstate.cutoffs->OldestXmin;
828  TransactionIdRetreat(frz_conflict_horizon);
829  }
830  }
831 
832  if (TransactionIdFollows(frz_conflict_horizon, prstate.latest_xid_removed))
833  conflict_xid = frz_conflict_horizon;
834  else
835  conflict_xid = prstate.latest_xid_removed;
836 
837  log_heap_prune_and_freeze(relation, buffer,
838  conflict_xid,
839  true, reason,
840  prstate.frozen, prstate.nfrozen,
841  prstate.redirected, prstate.nredirected,
842  prstate.nowdead, prstate.ndead,
843  prstate.nowunused, prstate.nunused);
844  }
845  }
846 
848 
849  /* Copy information back for caller */
850  presult->ndeleted = prstate.ndeleted;
851  presult->nnewlpdead = prstate.ndead;
852  presult->nfrozen = prstate.nfrozen;
853  presult->live_tuples = prstate.live_tuples;
854  presult->recently_dead_tuples = prstate.recently_dead_tuples;
855 
856  /*
857  * It was convenient to ignore LP_DEAD items in all_visible earlier on to
858  * make the choice of whether or not to freeze the page unaffected by the
859  * short-term presence of LP_DEAD items. These LP_DEAD items were
860  * effectively assumed to be LP_UNUSED items in the making. It doesn't
861  * matter which vacuum heap pass (initial pass or final pass) ends up
862  * setting the page all-frozen, as long as the ongoing VACUUM does it.
863  *
864  * Now that freezing has been finalized, unset all_visible if there are
865  * any LP_DEAD items on the page. It needs to reflect the present state
866  * of the page, as expected by our caller.
867  */
868  if (prstate.all_visible && prstate.lpdead_items == 0)
869  {
870  presult->all_visible = prstate.all_visible;
871  presult->all_frozen = prstate.all_frozen;
872  }
873  else
874  {
875  presult->all_visible = false;
876  presult->all_frozen = false;
877  }
878 
879  presult->hastup = prstate.hastup;
880 
881  /*
882  * For callers planning to update the visibility map, the conflict horizon
883  * for that record must be the newest xmin on the page. However, if the
884  * page is completely frozen, there can be no conflict and the
885  * vm_conflict_horizon should remain InvalidTransactionId. This includes
886  * the case that we just froze all the tuples; the prune-freeze record
887  * included the conflict XID already so the caller doesn't need it.
888  */
889  if (presult->all_frozen)
891  else
892  presult->vm_conflict_horizon = prstate.visibility_cutoff_xid;
893 
894  presult->lpdead_items = prstate.lpdead_items;
895  /* the presult->deadoffsets array was already filled in */
896 
897  if (prstate.freeze)
898  {
899  if (presult->nfrozen > 0)
900  {
901  *new_relfrozen_xid = prstate.pagefrz.FreezePageRelfrozenXid;
902  *new_relmin_mxid = prstate.pagefrz.FreezePageRelminMxid;
903  }
904  else
905  {
906  *new_relfrozen_xid = prstate.pagefrz.NoFreezePageRelfrozenXid;
907  *new_relmin_mxid = prstate.pagefrz.NoFreezePageRelminMxid;
908  }
909  }
910 }
void MarkBufferDirtyHint(Buffer buffer, bool buffer_std)
Definition: bufmgr.c:4988
static void PageClearFull(Page page)
Definition: bufpage.h:423
static bool PageIsFull(Page page)
Definition: bufpage.h:413
#define likely(x)
Definition: c.h:329
int64_t int64
Definition: c.h:482
void heap_freeze_prepared_tuples(Buffer buffer, HeapTupleFreeze *tuples, int ntuples)
Definition: heapam.c:7220
void heap_pre_freeze_checks(Buffer buffer, HeapTupleFreeze *tuples, int ntuples)
Definition: heapam.c:7167
#define HEAP_PAGE_PRUNE_FREEZE
Definition: heapam.h:43
#define HEAP_PAGE_PRUNE_MARK_UNUSED_NOW
Definition: heapam.h:42
WalUsage pgWalUsage
Definition: instrument.c:22
#define InvalidMultiXactId
Definition: multixact.h:24
#define OffsetNumberPrev(offsetNumber)
Definition: off.h:54
static void heap_prune_chain(Page page, BlockNumber blockno, OffsetNumber maxoff, OffsetNumber rootoffnum, PruneState *prstate)
Definition: pruneheap.c:999
static void heap_prune_record_unchanged_lp_dead(Page page, PruneState *prstate, OffsetNumber offnum)
Definition: pruneheap.c:1508
static void heap_prune_record_unused(PruneState *prstate, OffsetNumber offnum, bool was_normal)
Definition: pruneheap.c:1297
static void heap_prune_record_unchanged_lp_normal(Page page, PruneState *prstate, OffsetNumber offnum)
Definition: pruneheap.c:1330
void log_heap_prune_and_freeze(Relation relation, Buffer buffer, TransactionId conflict_xid, bool cleanup_lock, PruneReason reason, HeapTupleFreeze *frozen, int nfrozen, OffsetNumber *redirected, int nredirected, OffsetNumber *dead, int ndead, OffsetNumber *unused, int nunused)
Definition: pruneheap.c:2053
static void heap_prune_record_unchanged_lp_unused(Page page, PruneState *prstate, OffsetNumber offnum)
Definition: pruneheap.c:1319
static HTSV_Result heap_prune_satisfies_vacuum(PruneState *prstate, HeapTuple tup, Buffer buffer)
Definition: pruneheap.c:917
void heap_page_prune_execute(Buffer buffer, bool lp_truncate_only, OffsetNumber *redirected, int nredirected, OffsetNumber *nowdead, int ndead, OffsetNumber *nowunused, int nunused)
Definition: pruneheap.c:1561
MultiXactId NoFreezePageRelminMxid
Definition: heapam.h:220
TransactionId FreezePageRelfrozenXid
Definition: heapam.h:208
bool freeze_required
Definition: heapam.h:182
MultiXactId FreezePageRelminMxid
Definition: heapam.h:209
TransactionId NoFreezePageRelfrozenXid
Definition: heapam.h:219
int recently_dead_tuples
Definition: heapam.h:235
TransactionId vm_conflict_horizon
Definition: heapam.h:250
OffsetNumber deadoffsets[MaxHeapTuplesPerPage]
Definition: heapam.h:264
bool all_visible
Definition: heapam.h:248
HeapPageFreeze pagefrz
Definition: pruneheap.c:103
bool all_visible
Definition: pruneheap.c:150
int ndead
Definition: pruneheap.c:55
bool processed[MaxHeapTuplesPerPage+1]
Definition: pruneheap.c:86
OffsetNumber heaponly_items[MaxHeapTuplesPerPage]
Definition: pruneheap.c:78
TransactionId new_prune_xid
Definition: pruneheap.c:52
bool hastup
Definition: pruneheap.c:122
int recently_dead_tuples
Definition: pruneheap.c:119
OffsetNumber nowdead[MaxHeapTuplesPerPage]
Definition: pruneheap.c:60
int nroot_items
Definition: pruneheap.c:75
OffsetNumber nowunused[MaxHeapTuplesPerPage]
Definition: pruneheap.c:61
int nheaponly_items
Definition: pruneheap.c:77
bool mark_unused_now
Definition: pruneheap.c:43
int live_tuples
Definition: pruneheap.c:118
TransactionId visibility_cutoff_xid
Definition: pruneheap.c:152