PostgreSQL Source Code  git master
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
heapam.h File Reference
#include "access/heapam_xlog.h"
#include "access/relation.h"
#include "access/relscan.h"
#include "access/sdir.h"
#include "access/skey.h"
#include "access/table.h"
#include "access/tableam.h"
#include "nodes/lockoptions.h"
#include "nodes/primnodes.h"
#include "storage/bufpage.h"
#include "storage/dsm.h"
#include "storage/lockdefs.h"
#include "storage/read_stream.h"
#include "storage/shm_toc.h"
#include "utils/relcache.h"
#include "utils/snapshot.h"
Include dependency graph for heapam.h:
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Data Structures

struct  HeapScanDescData
 
struct  IndexFetchHeapData
 
struct  HeapTupleFreeze
 
struct  HeapPageFreeze
 
struct  PruneFreezeResult
 

Macros

#define HEAP_INSERT_SKIP_FSM   TABLE_INSERT_SKIP_FSM
 
#define HEAP_INSERT_FROZEN   TABLE_INSERT_FROZEN
 
#define HEAP_INSERT_NO_LOGICAL   TABLE_INSERT_NO_LOGICAL
 
#define HEAP_INSERT_SPECULATIVE   0x0010
 
#define HEAP_PAGE_PRUNE_MARK_UNUSED_NOW   (1 << 0)
 
#define HEAP_PAGE_PRUNE_FREEZE   (1 << 1)
 
#define MaxLockTupleMode   LockTupleExclusive
 
#define HEAP_FREEZE_CHECK_XMIN_COMMITTED   0x01
 
#define HEAP_FREEZE_CHECK_XMAX_ABORTED   0x02
 
#define HeapScanIsValid(scan)   PointerIsValid(scan)
 

Typedefs

typedef struct BulkInsertStateDataBulkInsertState
 
typedef struct HeapScanDescData HeapScanDescData
 
typedef struct HeapScanDescDataHeapScanDesc
 
typedef struct IndexFetchHeapData IndexFetchHeapData
 
typedef struct HeapTupleFreeze HeapTupleFreeze
 
typedef struct HeapPageFreeze HeapPageFreeze
 
typedef struct PruneFreezeResult PruneFreezeResult
 

Enumerations

enum  HTSV_Result {
  HEAPTUPLE_DEAD , HEAPTUPLE_LIVE , HEAPTUPLE_RECENTLY_DEAD , HEAPTUPLE_INSERT_IN_PROGRESS ,
  HEAPTUPLE_DELETE_IN_PROGRESS
}
 
enum  PruneReason { PRUNE_ON_ACCESS , PRUNE_VACUUM_SCAN , PRUNE_VACUUM_CLEANUP }
 

Functions

TableScanDesc heap_beginscan (Relation relation, Snapshot snapshot, int nkeys, ScanKey key, ParallelTableScanDesc parallel_scan, uint32 flags)
 
void heap_setscanlimits (TableScanDesc sscan, BlockNumber startBlk, BlockNumber numBlks)
 
void heap_prepare_pagescan (TableScanDesc sscan)
 
void heap_rescan (TableScanDesc sscan, ScanKey key, bool set_params, bool allow_strat, bool allow_sync, bool allow_pagemode)
 
void heap_endscan (TableScanDesc sscan)
 
HeapTuple heap_getnext (TableScanDesc sscan, ScanDirection direction)
 
bool heap_getnextslot (TableScanDesc sscan, ScanDirection direction, struct TupleTableSlot *slot)
 
void heap_set_tidrange (TableScanDesc sscan, ItemPointer mintid, ItemPointer maxtid)
 
bool heap_getnextslot_tidrange (TableScanDesc sscan, ScanDirection direction, TupleTableSlot *slot)
 
bool heap_fetch (Relation relation, Snapshot snapshot, HeapTuple tuple, Buffer *userbuf, bool keep_buf)
 
bool heap_hot_search_buffer (ItemPointer tid, Relation relation, Buffer buffer, Snapshot snapshot, HeapTuple heapTuple, bool *all_dead, bool first_call)
 
void heap_get_latest_tid (TableScanDesc sscan, ItemPointer tid)
 
BulkInsertState GetBulkInsertState (void)
 
void FreeBulkInsertState (BulkInsertState)
 
void ReleaseBulkInsertStatePin (BulkInsertState bistate)
 
void heap_insert (Relation relation, HeapTuple tup, CommandId cid, int options, BulkInsertState bistate)
 
void heap_multi_insert (Relation relation, struct TupleTableSlot **slots, int ntuples, CommandId cid, int options, BulkInsertState bistate)
 
TM_Result heap_delete (Relation relation, ItemPointer tid, CommandId cid, Snapshot crosscheck, bool wait, struct TM_FailureData *tmfd, bool changingPart)
 
void heap_finish_speculative (Relation relation, ItemPointer tid)
 
void heap_abort_speculative (Relation relation, ItemPointer tid)
 
TM_Result heap_update (Relation relation, ItemPointer otid, HeapTuple newtup, CommandId cid, Snapshot crosscheck, bool wait, struct TM_FailureData *tmfd, LockTupleMode *lockmode, TU_UpdateIndexes *update_indexes)
 
TM_Result heap_lock_tuple (Relation relation, HeapTuple tuple, CommandId cid, LockTupleMode mode, LockWaitPolicy wait_policy, bool follow_updates, Buffer *buffer, struct TM_FailureData *tmfd)
 
bool heap_inplace_lock (Relation relation, HeapTuple oldtup_ptr, Buffer buffer, void(*release_callback)(void *), void *arg)
 
void heap_inplace_update_and_unlock (Relation relation, HeapTuple oldtup, HeapTuple tuple, Buffer buffer)
 
void heap_inplace_unlock (Relation relation, HeapTuple oldtup, Buffer buffer)
 
bool heap_prepare_freeze_tuple (HeapTupleHeader tuple, const struct VacuumCutoffs *cutoffs, HeapPageFreeze *pagefrz, HeapTupleFreeze *frz, bool *totally_frozen)
 
void heap_pre_freeze_checks (Buffer buffer, HeapTupleFreeze *tuples, int ntuples)
 
void heap_freeze_prepared_tuples (Buffer buffer, HeapTupleFreeze *tuples, int ntuples)
 
bool heap_freeze_tuple (HeapTupleHeader tuple, TransactionId relfrozenxid, TransactionId relminmxid, TransactionId FreezeLimit, TransactionId MultiXactCutoff)
 
bool heap_tuple_should_freeze (HeapTupleHeader tuple, const struct VacuumCutoffs *cutoffs, TransactionId *NoFreezePageRelfrozenXid, MultiXactId *NoFreezePageRelminMxid)
 
bool heap_tuple_needs_eventual_freeze (HeapTupleHeader tuple)
 
void simple_heap_insert (Relation relation, HeapTuple tup)
 
void simple_heap_delete (Relation relation, ItemPointer tid)
 
void simple_heap_update (Relation relation, ItemPointer otid, HeapTuple tup, TU_UpdateIndexes *update_indexes)
 
TransactionId heap_index_delete_tuples (Relation rel, TM_IndexDeleteOp *delstate)
 
void heap_page_prune_opt (Relation relation, Buffer buffer)
 
void heap_page_prune_and_freeze (Relation relation, Buffer buffer, struct GlobalVisState *vistest, int options, struct VacuumCutoffs *cutoffs, PruneFreezeResult *presult, PruneReason reason, OffsetNumber *off_loc, TransactionId *new_relfrozen_xid, MultiXactId *new_relmin_mxid)
 
void heap_page_prune_execute (Buffer buffer, bool lp_truncate_only, OffsetNumber *redirected, int nredirected, OffsetNumber *nowdead, int ndead, OffsetNumber *nowunused, int nunused)
 
void heap_get_root_tuples (Page page, OffsetNumber *root_offsets)
 
void log_heap_prune_and_freeze (Relation relation, Buffer buffer, TransactionId conflict_xid, bool cleanup_lock, PruneReason reason, HeapTupleFreeze *frozen, int nfrozen, OffsetNumber *redirected, int nredirected, OffsetNumber *dead, int ndead, OffsetNumber *unused, int nunused)
 
void heap_vacuum_rel (Relation rel, struct VacuumParams *params, BufferAccessStrategy bstrategy)
 
bool HeapTupleSatisfiesVisibility (HeapTuple htup, Snapshot snapshot, Buffer buffer)
 
TM_Result HeapTupleSatisfiesUpdate (HeapTuple htup, CommandId curcid, Buffer buffer)
 
HTSV_Result HeapTupleSatisfiesVacuum (HeapTuple htup, TransactionId OldestXmin, Buffer buffer)
 
HTSV_Result HeapTupleSatisfiesVacuumHorizon (HeapTuple htup, Buffer buffer, TransactionId *dead_after)
 
void HeapTupleSetHintBits (HeapTupleHeader tuple, Buffer buffer, uint16 infomask, TransactionId xid)
 
bool HeapTupleHeaderIsOnlyLocked (HeapTupleHeader tuple)
 
bool HeapTupleIsSurelyDead (HeapTuple htup, struct GlobalVisState *vistest)
 
bool ResolveCminCmaxDuringDecoding (struct HTAB *tuplecid_data, Snapshot snapshot, HeapTuple htup, Buffer buffer, CommandId *cmin, CommandId *cmax)
 
void HeapCheckForSerializableConflictOut (bool visible, Relation relation, HeapTuple tuple, Buffer buffer, Snapshot snapshot)
 
static void heap_execute_freeze_tuple (HeapTupleHeader tuple, HeapTupleFreeze *frz)
 

Macro Definition Documentation

◆ HEAP_FREEZE_CHECK_XMAX_ABORTED

#define HEAP_FREEZE_CHECK_XMAX_ABORTED   0x02

Definition at line 138 of file heapam.h.

◆ HEAP_FREEZE_CHECK_XMIN_COMMITTED

#define HEAP_FREEZE_CHECK_XMIN_COMMITTED   0x01

Definition at line 137 of file heapam.h.

◆ HEAP_INSERT_FROZEN

#define HEAP_INSERT_FROZEN   TABLE_INSERT_FROZEN

Definition at line 37 of file heapam.h.

◆ HEAP_INSERT_NO_LOGICAL

#define HEAP_INSERT_NO_LOGICAL   TABLE_INSERT_NO_LOGICAL

Definition at line 38 of file heapam.h.

◆ HEAP_INSERT_SKIP_FSM

#define HEAP_INSERT_SKIP_FSM   TABLE_INSERT_SKIP_FSM

Definition at line 36 of file heapam.h.

◆ HEAP_INSERT_SPECULATIVE

#define HEAP_INSERT_SPECULATIVE   0x0010

Definition at line 39 of file heapam.h.

◆ HEAP_PAGE_PRUNE_FREEZE

#define HEAP_PAGE_PRUNE_FREEZE   (1 << 1)

Definition at line 43 of file heapam.h.

◆ HEAP_PAGE_PRUNE_MARK_UNUSED_NOW

#define HEAP_PAGE_PRUNE_MARK_UNUSED_NOW   (1 << 0)

Definition at line 42 of file heapam.h.

◆ HeapScanIsValid

#define HeapScanIsValid (   scan)    PointerIsValid(scan)

Definition at line 288 of file heapam.h.

◆ MaxLockTupleMode

#define MaxLockTupleMode   LockTupleExclusive

Definition at line 49 of file heapam.h.

Typedef Documentation

◆ BulkInsertState

Definition at line 45 of file heapam.h.

◆ HeapPageFreeze

◆ HeapScanDesc

typedef struct HeapScanDescData* HeapScanDesc

Definition at line 110 of file heapam.h.

◆ HeapScanDescData

◆ HeapTupleFreeze

◆ IndexFetchHeapData

◆ PruneFreezeResult

Enumeration Type Documentation

◆ HTSV_Result

Enumerator
HEAPTUPLE_DEAD 
HEAPTUPLE_LIVE 
HEAPTUPLE_RECENTLY_DEAD 
HEAPTUPLE_INSERT_IN_PROGRESS 
HEAPTUPLE_DELETE_IN_PROGRESS 

Definition at line 124 of file heapam.h.

125 {
126  HEAPTUPLE_DEAD, /* tuple is dead and deletable */
127  HEAPTUPLE_LIVE, /* tuple is live (committed, no deleter) */
128  HEAPTUPLE_RECENTLY_DEAD, /* tuple is dead, but not deletable yet */
129  HEAPTUPLE_INSERT_IN_PROGRESS, /* inserting xact is still in progress */
130  HEAPTUPLE_DELETE_IN_PROGRESS, /* deleting xact is still in progress */
131 } HTSV_Result;
HTSV_Result
Definition: heapam.h:125
@ HEAPTUPLE_RECENTLY_DEAD
Definition: heapam.h:128
@ HEAPTUPLE_INSERT_IN_PROGRESS
Definition: heapam.h:129
@ HEAPTUPLE_LIVE
Definition: heapam.h:127
@ HEAPTUPLE_DELETE_IN_PROGRESS
Definition: heapam.h:130
@ HEAPTUPLE_DEAD
Definition: heapam.h:126

◆ PruneReason

Enumerator
PRUNE_ON_ACCESS 
PRUNE_VACUUM_SCAN 
PRUNE_VACUUM_CLEANUP 

Definition at line 268 of file heapam.h.

269 {
270  PRUNE_ON_ACCESS, /* on-access pruning */
271  PRUNE_VACUUM_SCAN, /* VACUUM 1st heap pass */
272  PRUNE_VACUUM_CLEANUP, /* VACUUM 2nd heap pass */
273 } PruneReason;
PruneReason
Definition: heapam.h:269
@ PRUNE_VACUUM_CLEANUP
Definition: heapam.h:272
@ PRUNE_ON_ACCESS
Definition: heapam.h:270
@ PRUNE_VACUUM_SCAN
Definition: heapam.h:271

Function Documentation

◆ FreeBulkInsertState()

void FreeBulkInsertState ( BulkInsertState  bistate)

Definition at line 1935 of file heapam.c.

1936 {
1937  if (bistate->current_buf != InvalidBuffer)
1938  ReleaseBuffer(bistate->current_buf);
1939  FreeAccessStrategy(bistate->strategy);
1940  pfree(bistate);
1941 }
#define InvalidBuffer
Definition: buf.h:25
void ReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:4924
void FreeAccessStrategy(BufferAccessStrategy strategy)
Definition: freelist.c:681
void pfree(void *pointer)
Definition: mcxt.c:1521
BufferAccessStrategy strategy
Definition: hio.h:31
Buffer current_buf
Definition: hio.h:32

References BulkInsertStateData::current_buf, FreeAccessStrategy(), InvalidBuffer, pfree(), ReleaseBuffer(), and BulkInsertStateData::strategy.

Referenced by ATRewriteTable(), CopyFrom(), CopyMultiInsertBufferCleanup(), intorel_shutdown(), and transientrel_shutdown().

◆ GetBulkInsertState()

BulkInsertState GetBulkInsertState ( void  )

Definition at line 1918 of file heapam.c.

1919 {
1920  BulkInsertState bistate;
1921 
1922  bistate = (BulkInsertState) palloc(sizeof(BulkInsertStateData));
1924  bistate->current_buf = InvalidBuffer;
1925  bistate->next_free = InvalidBlockNumber;
1926  bistate->last_free = InvalidBlockNumber;
1927  bistate->already_extended_by = 0;
1928  return bistate;
1929 }
#define InvalidBlockNumber
Definition: block.h:33
@ BAS_BULKWRITE
Definition: bufmgr.h:38
BufferAccessStrategy GetAccessStrategy(BufferAccessStrategyType btype)
Definition: freelist.c:541
struct BulkInsertStateData * BulkInsertState
Definition: heapam.h:45
void * palloc(Size size)
Definition: mcxt.c:1317
BlockNumber last_free
Definition: hio.h:49
uint32 already_extended_by
Definition: hio.h:50
BlockNumber next_free
Definition: hio.h:48

References BulkInsertStateData::already_extended_by, BAS_BULKWRITE, BulkInsertStateData::current_buf, GetAccessStrategy(), InvalidBlockNumber, InvalidBuffer, BulkInsertStateData::last_free, BulkInsertStateData::next_free, palloc(), and BulkInsertStateData::strategy.

Referenced by ATRewriteTable(), CopyFrom(), CopyMultiInsertBufferInit(), intorel_startup(), and transientrel_startup().

◆ heap_abort_speculative()

void heap_abort_speculative ( Relation  relation,
ItemPointer  tid 
)

Definition at line 6023 of file heapam.c.

6024 {
6026  ItemId lp;
6027  HeapTupleData tp;
6028  Page page;
6029  BlockNumber block;
6030  Buffer buffer;
6031 
6032  Assert(ItemPointerIsValid(tid));
6033 
6034  block = ItemPointerGetBlockNumber(tid);
6035  buffer = ReadBuffer(relation, block);
6036  page = BufferGetPage(buffer);
6037 
6039 
6040  /*
6041  * Page can't be all visible, we just inserted into it, and are still
6042  * running.
6043  */
6044  Assert(!PageIsAllVisible(page));
6045 
6046  lp = PageGetItemId(page, ItemPointerGetOffsetNumber(tid));
6047  Assert(ItemIdIsNormal(lp));
6048 
6049  tp.t_tableOid = RelationGetRelid(relation);
6050  tp.t_data = (HeapTupleHeader) PageGetItem(page, lp);
6051  tp.t_len = ItemIdGetLength(lp);
6052  tp.t_self = *tid;
6053 
6054  /*
6055  * Sanity check that the tuple really is a speculatively inserted tuple,
6056  * inserted by us.
6057  */
6058  if (tp.t_data->t_choice.t_heap.t_xmin != xid)
6059  elog(ERROR, "attempted to kill a tuple inserted by another transaction");
6060  if (!(IsToastRelation(relation) || HeapTupleHeaderIsSpeculative(tp.t_data)))
6061  elog(ERROR, "attempted to kill a non-speculative tuple");
6063 
6064  /*
6065  * No need to check for serializable conflicts here. There is never a
6066  * need for a combo CID, either. No need to extract replica identity, or
6067  * do anything special with infomask bits.
6068  */
6069 
6071 
6072  /*
6073  * The tuple will become DEAD immediately. Flag that this page is a
6074  * candidate for pruning by setting xmin to TransactionXmin. While not
6075  * immediately prunable, it is the oldest xid we can cheaply determine
6076  * that's safe against wraparound / being older than the table's
6077  * relfrozenxid. To defend against the unlikely case of a new relation
6078  * having a newer relfrozenxid than our TransactionXmin, use relfrozenxid
6079  * if so (vacuum can't subsequently move relfrozenxid to beyond
6080  * TransactionXmin, so there's no race here).
6081  */
6083  {
6084  TransactionId relfrozenxid = relation->rd_rel->relfrozenxid;
6085  TransactionId prune_xid;
6086 
6087  if (TransactionIdPrecedes(TransactionXmin, relfrozenxid))
6088  prune_xid = relfrozenxid;
6089  else
6090  prune_xid = TransactionXmin;
6091  PageSetPrunable(page, prune_xid);
6092  }
6093 
6094  /* store transaction information of xact deleting the tuple */
6097 
6098  /*
6099  * Set the tuple header xmin to InvalidTransactionId. This makes the
6100  * tuple immediately invisible everyone. (In particular, to any
6101  * transactions waiting on the speculative token, woken up later.)
6102  */
6104 
6105  /* Clear the speculative insertion token too */
6106  tp.t_data->t_ctid = tp.t_self;
6107 
6108  MarkBufferDirty(buffer);
6109 
6110  /*
6111  * XLOG stuff
6112  *
6113  * The WAL records generated here match heap_delete(). The same recovery
6114  * routines are used.
6115  */
6116  if (RelationNeedsWAL(relation))
6117  {
6118  xl_heap_delete xlrec;
6119  XLogRecPtr recptr;
6120 
6121  xlrec.flags = XLH_DELETE_IS_SUPER;
6123  tp.t_data->t_infomask2);
6125  xlrec.xmax = xid;
6126 
6127  XLogBeginInsert();
6128  XLogRegisterData((char *) &xlrec, SizeOfHeapDelete);
6129  XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
6130 
6131  /* No replica identity & replication origin logged */
6132 
6133  recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_DELETE);
6134 
6135  PageSetLSN(page, recptr);
6136  }
6137 
6138  END_CRIT_SECTION();
6139 
6140  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
6141 
6142  if (HeapTupleHasExternal(&tp))
6143  {
6144  Assert(!IsToastRelation(relation));
6145  heap_toast_delete(relation, &tp, true);
6146  }
6147 
6148  /*
6149  * Never need to mark tuple for invalidation, since catalogs don't support
6150  * speculative insertion
6151  */
6152 
6153  /* Now we can release the buffer */
6154  ReleaseBuffer(buffer);
6155 
6156  /* count deletion, as we counted the insertion too */
6157  pgstat_count_heap_delete(relation);
6158 }
uint32 BlockNumber
Definition: block.h:31
int Buffer
Definition: buf.h:23
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:2532
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:5158
Buffer ReadBuffer(Relation reln, BlockNumber blockNum)
Definition: bufmgr.c:746
#define BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:189
static Page BufferGetPage(Buffer buffer)
Definition: bufmgr.h:400
#define BUFFER_LOCK_EXCLUSIVE
Definition: bufmgr.h:191
Pointer Page
Definition: bufpage.h:81
static Item PageGetItem(Page page, ItemId itemId)
Definition: bufpage.h:354
static ItemId PageGetItemId(Page page, OffsetNumber offsetNumber)
Definition: bufpage.h:243
static bool PageIsAllVisible(Page page)
Definition: bufpage.h:429
static void PageSetLSN(Page page, XLogRecPtr lsn)
Definition: bufpage.h:391
#define PageSetPrunable(page, xid)
Definition: bufpage.h:447
#define Assert(condition)
Definition: c.h:837
uint32 TransactionId
Definition: c.h:631
bool IsToastRelation(Relation relation)
Definition: catalog.c:175
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:225
static uint8 compute_infobits(uint16 infomask, uint16 infomask2)
Definition: heapam.c:2629
#define XLOG_HEAP_DELETE
Definition: heapam_xlog.h:34
#define SizeOfHeapDelete
Definition: heapam_xlog.h:121
#define XLH_DELETE_IS_SUPER
Definition: heapam_xlog.h:105
void heap_toast_delete(Relation rel, HeapTuple oldtup, bool is_speculative)
Definition: heaptoast.c:43
HeapTupleHeaderData * HeapTupleHeader
Definition: htup.h:23
#define HEAP_KEYS_UPDATED
Definition: htup_details.h:275
#define HeapTupleHeaderIsHeapOnly(tup)
Definition: htup_details.h:499
#define HeapTupleHeaderSetXmin(tup, xid)
Definition: htup_details.h:315
#define HEAP_XMAX_BITS
Definition: htup_details.h:267
#define HeapTupleHasExternal(tuple)
Definition: htup_details.h:671
#define HEAP_MOVED
Definition: htup_details.h:213
#define HeapTupleHeaderIsSpeculative(tup)
Definition: htup_details.h:428
#define ItemIdGetLength(itemId)
Definition: itemid.h:59
#define ItemIdIsNormal(itemId)
Definition: itemid.h:99
static OffsetNumber ItemPointerGetOffsetNumber(const ItemPointerData *pointer)
Definition: itemptr.h:124
static BlockNumber ItemPointerGetBlockNumber(const ItemPointerData *pointer)
Definition: itemptr.h:103
static bool ItemPointerIsValid(const ItemPointerData *pointer)
Definition: itemptr.h:83
#define START_CRIT_SECTION()
Definition: miscadmin.h:149
#define END_CRIT_SECTION()
Definition: miscadmin.h:151
void pgstat_count_heap_delete(Relation rel)
#define RelationGetRelid(relation)
Definition: rel.h:505
#define RelationNeedsWAL(relation)
Definition: rel.h:628
TransactionId TransactionXmin
Definition: snapmgr.c:98
ItemPointerData t_self
Definition: htup.h:65
uint32 t_len
Definition: htup.h:64
HeapTupleHeader t_data
Definition: htup.h:68
Oid t_tableOid
Definition: htup.h:66
TransactionId t_xmin
Definition: htup_details.h:124
union HeapTupleHeaderData::@46 t_choice
ItemPointerData t_ctid
Definition: htup_details.h:161
HeapTupleFields t_heap
Definition: htup_details.h:157
Form_pg_class rd_rel
Definition: rel.h:111
TransactionId xmax
Definition: heapam_xlog.h:115
OffsetNumber offnum
Definition: heapam_xlog.h:116
uint8 infobits_set
Definition: heapam_xlog.h:117
bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition: transam.c:280
#define InvalidTransactionId
Definition: transam.h:31
#define TransactionIdIsValid(xid)
Definition: transam.h:41
TransactionId GetCurrentTransactionId(void)
Definition: xact.c:453
uint64 XLogRecPtr
Definition: xlogdefs.h:21
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition: xloginsert.c:474
void XLogRegisterData(const char *data, uint32 len)
Definition: xloginsert.c:364
void XLogRegisterBuffer(uint8 block_id, Buffer buffer, uint8 flags)
Definition: xloginsert.c:242
void XLogBeginInsert(void)
Definition: xloginsert.c:149
#define REGBUF_STANDARD
Definition: xloginsert.h:34

References Assert, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferGetPage(), compute_infobits(), elog, END_CRIT_SECTION, ERROR, xl_heap_delete::flags, GetCurrentTransactionId(), HEAP_KEYS_UPDATED, HEAP_MOVED, heap_toast_delete(), HEAP_XMAX_BITS, HeapTupleHasExternal, HeapTupleHeaderIsHeapOnly, HeapTupleHeaderIsSpeculative, HeapTupleHeaderSetXmin, xl_heap_delete::infobits_set, InvalidTransactionId, IsToastRelation(), ItemIdGetLength, ItemIdIsNormal, ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), ItemPointerIsValid(), LockBuffer(), MarkBufferDirty(), xl_heap_delete::offnum, PageGetItem(), PageGetItemId(), PageIsAllVisible(), PageSetLSN(), PageSetPrunable, pgstat_count_heap_delete(), RelationData::rd_rel, ReadBuffer(), REGBUF_STANDARD, RelationGetRelid, RelationNeedsWAL, ReleaseBuffer(), SizeOfHeapDelete, START_CRIT_SECTION, HeapTupleHeaderData::t_choice, HeapTupleHeaderData::t_ctid, HeapTupleData::t_data, HeapTupleHeaderData::t_heap, HeapTupleHeaderData::t_infomask, HeapTupleHeaderData::t_infomask2, HeapTupleData::t_len, HeapTupleData::t_self, HeapTupleData::t_tableOid, HeapTupleFields::t_xmin, TransactionIdIsValid, TransactionIdPrecedes(), TransactionXmin, XLH_DELETE_IS_SUPER, XLOG_HEAP_DELETE, XLogBeginInsert(), XLogInsert(), XLogRegisterBuffer(), XLogRegisterData(), and xl_heap_delete::xmax.

Referenced by heapam_tuple_complete_speculative(), and toast_delete_datum().

◆ heap_beginscan()

TableScanDesc heap_beginscan ( Relation  relation,
Snapshot  snapshot,
int  nkeys,
ScanKey  key,
ParallelTableScanDesc  parallel_scan,
uint32  flags 
)

Definition at line 1029 of file heapam.c.

1033 {
1034  HeapScanDesc scan;
1035 
1036  /*
1037  * increment relation ref count while scanning relation
1038  *
1039  * This is just to make really sure the relcache entry won't go away while
1040  * the scan has a pointer to it. Caller should be holding the rel open
1041  * anyway, so this is redundant in all normal scenarios...
1042  */
1044 
1045  /*
1046  * allocate and initialize scan descriptor
1047  */
1048  scan = (HeapScanDesc) palloc(sizeof(HeapScanDescData));
1049 
1050  scan->rs_base.rs_rd = relation;
1051  scan->rs_base.rs_snapshot = snapshot;
1052  scan->rs_base.rs_nkeys = nkeys;
1053  scan->rs_base.rs_flags = flags;
1054  scan->rs_base.rs_parallel = parallel_scan;
1055  scan->rs_strategy = NULL; /* set in initscan */
1056  scan->rs_vmbuffer = InvalidBuffer;
1057  scan->rs_empty_tuples_pending = 0;
1058 
1059  /*
1060  * Disable page-at-a-time mode if it's not a MVCC-safe snapshot.
1061  */
1062  if (!(snapshot && IsMVCCSnapshot(snapshot)))
1064 
1065  /*
1066  * For seqscan and sample scans in a serializable transaction, acquire a
1067  * predicate lock on the entire relation. This is required not only to
1068  * lock all the matching tuples, but also to conflict with new insertions
1069  * into the table. In an indexscan, we take page locks on the index pages
1070  * covering the range specified in the scan qual, but in a heap scan there
1071  * is nothing more fine-grained to lock. A bitmap scan is a different
1072  * story, there we have already scanned the index and locked the index
1073  * pages covering the predicate. But in that case we still have to lock
1074  * any matching heap tuples. For sample scan we could optimize the locking
1075  * to be at least page-level granularity, but we'd need to add per-tuple
1076  * locking for that.
1077  */
1079  {
1080  /*
1081  * Ensure a missing snapshot is noticed reliably, even if the
1082  * isolation mode means predicate locking isn't performed (and
1083  * therefore the snapshot isn't used here).
1084  */
1085  Assert(snapshot);
1086  PredicateLockRelation(relation, snapshot);
1087  }
1088 
1089  /* we only need to set this up once */
1090  scan->rs_ctup.t_tableOid = RelationGetRelid(relation);
1091 
1092  /*
1093  * Allocate memory to keep track of page allocation for parallel workers
1094  * when doing a parallel scan.
1095  */
1096  if (parallel_scan != NULL)
1098  else
1099  scan->rs_parallelworkerdata = NULL;
1100 
1101  /*
1102  * we do this here instead of in initscan() because heap_rescan also calls
1103  * initscan() and we don't want to allocate memory again
1104  */
1105  if (nkeys > 0)
1106  scan->rs_base.rs_key = (ScanKey) palloc(sizeof(ScanKeyData) * nkeys);
1107  else
1108  scan->rs_base.rs_key = NULL;
1109 
1110  initscan(scan, key, false);
1111 
1112  scan->rs_read_stream = NULL;
1113 
1114  /*
1115  * Set up a read stream for sequential scans and TID range scans. This
1116  * should be done after initscan() because initscan() allocates the
1117  * BufferAccessStrategy object passed to the read stream API.
1118  */
1119  if (scan->rs_base.rs_flags & SO_TYPE_SEQSCAN ||
1121  {
1123 
1124  if (scan->rs_base.rs_parallel)
1126  else
1128 
1130  scan->rs_strategy,
1131  scan->rs_base.rs_rd,
1132  MAIN_FORKNUM,
1133  cb,
1134  scan,
1135  0);
1136  }
1137 
1138 
1139  return (TableScanDesc) scan;
1140 }
static BlockNumber heap_scan_stream_read_next_parallel(ReadStream *stream, void *callback_private_data, void *per_buffer_data)
Definition: heapam.c:223
static BlockNumber heap_scan_stream_read_next_serial(ReadStream *stream, void *callback_private_data, void *per_buffer_data)
Definition: heapam.c:261
static void initscan(HeapScanDesc scan, ScanKey key, bool keep_startblock)
Definition: heapam.c:285
struct HeapScanDescData * HeapScanDesc
Definition: heapam.h:110
if(TABLE==NULL||TABLE_index==NULL)
Definition: isn.c:76
void PredicateLockRelation(Relation relation, Snapshot snapshot)
Definition: predicate.c:2566
ReadStream * read_stream_begin_relation(int flags, BufferAccessStrategy strategy, Relation rel, ForkNumber forknum, ReadStreamBlockNumberCB callback, void *callback_private_data, size_t per_buffer_data_size)
Definition: read_stream.c:551
BlockNumber(* ReadStreamBlockNumberCB)(ReadStream *stream, void *callback_private_data, void *per_buffer_data)
Definition: read_stream.h:56
#define READ_STREAM_SEQUENTIAL
Definition: read_stream.h:36
void RelationIncrementReferenceCount(Relation rel)
Definition: relcache.c:2150
@ MAIN_FORKNUM
Definition: relpath.h:58
ScanKeyData * ScanKey
Definition: skey.h:75
#define IsMVCCSnapshot(snapshot)
Definition: snapmgr.h:62
Buffer rs_vmbuffer
Definition: heapam.h:102
BufferAccessStrategy rs_strategy
Definition: heapam.h:71
ParallelBlockTableScanWorkerData * rs_parallelworkerdata
Definition: heapam.h:93
HeapTupleData rs_ctup
Definition: heapam.h:73
ReadStream * rs_read_stream
Definition: heapam.h:76
int rs_empty_tuples_pending
Definition: heapam.h:103
TableScanDescData rs_base
Definition: heapam.h:56
Relation rs_rd
Definition: relscan.h:38
uint32 rs_flags
Definition: relscan.h:70
struct ScanKeyData * rs_key
Definition: relscan.h:41
struct SnapshotData * rs_snapshot
Definition: relscan.h:39
struct ParallelTableScanDescData * rs_parallel
Definition: relscan.h:72
@ SO_TYPE_TIDRANGESCAN
Definition: tableam.h:52
@ SO_ALLOW_PAGEMODE
Definition: tableam.h:61
@ SO_TYPE_SAMPLESCAN
Definition: tableam.h:50
@ SO_TYPE_SEQSCAN
Definition: tableam.h:48

References Assert, heap_scan_stream_read_next_parallel(), heap_scan_stream_read_next_serial(), if(), initscan(), InvalidBuffer, IsMVCCSnapshot, sort-test::key, MAIN_FORKNUM, palloc(), PredicateLockRelation(), read_stream_begin_relation(), READ_STREAM_SEQUENTIAL, RelationGetRelid, RelationIncrementReferenceCount(), HeapScanDescData::rs_base, HeapScanDescData::rs_ctup, HeapScanDescData::rs_empty_tuples_pending, TableScanDescData::rs_flags, TableScanDescData::rs_key, TableScanDescData::rs_nkeys, TableScanDescData::rs_parallel, HeapScanDescData::rs_parallelworkerdata, TableScanDescData::rs_rd, HeapScanDescData::rs_read_stream, TableScanDescData::rs_snapshot, HeapScanDescData::rs_strategy, HeapScanDescData::rs_vmbuffer, SO_ALLOW_PAGEMODE, SO_TYPE_SAMPLESCAN, SO_TYPE_SEQSCAN, SO_TYPE_TIDRANGESCAN, and HeapTupleData::t_tableOid.

◆ heap_delete()

TM_Result heap_delete ( Relation  relation,
ItemPointer  tid,
CommandId  cid,
Snapshot  crosscheck,
bool  wait,
struct TM_FailureData tmfd,
bool  changingPart 
)

Definition at line 2674 of file heapam.c.

2677 {
2678  TM_Result result;
2680  ItemId lp;
2681  HeapTupleData tp;
2682  Page page;
2683  BlockNumber block;
2684  Buffer buffer;
2685  Buffer vmbuffer = InvalidBuffer;
2686  TransactionId new_xmax;
2687  uint16 new_infomask,
2688  new_infomask2;
2689  bool have_tuple_lock = false;
2690  bool iscombo;
2691  bool all_visible_cleared = false;
2692  HeapTuple old_key_tuple = NULL; /* replica identity of the tuple */
2693  bool old_key_copied = false;
2694 
2695  Assert(ItemPointerIsValid(tid));
2696 
2697  /*
2698  * Forbid this during a parallel operation, lest it allocate a combo CID.
2699  * Other workers might need that combo CID for visibility checks, and we
2700  * have no provision for broadcasting it to them.
2701  */
2702  if (IsInParallelMode())
2703  ereport(ERROR,
2704  (errcode(ERRCODE_INVALID_TRANSACTION_STATE),
2705  errmsg("cannot delete tuples during a parallel operation")));
2706 
2707  block = ItemPointerGetBlockNumber(tid);
2708  buffer = ReadBuffer(relation, block);
2709  page = BufferGetPage(buffer);
2710 
2711  /*
2712  * Before locking the buffer, pin the visibility map page if it appears to
2713  * be necessary. Since we haven't got the lock yet, someone else might be
2714  * in the middle of changing this, so we'll need to recheck after we have
2715  * the lock.
2716  */
2717  if (PageIsAllVisible(page))
2718  visibilitymap_pin(relation, block, &vmbuffer);
2719 
2721 
2722  lp = PageGetItemId(page, ItemPointerGetOffsetNumber(tid));
2723  Assert(ItemIdIsNormal(lp));
2724 
2725  tp.t_tableOid = RelationGetRelid(relation);
2726  tp.t_data = (HeapTupleHeader) PageGetItem(page, lp);
2727  tp.t_len = ItemIdGetLength(lp);
2728  tp.t_self = *tid;
2729 
2730 l1:
2731 
2732  /*
2733  * If we didn't pin the visibility map page and the page has become all
2734  * visible while we were busy locking the buffer, we'll have to unlock and
2735  * re-lock, to avoid holding the buffer lock across an I/O. That's a bit
2736  * unfortunate, but hopefully shouldn't happen often.
2737  */
2738  if (vmbuffer == InvalidBuffer && PageIsAllVisible(page))
2739  {
2740  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
2741  visibilitymap_pin(relation, block, &vmbuffer);
2743  }
2744 
2745  result = HeapTupleSatisfiesUpdate(&tp, cid, buffer);
2746 
2747  if (result == TM_Invisible)
2748  {
2749  UnlockReleaseBuffer(buffer);
2750  ereport(ERROR,
2751  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
2752  errmsg("attempted to delete invisible tuple")));
2753  }
2754  else if (result == TM_BeingModified && wait)
2755  {
2756  TransactionId xwait;
2757  uint16 infomask;
2758 
2759  /* must copy state data before unlocking buffer */
2760  xwait = HeapTupleHeaderGetRawXmax(tp.t_data);
2761  infomask = tp.t_data->t_infomask;
2762 
2763  /*
2764  * Sleep until concurrent transaction ends -- except when there's a
2765  * single locker and it's our own transaction. Note we don't care
2766  * which lock mode the locker has, because we need the strongest one.
2767  *
2768  * Before sleeping, we need to acquire tuple lock to establish our
2769  * priority for the tuple (see heap_lock_tuple). LockTuple will
2770  * release us when we are next-in-line for the tuple.
2771  *
2772  * If we are forced to "start over" below, we keep the tuple lock;
2773  * this arranges that we stay at the head of the line while rechecking
2774  * tuple state.
2775  */
2776  if (infomask & HEAP_XMAX_IS_MULTI)
2777  {
2778  bool current_is_member = false;
2779 
2780  if (DoesMultiXactIdConflict((MultiXactId) xwait, infomask,
2781  LockTupleExclusive, &current_is_member))
2782  {
2783  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
2784 
2785  /*
2786  * Acquire the lock, if necessary (but skip it when we're
2787  * requesting a lock and already have one; avoids deadlock).
2788  */
2789  if (!current_is_member)
2791  LockWaitBlock, &have_tuple_lock);
2792 
2793  /* wait for multixact */
2795  relation, &(tp.t_self), XLTW_Delete,
2796  NULL);
2798 
2799  /*
2800  * If xwait had just locked the tuple then some other xact
2801  * could update this tuple before we get to this point. Check
2802  * for xmax change, and start over if so.
2803  *
2804  * We also must start over if we didn't pin the VM page, and
2805  * the page has become all visible.
2806  */
2807  if ((vmbuffer == InvalidBuffer && PageIsAllVisible(page)) ||
2808  xmax_infomask_changed(tp.t_data->t_infomask, infomask) ||
2810  xwait))
2811  goto l1;
2812  }
2813 
2814  /*
2815  * You might think the multixact is necessarily done here, but not
2816  * so: it could have surviving members, namely our own xact or
2817  * other subxacts of this backend. It is legal for us to delete
2818  * the tuple in either case, however (the latter case is
2819  * essentially a situation of upgrading our former shared lock to
2820  * exclusive). We don't bother changing the on-disk hint bits
2821  * since we are about to overwrite the xmax altogether.
2822  */
2823  }
2824  else if (!TransactionIdIsCurrentTransactionId(xwait))
2825  {
2826  /*
2827  * Wait for regular transaction to end; but first, acquire tuple
2828  * lock.
2829  */
2830  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
2832  LockWaitBlock, &have_tuple_lock);
2833  XactLockTableWait(xwait, relation, &(tp.t_self), XLTW_Delete);
2835 
2836  /*
2837  * xwait is done, but if xwait had just locked the tuple then some
2838  * other xact could update this tuple before we get to this point.
2839  * Check for xmax change, and start over if so.
2840  *
2841  * We also must start over if we didn't pin the VM page, and the
2842  * page has become all visible.
2843  */
2844  if ((vmbuffer == InvalidBuffer && PageIsAllVisible(page)) ||
2845  xmax_infomask_changed(tp.t_data->t_infomask, infomask) ||
2847  xwait))
2848  goto l1;
2849 
2850  /* Otherwise check if it committed or aborted */
2851  UpdateXmaxHintBits(tp.t_data, buffer, xwait);
2852  }
2853 
2854  /*
2855  * We may overwrite if previous xmax aborted, or if it committed but
2856  * only locked the tuple without updating it.
2857  */
2858  if ((tp.t_data->t_infomask & HEAP_XMAX_INVALID) ||
2861  result = TM_Ok;
2862  else if (!ItemPointerEquals(&tp.t_self, &tp.t_data->t_ctid))
2863  result = TM_Updated;
2864  else
2865  result = TM_Deleted;
2866  }
2867 
2868  /* sanity check the result HeapTupleSatisfiesUpdate() and the logic above */
2869  if (result != TM_Ok)
2870  {
2871  Assert(result == TM_SelfModified ||
2872  result == TM_Updated ||
2873  result == TM_Deleted ||
2874  result == TM_BeingModified);
2876  Assert(result != TM_Updated ||
2877  !ItemPointerEquals(&tp.t_self, &tp.t_data->t_ctid));
2878  }
2879 
2880  if (crosscheck != InvalidSnapshot && result == TM_Ok)
2881  {
2882  /* Perform additional check for transaction-snapshot mode RI updates */
2883  if (!HeapTupleSatisfiesVisibility(&tp, crosscheck, buffer))
2884  result = TM_Updated;
2885  }
2886 
2887  if (result != TM_Ok)
2888  {
2889  tmfd->ctid = tp.t_data->t_ctid;
2891  if (result == TM_SelfModified)
2892  tmfd->cmax = HeapTupleHeaderGetCmax(tp.t_data);
2893  else
2894  tmfd->cmax = InvalidCommandId;
2895  UnlockReleaseBuffer(buffer);
2896  if (have_tuple_lock)
2897  UnlockTupleTuplock(relation, &(tp.t_self), LockTupleExclusive);
2898  if (vmbuffer != InvalidBuffer)
2899  ReleaseBuffer(vmbuffer);
2900  return result;
2901  }
2902 
2903  /*
2904  * We're about to do the actual delete -- check for conflict first, to
2905  * avoid possibly having to roll back work we've just done.
2906  *
2907  * This is safe without a recheck as long as there is no possibility of
2908  * another process scanning the page between this check and the delete
2909  * being visible to the scan (i.e., an exclusive buffer content lock is
2910  * continuously held from this point until the tuple delete is visible).
2911  */
2912  CheckForSerializableConflictIn(relation, tid, BufferGetBlockNumber(buffer));
2913 
2914  /* replace cid with a combo CID if necessary */
2915  HeapTupleHeaderAdjustCmax(tp.t_data, &cid, &iscombo);
2916 
2917  /*
2918  * Compute replica identity tuple before entering the critical section so
2919  * we don't PANIC upon a memory allocation failure.
2920  */
2921  old_key_tuple = ExtractReplicaIdentity(relation, &tp, true, &old_key_copied);
2922 
2923  /*
2924  * If this is the first possibly-multixact-able operation in the current
2925  * transaction, set my per-backend OldestMemberMXactId setting. We can be
2926  * certain that the transaction will never become a member of any older
2927  * MultiXactIds than that. (We have to do this even if we end up just
2928  * using our own TransactionId below, since some other backend could
2929  * incorporate our XID into a MultiXact immediately afterwards.)
2930  */
2932 
2935  xid, LockTupleExclusive, true,
2936  &new_xmax, &new_infomask, &new_infomask2);
2937 
2939 
2940  /*
2941  * If this transaction commits, the tuple will become DEAD sooner or
2942  * later. Set flag that this page is a candidate for pruning once our xid
2943  * falls below the OldestXmin horizon. If the transaction finally aborts,
2944  * the subsequent page pruning will be a no-op and the hint will be
2945  * cleared.
2946  */
2947  PageSetPrunable(page, xid);
2948 
2949  if (PageIsAllVisible(page))
2950  {
2951  all_visible_cleared = true;
2952  PageClearAllVisible(page);
2953  visibilitymap_clear(relation, BufferGetBlockNumber(buffer),
2954  vmbuffer, VISIBILITYMAP_VALID_BITS);
2955  }
2956 
2957  /* store transaction information of xact deleting the tuple */
2960  tp.t_data->t_infomask |= new_infomask;
2961  tp.t_data->t_infomask2 |= new_infomask2;
2963  HeapTupleHeaderSetXmax(tp.t_data, new_xmax);
2964  HeapTupleHeaderSetCmax(tp.t_data, cid, iscombo);
2965  /* Make sure there is no forward chain link in t_ctid */
2966  tp.t_data->t_ctid = tp.t_self;
2967 
2968  /* Signal that this is actually a move into another partition */
2969  if (changingPart)
2971 
2972  MarkBufferDirty(buffer);
2973 
2974  /*
2975  * XLOG stuff
2976  *
2977  * NB: heap_abort_speculative() uses the same xlog record and replay
2978  * routines.
2979  */
2980  if (RelationNeedsWAL(relation))
2981  {
2982  xl_heap_delete xlrec;
2983  xl_heap_header xlhdr;
2984  XLogRecPtr recptr;
2985 
2986  /*
2987  * For logical decode we need combo CIDs to properly decode the
2988  * catalog
2989  */
2991  log_heap_new_cid(relation, &tp);
2992 
2993  xlrec.flags = 0;
2994  if (all_visible_cleared)
2996  if (changingPart)
2999  tp.t_data->t_infomask2);
3001  xlrec.xmax = new_xmax;
3002 
3003  if (old_key_tuple != NULL)
3004  {
3005  if (relation->rd_rel->relreplident == REPLICA_IDENTITY_FULL)
3007  else
3009  }
3010 
3011  XLogBeginInsert();
3012  XLogRegisterData((char *) &xlrec, SizeOfHeapDelete);
3013 
3014  XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
3015 
3016  /*
3017  * Log replica identity of the deleted tuple if there is one
3018  */
3019  if (old_key_tuple != NULL)
3020  {
3021  xlhdr.t_infomask2 = old_key_tuple->t_data->t_infomask2;
3022  xlhdr.t_infomask = old_key_tuple->t_data->t_infomask;
3023  xlhdr.t_hoff = old_key_tuple->t_data->t_hoff;
3024 
3025  XLogRegisterData((char *) &xlhdr, SizeOfHeapHeader);
3026  XLogRegisterData((char *) old_key_tuple->t_data
3028  old_key_tuple->t_len
3030  }
3031 
3032  /* filtering by origin on a row level is much more efficient */
3034 
3035  recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_DELETE);
3036 
3037  PageSetLSN(page, recptr);
3038  }
3039 
3040  END_CRIT_SECTION();
3041 
3042  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
3043 
3044  if (vmbuffer != InvalidBuffer)
3045  ReleaseBuffer(vmbuffer);
3046 
3047  /*
3048  * If the tuple has toasted out-of-line attributes, we need to delete
3049  * those items too. We have to do this before releasing the buffer
3050  * because we need to look at the contents of the tuple, but it's OK to
3051  * release the content lock on the buffer first.
3052  */
3053  if (relation->rd_rel->relkind != RELKIND_RELATION &&
3054  relation->rd_rel->relkind != RELKIND_MATVIEW)
3055  {
3056  /* toast table entries should never be recursively toasted */
3058  }
3059  else if (HeapTupleHasExternal(&tp))
3060  heap_toast_delete(relation, &tp, false);
3061 
3062  /*
3063  * Mark tuple for invalidation from system caches at next command
3064  * boundary. We have to do this before releasing the buffer because we
3065  * need to look at the contents of the tuple.
3066  */
3067  CacheInvalidateHeapTuple(relation, &tp, NULL);
3068 
3069  /* Now we can release the buffer */
3070  ReleaseBuffer(buffer);
3071 
3072  /*
3073  * Release the lmgr tuple lock, if we had it.
3074  */
3075  if (have_tuple_lock)
3076  UnlockTupleTuplock(relation, &(tp.t_self), LockTupleExclusive);
3077 
3078  pgstat_count_heap_delete(relation);
3079 
3080  if (old_key_tuple != NULL && old_key_copied)
3081  heap_freetuple(old_key_tuple);
3082 
3083  return TM_Ok;
3084 }
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:3724
void UnlockReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:4941
static void PageClearAllVisible(Page page)
Definition: bufpage.h:439
#define InvalidCommandId
Definition: c.h:648
unsigned short uint16
Definition: c.h:491
TransactionId MultiXactId
Definition: c.h:641
void HeapTupleHeaderAdjustCmax(HeapTupleHeader tup, CommandId *cmax, bool *iscombo)
Definition: combocid.c:153
CommandId HeapTupleHeaderGetCmax(HeapTupleHeader tup)
Definition: combocid.c:118
int errcode(int sqlerrcode)
Definition: elog.c:853
int errmsg(const char *fmt,...)
Definition: elog.c:1070
#define ereport(elevel,...)
Definition: elog.h:149
static bool DoesMultiXactIdConflict(MultiXactId multi, uint16 infomask, LockTupleMode lockmode, bool *current_is_member)
Definition: heapam.c:7435
static XLogRecPtr log_heap_new_cid(Relation relation, HeapTuple tup)
Definition: heapam.c:8898
static void compute_new_xmax_infomask(TransactionId xmax, uint16 old_infomask, uint16 old_infomask2, TransactionId add_to_xmax, LockTupleMode mode, bool is_update, TransactionId *result_xmax, uint16 *result_infomask, uint16 *result_infomask2)
Definition: heapam.c:5174
static HeapTuple ExtractReplicaIdentity(Relation relation, HeapTuple tp, bool key_required, bool *copy)
Definition: heapam.c:8979
static bool heap_acquire_tuplock(Relation relation, ItemPointer tid, LockTupleMode mode, LockWaitPolicy wait_policy, bool *have_tuple_lock)
Definition: heapam.c:5125
static void MultiXactIdWait(MultiXactId multi, MultiXactStatus status, uint16 infomask, Relation rel, ItemPointer ctid, XLTW_Oper oper, int *remaining)
Definition: heapam.c:7612
static bool xmax_infomask_changed(uint16 new_infomask, uint16 old_infomask)
Definition: heapam.c:2651
#define UnlockTupleTuplock(rel, tup, mode)
Definition: heapam.c:161
static void UpdateXmaxHintBits(HeapTupleHeader tuple, Buffer buffer, TransactionId xid)
Definition: heapam.c:1896
bool HeapTupleSatisfiesVisibility(HeapTuple htup, Snapshot snapshot, Buffer buffer)
bool HeapTupleHeaderIsOnlyLocked(HeapTupleHeader tuple)
TM_Result HeapTupleSatisfiesUpdate(HeapTuple htup, CommandId curcid, Buffer buffer)
#define XLH_DELETE_CONTAINS_OLD_KEY
Definition: heapam_xlog.h:104
#define XLH_DELETE_ALL_VISIBLE_CLEARED
Definition: heapam_xlog.h:102
#define SizeOfHeapHeader
Definition: heapam_xlog.h:157
#define XLH_DELETE_IS_PARTITION_MOVE
Definition: heapam_xlog.h:106
#define XLH_DELETE_CONTAINS_OLD_TUPLE
Definition: heapam_xlog.h:103
void heap_freetuple(HeapTuple htup)
Definition: heaptuple.c:1434
#define HEAP_XMAX_IS_LOCKED_ONLY(infomask)
Definition: htup_details.h:227
#define SizeofHeapTupleHeader
Definition: htup_details.h:185
#define HeapTupleHeaderSetXmax(tup, xid)
Definition: htup_details.h:376
#define HeapTupleHeaderClearHotUpdated(tup)
Definition: htup_details.h:494
#define HEAP_XMAX_IS_MULTI
Definition: htup_details.h:209
#define HEAP_XMAX_INVALID
Definition: htup_details.h:208
#define HeapTupleHeaderSetMovedPartitions(tup)
Definition: htup_details.h:447
#define HeapTupleHeaderGetRawXmax(tup)
Definition: htup_details.h:371
#define HeapTupleHeaderGetUpdateXid(tup)
Definition: htup_details.h:361
#define HeapTupleHeaderSetCmax(tup, cid, iscombo)
Definition: htup_details.h:401
void CacheInvalidateHeapTuple(Relation relation, HeapTuple tuple, HeapTuple newtuple)
Definition: inval.c:1493
bool ItemPointerEquals(ItemPointer pointer1, ItemPointer pointer2)
Definition: itemptr.c:35
void XactLockTableWait(TransactionId xid, Relation rel, ItemPointer ctid, XLTW_Oper oper)
Definition: lmgr.c:656
@ XLTW_Delete
Definition: lmgr.h:28
@ LockWaitBlock
Definition: lockoptions.h:39
@ LockTupleExclusive
Definition: lockoptions.h:58
void MultiXactIdSetOldestMember(void)
Definition: multixact.c:673
@ MultiXactStatusUpdate
Definition: multixact.h:46
void CheckForSerializableConflictIn(Relation relation, ItemPointer tid, BlockNumber blkno)
Definition: predicate.c:4326
#define RelationIsAccessibleInLogicalDecoding(relation)
Definition: rel.h:684
#define InvalidSnapshot
Definition: snapshot.h:123
TransactionId xmax
Definition: tableam.h:150
CommandId cmax
Definition: tableam.h:151
ItemPointerData ctid
Definition: tableam.h:149
uint16 t_infomask
Definition: heapam_xlog.h:153
uint16 t_infomask2
Definition: heapam_xlog.h:152
TM_Result
Definition: tableam.h:79
@ TM_Ok
Definition: tableam.h:84
@ TM_BeingModified
Definition: tableam.h:106
@ TM_Deleted
Definition: tableam.h:99
@ TM_Updated
Definition: tableam.h:96
@ TM_SelfModified
Definition: tableam.h:90
@ TM_Invisible
Definition: tableam.h:87
#define TransactionIdEquals(id1, id2)
Definition: transam.h:43
bool visibilitymap_clear(Relation rel, BlockNumber heapBlk, Buffer vmbuf, uint8 flags)
void visibilitymap_pin(Relation rel, BlockNumber heapBlk, Buffer *vmbuf)
#define VISIBILITYMAP_VALID_BITS
bool TransactionIdIsCurrentTransactionId(TransactionId xid)
Definition: xact.c:940
bool IsInParallelMode(void)
Definition: xact.c:1088
#define XLOG_INCLUDE_ORIGIN
Definition: xlog.h:154
void XLogSetRecordFlags(uint8 flags)
Definition: xloginsert.c:456

References Assert, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferGetBlockNumber(), BufferGetPage(), CacheInvalidateHeapTuple(), CheckForSerializableConflictIn(), TM_FailureData::cmax, compute_infobits(), compute_new_xmax_infomask(), TM_FailureData::ctid, DoesMultiXactIdConflict(), END_CRIT_SECTION, ereport, errcode(), errmsg(), ERROR, ExtractReplicaIdentity(), xl_heap_delete::flags, GetCurrentTransactionId(), heap_acquire_tuplock(), heap_freetuple(), HEAP_KEYS_UPDATED, HEAP_MOVED, heap_toast_delete(), HEAP_XMAX_BITS, HEAP_XMAX_INVALID, HEAP_XMAX_IS_LOCKED_ONLY, HEAP_XMAX_IS_MULTI, HeapTupleHasExternal, HeapTupleHeaderAdjustCmax(), HeapTupleHeaderClearHotUpdated, HeapTupleHeaderGetCmax(), HeapTupleHeaderGetRawXmax, HeapTupleHeaderGetUpdateXid, HeapTupleHeaderIsOnlyLocked(), HeapTupleHeaderSetCmax, HeapTupleHeaderSetMovedPartitions, HeapTupleHeaderSetXmax, HeapTupleSatisfiesUpdate(), HeapTupleSatisfiesVisibility(), xl_heap_delete::infobits_set, InvalidBuffer, InvalidCommandId, InvalidSnapshot, IsInParallelMode(), ItemIdGetLength, ItemIdIsNormal, ItemPointerEquals(), ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), ItemPointerIsValid(), LockBuffer(), LockTupleExclusive, LockWaitBlock, log_heap_new_cid(), MarkBufferDirty(), MultiXactIdSetOldestMember(), MultiXactIdWait(), MultiXactStatusUpdate, xl_heap_delete::offnum, PageClearAllVisible(), PageGetItem(), PageGetItemId(), PageIsAllVisible(), PageSetLSN(), PageSetPrunable, pgstat_count_heap_delete(), RelationData::rd_rel, ReadBuffer(), REGBUF_STANDARD, RelationGetRelid, RelationIsAccessibleInLogicalDecoding, RelationNeedsWAL, ReleaseBuffer(), SizeOfHeapDelete, SizeOfHeapHeader, SizeofHeapTupleHeader, START_CRIT_SECTION, HeapTupleHeaderData::t_ctid, HeapTupleData::t_data, xl_heap_header::t_hoff, HeapTupleHeaderData::t_hoff, xl_heap_header::t_infomask, HeapTupleHeaderData::t_infomask, xl_heap_header::t_infomask2, HeapTupleHeaderData::t_infomask2, HeapTupleData::t_len, HeapTupleData::t_self, HeapTupleData::t_tableOid, TM_BeingModified, TM_Deleted, TM_Invisible, TM_Ok, TM_SelfModified, TM_Updated, TransactionIdEquals, TransactionIdIsCurrentTransactionId(), UnlockReleaseBuffer(), UnlockTupleTuplock, UpdateXmaxHintBits(), visibilitymap_clear(), visibilitymap_pin(), VISIBILITYMAP_VALID_BITS, XactLockTableWait(), XLH_DELETE_ALL_VISIBLE_CLEARED, XLH_DELETE_CONTAINS_OLD_KEY, XLH_DELETE_CONTAINS_OLD_TUPLE, XLH_DELETE_IS_PARTITION_MOVE, XLOG_HEAP_DELETE, XLOG_INCLUDE_ORIGIN, XLogBeginInsert(), XLogInsert(), XLogRegisterBuffer(), XLogRegisterData(), XLogSetRecordFlags(), XLTW_Delete, xl_heap_delete::xmax, TM_FailureData::xmax, and xmax_infomask_changed().

Referenced by heapam_tuple_delete(), and simple_heap_delete().

◆ heap_endscan()

void heap_endscan ( TableScanDesc  sscan)

Definition at line 1201 of file heapam.c.

1202 {
1203  HeapScanDesc scan = (HeapScanDesc) sscan;
1204 
1205  /* Note: no locking manipulations needed */
1206 
1207  /*
1208  * unpin scan buffers
1209  */
1210  if (BufferIsValid(scan->rs_cbuf))
1211  ReleaseBuffer(scan->rs_cbuf);
1212 
1213  if (BufferIsValid(scan->rs_vmbuffer))
1214  ReleaseBuffer(scan->rs_vmbuffer);
1215 
1216  /*
1217  * Must free the read stream before freeing the BufferAccessStrategy.
1218  */
1219  if (scan->rs_read_stream)
1221 
1222  /*
1223  * decrement relation reference count and free scan descriptor storage
1224  */
1226 
1227  if (scan->rs_base.rs_key)
1228  pfree(scan->rs_base.rs_key);
1229 
1230  if (scan->rs_strategy != NULL)
1232 
1233  if (scan->rs_parallelworkerdata != NULL)
1235 
1236  if (scan->rs_base.rs_flags & SO_TEMP_SNAPSHOT)
1238 
1239  pfree(scan);
1240 }
static bool BufferIsValid(Buffer bufnum)
Definition: bufmgr.h:351
void read_stream_end(ReadStream *stream)
Definition: read_stream.c:846
void RelationDecrementReferenceCount(Relation rel)
Definition: relcache.c:2163
void UnregisterSnapshot(Snapshot snapshot)
Definition: snapmgr.c:836
Buffer rs_cbuf
Definition: heapam.h:68
@ SO_TEMP_SNAPSHOT
Definition: tableam.h:64

References BufferIsValid(), FreeAccessStrategy(), pfree(), read_stream_end(), RelationDecrementReferenceCount(), ReleaseBuffer(), HeapScanDescData::rs_base, HeapScanDescData::rs_cbuf, TableScanDescData::rs_flags, TableScanDescData::rs_key, HeapScanDescData::rs_parallelworkerdata, TableScanDescData::rs_rd, HeapScanDescData::rs_read_stream, TableScanDescData::rs_snapshot, HeapScanDescData::rs_strategy, HeapScanDescData::rs_vmbuffer, SO_TEMP_SNAPSHOT, and UnregisterSnapshot().

◆ heap_execute_freeze_tuple()

static void heap_execute_freeze_tuple ( HeapTupleHeader  tuple,
HeapTupleFreeze frz 
)
inlinestatic

◆ heap_fetch()

bool heap_fetch ( Relation  relation,
Snapshot  snapshot,
HeapTuple  tuple,
Buffer userbuf,
bool  keep_buf 
)

Definition at line 1502 of file heapam.c.

1507 {
1508  ItemPointer tid = &(tuple->t_self);
1509  ItemId lp;
1510  Buffer buffer;
1511  Page page;
1512  OffsetNumber offnum;
1513  bool valid;
1514 
1515  /*
1516  * Fetch and pin the appropriate page of the relation.
1517  */
1518  buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(tid));
1519 
1520  /*
1521  * Need share lock on buffer to examine tuple commit status.
1522  */
1523  LockBuffer(buffer, BUFFER_LOCK_SHARE);
1524  page = BufferGetPage(buffer);
1525 
1526  /*
1527  * We'd better check for out-of-range offnum in case of VACUUM since the
1528  * TID was obtained.
1529  */
1530  offnum = ItemPointerGetOffsetNumber(tid);
1531  if (offnum < FirstOffsetNumber || offnum > PageGetMaxOffsetNumber(page))
1532  {
1533  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
1534  ReleaseBuffer(buffer);
1535  *userbuf = InvalidBuffer;
1536  tuple->t_data = NULL;
1537  return false;
1538  }
1539 
1540  /*
1541  * get the item line pointer corresponding to the requested tid
1542  */
1543  lp = PageGetItemId(page, offnum);
1544 
1545  /*
1546  * Must check for deleted tuple.
1547  */
1548  if (!ItemIdIsNormal(lp))
1549  {
1550  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
1551  ReleaseBuffer(buffer);
1552  *userbuf = InvalidBuffer;
1553  tuple->t_data = NULL;
1554  return false;
1555  }
1556 
1557  /*
1558  * fill in *tuple fields
1559  */
1560  tuple->t_data = (HeapTupleHeader) PageGetItem(page, lp);
1561  tuple->t_len = ItemIdGetLength(lp);
1562  tuple->t_tableOid = RelationGetRelid(relation);
1563 
1564  /*
1565  * check tuple visibility, then release lock
1566  */
1567  valid = HeapTupleSatisfiesVisibility(tuple, snapshot, buffer);
1568 
1569  if (valid)
1570  PredicateLockTID(relation, &(tuple->t_self), snapshot,
1571  HeapTupleHeaderGetXmin(tuple->t_data));
1572 
1573  HeapCheckForSerializableConflictOut(valid, relation, tuple, buffer, snapshot);
1574 
1575  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
1576 
1577  if (valid)
1578  {
1579  /*
1580  * All checks passed, so return the tuple as valid. Caller is now
1581  * responsible for releasing the buffer.
1582  */
1583  *userbuf = buffer;
1584 
1585  return true;
1586  }
1587 
1588  /* Tuple failed time qual, but maybe caller wants to see it anyway. */
1589  if (keep_buf)
1590  *userbuf = buffer;
1591  else
1592  {
1593  ReleaseBuffer(buffer);
1594  *userbuf = InvalidBuffer;
1595  tuple->t_data = NULL;
1596  }
1597 
1598  return false;
1599 }
#define BUFFER_LOCK_SHARE
Definition: bufmgr.h:190
static OffsetNumber PageGetMaxOffsetNumber(Page page)
Definition: bufpage.h:372
void HeapCheckForSerializableConflictOut(bool visible, Relation relation, HeapTuple tuple, Buffer buffer, Snapshot snapshot)
Definition: heapam.c:9083
#define HeapTupleHeaderGetXmin(tup)
Definition: htup_details.h:309
uint16 OffsetNumber
Definition: off.h:24
void PredicateLockTID(Relation relation, ItemPointer tid, Snapshot snapshot, TransactionId tuple_xid)
Definition: predicate.c:2611

References BUFFER_LOCK_SHARE, BUFFER_LOCK_UNLOCK, BufferGetPage(), HeapCheckForSerializableConflictOut(), HeapTupleHeaderGetXmin, HeapTupleSatisfiesVisibility(), InvalidBuffer, ItemIdGetLength, ItemIdIsNormal, ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), LockBuffer(), PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), PredicateLockTID(), ReadBuffer(), RelationGetRelid, ReleaseBuffer(), HeapTupleData::t_data, HeapTupleData::t_len, HeapTupleData::t_self, and HeapTupleData::t_tableOid.

Referenced by heap_lock_updated_tuple_rec(), heapam_fetch_row_version(), and heapam_tuple_lock().

◆ heap_finish_speculative()

void heap_finish_speculative ( Relation  relation,
ItemPointer  tid 
)

Definition at line 5936 of file heapam.c.

5937 {
5938  Buffer buffer;
5939  Page page;
5940  OffsetNumber offnum;
5941  ItemId lp = NULL;
5942  HeapTupleHeader htup;
5943 
5944  buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(tid));
5946  page = (Page) BufferGetPage(buffer);
5947 
5948  offnum = ItemPointerGetOffsetNumber(tid);
5949  if (PageGetMaxOffsetNumber(page) >= offnum)
5950  lp = PageGetItemId(page, offnum);
5951 
5952  if (PageGetMaxOffsetNumber(page) < offnum || !ItemIdIsNormal(lp))
5953  elog(ERROR, "invalid lp");
5954 
5955  htup = (HeapTupleHeader) PageGetItem(page, lp);
5956 
5957  /* NO EREPORT(ERROR) from here till changes are logged */
5959 
5961 
5962  MarkBufferDirty(buffer);
5963 
5964  /*
5965  * Replace the speculative insertion token with a real t_ctid, pointing to
5966  * itself like it does on regular tuples.
5967  */
5968  htup->t_ctid = *tid;
5969 
5970  /* XLOG stuff */
5971  if (RelationNeedsWAL(relation))
5972  {
5973  xl_heap_confirm xlrec;
5974  XLogRecPtr recptr;
5975 
5976  xlrec.offnum = ItemPointerGetOffsetNumber(tid);
5977 
5978  XLogBeginInsert();
5979 
5980  /* We want the same filtering on this as on a plain insert */
5982 
5983  XLogRegisterData((char *) &xlrec, SizeOfHeapConfirm);
5984  XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
5985 
5986  recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_CONFIRM);
5987 
5988  PageSetLSN(page, recptr);
5989  }
5990 
5991  END_CRIT_SECTION();
5992 
5993  UnlockReleaseBuffer(buffer);
5994 }
#define SizeOfHeapConfirm
Definition: heapam_xlog.h:423
#define XLOG_HEAP_CONFIRM
Definition: heapam_xlog.h:38
OffsetNumber offnum
Definition: heapam_xlog.h:420

References Assert, BUFFER_LOCK_EXCLUSIVE, BufferGetPage(), elog, END_CRIT_SECTION, ERROR, HeapTupleHeaderIsSpeculative, ItemIdIsNormal, ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), LockBuffer(), MarkBufferDirty(), xl_heap_confirm::offnum, PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), PageSetLSN(), ReadBuffer(), REGBUF_STANDARD, RelationNeedsWAL, SizeOfHeapConfirm, START_CRIT_SECTION, HeapTupleHeaderData::t_ctid, UnlockReleaseBuffer(), XLOG_HEAP_CONFIRM, XLOG_INCLUDE_ORIGIN, XLogBeginInsert(), XLogInsert(), XLogRegisterBuffer(), XLogRegisterData(), and XLogSetRecordFlags().

Referenced by heapam_tuple_complete_speculative().

◆ heap_freeze_prepared_tuples()

void heap_freeze_prepared_tuples ( Buffer  buffer,
HeapTupleFreeze tuples,
int  ntuples 
)

Definition at line 7220 of file heapam.c.

7221 {
7222  Page page = BufferGetPage(buffer);
7223 
7224  for (int i = 0; i < ntuples; i++)
7225  {
7226  HeapTupleFreeze *frz = tuples + i;
7227  ItemId itemid = PageGetItemId(page, frz->offset);
7228  HeapTupleHeader htup;
7229 
7230  htup = (HeapTupleHeader) PageGetItem(page, itemid);
7231  heap_execute_freeze_tuple(htup, frz);
7232  }
7233 }
static void heap_execute_freeze_tuple(HeapTupleHeader tuple, HeapTupleFreeze *frz)
Definition: heapam.h:443
int i
Definition: isn.c:72
OffsetNumber offset
Definition: heapam.h:152

References BufferGetPage(), heap_execute_freeze_tuple(), i, HeapTupleFreeze::offset, PageGetItem(), and PageGetItemId().

Referenced by heap_page_prune_and_freeze().

◆ heap_freeze_tuple()

bool heap_freeze_tuple ( HeapTupleHeader  tuple,
TransactionId  relfrozenxid,
TransactionId  relminmxid,
TransactionId  FreezeLimit,
TransactionId  MultiXactCutoff 
)

Definition at line 7242 of file heapam.c.

7245 {
7246  HeapTupleFreeze frz;
7247  bool do_freeze;
7248  bool totally_frozen;
7249  struct VacuumCutoffs cutoffs;
7250  HeapPageFreeze pagefrz;
7251 
7252  cutoffs.relfrozenxid = relfrozenxid;
7253  cutoffs.relminmxid = relminmxid;
7254  cutoffs.OldestXmin = FreezeLimit;
7255  cutoffs.OldestMxact = MultiXactCutoff;
7256  cutoffs.FreezeLimit = FreezeLimit;
7257  cutoffs.MultiXactCutoff = MultiXactCutoff;
7258 
7259  pagefrz.freeze_required = true;
7260  pagefrz.FreezePageRelfrozenXid = FreezeLimit;
7261  pagefrz.FreezePageRelminMxid = MultiXactCutoff;
7262  pagefrz.NoFreezePageRelfrozenXid = FreezeLimit;
7263  pagefrz.NoFreezePageRelminMxid = MultiXactCutoff;
7264 
7265  do_freeze = heap_prepare_freeze_tuple(tuple, &cutoffs,
7266  &pagefrz, &frz, &totally_frozen);
7267 
7268  /*
7269  * Note that because this is not a WAL-logged operation, we don't need to
7270  * fill in the offset in the freeze record.
7271  */
7272 
7273  if (do_freeze)
7274  heap_execute_freeze_tuple(tuple, &frz);
7275  return do_freeze;
7276 }
bool heap_prepare_freeze_tuple(HeapTupleHeader tuple, const struct VacuumCutoffs *cutoffs, HeapPageFreeze *pagefrz, HeapTupleFreeze *frz, bool *totally_frozen)
Definition: heapam.c:6894
TransactionId FreezeLimit
Definition: vacuum.h:277
TransactionId relfrozenxid
Definition: vacuum.h:251
MultiXactId relminmxid
Definition: vacuum.h:252
MultiXactId MultiXactCutoff
Definition: vacuum.h:278

References VacuumCutoffs::FreezeLimit, heap_execute_freeze_tuple(), heap_prepare_freeze_tuple(), VacuumCutoffs::MultiXactCutoff, VacuumCutoffs::OldestMxact, VacuumCutoffs::OldestXmin, VacuumCutoffs::relfrozenxid, and VacuumCutoffs::relminmxid.

Referenced by rewrite_heap_tuple().

◆ heap_get_latest_tid()

void heap_get_latest_tid ( TableScanDesc  sscan,
ItemPointer  tid 
)

Definition at line 1774 of file heapam.c.

1776 {
1777  Relation relation = sscan->rs_rd;
1778  Snapshot snapshot = sscan->rs_snapshot;
1779  ItemPointerData ctid;
1780  TransactionId priorXmax;
1781 
1782  /*
1783  * table_tuple_get_latest_tid() verified that the passed in tid is valid.
1784  * Assume that t_ctid links are valid however - there shouldn't be invalid
1785  * ones in the table.
1786  */
1787  Assert(ItemPointerIsValid(tid));
1788 
1789  /*
1790  * Loop to chase down t_ctid links. At top of loop, ctid is the tuple we
1791  * need to examine, and *tid is the TID we will return if ctid turns out
1792  * to be bogus.
1793  *
1794  * Note that we will loop until we reach the end of the t_ctid chain.
1795  * Depending on the snapshot passed, there might be at most one visible
1796  * version of the row, but we don't try to optimize for that.
1797  */
1798  ctid = *tid;
1799  priorXmax = InvalidTransactionId; /* cannot check first XMIN */
1800  for (;;)
1801  {
1802  Buffer buffer;
1803  Page page;
1804  OffsetNumber offnum;
1805  ItemId lp;
1806  HeapTupleData tp;
1807  bool valid;
1808 
1809  /*
1810  * Read, pin, and lock the page.
1811  */
1812  buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(&ctid));
1813  LockBuffer(buffer, BUFFER_LOCK_SHARE);
1814  page = BufferGetPage(buffer);
1815 
1816  /*
1817  * Check for bogus item number. This is not treated as an error
1818  * condition because it can happen while following a t_ctid link. We
1819  * just assume that the prior tid is OK and return it unchanged.
1820  */
1821  offnum = ItemPointerGetOffsetNumber(&ctid);
1822  if (offnum < FirstOffsetNumber || offnum > PageGetMaxOffsetNumber(page))
1823  {
1824  UnlockReleaseBuffer(buffer);
1825  break;
1826  }
1827  lp = PageGetItemId(page, offnum);
1828  if (!ItemIdIsNormal(lp))
1829  {
1830  UnlockReleaseBuffer(buffer);
1831  break;
1832  }
1833 
1834  /* OK to access the tuple */
1835  tp.t_self = ctid;
1836  tp.t_data = (HeapTupleHeader) PageGetItem(page, lp);
1837  tp.t_len = ItemIdGetLength(lp);
1838  tp.t_tableOid = RelationGetRelid(relation);
1839 
1840  /*
1841  * After following a t_ctid link, we might arrive at an unrelated
1842  * tuple. Check for XMIN match.
1843  */
1844  if (TransactionIdIsValid(priorXmax) &&
1846  {
1847  UnlockReleaseBuffer(buffer);
1848  break;
1849  }
1850 
1851  /*
1852  * Check tuple visibility; if visible, set it as the new result
1853  * candidate.
1854  */
1855  valid = HeapTupleSatisfiesVisibility(&tp, snapshot, buffer);
1856  HeapCheckForSerializableConflictOut(valid, relation, &tp, buffer, snapshot);
1857  if (valid)
1858  *tid = ctid;
1859 
1860  /*
1861  * If there's a valid t_ctid link, follow it, else we're done.
1862  */
1863  if ((tp.t_data->t_infomask & HEAP_XMAX_INVALID) ||
1867  {
1868  UnlockReleaseBuffer(buffer);
1869  break;
1870  }
1871 
1872  ctid = tp.t_data->t_ctid;
1873  priorXmax = HeapTupleHeaderGetUpdateXid(tp.t_data);
1874  UnlockReleaseBuffer(buffer);
1875  } /* end of loop */
1876 }
#define HeapTupleHeaderIndicatesMovedPartitions(tup)
Definition: htup_details.h:444

References Assert, BUFFER_LOCK_SHARE, BufferGetPage(), HEAP_XMAX_INVALID, HeapCheckForSerializableConflictOut(), HeapTupleHeaderGetUpdateXid, HeapTupleHeaderGetXmin, HeapTupleHeaderIndicatesMovedPartitions, HeapTupleHeaderIsOnlyLocked(), HeapTupleSatisfiesVisibility(), InvalidTransactionId, ItemIdGetLength, ItemIdIsNormal, ItemPointerEquals(), ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), ItemPointerIsValid(), LockBuffer(), PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), ReadBuffer(), RelationGetRelid, TableScanDescData::rs_rd, TableScanDescData::rs_snapshot, HeapTupleHeaderData::t_ctid, HeapTupleData::t_data, HeapTupleHeaderData::t_infomask, HeapTupleData::t_len, HeapTupleData::t_self, HeapTupleData::t_tableOid, TransactionIdEquals, TransactionIdIsValid, and UnlockReleaseBuffer().

◆ heap_get_root_tuples()

void heap_get_root_tuples ( Page  page,
OffsetNumber root_offsets 
)

Definition at line 1785 of file pruneheap.c.

1786 {
1787  OffsetNumber offnum,
1788  maxoff;
1789 
1790  MemSet(root_offsets, InvalidOffsetNumber,
1792 
1793  maxoff = PageGetMaxOffsetNumber(page);
1794  for (offnum = FirstOffsetNumber; offnum <= maxoff; offnum = OffsetNumberNext(offnum))
1795  {
1796  ItemId lp = PageGetItemId(page, offnum);
1797  HeapTupleHeader htup;
1798  OffsetNumber nextoffnum;
1799  TransactionId priorXmax;
1800 
1801  /* skip unused and dead items */
1802  if (!ItemIdIsUsed(lp) || ItemIdIsDead(lp))
1803  continue;
1804 
1805  if (ItemIdIsNormal(lp))
1806  {
1807  htup = (HeapTupleHeader) PageGetItem(page, lp);
1808 
1809  /*
1810  * Check if this tuple is part of a HOT-chain rooted at some other
1811  * tuple. If so, skip it for now; we'll process it when we find
1812  * its root.
1813  */
1814  if (HeapTupleHeaderIsHeapOnly(htup))
1815  continue;
1816 
1817  /*
1818  * This is either a plain tuple or the root of a HOT-chain.
1819  * Remember it in the mapping.
1820  */
1821  root_offsets[offnum - 1] = offnum;
1822 
1823  /* If it's not the start of a HOT-chain, we're done with it */
1824  if (!HeapTupleHeaderIsHotUpdated(htup))
1825  continue;
1826 
1827  /* Set up to scan the HOT-chain */
1828  nextoffnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
1829  priorXmax = HeapTupleHeaderGetUpdateXid(htup);
1830  }
1831  else
1832  {
1833  /* Must be a redirect item. We do not set its root_offsets entry */
1835  /* Set up to scan the HOT-chain */
1836  nextoffnum = ItemIdGetRedirect(lp);
1837  priorXmax = InvalidTransactionId;
1838  }
1839 
1840  /*
1841  * Now follow the HOT-chain and collect other tuples in the chain.
1842  *
1843  * Note: Even though this is a nested loop, the complexity of the
1844  * function is O(N) because a tuple in the page should be visited not
1845  * more than twice, once in the outer loop and once in HOT-chain
1846  * chases.
1847  */
1848  for (;;)
1849  {
1850  /* Sanity check (pure paranoia) */
1851  if (offnum < FirstOffsetNumber)
1852  break;
1853 
1854  /*
1855  * An offset past the end of page's line pointer array is possible
1856  * when the array was truncated
1857  */
1858  if (offnum > maxoff)
1859  break;
1860 
1861  lp = PageGetItemId(page, nextoffnum);
1862 
1863  /* Check for broken chains */
1864  if (!ItemIdIsNormal(lp))
1865  break;
1866 
1867  htup = (HeapTupleHeader) PageGetItem(page, lp);
1868 
1869  if (TransactionIdIsValid(priorXmax) &&
1870  !TransactionIdEquals(priorXmax, HeapTupleHeaderGetXmin(htup)))
1871  break;
1872 
1873  /* Remember the root line pointer for this item */
1874  root_offsets[nextoffnum - 1] = offnum;
1875 
1876  /* Advance to next chain member, if any */
1877  if (!HeapTupleHeaderIsHotUpdated(htup))
1878  break;
1879 
1880  /* HOT implies it can't have moved to different partition */
1882 
1883  nextoffnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
1884  priorXmax = HeapTupleHeaderGetUpdateXid(htup);
1885  }
1886  }
1887 }
#define MemSet(start, val, len)
Definition: c.h:999
#define MaxHeapTuplesPerPage
Definition: htup_details.h:572
#define HeapTupleHeaderIsHotUpdated(tup)
Definition: htup_details.h:482
#define ItemIdGetRedirect(itemId)
Definition: itemid.h:78
#define ItemIdIsDead(itemId)
Definition: itemid.h:113
#define ItemIdIsUsed(itemId)
Definition: itemid.h:92
#define ItemIdIsRedirected(itemId)
Definition: itemid.h:106
#define InvalidOffsetNumber
Definition: off.h:26
#define OffsetNumberNext(offsetNumber)
Definition: off.h:52
#define FirstOffsetNumber
Definition: off.h:27

References Assert, FirstOffsetNumber, HeapTupleHeaderGetUpdateXid, HeapTupleHeaderGetXmin, HeapTupleHeaderIndicatesMovedPartitions, HeapTupleHeaderIsHeapOnly, HeapTupleHeaderIsHotUpdated, InvalidOffsetNumber, InvalidTransactionId, ItemIdGetRedirect, ItemIdIsDead, ItemIdIsNormal, ItemIdIsRedirected, ItemIdIsUsed, ItemPointerGetOffsetNumber(), MaxHeapTuplesPerPage, MemSet, OffsetNumberNext, PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), HeapTupleHeaderData::t_ctid, TransactionIdEquals, and TransactionIdIsValid.

Referenced by heapam_index_build_range_scan(), and heapam_index_validate_scan().

◆ heap_getnext()

HeapTuple heap_getnext ( TableScanDesc  sscan,
ScanDirection  direction 
)

Definition at line 1243 of file heapam.c.

1244 {
1245  HeapScanDesc scan = (HeapScanDesc) sscan;
1246 
1247  /*
1248  * This is still widely used directly, without going through table AM, so
1249  * add a safety check. It's possible we should, at a later point,
1250  * downgrade this to an assert. The reason for checking the AM routine,
1251  * rather than the AM oid, is that this allows to write regression tests
1252  * that create another AM reusing the heap handler.
1253  */
1255  ereport(ERROR,
1256  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1257  errmsg_internal("only heap AM is supported")));
1258 
1259  /*
1260  * We don't expect direct calls to heap_getnext with valid CheckXidAlive
1261  * for catalog or regular tables. See detailed comments in xact.c where
1262  * these variables are declared. Normally we have such a check at tableam
1263  * level API but this is called from many places so we need to ensure it
1264  * here.
1265  */
1267  elog(ERROR, "unexpected heap_getnext call during logical decoding");
1268 
1269  /* Note: no locking manipulations needed */
1270 
1271  if (scan->rs_base.rs_flags & SO_ALLOW_PAGEMODE)
1272  heapgettup_pagemode(scan, direction,
1273  scan->rs_base.rs_nkeys, scan->rs_base.rs_key);
1274  else
1275  heapgettup(scan, direction,
1276  scan->rs_base.rs_nkeys, scan->rs_base.rs_key);
1277 
1278  if (scan->rs_ctup.t_data == NULL)
1279  return NULL;
1280 
1281  /*
1282  * if we get here it means we have a new current scan tuple, so point to
1283  * the proper return buffer and return the tuple.
1284  */
1285 
1287 
1288  return &scan->rs_ctup;
1289 }
#define unlikely(x)
Definition: c.h:326
int errmsg_internal(const char *fmt,...)
Definition: elog.c:1157
static void heapgettup(HeapScanDesc scan, ScanDirection dir, int nkeys, ScanKey key)
Definition: heapam.c:829
static void heapgettup_pagemode(HeapScanDesc scan, ScanDirection dir, int nkeys, ScanKey key)
Definition: heapam.c:939
const TableAmRoutine * GetHeapamTableAmRoutine(void)
#define pgstat_count_heap_getnext(rel)
Definition: pgstat.h:654
const struct TableAmRoutine * rd_tableam
Definition: rel.h:189
bool bsysscan
Definition: xact.c:99
TransactionId CheckXidAlive
Definition: xact.c:98

References bsysscan, CheckXidAlive, elog, ereport, errcode(), errmsg_internal(), ERROR, GetHeapamTableAmRoutine(), heapgettup(), heapgettup_pagemode(), pgstat_count_heap_getnext, RelationData::rd_tableam, HeapScanDescData::rs_base, HeapScanDescData::rs_ctup, TableScanDescData::rs_flags, TableScanDescData::rs_key, TableScanDescData::rs_nkeys, TableScanDescData::rs_rd, SO_ALLOW_PAGEMODE, HeapTupleData::t_data, TransactionIdIsValid, and unlikely.

Referenced by AlterTableMoveAll(), AlterTableSpaceOptions(), check_db_file_conflict(), CreateDatabaseUsingFileCopy(), do_autovacuum(), DropSetting(), DropTableSpace(), find_typed_table_dependencies(), get_all_vacuum_rels(), get_database_list(), get_subscription_list(), get_tables_to_cluster(), get_tablespace_name(), get_tablespace_oid(), GetAllTablesPublicationRelations(), getRelationsInNamespace(), GetSchemaPublicationRelations(), heapam_index_build_range_scan(), heapam_index_validate_scan(), objectsInSchemaToOids(), pgrowlocks(), pgstat_heap(), populate_typ_list(), ReindexMultipleTables(), remove_dbtablespaces(), RemoveSubscriptionRel(), RenameTableSpace(), ThereIsAtLeastOneRole(), and vac_truncate_clog().

◆ heap_getnextslot()

bool heap_getnextslot ( TableScanDesc  sscan,
ScanDirection  direction,
struct TupleTableSlot slot 
)

Definition at line 1292 of file heapam.c.

1293 {
1294  HeapScanDesc scan = (HeapScanDesc) sscan;
1295 
1296  /* Note: no locking manipulations needed */
1297 
1298  if (sscan->rs_flags & SO_ALLOW_PAGEMODE)
1299  heapgettup_pagemode(scan, direction, sscan->rs_nkeys, sscan->rs_key);
1300  else
1301  heapgettup(scan, direction, sscan->rs_nkeys, sscan->rs_key);
1302 
1303  if (scan->rs_ctup.t_data == NULL)
1304  {
1305  ExecClearTuple(slot);
1306  return false;
1307  }
1308 
1309  /*
1310  * if we get here it means we have a new current scan tuple, so point to
1311  * the proper return buffer and return the tuple.
1312  */
1313 
1315 
1316  ExecStoreBufferHeapTuple(&scan->rs_ctup, slot,
1317  scan->rs_cbuf);
1318  return true;
1319 }
TupleTableSlot * ExecStoreBufferHeapTuple(HeapTuple tuple, TupleTableSlot *slot, Buffer buffer)
Definition: execTuples.c:1479
static TupleTableSlot * ExecClearTuple(TupleTableSlot *slot)
Definition: tuptable.h:454

References ExecClearTuple(), ExecStoreBufferHeapTuple(), heapgettup(), heapgettup_pagemode(), pgstat_count_heap_getnext, HeapScanDescData::rs_base, HeapScanDescData::rs_cbuf, HeapScanDescData::rs_ctup, TableScanDescData::rs_flags, TableScanDescData::rs_key, TableScanDescData::rs_nkeys, TableScanDescData::rs_rd, SO_ALLOW_PAGEMODE, and HeapTupleData::t_data.

◆ heap_getnextslot_tidrange()

bool heap_getnextslot_tidrange ( TableScanDesc  sscan,
ScanDirection  direction,
TupleTableSlot slot 
)

Definition at line 1395 of file heapam.c.

1397 {
1398  HeapScanDesc scan = (HeapScanDesc) sscan;
1399  ItemPointer mintid = &sscan->st.tidrange.rs_mintid;
1400  ItemPointer maxtid = &sscan->st.tidrange.rs_maxtid;
1401 
1402  /* Note: no locking manipulations needed */
1403  for (;;)
1404  {
1405  if (sscan->rs_flags & SO_ALLOW_PAGEMODE)
1406  heapgettup_pagemode(scan, direction, sscan->rs_nkeys, sscan->rs_key);
1407  else
1408  heapgettup(scan, direction, sscan->rs_nkeys, sscan->rs_key);
1409 
1410  if (scan->rs_ctup.t_data == NULL)
1411  {
1412  ExecClearTuple(slot);
1413  return false;
1414  }
1415 
1416  /*
1417  * heap_set_tidrange will have used heap_setscanlimits to limit the
1418  * range of pages we scan to only ones that can contain the TID range
1419  * we're scanning for. Here we must filter out any tuples from these
1420  * pages that are outside of that range.
1421  */
1422  if (ItemPointerCompare(&scan->rs_ctup.t_self, mintid) < 0)
1423  {
1424  ExecClearTuple(slot);
1425 
1426  /*
1427  * When scanning backwards, the TIDs will be in descending order.
1428  * Future tuples in this direction will be lower still, so we can
1429  * just return false to indicate there will be no more tuples.
1430  */
1431  if (ScanDirectionIsBackward(direction))
1432  return false;
1433 
1434  continue;
1435  }
1436 
1437  /*
1438  * Likewise for the final page, we must filter out TIDs greater than
1439  * maxtid.
1440  */
1441  if (ItemPointerCompare(&scan->rs_ctup.t_self, maxtid) > 0)
1442  {
1443  ExecClearTuple(slot);
1444 
1445  /*
1446  * When scanning forward, the TIDs will be in ascending order.
1447  * Future tuples in this direction will be higher still, so we can
1448  * just return false to indicate there will be no more tuples.
1449  */
1450  if (ScanDirectionIsForward(direction))
1451  return false;
1452  continue;
1453  }
1454 
1455  break;
1456  }
1457 
1458  /*
1459  * if we get here it means we have a new current scan tuple, so point to
1460  * the proper return buffer and return the tuple.
1461  */
1463 
1464  ExecStoreBufferHeapTuple(&scan->rs_ctup, slot, scan->rs_cbuf);
1465  return true;
1466 }
int32 ItemPointerCompare(ItemPointer arg1, ItemPointer arg2)
Definition: itemptr.c:51
#define ScanDirectionIsForward(direction)
Definition: sdir.h:64
#define ScanDirectionIsBackward(direction)
Definition: sdir.h:50
union TableScanDescData::@48 st
struct TableScanDescData::@48::@50 tidrange

References ExecClearTuple(), ExecStoreBufferHeapTuple(), heapgettup(), heapgettup_pagemode(), ItemPointerCompare(), pgstat_count_heap_getnext, HeapScanDescData::rs_base, HeapScanDescData::rs_cbuf, HeapScanDescData::rs_ctup, TableScanDescData::rs_flags, TableScanDescData::rs_key, TableScanDescData::rs_nkeys, TableScanDescData::rs_rd, ScanDirectionIsBackward, ScanDirectionIsForward, SO_ALLOW_PAGEMODE, TableScanDescData::st, HeapTupleData::t_data, HeapTupleData::t_self, and TableScanDescData::tidrange.

◆ heap_hot_search_buffer()

bool heap_hot_search_buffer ( ItemPointer  tid,
Relation  relation,
Buffer  buffer,
Snapshot  snapshot,
HeapTuple  heapTuple,
bool *  all_dead,
bool  first_call 
)

Definition at line 1622 of file heapam.c.

1625 {
1626  Page page = BufferGetPage(buffer);
1627  TransactionId prev_xmax = InvalidTransactionId;
1628  BlockNumber blkno;
1629  OffsetNumber offnum;
1630  bool at_chain_start;
1631  bool valid;
1632  bool skip;
1633  GlobalVisState *vistest = NULL;
1634 
1635  /* If this is not the first call, previous call returned a (live!) tuple */
1636  if (all_dead)
1637  *all_dead = first_call;
1638 
1639  blkno = ItemPointerGetBlockNumber(tid);
1640  offnum = ItemPointerGetOffsetNumber(tid);
1641  at_chain_start = first_call;
1642  skip = !first_call;
1643 
1644  /* XXX: we should assert that a snapshot is pushed or registered */
1646  Assert(BufferGetBlockNumber(buffer) == blkno);
1647 
1648  /* Scan through possible multiple members of HOT-chain */
1649  for (;;)
1650  {
1651  ItemId lp;
1652 
1653  /* check for bogus TID */
1654  if (offnum < FirstOffsetNumber || offnum > PageGetMaxOffsetNumber(page))
1655  break;
1656 
1657  lp = PageGetItemId(page, offnum);
1658 
1659  /* check for unused, dead, or redirected items */
1660  if (!ItemIdIsNormal(lp))
1661  {
1662  /* We should only see a redirect at start of chain */
1663  if (ItemIdIsRedirected(lp) && at_chain_start)
1664  {
1665  /* Follow the redirect */
1666  offnum = ItemIdGetRedirect(lp);
1667  at_chain_start = false;
1668  continue;
1669  }
1670  /* else must be end of chain */
1671  break;
1672  }
1673 
1674  /*
1675  * Update heapTuple to point to the element of the HOT chain we're
1676  * currently investigating. Having t_self set correctly is important
1677  * because the SSI checks and the *Satisfies routine for historical
1678  * MVCC snapshots need the correct tid to decide about the visibility.
1679  */
1680  heapTuple->t_data = (HeapTupleHeader) PageGetItem(page, lp);
1681  heapTuple->t_len = ItemIdGetLength(lp);
1682  heapTuple->t_tableOid = RelationGetRelid(relation);
1683  ItemPointerSet(&heapTuple->t_self, blkno, offnum);
1684 
1685  /*
1686  * Shouldn't see a HEAP_ONLY tuple at chain start.
1687  */
1688  if (at_chain_start && HeapTupleIsHeapOnly(heapTuple))
1689  break;
1690 
1691  /*
1692  * The xmin should match the previous xmax value, else chain is
1693  * broken.
1694  */
1695  if (TransactionIdIsValid(prev_xmax) &&
1696  !TransactionIdEquals(prev_xmax,
1697  HeapTupleHeaderGetXmin(heapTuple->t_data)))
1698  break;
1699 
1700  /*
1701  * When first_call is true (and thus, skip is initially false) we'll
1702  * return the first tuple we find. But on later passes, heapTuple
1703  * will initially be pointing to the tuple we returned last time.
1704  * Returning it again would be incorrect (and would loop forever), so
1705  * we skip it and return the next match we find.
1706  */
1707  if (!skip)
1708  {
1709  /* If it's visible per the snapshot, we must return it */
1710  valid = HeapTupleSatisfiesVisibility(heapTuple, snapshot, buffer);
1711  HeapCheckForSerializableConflictOut(valid, relation, heapTuple,
1712  buffer, snapshot);
1713 
1714  if (valid)
1715  {
1716  ItemPointerSetOffsetNumber(tid, offnum);
1717  PredicateLockTID(relation, &heapTuple->t_self, snapshot,
1718  HeapTupleHeaderGetXmin(heapTuple->t_data));
1719  if (all_dead)
1720  *all_dead = false;
1721  return true;
1722  }
1723  }
1724  skip = false;
1725 
1726  /*
1727  * If we can't see it, maybe no one else can either. At caller
1728  * request, check whether all chain members are dead to all
1729  * transactions.
1730  *
1731  * Note: if you change the criterion here for what is "dead", fix the
1732  * planner's get_actual_variable_range() function to match.
1733  */
1734  if (all_dead && *all_dead)
1735  {
1736  if (!vistest)
1737  vistest = GlobalVisTestFor(relation);
1738 
1739  if (!HeapTupleIsSurelyDead(heapTuple, vistest))
1740  *all_dead = false;
1741  }
1742 
1743  /*
1744  * Check to see if HOT chain continues past this tuple; if so fetch
1745  * the next offnum and loop around.
1746  */
1747  if (HeapTupleIsHotUpdated(heapTuple))
1748  {
1750  blkno);
1751  offnum = ItemPointerGetOffsetNumber(&heapTuple->t_data->t_ctid);
1752  at_chain_start = false;
1753  prev_xmax = HeapTupleHeaderGetUpdateXid(heapTuple->t_data);
1754  }
1755  else
1756  break; /* end of chain */
1757  }
1758 
1759  return false;
1760 }
bool HeapTupleIsSurelyDead(HeapTuple htup, GlobalVisState *vistest)
#define HeapTupleIsHeapOnly(tuple)
Definition: htup_details.h:683
#define HeapTupleIsHotUpdated(tuple)
Definition: htup_details.h:674
static void ItemPointerSet(ItemPointerData *pointer, BlockNumber blockNumber, OffsetNumber offNum)
Definition: itemptr.h:135
static void ItemPointerSetOffsetNumber(ItemPointerData *pointer, OffsetNumber offsetNumber)
Definition: itemptr.h:158
static const struct exclude_list_item skip[]
Definition: pg_checksums.c:107
GlobalVisState * GlobalVisTestFor(Relation rel)
Definition: procarray.c:4111
TransactionId RecentXmin
Definition: snapmgr.c:99

References Assert, BufferGetBlockNumber(), BufferGetPage(), GlobalVisTestFor(), HeapCheckForSerializableConflictOut(), HeapTupleHeaderGetUpdateXid, HeapTupleHeaderGetXmin, HeapTupleIsHeapOnly, HeapTupleIsHotUpdated, HeapTupleIsSurelyDead(), HeapTupleSatisfiesVisibility(), InvalidTransactionId, ItemIdGetLength, ItemIdGetRedirect, ItemIdIsNormal, ItemIdIsRedirected, ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), ItemPointerSet(), ItemPointerSetOffsetNumber(), PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), PredicateLockTID(), RecentXmin, RelationGetRelid, skip, HeapTupleHeaderData::t_ctid, HeapTupleData::t_data, HeapTupleData::t_len, HeapTupleData::t_self, HeapTupleData::t_tableOid, TransactionIdEquals, and TransactionIdIsValid.

Referenced by heap_index_delete_tuples(), heapam_index_fetch_tuple(), and heapam_scan_bitmap_next_block().

◆ heap_index_delete_tuples()

TransactionId heap_index_delete_tuples ( Relation  rel,
TM_IndexDeleteOp delstate 
)

Definition at line 7956 of file heapam.c.

7957 {
7958  /* Initial assumption is that earlier pruning took care of conflict */
7959  TransactionId snapshotConflictHorizon = InvalidTransactionId;
7962  Page page = NULL;
7964  TransactionId priorXmax;
7965 #ifdef USE_PREFETCH
7966  IndexDeletePrefetchState prefetch_state;
7967  int prefetch_distance;
7968 #endif
7969  SnapshotData SnapshotNonVacuumable;
7970  int finalndeltids = 0,
7971  nblocksaccessed = 0;
7972 
7973  /* State that's only used in bottom-up index deletion case */
7974  int nblocksfavorable = 0;
7975  int curtargetfreespace = delstate->bottomupfreespace,
7976  lastfreespace = 0,
7977  actualfreespace = 0;
7978  bool bottomup_final_block = false;
7979 
7980  InitNonVacuumableSnapshot(SnapshotNonVacuumable, GlobalVisTestFor(rel));
7981 
7982  /* Sort caller's deltids array by TID for further processing */
7983  index_delete_sort(delstate);
7984 
7985  /*
7986  * Bottom-up case: resort deltids array in an order attuned to where the
7987  * greatest number of promising TIDs are to be found, and determine how
7988  * many blocks from the start of sorted array should be considered
7989  * favorable. This will also shrink the deltids array in order to
7990  * eliminate completely unfavorable blocks up front.
7991  */
7992  if (delstate->bottomup)
7993  nblocksfavorable = bottomup_sort_and_shrink(delstate);
7994 
7995 #ifdef USE_PREFETCH
7996  /* Initialize prefetch state. */
7997  prefetch_state.cur_hblkno = InvalidBlockNumber;
7998  prefetch_state.next_item = 0;
7999  prefetch_state.ndeltids = delstate->ndeltids;
8000  prefetch_state.deltids = delstate->deltids;
8001 
8002  /*
8003  * Determine the prefetch distance that we will attempt to maintain.
8004  *
8005  * Since the caller holds a buffer lock somewhere in rel, we'd better make
8006  * sure that isn't a catalog relation before we call code that does
8007  * syscache lookups, to avoid risk of deadlock.
8008  */
8009  if (IsCatalogRelation(rel))
8010  prefetch_distance = maintenance_io_concurrency;
8011  else
8012  prefetch_distance =
8014 
8015  /* Cap initial prefetch distance for bottom-up deletion caller */
8016  if (delstate->bottomup)
8017  {
8018  Assert(nblocksfavorable >= 1);
8019  Assert(nblocksfavorable <= BOTTOMUP_MAX_NBLOCKS);
8020  prefetch_distance = Min(prefetch_distance, nblocksfavorable);
8021  }
8022 
8023  /* Start prefetching. */
8024  index_delete_prefetch_buffer(rel, &prefetch_state, prefetch_distance);
8025 #endif
8026 
8027  /* Iterate over deltids, determine which to delete, check their horizon */
8028  Assert(delstate->ndeltids > 0);
8029  for (int i = 0; i < delstate->ndeltids; i++)
8030  {
8031  TM_IndexDelete *ideltid = &delstate->deltids[i];
8032  TM_IndexStatus *istatus = delstate->status + ideltid->id;
8033  ItemPointer htid = &ideltid->tid;
8034  OffsetNumber offnum;
8035 
8036  /*
8037  * Read buffer, and perform required extra steps each time a new block
8038  * is encountered. Avoid refetching if it's the same block as the one
8039  * from the last htid.
8040  */
8041  if (blkno == InvalidBlockNumber ||
8042  ItemPointerGetBlockNumber(htid) != blkno)
8043  {
8044  /*
8045  * Consider giving up early for bottom-up index deletion caller
8046  * first. (Only prefetch next-next block afterwards, when it
8047  * becomes clear that we're at least going to access the next
8048  * block in line.)
8049  *
8050  * Sometimes the first block frees so much space for bottom-up
8051  * caller that the deletion process can end without accessing any
8052  * more blocks. It is usually necessary to access 2 or 3 blocks
8053  * per bottom-up deletion operation, though.
8054  */
8055  if (delstate->bottomup)
8056  {
8057  /*
8058  * We often allow caller to delete a few additional items
8059  * whose entries we reached after the point that space target
8060  * from caller was satisfied. The cost of accessing the page
8061  * was already paid at that point, so it made sense to finish
8062  * it off. When that happened, we finalize everything here
8063  * (by finishing off the whole bottom-up deletion operation
8064  * without needlessly paying the cost of accessing any more
8065  * blocks).
8066  */
8067  if (bottomup_final_block)
8068  break;
8069 
8070  /*
8071  * Give up when we didn't enable our caller to free any
8072  * additional space as a result of processing the page that we
8073  * just finished up with. This rule is the main way in which
8074  * we keep the cost of bottom-up deletion under control.
8075  */
8076  if (nblocksaccessed >= 1 && actualfreespace == lastfreespace)
8077  break;
8078  lastfreespace = actualfreespace; /* for next time */
8079 
8080  /*
8081  * Deletion operation (which is bottom-up) will definitely
8082  * access the next block in line. Prepare for that now.
8083  *
8084  * Decay target free space so that we don't hang on for too
8085  * long with a marginal case. (Space target is only truly
8086  * helpful when it allows us to recognize that we don't need
8087  * to access more than 1 or 2 blocks to satisfy caller due to
8088  * agreeable workload characteristics.)
8089  *
8090  * We are a bit more patient when we encounter contiguous
8091  * blocks, though: these are treated as favorable blocks. The
8092  * decay process is only applied when the next block in line
8093  * is not a favorable/contiguous block. This is not an
8094  * exception to the general rule; we still insist on finding
8095  * at least one deletable item per block accessed. See
8096  * bottomup_nblocksfavorable() for full details of the theory
8097  * behind favorable blocks and heap block locality in general.
8098  *
8099  * Note: The first block in line is always treated as a
8100  * favorable block, so the earliest possible point that the
8101  * decay can be applied is just before we access the second
8102  * block in line. The Assert() verifies this for us.
8103  */
8104  Assert(nblocksaccessed > 0 || nblocksfavorable > 0);
8105  if (nblocksfavorable > 0)
8106  nblocksfavorable--;
8107  else
8108  curtargetfreespace /= 2;
8109  }
8110 
8111  /* release old buffer */
8112  if (BufferIsValid(buf))
8114 
8115  blkno = ItemPointerGetBlockNumber(htid);
8116  buf = ReadBuffer(rel, blkno);
8117  nblocksaccessed++;
8118  Assert(!delstate->bottomup ||
8119  nblocksaccessed <= BOTTOMUP_MAX_NBLOCKS);
8120 
8121 #ifdef USE_PREFETCH
8122 
8123  /*
8124  * To maintain the prefetch distance, prefetch one more page for
8125  * each page we read.
8126  */
8127  index_delete_prefetch_buffer(rel, &prefetch_state, 1);
8128 #endif
8129 
8131 
8132  page = BufferGetPage(buf);
8133  maxoff = PageGetMaxOffsetNumber(page);
8134  }
8135 
8136  /*
8137  * In passing, detect index corruption involving an index page with a
8138  * TID that points to a location in the heap that couldn't possibly be
8139  * correct. We only do this with actual TIDs from caller's index page
8140  * (not items reached by traversing through a HOT chain).
8141  */
8142  index_delete_check_htid(delstate, page, maxoff, htid, istatus);
8143 
8144  if (istatus->knowndeletable)
8145  Assert(!delstate->bottomup && !istatus->promising);
8146  else
8147  {
8148  ItemPointerData tmp = *htid;
8149  HeapTupleData heapTuple;
8150 
8151  /* Are any tuples from this HOT chain non-vacuumable? */
8152  if (heap_hot_search_buffer(&tmp, rel, buf, &SnapshotNonVacuumable,
8153  &heapTuple, NULL, true))
8154  continue; /* can't delete entry */
8155 
8156  /* Caller will delete, since whole HOT chain is vacuumable */
8157  istatus->knowndeletable = true;
8158 
8159  /* Maintain index free space info for bottom-up deletion case */
8160  if (delstate->bottomup)
8161  {
8162  Assert(istatus->freespace > 0);
8163  actualfreespace += istatus->freespace;
8164  if (actualfreespace >= curtargetfreespace)
8165  bottomup_final_block = true;
8166  }
8167  }
8168 
8169  /*
8170  * Maintain snapshotConflictHorizon value for deletion operation as a
8171  * whole by advancing current value using heap tuple headers. This is
8172  * loosely based on the logic for pruning a HOT chain.
8173  */
8174  offnum = ItemPointerGetOffsetNumber(htid);
8175  priorXmax = InvalidTransactionId; /* cannot check first XMIN */
8176  for (;;)
8177  {
8178  ItemId lp;
8179  HeapTupleHeader htup;
8180 
8181  /* Sanity check (pure paranoia) */
8182  if (offnum < FirstOffsetNumber)
8183  break;
8184 
8185  /*
8186  * An offset past the end of page's line pointer array is possible
8187  * when the array was truncated
8188  */
8189  if (offnum > maxoff)
8190  break;
8191 
8192  lp = PageGetItemId(page, offnum);
8193  if (ItemIdIsRedirected(lp))
8194  {
8195  offnum = ItemIdGetRedirect(lp);
8196  continue;
8197  }
8198 
8199  /*
8200  * We'll often encounter LP_DEAD line pointers (especially with an
8201  * entry marked knowndeletable by our caller up front). No heap
8202  * tuple headers get examined for an htid that leads us to an
8203  * LP_DEAD item. This is okay because the earlier pruning
8204  * operation that made the line pointer LP_DEAD in the first place
8205  * must have considered the original tuple header as part of
8206  * generating its own snapshotConflictHorizon value.
8207  *
8208  * Relying on XLOG_HEAP2_PRUNE_VACUUM_SCAN records like this is
8209  * the same strategy that index vacuuming uses in all cases. Index
8210  * VACUUM WAL records don't even have a snapshotConflictHorizon
8211  * field of their own for this reason.
8212  */
8213  if (!ItemIdIsNormal(lp))
8214  break;
8215 
8216  htup = (HeapTupleHeader) PageGetItem(page, lp);
8217 
8218  /*
8219  * Check the tuple XMIN against prior XMAX, if any
8220  */
8221  if (TransactionIdIsValid(priorXmax) &&
8222  !TransactionIdEquals(HeapTupleHeaderGetXmin(htup), priorXmax))
8223  break;
8224 
8226  &snapshotConflictHorizon);
8227 
8228  /*
8229  * If the tuple is not HOT-updated, then we are at the end of this
8230  * HOT-chain. No need to visit later tuples from the same update
8231  * chain (they get their own index entries) -- just move on to
8232  * next htid from index AM caller.
8233  */
8234  if (!HeapTupleHeaderIsHotUpdated(htup))
8235  break;
8236 
8237  /* Advance to next HOT chain member */
8238  Assert(ItemPointerGetBlockNumber(&htup->t_ctid) == blkno);
8239  offnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
8240  priorXmax = HeapTupleHeaderGetUpdateXid(htup);
8241  }
8242 
8243  /* Enable further/final shrinking of deltids for caller */
8244  finalndeltids = i + 1;
8245  }
8246 
8248 
8249  /*
8250  * Shrink deltids array to exclude non-deletable entries at the end. This
8251  * is not just a minor optimization. Final deltids array size might be
8252  * zero for a bottom-up caller. Index AM is explicitly allowed to rely on
8253  * ndeltids being zero in all cases with zero total deletable entries.
8254  */
8255  Assert(finalndeltids > 0 || delstate->bottomup);
8256  delstate->ndeltids = finalndeltids;
8257 
8258  return snapshotConflictHorizon;
8259 }
int maintenance_io_concurrency
Definition: bufmgr.c:158
#define Min(x, y)
Definition: c.h:983
bool IsCatalogRelation(Relation relation)
Definition: catalog.c:103
static int bottomup_sort_and_shrink(TM_IndexDeleteOp *delstate)
Definition: heapam.c:8513
void HeapTupleHeaderAdvanceConflictHorizon(HeapTupleHeader tuple, TransactionId *snapshotConflictHorizon)
Definition: heapam.c:7811
static void index_delete_check_htid(TM_IndexDeleteOp *delstate, Page page, OffsetNumber maxoff, ItemPointer htid, TM_IndexStatus *istatus)
Definition: heapam.c:7896
#define BOTTOMUP_MAX_NBLOCKS
Definition: heapam.c:181
bool heap_hot_search_buffer(ItemPointer tid, Relation relation, Buffer buffer, Snapshot snapshot, HeapTuple heapTuple, bool *all_dead, bool first_call)
Definition: heapam.c:1622
static void index_delete_sort(TM_IndexDeleteOp *delstate)
Definition: heapam.c:8301
static char * buf
Definition: pg_test_fsync.c:72
#define InitNonVacuumableSnapshot(snapshotdata, vistestp)
Definition: snapmgr.h:48
int get_tablespace_maintenance_io_concurrency(Oid spcid)
Definition: spccache.c:229
TM_IndexStatus * status
Definition: tableam.h:254
int bottomupfreespace
Definition: tableam.h:249
TM_IndexDelete * deltids
Definition: tableam.h:253
ItemPointerData tid
Definition: tableam.h:212
bool knowndeletable
Definition: tableam.h:219
bool promising
Definition: tableam.h:222
int16 freespace
Definition: tableam.h:223

References Assert, TM_IndexDeleteOp::bottomup, BOTTOMUP_MAX_NBLOCKS, bottomup_sort_and_shrink(), TM_IndexDeleteOp::bottomupfreespace, buf, BUFFER_LOCK_SHARE, BufferGetPage(), BufferIsValid(), TM_IndexDeleteOp::deltids, FirstOffsetNumber, TM_IndexStatus::freespace, get_tablespace_maintenance_io_concurrency(), GlobalVisTestFor(), heap_hot_search_buffer(), HeapTupleHeaderAdvanceConflictHorizon(), HeapTupleHeaderGetUpdateXid, HeapTupleHeaderGetXmin, HeapTupleHeaderIsHotUpdated, i, TM_IndexDelete::id, index_delete_check_htid(), index_delete_sort(), InitNonVacuumableSnapshot, InvalidBlockNumber, InvalidBuffer, InvalidOffsetNumber, InvalidTransactionId, IsCatalogRelation(), ItemIdGetRedirect, ItemIdIsNormal, ItemIdIsRedirected, ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), TM_IndexStatus::knowndeletable, LockBuffer(), maintenance_io_concurrency, Min, TM_IndexDeleteOp::ndeltids, PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), TM_IndexStatus::promising, RelationData::rd_rel, ReadBuffer(), TM_IndexDeleteOp::status, HeapTupleHeaderData::t_ctid, TM_IndexDelete::tid, TransactionIdEquals, TransactionIdIsValid, and UnlockReleaseBuffer().

◆ heap_inplace_lock()

bool heap_inplace_lock ( Relation  relation,
HeapTuple  oldtup_ptr,
Buffer  buffer,
void(*)(void *)  release_callback,
void *  arg 
)

Definition at line 6202 of file heapam.c.

6205 {
6206  HeapTupleData oldtup = *oldtup_ptr; /* minimize diff vs. heap_update() */
6207  TM_Result result;
6208  bool ret;
6209 
6210 #ifdef USE_ASSERT_CHECKING
6211  if (RelationGetRelid(relation) == RelationRelationId)
6212  check_inplace_rel_lock(oldtup_ptr);
6213 #endif
6214 
6215  Assert(BufferIsValid(buffer));
6216 
6217  /*
6218  * Construct shared cache inval if necessary. Because we pass a tuple
6219  * version without our own inplace changes or inplace changes other
6220  * sessions complete while we wait for locks, inplace update mustn't
6221  * change catcache lookup keys. But we aren't bothering with index
6222  * updates either, so that's true a fortiori. After LockBuffer(), it
6223  * would be too late, because this might reach a
6224  * CatalogCacheInitializeCache() that locks "buffer".
6225  */
6226  CacheInvalidateHeapTupleInplace(relation, oldtup_ptr, NULL);
6227 
6228  LockTuple(relation, &oldtup.t_self, InplaceUpdateTupleLock);
6230 
6231  /*----------
6232  * Interpret HeapTupleSatisfiesUpdate() like heap_update() does, except:
6233  *
6234  * - wait unconditionally
6235  * - already locked tuple above, since inplace needs that unconditionally
6236  * - don't recheck header after wait: simpler to defer to next iteration
6237  * - don't try to continue even if the updater aborts: likewise
6238  * - no crosscheck
6239  */
6240  result = HeapTupleSatisfiesUpdate(&oldtup, GetCurrentCommandId(false),
6241  buffer);
6242 
6243  if (result == TM_Invisible)
6244  {
6245  /* no known way this can happen */
6246  ereport(ERROR,
6247  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
6248  errmsg_internal("attempted to overwrite invisible tuple")));
6249  }
6250  else if (result == TM_SelfModified)
6251  {
6252  /*
6253  * CREATE INDEX might reach this if an expression is silly enough to
6254  * call e.g. SELECT ... FROM pg_class FOR SHARE. C code of other SQL
6255  * statements might get here after a heap_update() of the same row, in
6256  * the absence of an intervening CommandCounterIncrement().
6257  */
6258  ereport(ERROR,
6259  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
6260  errmsg("tuple to be updated was already modified by an operation triggered by the current command")));
6261  }
6262  else if (result == TM_BeingModified)
6263  {
6264  TransactionId xwait;
6265  uint16 infomask;
6266 
6267  xwait = HeapTupleHeaderGetRawXmax(oldtup.t_data);
6268  infomask = oldtup.t_data->t_infomask;
6269 
6270  if (infomask & HEAP_XMAX_IS_MULTI)
6271  {
6274  int remain;
6275 
6276  if (DoesMultiXactIdConflict((MultiXactId) xwait, infomask,
6277  lockmode, NULL))
6278  {
6279  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
6280  release_callback(arg);
6281  ret = false;
6282  MultiXactIdWait((MultiXactId) xwait, mxact_status, infomask,
6283  relation, &oldtup.t_self, XLTW_Update,
6284  &remain);
6285  }
6286  else
6287  ret = true;
6288  }
6289  else if (TransactionIdIsCurrentTransactionId(xwait))
6290  ret = true;
6291  else if (HEAP_XMAX_IS_KEYSHR_LOCKED(infomask))
6292  ret = true;
6293  else
6294  {
6295  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
6296  release_callback(arg);
6297  ret = false;
6298  XactLockTableWait(xwait, relation, &oldtup.t_self,
6299  XLTW_Update);
6300  }
6301  }
6302  else
6303  {
6304  ret = (result == TM_Ok);
6305  if (!ret)
6306  {
6307  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
6308  release_callback(arg);
6309  }
6310  }
6311 
6312  /*
6313  * GetCatalogSnapshot() relies on invalidation messages to know when to
6314  * take a new snapshot. COMMIT of xwait is responsible for sending the
6315  * invalidation. We're not acquiring heavyweight locks sufficient to
6316  * block if not yet sent, so we must take a new snapshot to ensure a later
6317  * attempt has a fair chance. While we don't need this if xwait aborted,
6318  * don't bother optimizing that.
6319  */
6320  if (!ret)
6321  {
6322  UnlockTuple(relation, &oldtup.t_self, InplaceUpdateTupleLock);
6325  }
6326  return ret;
6327 }
#define HEAP_XMAX_IS_KEYSHR_LOCKED(infomask)
Definition: htup_details.h:263
void CacheInvalidateHeapTupleInplace(Relation relation, HeapTuple tuple, HeapTuple newtuple)
Definition: inval.c:1510
void ForgetInplace_Inval(void)
Definition: inval.c:1211
void LockTuple(Relation relation, ItemPointer tid, LOCKMODE lockmode)
Definition: lmgr.c:557
void UnlockTuple(Relation relation, ItemPointer tid, LOCKMODE lockmode)
Definition: lmgr.c:594
@ XLTW_Update
Definition: lmgr.h:27
#define InplaceUpdateTupleLock
Definition: lockdefs.h:48
LockTupleMode
Definition: lockoptions.h:50
@ LockTupleNoKeyExclusive
Definition: lockoptions.h:56
MultiXactStatus
Definition: multixact.h:38
@ MultiXactStatusNoKeyUpdate
Definition: multixact.h:44
void * arg
void InvalidateCatalogSnapshot(void)
Definition: snapmgr.c:422
CommandId GetCurrentCommandId(bool used)
Definition: xact.c:828

References arg, Assert, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferIsValid(), CacheInvalidateHeapTupleInplace(), DoesMultiXactIdConflict(), ereport, errcode(), errmsg(), errmsg_internal(), ERROR, ForgetInplace_Inval(), GetCurrentCommandId(), HEAP_XMAX_IS_KEYSHR_LOCKED, HEAP_XMAX_IS_MULTI, HeapTupleHeaderGetRawXmax, HeapTupleSatisfiesUpdate(), InplaceUpdateTupleLock, InvalidateCatalogSnapshot(), LockBuffer(), LockTuple(), LockTupleNoKeyExclusive, MultiXactIdWait(), MultiXactStatusNoKeyUpdate, RelationGetRelid, HeapTupleData::t_data, HeapTupleHeaderData::t_infomask, HeapTupleData::t_self, TM_BeingModified, TM_Invisible, TM_Ok, TM_SelfModified, TransactionIdIsCurrentTransactionId(), UnlockTuple(), XactLockTableWait(), and XLTW_Update.

Referenced by systable_inplace_update_begin().

◆ heap_inplace_unlock()

void heap_inplace_unlock ( Relation  relation,
HeapTuple  oldtup,
Buffer  buffer 
)

◆ heap_inplace_update_and_unlock()

void heap_inplace_update_and_unlock ( Relation  relation,
HeapTuple  oldtup,
HeapTuple  tuple,
Buffer  buffer 
)

Definition at line 6338 of file heapam.c.

6341 {
6342  HeapTupleHeader htup = oldtup->t_data;
6343  uint32 oldlen;
6344  uint32 newlen;
6345  char *dst;
6346  char *src;
6347  int nmsgs = 0;
6348  SharedInvalidationMessage *invalMessages = NULL;
6349  bool RelcacheInitFileInval = false;
6350 
6351  Assert(ItemPointerEquals(&oldtup->t_self, &tuple->t_self));
6352  oldlen = oldtup->t_len - htup->t_hoff;
6353  newlen = tuple->t_len - tuple->t_data->t_hoff;
6354  if (oldlen != newlen || htup->t_hoff != tuple->t_data->t_hoff)
6355  elog(ERROR, "wrong tuple length");
6356 
6357  dst = (char *) htup + htup->t_hoff;
6358  src = (char *) tuple->t_data + tuple->t_data->t_hoff;
6359 
6360  /* Like RecordTransactionCommit(), log only if needed */
6361  if (XLogStandbyInfoActive())
6362  nmsgs = inplaceGetInvalidationMessages(&invalMessages,
6363  &RelcacheInitFileInval);
6364 
6365  /*
6366  * Unlink relcache init files as needed. If unlinking, acquire
6367  * RelCacheInitLock until after associated invalidations. By doing this
6368  * in advance, if we checkpoint and then crash between inplace
6369  * XLogInsert() and inval, we don't rely on StartupXLOG() ->
6370  * RelationCacheInitFileRemove(). That uses elevel==LOG, so replay would
6371  * neglect to PANIC on EIO.
6372  */
6373  PreInplace_Inval();
6374 
6375  /*----------
6376  * NO EREPORT(ERROR) from here till changes are complete
6377  *
6378  * Our buffer lock won't stop a reader having already pinned and checked
6379  * visibility for this tuple. Hence, we write WAL first, then mutate the
6380  * buffer. Like in MarkBufferDirtyHint() or RecordTransactionCommit(),
6381  * checkpoint delay makes that acceptable. With the usual order of
6382  * changes, a crash after memcpy() and before XLogInsert() could allow
6383  * datfrozenxid to overtake relfrozenxid:
6384  *
6385  * ["D" is a VACUUM (ONLY_DATABASE_STATS)]
6386  * ["R" is a VACUUM tbl]
6387  * D: vac_update_datfrozenid() -> systable_beginscan(pg_class)
6388  * D: systable_getnext() returns pg_class tuple of tbl
6389  * R: memcpy() into pg_class tuple of tbl
6390  * D: raise pg_database.datfrozenxid, XLogInsert(), finish
6391  * [crash]
6392  * [recovery restores datfrozenxid w/o relfrozenxid]
6393  *
6394  * Like in MarkBufferDirtyHint() subroutine XLogSaveBufferForHint(), copy
6395  * the buffer to the stack before logging. Here, that facilitates a FPI
6396  * of the post-mutation block before we accept other sessions seeing it.
6397  */
6401 
6402  /* XLOG stuff */
6403  if (RelationNeedsWAL(relation))
6404  {
6405  xl_heap_inplace xlrec;
6406  PGAlignedBlock copied_buffer;
6407  char *origdata = (char *) BufferGetBlock(buffer);
6408  Page page = BufferGetPage(buffer);
6409  uint16 lower = ((PageHeader) page)->pd_lower;
6410  uint16 upper = ((PageHeader) page)->pd_upper;
6411  uintptr_t dst_offset_in_block;
6412  RelFileLocator rlocator;
6413  ForkNumber forkno;
6414  BlockNumber blkno;
6415  XLogRecPtr recptr;
6416 
6417  xlrec.offnum = ItemPointerGetOffsetNumber(&tuple->t_self);
6418  xlrec.dbId = MyDatabaseId;
6419  xlrec.tsId = MyDatabaseTableSpace;
6420  xlrec.relcacheInitFileInval = RelcacheInitFileInval;
6421  xlrec.nmsgs = nmsgs;
6422 
6423  XLogBeginInsert();
6424  XLogRegisterData((char *) &xlrec, MinSizeOfHeapInplace);
6425  if (nmsgs != 0)
6426  XLogRegisterData((char *) invalMessages,
6427  nmsgs * sizeof(SharedInvalidationMessage));
6428 
6429  /* register block matching what buffer will look like after changes */
6430  memcpy(copied_buffer.data, origdata, lower);
6431  memcpy(copied_buffer.data + upper, origdata + upper, BLCKSZ - upper);
6432  dst_offset_in_block = dst - origdata;
6433  memcpy(copied_buffer.data + dst_offset_in_block, src, newlen);
6434  BufferGetTag(buffer, &rlocator, &forkno, &blkno);
6435  Assert(forkno == MAIN_FORKNUM);
6436  XLogRegisterBlock(0, &rlocator, forkno, blkno, copied_buffer.data,
6437  REGBUF_STANDARD);
6438  XLogRegisterBufData(0, src, newlen);
6439 
6440  /* inplace updates aren't decoded atm, don't log the origin */
6441 
6442  recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_INPLACE);
6443 
6444  PageSetLSN(page, recptr);
6445  }
6446 
6447  memcpy(dst, src, newlen);
6448 
6449  MarkBufferDirty(buffer);
6450 
6451  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
6452 
6453  /*
6454  * Send invalidations to shared queue. SearchSysCacheLocked1() assumes we
6455  * do this before UnlockTuple().
6456  *
6457  * If we're mutating a tuple visible only to this transaction, there's an
6458  * equivalent transactional inval from the action that created the tuple,
6459  * and this inval is superfluous.
6460  */
6461  AtInplace_Inval();
6462 
6464  END_CRIT_SECTION();
6465  UnlockTuple(relation, &tuple->t_self, InplaceUpdateTupleLock);
6466 
6467  AcceptInvalidationMessages(); /* local processing of just-sent inval */
6468 
6469  /*
6470  * Queue a transactional inval. The immediate invalidation we just sent
6471  * is the only one known to be necessary. To reduce risk from the
6472  * transition to immediate invalidation, continue sending a transactional
6473  * invalidation like we've long done. Third-party code might rely on it.
6474  */
6476  CacheInvalidateHeapTuple(relation, tuple, NULL);
6477 }
void BufferGetTag(Buffer buffer, RelFileLocator *rlocator, ForkNumber *forknum, BlockNumber *blknum)
Definition: bufmgr.c:3745
static Block BufferGetBlock(Buffer buffer)
Definition: bufmgr.h:367
PageHeaderData * PageHeader
Definition: bufpage.h:173
unsigned int uint32
Definition: c.h:492
Oid MyDatabaseTableSpace
Definition: globals.c:95
Oid MyDatabaseId
Definition: globals.c:93
#define MinSizeOfHeapInplace
Definition: heapam_xlog.h:436
#define XLOG_HEAP_INPLACE
Definition: heapam_xlog.h:40
void AcceptInvalidationMessages(void)
Definition: inval.c:863
int inplaceGetInvalidationMessages(SharedInvalidationMessage **msgs, bool *RelcacheInitFileInval)
Definition: inval.c:1015
void PreInplace_Inval(void)
Definition: inval.c:1175
void AtInplace_Inval(void)
Definition: inval.c:1188
#define IsBootstrapProcessingMode()
Definition: miscadmin.h:454
Datum lower(PG_FUNCTION_ARGS)
Definition: oracle_compat.c:49
Datum upper(PG_FUNCTION_ARGS)
Definition: oracle_compat.c:80
#define DELAY_CHKPT_START
Definition: proc.h:119
ForkNumber
Definition: relpath.h:56
PGPROC * MyProc
Definition: proc.c:66
int delayChkptFlags
Definition: proc.h:240
OffsetNumber offnum
Definition: heapam_xlog.h:428
bool relcacheInitFileInval
Definition: heapam_xlog.h:431
char data[BLCKSZ]
Definition: c.h:1098
#define XLogStandbyInfoActive()
Definition: xlog.h:123
void XLogRegisterBufData(uint8 block_id, const char *data, uint32 len)
Definition: xloginsert.c:405
void XLogRegisterBlock(uint8 block_id, RelFileLocator *rlocator, ForkNumber forknum, BlockNumber blknum, const char *page, uint8 flags)
Definition: xloginsert.c:309

References AcceptInvalidationMessages(), Assert, AtInplace_Inval(), BUFFER_LOCK_UNLOCK, BufferGetBlock(), BufferGetPage(), BufferGetTag(), CacheInvalidateHeapTuple(), PGAlignedBlock::data, xl_heap_inplace::dbId, DELAY_CHKPT_START, PGPROC::delayChkptFlags, elog, END_CRIT_SECTION, ERROR, inplaceGetInvalidationMessages(), InplaceUpdateTupleLock, IsBootstrapProcessingMode, ItemPointerEquals(), ItemPointerGetOffsetNumber(), LockBuffer(), lower(), MAIN_FORKNUM, MarkBufferDirty(), MinSizeOfHeapInplace, MyDatabaseId, MyDatabaseTableSpace, MyProc, xl_heap_inplace::nmsgs, xl_heap_inplace::offnum, PageSetLSN(), PreInplace_Inval(), REGBUF_STANDARD, RelationNeedsWAL, xl_heap_inplace::relcacheInitFileInval, START_CRIT_SECTION, HeapTupleData::t_data, HeapTupleHeaderData::t_hoff, HeapTupleData::t_len, HeapTupleData::t_self, xl_heap_inplace::tsId, UnlockTuple(), upper(), XLOG_HEAP_INPLACE, XLogBeginInsert(), XLogInsert(), XLogRegisterBlock(), XLogRegisterBufData(), XLogRegisterData(), and XLogStandbyInfoActive.

Referenced by systable_inplace_update_finish().

◆ heap_insert()

void heap_insert ( Relation  relation,
HeapTuple  tup,
CommandId  cid,
int  options,
BulkInsertState  bistate 
)

Definition at line 1985 of file heapam.c.

1987 {
1989  HeapTuple heaptup;
1990  Buffer buffer;
1991  Buffer vmbuffer = InvalidBuffer;
1992  bool all_visible_cleared = false;
1993 
1994  /* Cheap, simplistic check that the tuple matches the rel's rowtype. */
1996  RelationGetNumberOfAttributes(relation));
1997 
1998  /*
1999  * Fill in tuple header fields and toast the tuple if necessary.
2000  *
2001  * Note: below this point, heaptup is the data we actually intend to store
2002  * into the relation; tup is the caller's original untoasted data.
2003  */
2004  heaptup = heap_prepare_insert(relation, tup, xid, cid, options);
2005 
2006  /*
2007  * Find buffer to insert this tuple into. If the page is all visible,
2008  * this will also pin the requisite visibility map page.
2009  */
2010  buffer = RelationGetBufferForTuple(relation, heaptup->t_len,
2011  InvalidBuffer, options, bistate,
2012  &vmbuffer, NULL,
2013  0);
2014 
2015  /*
2016  * We're about to do the actual insert -- but check for conflict first, to
2017  * avoid possibly having to roll back work we've just done.
2018  *
2019  * This is safe without a recheck as long as there is no possibility of
2020  * another process scanning the page between this check and the insert
2021  * being visible to the scan (i.e., an exclusive buffer content lock is
2022  * continuously held from this point until the tuple insert is visible).
2023  *
2024  * For a heap insert, we only need to check for table-level SSI locks. Our
2025  * new tuple can't possibly conflict with existing tuple locks, and heap
2026  * page locks are only consolidated versions of tuple locks; they do not
2027  * lock "gaps" as index page locks do. So we don't need to specify a
2028  * buffer when making the call, which makes for a faster check.
2029  */
2031 
2032  /* NO EREPORT(ERROR) from here till changes are logged */
2034 
2035  RelationPutHeapTuple(relation, buffer, heaptup,
2036  (options & HEAP_INSERT_SPECULATIVE) != 0);
2037 
2038  if (PageIsAllVisible(BufferGetPage(buffer)))
2039  {
2040  all_visible_cleared = true;
2042  visibilitymap_clear(relation,
2043  ItemPointerGetBlockNumber(&(heaptup->t_self)),
2044  vmbuffer, VISIBILITYMAP_VALID_BITS);
2045  }
2046 
2047  /*
2048  * XXX Should we set PageSetPrunable on this page ?
2049  *
2050  * The inserting transaction may eventually abort thus making this tuple
2051  * DEAD and hence available for pruning. Though we don't want to optimize
2052  * for aborts, if no other tuple in this page is UPDATEd/DELETEd, the
2053  * aborted tuple will never be pruned until next vacuum is triggered.
2054  *
2055  * If you do add PageSetPrunable here, add it in heap_xlog_insert too.
2056  */
2057 
2058  MarkBufferDirty(buffer);
2059 
2060  /* XLOG stuff */
2061  if (RelationNeedsWAL(relation))
2062  {
2063  xl_heap_insert xlrec;
2064  xl_heap_header xlhdr;
2065  XLogRecPtr recptr;
2066  Page page = BufferGetPage(buffer);
2067  uint8 info = XLOG_HEAP_INSERT;
2068  int bufflags = 0;
2069 
2070  /*
2071  * If this is a catalog, we need to transmit combo CIDs to properly
2072  * decode, so log that as well.
2073  */
2075  log_heap_new_cid(relation, heaptup);
2076 
2077  /*
2078  * If this is the single and first tuple on page, we can reinit the
2079  * page instead of restoring the whole thing. Set flag, and hide
2080  * buffer references from XLogInsert.
2081  */
2082  if (ItemPointerGetOffsetNumber(&(heaptup->t_self)) == FirstOffsetNumber &&
2084  {
2085  info |= XLOG_HEAP_INIT_PAGE;
2086  bufflags |= REGBUF_WILL_INIT;
2087  }
2088 
2089  xlrec.offnum = ItemPointerGetOffsetNumber(&heaptup->t_self);
2090  xlrec.flags = 0;
2091  if (all_visible_cleared)
2096 
2097  /*
2098  * For logical decoding, we need the tuple even if we're doing a full
2099  * page write, so make sure it's included even if we take a full-page
2100  * image. (XXX We could alternatively store a pointer into the FPW).
2101  */
2102  if (RelationIsLogicallyLogged(relation) &&
2104  {
2106  bufflags |= REGBUF_KEEP_DATA;
2107 
2108  if (IsToastRelation(relation))
2110  }
2111 
2112  XLogBeginInsert();
2113  XLogRegisterData((char *) &xlrec, SizeOfHeapInsert);
2114 
2115  xlhdr.t_infomask2 = heaptup->t_data->t_infomask2;
2116  xlhdr.t_infomask = heaptup->t_data->t_infomask;
2117  xlhdr.t_hoff = heaptup->t_data->t_hoff;
2118 
2119  /*
2120  * note we mark xlhdr as belonging to buffer; if XLogInsert decides to
2121  * write the whole page to the xlog, we don't need to store
2122  * xl_heap_header in the xlog.
2123  */
2124  XLogRegisterBuffer(0, buffer, REGBUF_STANDARD | bufflags);
2125  XLogRegisterBufData(0, (char *) &xlhdr, SizeOfHeapHeader);
2126  /* PG73FORMAT: write bitmap [+ padding] [+ oid] + data */
2128  (char *) heaptup->t_data + SizeofHeapTupleHeader,
2129  heaptup->t_len - SizeofHeapTupleHeader);
2130 
2131  /* filtering by origin on a row level is much more efficient */
2133 
2134  recptr = XLogInsert(RM_HEAP_ID, info);
2135 
2136  PageSetLSN(page, recptr);
2137  }
2138 
2139  END_CRIT_SECTION();
2140 
2141  UnlockReleaseBuffer(buffer);
2142  if (vmbuffer != InvalidBuffer)
2143  ReleaseBuffer(vmbuffer);
2144 
2145  /*
2146  * If tuple is cachable, mark it for invalidation from the caches in case
2147  * we abort. Note it is OK to do this after releasing the buffer, because
2148  * the heaptup data structure is all in local memory, not in the shared
2149  * buffer.
2150  */
2151  CacheInvalidateHeapTuple(relation, heaptup, NULL);
2152 
2153  /* Note: speculative insertions are counted too, even if aborted later */
2154  pgstat_count_heap_insert(relation, 1);
2155 
2156  /*
2157  * If heaptup is a private copy, release it. Don't forget to copy t_self
2158  * back to the caller's image, too.
2159  */
2160  if (heaptup != tup)
2161  {
2162  tup->t_self = heaptup->t_self;
2163  heap_freetuple(heaptup);
2164  }
2165 }
unsigned char uint8
Definition: c.h:490
static HeapTuple heap_prepare_insert(Relation relation, HeapTuple tup, TransactionId xid, CommandId cid, int options)
Definition: heapam.c:2174
#define HEAP_INSERT_SPECULATIVE
Definition: heapam.h:39
#define HEAP_INSERT_NO_LOGICAL
Definition: heapam.h:38
#define XLH_INSERT_ON_TOAST_RELATION
Definition: heapam_xlog.h:76
#define XLH_INSERT_IS_SPECULATIVE
Definition: heapam_xlog.h:74
#define XLH_INSERT_ALL_VISIBLE_CLEARED
Definition: heapam_xlog.h:72
#define XLOG_HEAP_INSERT
Definition: heapam_xlog.h:33
#define SizeOfHeapInsert
Definition: heapam_xlog.h:168
#define XLH_INSERT_CONTAINS_NEW_TUPLE
Definition: heapam_xlog.h:75
#define XLOG_HEAP_INIT_PAGE
Definition: heapam_xlog.h:47
void RelationPutHeapTuple(Relation relation, Buffer buffer, HeapTuple tuple, bool token)
Definition: hio.c:35
Buffer RelationGetBufferForTuple(Relation relation, Size len, Buffer otherBuffer, int options, BulkInsertState bistate, Buffer *vmbuffer, Buffer *vmbuffer_other, int num_pages)
Definition: hio.c:502
#define HeapTupleHeaderGetNatts(tup)
Definition: htup_details.h:529
void pgstat_count_heap_insert(Relation rel, PgStat_Counter n)
#define RelationIsLogicallyLogged(relation)
Definition: rel.h:701
#define RelationGetNumberOfAttributes(relation)
Definition: rel.h:511
OffsetNumber offnum
Definition: heapam_xlog.h:162
#define REGBUF_KEEP_DATA
Definition: xloginsert.h:35
#define REGBUF_WILL_INIT
Definition: xloginsert.h:33

References Assert, BufferGetBlockNumber(), BufferGetPage(), CacheInvalidateHeapTuple(), CheckForSerializableConflictIn(), END_CRIT_SECTION, FirstOffsetNumber, xl_heap_insert::flags, GetCurrentTransactionId(), heap_freetuple(), HEAP_INSERT_NO_LOGICAL, HEAP_INSERT_SPECULATIVE, heap_prepare_insert(), HeapTupleHeaderGetNatts, InvalidBlockNumber, InvalidBuffer, IsToastRelation(), ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), log_heap_new_cid(), MarkBufferDirty(), xl_heap_insert::offnum, PageClearAllVisible(), PageGetMaxOffsetNumber(), PageIsAllVisible(), PageSetLSN(), pgstat_count_heap_insert(), REGBUF_KEEP_DATA, REGBUF_STANDARD, REGBUF_WILL_INIT, RelationGetBufferForTuple(), RelationGetNumberOfAttributes, RelationIsAccessibleInLogicalDecoding, RelationIsLogicallyLogged, RelationNeedsWAL, RelationPutHeapTuple(), ReleaseBuffer(), SizeOfHeapHeader, SizeOfHeapInsert, SizeofHeapTupleHeader, START_CRIT_SECTION, HeapTupleData::t_data, xl_heap_header::t_hoff, HeapTupleHeaderData::t_hoff, xl_heap_header::t_infomask, HeapTupleHeaderData::t_infomask, xl_heap_header::t_infomask2, HeapTupleHeaderData::t_infomask2, HeapTupleData::t_len, HeapTupleData::t_self, UnlockReleaseBuffer(), visibilitymap_clear(), VISIBILITYMAP_VALID_BITS, XLH_INSERT_ALL_VISIBLE_CLEARED, XLH_INSERT_CONTAINS_NEW_TUPLE, XLH_INSERT_IS_SPECULATIVE, XLH_INSERT_ON_TOAST_RELATION, XLOG_HEAP_INIT_PAGE, XLOG_HEAP_INSERT, XLOG_INCLUDE_ORIGIN, XLogBeginInsert(), XLogInsert(), XLogRegisterBufData(), XLogRegisterBuffer(), XLogRegisterData(), and XLogSetRecordFlags().

Referenced by heapam_tuple_insert(), heapam_tuple_insert_speculative(), simple_heap_insert(), and toast_save_datum().

◆ heap_lock_tuple()

TM_Result heap_lock_tuple ( Relation  relation,
HeapTuple  tuple,
CommandId  cid,
LockTupleMode  mode,
LockWaitPolicy  wait_policy,
bool  follow_updates,
Buffer buffer,
struct TM_FailureData tmfd 
)

Definition at line 4427 of file heapam.c.

4431 {
4432  TM_Result result;
4433  ItemPointer tid = &(tuple->t_self);
4434  ItemId lp;
4435  Page page;
4436  Buffer vmbuffer = InvalidBuffer;
4437  BlockNumber block;
4438  TransactionId xid,
4439  xmax;
4440  uint16 old_infomask,
4441  new_infomask,
4442  new_infomask2;
4443  bool first_time = true;
4444  bool skip_tuple_lock = false;
4445  bool have_tuple_lock = false;
4446  bool cleared_all_frozen = false;
4447 
4448  *buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(tid));
4449  block = ItemPointerGetBlockNumber(tid);
4450 
4451  /*
4452  * Before locking the buffer, pin the visibility map page if it appears to
4453  * be necessary. Since we haven't got the lock yet, someone else might be
4454  * in the middle of changing this, so we'll need to recheck after we have
4455  * the lock.
4456  */
4457  if (PageIsAllVisible(BufferGetPage(*buffer)))
4458  visibilitymap_pin(relation, block, &vmbuffer);
4459 
4461 
4462  page = BufferGetPage(*buffer);
4463  lp = PageGetItemId(page, ItemPointerGetOffsetNumber(tid));
4464  Assert(ItemIdIsNormal(lp));
4465 
4466  tuple->t_data = (HeapTupleHeader) PageGetItem(page, lp);
4467  tuple->t_len = ItemIdGetLength(lp);
4468  tuple->t_tableOid = RelationGetRelid(relation);
4469 
4470 l3:
4471  result = HeapTupleSatisfiesUpdate(tuple, cid, *buffer);
4472 
4473  if (result == TM_Invisible)
4474  {
4475  /*
4476  * This is possible, but only when locking a tuple for ON CONFLICT
4477  * UPDATE. We return this value here rather than throwing an error in
4478  * order to give that case the opportunity to throw a more specific
4479  * error.
4480  */
4481  result = TM_Invisible;
4482  goto out_locked;
4483  }
4484  else if (result == TM_BeingModified ||
4485  result == TM_Updated ||
4486  result == TM_Deleted)
4487  {
4488  TransactionId xwait;
4489  uint16 infomask;
4490  uint16 infomask2;
4491  bool require_sleep;
4492  ItemPointerData t_ctid;
4493 
4494  /* must copy state data before unlocking buffer */
4495  xwait = HeapTupleHeaderGetRawXmax(tuple->t_data);
4496  infomask = tuple->t_data->t_infomask;
4497  infomask2 = tuple->t_data->t_infomask2;
4498  ItemPointerCopy(&tuple->t_data->t_ctid, &t_ctid);
4499 
4500  LockBuffer(*buffer, BUFFER_LOCK_UNLOCK);
4501 
4502  /*
4503  * If any subtransaction of the current top transaction already holds
4504  * a lock as strong as or stronger than what we're requesting, we
4505  * effectively hold the desired lock already. We *must* succeed
4506  * without trying to take the tuple lock, else we will deadlock
4507  * against anyone wanting to acquire a stronger lock.
4508  *
4509  * Note we only do this the first time we loop on the HTSU result;
4510  * there is no point in testing in subsequent passes, because
4511  * evidently our own transaction cannot have acquired a new lock after
4512  * the first time we checked.
4513  */
4514  if (first_time)
4515  {
4516  first_time = false;
4517 
4518  if (infomask & HEAP_XMAX_IS_MULTI)
4519  {
4520  int i;
4521  int nmembers;
4522  MultiXactMember *members;
4523 
4524  /*
4525  * We don't need to allow old multixacts here; if that had
4526  * been the case, HeapTupleSatisfiesUpdate would have returned
4527  * MayBeUpdated and we wouldn't be here.
4528  */
4529  nmembers =
4530  GetMultiXactIdMembers(xwait, &members, false,
4531  HEAP_XMAX_IS_LOCKED_ONLY(infomask));
4532 
4533  for (i = 0; i < nmembers; i++)
4534  {
4535  /* only consider members of our own transaction */
4536  if (!TransactionIdIsCurrentTransactionId(members[i].xid))
4537  continue;
4538 
4539  if (TUPLOCK_from_mxstatus(members[i].status) >= mode)
4540  {
4541  pfree(members);
4542  result = TM_Ok;
4543  goto out_unlocked;
4544  }
4545  else
4546  {
4547  /*
4548  * Disable acquisition of the heavyweight tuple lock.
4549  * Otherwise, when promoting a weaker lock, we might
4550  * deadlock with another locker that has acquired the
4551  * heavyweight tuple lock and is waiting for our
4552  * transaction to finish.
4553  *
4554  * Note that in this case we still need to wait for
4555  * the multixact if required, to avoid acquiring
4556  * conflicting locks.
4557  */
4558  skip_tuple_lock = true;
4559  }
4560  }
4561 
4562  if (members)
4563  pfree(members);
4564  }
4565  else if (TransactionIdIsCurrentTransactionId(xwait))
4566  {
4567  switch (mode)
4568  {
4569  case LockTupleKeyShare:
4570  Assert(HEAP_XMAX_IS_KEYSHR_LOCKED(infomask) ||
4571  HEAP_XMAX_IS_SHR_LOCKED(infomask) ||
4572  HEAP_XMAX_IS_EXCL_LOCKED(infomask));
4573  result = TM_Ok;
4574  goto out_unlocked;
4575  case LockTupleShare:
4576  if (HEAP_XMAX_IS_SHR_LOCKED(infomask) ||
4577  HEAP_XMAX_IS_EXCL_LOCKED(infomask))
4578  {
4579  result = TM_Ok;
4580  goto out_unlocked;
4581  }
4582  break;
4584  if (HEAP_XMAX_IS_EXCL_LOCKED(infomask))
4585  {
4586  result = TM_Ok;
4587  goto out_unlocked;
4588  }
4589  break;
4590  case LockTupleExclusive:
4591  if (HEAP_XMAX_IS_EXCL_LOCKED(infomask) &&
4592  infomask2 & HEAP_KEYS_UPDATED)
4593  {
4594  result = TM_Ok;
4595  goto out_unlocked;
4596  }
4597  break;
4598  }
4599  }
4600  }
4601 
4602  /*
4603  * Initially assume that we will have to wait for the locking
4604  * transaction(s) to finish. We check various cases below in which
4605  * this can be turned off.
4606  */
4607  require_sleep = true;
4608  if (mode == LockTupleKeyShare)
4609  {
4610  /*
4611  * If we're requesting KeyShare, and there's no update present, we
4612  * don't need to wait. Even if there is an update, we can still
4613  * continue if the key hasn't been modified.
4614  *
4615  * However, if there are updates, we need to walk the update chain
4616  * to mark future versions of the row as locked, too. That way,
4617  * if somebody deletes that future version, we're protected
4618  * against the key going away. This locking of future versions
4619  * could block momentarily, if a concurrent transaction is
4620  * deleting a key; or it could return a value to the effect that
4621  * the transaction deleting the key has already committed. So we
4622  * do this before re-locking the buffer; otherwise this would be
4623  * prone to deadlocks.
4624  *
4625  * Note that the TID we're locking was grabbed before we unlocked
4626  * the buffer. For it to change while we're not looking, the
4627  * other properties we're testing for below after re-locking the
4628  * buffer would also change, in which case we would restart this
4629  * loop above.
4630  */
4631  if (!(infomask2 & HEAP_KEYS_UPDATED))
4632  {
4633  bool updated;
4634 
4635  updated = !HEAP_XMAX_IS_LOCKED_ONLY(infomask);
4636 
4637  /*
4638  * If there are updates, follow the update chain; bail out if
4639  * that cannot be done.
4640  */
4641  if (follow_updates && updated)
4642  {
4643  TM_Result res;
4644 
4645  res = heap_lock_updated_tuple(relation, tuple, &t_ctid,
4647  mode);
4648  if (res != TM_Ok)
4649  {
4650  result = res;
4651  /* recovery code expects to have buffer lock held */
4653  goto failed;
4654  }
4655  }
4656 
4658 
4659  /*
4660  * Make sure it's still an appropriate lock, else start over.
4661  * Also, if it wasn't updated before we released the lock, but
4662  * is updated now, we start over too; the reason is that we
4663  * now need to follow the update chain to lock the new
4664  * versions.
4665  */
4666  if (!HeapTupleHeaderIsOnlyLocked(tuple->t_data) &&
4667  ((tuple->t_data->t_infomask2 & HEAP_KEYS_UPDATED) ||
4668  !updated))
4669  goto l3;
4670 
4671  /* Things look okay, so we can skip sleeping */
4672  require_sleep = false;
4673 
4674  /*
4675  * Note we allow Xmax to change here; other updaters/lockers
4676  * could have modified it before we grabbed the buffer lock.
4677  * However, this is not a problem, because with the recheck we
4678  * just did we ensure that they still don't conflict with the
4679  * lock we want.
4680  */
4681  }
4682  }
4683  else if (mode == LockTupleShare)
4684  {
4685  /*
4686  * If we're requesting Share, we can similarly avoid sleeping if
4687  * there's no update and no exclusive lock present.
4688  */
4689  if (HEAP_XMAX_IS_LOCKED_ONLY(infomask) &&
4690  !HEAP_XMAX_IS_EXCL_LOCKED(infomask))
4691  {
4693 
4694  /*
4695  * Make sure it's still an appropriate lock, else start over.
4696  * See above about allowing xmax to change.
4697  */
4698  if (!HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_data->t_infomask) ||
4700  goto l3;
4701  require_sleep = false;
4702  }
4703  }
4704  else if (mode == LockTupleNoKeyExclusive)
4705  {
4706  /*
4707  * If we're requesting NoKeyExclusive, we might also be able to
4708  * avoid sleeping; just ensure that there no conflicting lock
4709  * already acquired.
4710  */
4711  if (infomask & HEAP_XMAX_IS_MULTI)
4712  {
4713  if (!DoesMultiXactIdConflict((MultiXactId) xwait, infomask,
4714  mode, NULL))
4715  {
4716  /*
4717  * No conflict, but if the xmax changed under us in the
4718  * meantime, start over.
4719  */
4721  if (xmax_infomask_changed(tuple->t_data->t_infomask, infomask) ||
4723  xwait))
4724  goto l3;
4725 
4726  /* otherwise, we're good */
4727  require_sleep = false;
4728  }
4729  }
4730  else if (HEAP_XMAX_IS_KEYSHR_LOCKED(infomask))
4731  {
4733 
4734  /* if the xmax changed in the meantime, start over */
4735  if (xmax_infomask_changed(tuple->t_data->t_infomask, infomask) ||
4737  xwait))
4738  goto l3;
4739  /* otherwise, we're good */
4740  require_sleep = false;
4741  }
4742  }
4743 
4744  /*
4745  * As a check independent from those above, we can also avoid sleeping
4746  * if the current transaction is the sole locker of the tuple. Note
4747  * that the strength of the lock already held is irrelevant; this is
4748  * not about recording the lock in Xmax (which will be done regardless
4749  * of this optimization, below). Also, note that the cases where we
4750  * hold a lock stronger than we are requesting are already handled
4751  * above by not doing anything.
4752  *
4753  * Note we only deal with the non-multixact case here; MultiXactIdWait
4754  * is well equipped to deal with this situation on its own.
4755  */
4756  if (require_sleep && !(infomask & HEAP_XMAX_IS_MULTI) &&
4758  {
4759  /* ... but if the xmax changed in the meantime, start over */
4761  if (xmax_infomask_changed(tuple->t_data->t_infomask, infomask) ||
4763  xwait))
4764  goto l3;
4766  require_sleep = false;
4767  }
4768 
4769  /*
4770  * Time to sleep on the other transaction/multixact, if necessary.
4771  *
4772  * If the other transaction is an update/delete that's already
4773  * committed, then sleeping cannot possibly do any good: if we're
4774  * required to sleep, get out to raise an error instead.
4775  *
4776  * By here, we either have already acquired the buffer exclusive lock,
4777  * or we must wait for the locking transaction or multixact; so below
4778  * we ensure that we grab buffer lock after the sleep.
4779  */
4780  if (require_sleep && (result == TM_Updated || result == TM_Deleted))
4781  {
4783  goto failed;
4784  }
4785  else if (require_sleep)
4786  {
4787  /*
4788  * Acquire tuple lock to establish our priority for the tuple, or
4789  * die trying. LockTuple will release us when we are next-in-line
4790  * for the tuple. We must do this even if we are share-locking,
4791  * but not if we already have a weaker lock on the tuple.
4792  *
4793  * If we are forced to "start over" below, we keep the tuple lock;
4794  * this arranges that we stay at the head of the line while
4795  * rechecking tuple state.
4796  */
4797  if (!skip_tuple_lock &&
4798  !heap_acquire_tuplock(relation, tid, mode, wait_policy,
4799  &have_tuple_lock))
4800  {
4801  /*
4802  * This can only happen if wait_policy is Skip and the lock
4803  * couldn't be obtained.
4804  */
4805  result = TM_WouldBlock;
4806  /* recovery code expects to have buffer lock held */
4808  goto failed;
4809  }
4810 
4811  if (infomask & HEAP_XMAX_IS_MULTI)
4812  {
4814 
4815  /* We only ever lock tuples, never update them */
4816  if (status >= MultiXactStatusNoKeyUpdate)
4817  elog(ERROR, "invalid lock mode in heap_lock_tuple");
4818 
4819  /* wait for multixact to end, or die trying */
4820  switch (wait_policy)
4821  {
4822  case LockWaitBlock:
4823  MultiXactIdWait((MultiXactId) xwait, status, infomask,
4824  relation, &tuple->t_self, XLTW_Lock, NULL);
4825  break;
4826  case LockWaitSkip:
4828  status, infomask, relation,
4829  NULL))
4830  {
4831  result = TM_WouldBlock;
4832  /* recovery code expects to have buffer lock held */
4834  goto failed;
4835  }
4836  break;
4837  case LockWaitError:
4839  status, infomask, relation,
4840  NULL))
4841  ereport(ERROR,
4842  (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
4843  errmsg("could not obtain lock on row in relation \"%s\"",
4844  RelationGetRelationName(relation))));
4845 
4846  break;
4847  }
4848 
4849  /*
4850  * Of course, the multixact might not be done here: if we're
4851  * requesting a light lock mode, other transactions with light
4852  * locks could still be alive, as well as locks owned by our
4853  * own xact or other subxacts of this backend. We need to
4854  * preserve the surviving MultiXact members. Note that it
4855  * isn't absolutely necessary in the latter case, but doing so
4856  * is simpler.
4857  */
4858  }
4859  else
4860  {
4861  /* wait for regular transaction to end, or die trying */
4862  switch (wait_policy)
4863  {
4864  case LockWaitBlock:
4865  XactLockTableWait(xwait, relation, &tuple->t_self,
4866  XLTW_Lock);
4867  break;
4868  case LockWaitSkip:
4869  if (!ConditionalXactLockTableWait(xwait))
4870  {
4871  result = TM_WouldBlock;
4872  /* recovery code expects to have buffer lock held */
4874  goto failed;
4875  }
4876  break;
4877  case LockWaitError:
4878  if (!ConditionalXactLockTableWait(xwait))
4879  ereport(ERROR,
4880  (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
4881  errmsg("could not obtain lock on row in relation \"%s\"",
4882  RelationGetRelationName(relation))));
4883  break;
4884  }
4885  }
4886 
4887  /* if there are updates, follow the update chain */
4888  if (follow_updates && !HEAP_XMAX_IS_LOCKED_ONLY(infomask))
4889  {
4890  TM_Result res;
4891 
4892  res = heap_lock_updated_tuple(relation, tuple, &t_ctid,
4894  mode);
4895  if (res != TM_Ok)
4896  {
4897  result = res;
4898  /* recovery code expects to have buffer lock held */
4900  goto failed;
4901  }
4902  }
4903 
4905 
4906  /*
4907  * xwait is done, but if xwait had just locked the tuple then some
4908  * other xact could update this tuple before we get to this point.
4909  * Check for xmax change, and start over if so.
4910  */
4911  if (xmax_infomask_changed(tuple->t_data->t_infomask, infomask) ||
4913  xwait))
4914  goto l3;
4915 
4916  if (!(infomask & HEAP_XMAX_IS_MULTI))
4917  {
4918  /*
4919  * Otherwise check if it committed or aborted. Note we cannot
4920  * be here if the tuple was only locked by somebody who didn't
4921  * conflict with us; that would have been handled above. So
4922  * that transaction must necessarily be gone by now. But
4923  * don't check for this in the multixact case, because some
4924  * locker transactions might still be running.
4925  */
4926  UpdateXmaxHintBits(tuple->t_data, *buffer, xwait);
4927  }
4928  }
4929 
4930  /* By here, we're certain that we hold buffer exclusive lock again */
4931 
4932  /*
4933  * We may lock if previous xmax aborted, or if it committed but only
4934  * locked the tuple without updating it; or if we didn't have to wait
4935  * at all for whatever reason.
4936  */
4937  if (!require_sleep ||
4938  (tuple->t_data->t_infomask & HEAP_XMAX_INVALID) ||
4941  result = TM_Ok;
4942  else if (!ItemPointerEquals(&tuple->t_self, &tuple->t_data->t_ctid))
4943  result = TM_Updated;
4944  else
4945  result = TM_Deleted;
4946  }
4947 
4948 failed:
4949  if (result != TM_Ok)
4950  {
4951  Assert(result == TM_SelfModified || result == TM_Updated ||
4952  result == TM_Deleted || result == TM_WouldBlock);
4953 
4954  /*
4955  * When locking a tuple under LockWaitSkip semantics and we fail with
4956  * TM_WouldBlock above, it's possible for concurrent transactions to
4957  * release the lock and set HEAP_XMAX_INVALID in the meantime. So
4958  * this assert is slightly different from the equivalent one in
4959  * heap_delete and heap_update.
4960  */
4961  Assert((result == TM_WouldBlock) ||
4962  !(tuple->t_data->t_infomask & HEAP_XMAX_INVALID));
4963  Assert(result != TM_Updated ||
4964  !ItemPointerEquals(&tuple->t_self, &tuple->t_data->t_ctid));
4965  tmfd->ctid = tuple->t_data->t_ctid;
4966  tmfd->xmax = HeapTupleHeaderGetUpdateXid(tuple->t_data);
4967  if (result == TM_SelfModified)
4968  tmfd->cmax = HeapTupleHeaderGetCmax(tuple->t_data);
4969  else
4970  tmfd->cmax = InvalidCommandId;
4971  goto out_locked;
4972  }
4973 
4974  /*
4975  * If we didn't pin the visibility map page and the page has become all
4976  * visible while we were busy locking the buffer, or during some
4977  * subsequent window during which we had it unlocked, we'll have to unlock
4978  * and re-lock, to avoid holding the buffer lock across I/O. That's a bit
4979  * unfortunate, especially since we'll now have to recheck whether the
4980  * tuple has been locked or updated under us, but hopefully it won't
4981  * happen very often.
4982  */
4983  if (vmbuffer == InvalidBuffer && PageIsAllVisible(page))
4984  {
4985  LockBuffer(*buffer, BUFFER_LOCK_UNLOCK);
4986  visibilitymap_pin(relation, block, &vmbuffer);
4988  goto l3;
4989  }
4990 
4991  xmax = HeapTupleHeaderGetRawXmax(tuple->t_data);
4992  old_infomask = tuple->t_data->t_infomask;
4993 
4994  /*
4995  * If this is the first possibly-multixact-able operation in the current
4996  * transaction, set my per-backend OldestMemberMXactId setting. We can be
4997  * certain that the transaction will never become a member of any older
4998  * MultiXactIds than that. (We have to do this even if we end up just
4999  * using our own TransactionId below, since some other backend could
5000  * incorporate our XID into a MultiXact immediately afterwards.)
5001  */
5003 
5004  /*
5005  * Compute the new xmax and infomask to store into the tuple. Note we do
5006  * not modify the tuple just yet, because that would leave it in the wrong
5007  * state if multixact.c elogs.
5008  */
5009  compute_new_xmax_infomask(xmax, old_infomask, tuple->t_data->t_infomask2,
5010  GetCurrentTransactionId(), mode, false,
5011  &xid, &new_infomask, &new_infomask2);
5012 
5014 
5015  /*
5016  * Store transaction information of xact locking the tuple.
5017  *
5018  * Note: Cmax is meaningless in this context, so don't set it; this avoids
5019  * possibly generating a useless combo CID. Moreover, if we're locking a
5020  * previously updated tuple, it's important to preserve the Cmax.
5021  *
5022  * Also reset the HOT UPDATE bit, but only if there's no update; otherwise
5023  * we would break the HOT chain.
5024  */
5025  tuple->t_data->t_infomask &= ~HEAP_XMAX_BITS;
5026  tuple->t_data->t_infomask2 &= ~HEAP_KEYS_UPDATED;
5027  tuple->t_data->t_infomask |= new_infomask;
5028  tuple->t_data->t_infomask2 |= new_infomask2;
5029  if (HEAP_XMAX_IS_LOCKED_ONLY(new_infomask))
5031  HeapTupleHeaderSetXmax(tuple->t_data, xid);
5032 
5033  /*
5034  * Make sure there is no forward chain link in t_ctid. Note that in the
5035  * cases where the tuple has been updated, we must not overwrite t_ctid,
5036  * because it was set by the updater. Moreover, if the tuple has been
5037  * updated, we need to follow the update chain to lock the new versions of
5038  * the tuple as well.
5039  */
5040  if (HEAP_XMAX_IS_LOCKED_ONLY(new_infomask))
5041  tuple->t_data->t_ctid = *tid;
5042 
5043  /* Clear only the all-frozen bit on visibility map if needed */
5044  if (PageIsAllVisible(page) &&
5045  visibilitymap_clear(relation, block, vmbuffer,
5047  cleared_all_frozen = true;
5048 
5049 
5050  MarkBufferDirty(*buffer);
5051 
5052  /*
5053  * XLOG stuff. You might think that we don't need an XLOG record because
5054  * there is no state change worth restoring after a crash. You would be
5055  * wrong however: we have just written either a TransactionId or a
5056  * MultiXactId that may never have been seen on disk before, and we need
5057  * to make sure that there are XLOG entries covering those ID numbers.
5058  * Else the same IDs might be re-used after a crash, which would be
5059  * disastrous if this page made it to disk before the crash. Essentially
5060  * we have to enforce the WAL log-before-data rule even in this case.
5061  * (Also, in a PITR log-shipping or 2PC environment, we have to have XLOG
5062  * entries for everything anyway.)
5063  */
5064  if (RelationNeedsWAL(relation))
5065  {
5066  xl_heap_lock xlrec;
5067  XLogRecPtr recptr;
5068 
5069  XLogBeginInsert();
5070  XLogRegisterBuffer(0, *buffer, REGBUF_STANDARD);
5071 
5072  xlrec.offnum = ItemPointerGetOffsetNumber(&tuple->t_self);
5073  xlrec.xmax = xid;
5074  xlrec.infobits_set = compute_infobits(new_infomask,
5075  tuple->t_data->t_infomask2);
5076  xlrec.flags = cleared_all_frozen ? XLH_LOCK_ALL_FROZEN_CLEARED : 0;
5077  XLogRegisterData((char *) &xlrec, SizeOfHeapLock);
5078 
5079  /* we don't decode row locks atm, so no need to log the origin */
5080 
5081  recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_LOCK);
5082 
5083  PageSetLSN(page, recptr);
5084  }
5085 
5086  END_CRIT_SECTION();
5087 
5088  result = TM_Ok;
5089 
5090 out_locked:
5091  LockBuffer(*buffer, BUFFER_LOCK_UNLOCK);
5092 
5093 out_unlocked:
5094  if (BufferIsValid(vmbuffer))
5095  ReleaseBuffer(vmbuffer);
5096 
5097  /*
5098  * Don't update the visibility map here. Locking a tuple doesn't change
5099  * visibility info.
5100  */
5101 
5102  /*
5103  * Now that we have successfully marked the tuple as locked, we can
5104  * release the lmgr tuple lock, if we had it.
5105  */
5106  if (have_tuple_lock)
5107  UnlockTupleTuplock(relation, tid, mode);
5108 
5109  return result;
5110 }
#define TUPLOCK_from_mxstatus(status)
Definition: heapam.c:210
static TM_Result heap_lock_updated_tuple(Relation rel, HeapTuple tuple, ItemPointer ctid, TransactionId xid, LockTupleMode mode)
Definition: heapam.c:5891
static bool ConditionalMultiXactIdWait(MultiXactId multi, MultiXactStatus status, uint16 infomask, Relation rel, int *remaining)
Definition: heapam.c:7634
static MultiXactStatus get_mxact_status_for_lock(LockTupleMode mode, bool is_update)
Definition: heapam.c:4379
#define XLH_LOCK_ALL_FROZEN_CLEARED
Definition: heapam_xlog.h:393
#define XLOG_HEAP_LOCK
Definition: heapam_xlog.h:39
#define SizeOfHeapLock
Definition: heapam_xlog.h:404
#define HEAP_XMAX_IS_EXCL_LOCKED(infomask)
Definition: htup_details.h:261
#define HEAP_XMAX_IS_SHR_LOCKED(infomask)
Definition: htup_details.h:259
static void ItemPointerCopy(const ItemPointerData *fromPointer, ItemPointerData *toPointer)
Definition: itemptr.h:172
bool ConditionalXactLockTableWait(TransactionId xid)
Definition: lmgr.c:729
@ XLTW_Lock
Definition: lmgr.h:29
@ LockWaitSkip
Definition: lockoptions.h:41
@ LockWaitError
Definition: lockoptions.h:43
@ LockTupleShare
Definition: lockoptions.h:54
@ LockTupleKeyShare
Definition: lockoptions.h:52
int GetMultiXactIdMembers(MultiXactId multi, MultiXactMember **members, bool from_pgupgrade, bool isLockOnly)
Definition: multixact.c:1299
static PgChecksumMode mode
Definition: pg_checksums.c:55
#define RelationGetRelationName(relation)
Definition: rel.h:539
uint8 infobits_set
Definition: heapam_xlog.h:400
OffsetNumber offnum
Definition: heapam_xlog.h:399
TransactionId xmax
Definition: heapam_xlog.h:398
@ TM_WouldBlock
Definition: tableam.h:109
#define VISIBILITYMAP_ALL_FROZEN

References Assert, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferGetPage(), BufferIsValid(), TM_FailureData::cmax, compute_infobits(), compute_new_xmax_infomask(), ConditionalMultiXactIdWait(), ConditionalXactLockTableWait(), TM_FailureData::ctid, DoesMultiXactIdConflict(), elog, END_CRIT_SECTION, ereport, errcode(), errmsg(), ERROR, xl_heap_lock::flags, get_mxact_status_for_lock(), GetCurrentTransactionId(), GetMultiXactIdMembers(), heap_acquire_tuplock(), HEAP_KEYS_UPDATED, heap_lock_updated_tuple(), HEAP_XMAX_BITS, HEAP_XMAX_INVALID, HEAP_XMAX_IS_EXCL_LOCKED, HEAP_XMAX_IS_KEYSHR_LOCKED, HEAP_XMAX_IS_LOCKED_ONLY, HEAP_XMAX_IS_MULTI, HEAP_XMAX_IS_SHR_LOCKED, HeapTupleHeaderClearHotUpdated, HeapTupleHeaderGetCmax(), HeapTupleHeaderGetRawXmax, HeapTupleHeaderGetUpdateXid, HeapTupleHeaderIsOnlyLocked(), HeapTupleHeaderSetXmax, HeapTupleSatisfiesUpdate(), i, xl_heap_lock::infobits_set, InvalidBuffer, InvalidCommandId, ItemIdGetLength, ItemIdIsNormal, ItemPointerCopy(), ItemPointerEquals(), ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), LockBuffer(), LockTupleExclusive, LockTupleKeyShare, LockTupleNoKeyExclusive, LockTupleShare, LockWaitBlock, LockWaitError, LockWaitSkip, MarkBufferDirty(), mode, MultiXactIdSetOldestMember(), MultiXactIdWait(), MultiXactStatusNoKeyUpdate, xl_heap_lock::offnum, PageGetItem(), PageGetItemId(), PageIsAllVisible(), PageSetLSN(), pfree(), ReadBuffer(), REGBUF_STANDARD, RelationGetRelationName, RelationGetRelid, RelationNeedsWAL, ReleaseBuffer(), res, SizeOfHeapLock, START_CRIT_SECTION, HeapTupleHeaderData::t_ctid, HeapTupleData::t_data, HeapTupleHeaderData::t_infomask, HeapTupleHeaderData::t_infomask2, HeapTupleData::t_len, HeapTupleData::t_self, HeapTupleData::t_tableOid, TM_BeingModified, TM_Deleted, TM_Invisible, TM_Ok, TM_SelfModified, TM_Updated, TM_WouldBlock, TransactionIdEquals, TransactionIdIsCurrentTransactionId(), TUPLOCK_from_mxstatus, UnlockTupleTuplock, UpdateXmaxHintBits(), VISIBILITYMAP_ALL_FROZEN, visibilitymap_clear(), visibilitymap_pin(), XactLockTableWait(), XLH_LOCK_ALL_FROZEN_CLEARED, XLOG_HEAP_LOCK, XLogBeginInsert(), XLogInsert(), XLogRegisterBuffer(), XLogRegisterData(), XLTW_Lock, xl_heap_lock::xmax, TM_FailureData::xmax, and xmax_infomask_changed().

Referenced by heapam_tuple_lock().

◆ heap_multi_insert()

void heap_multi_insert ( Relation  relation,
struct TupleTableSlot **  slots,
int  ntuples,
CommandId  cid,
int  options,
BulkInsertState  bistate 
)

Definition at line 2254 of file heapam.c.

2256 {
2258  HeapTuple *heaptuples;
2259  int i;
2260  int ndone;
2261  PGAlignedBlock scratch;
2262  Page page;
2263  Buffer vmbuffer = InvalidBuffer;
2264  bool needwal;
2265  Size saveFreeSpace;
2266  bool need_tuple_data = RelationIsLogicallyLogged(relation);
2267  bool need_cids = RelationIsAccessibleInLogicalDecoding(relation);
2268  bool starting_with_empty_page = false;
2269  int npages = 0;
2270  int npages_used = 0;
2271 
2272  /* currently not needed (thus unsupported) for heap_multi_insert() */
2274 
2275  needwal = RelationNeedsWAL(relation);
2276  saveFreeSpace = RelationGetTargetPageFreeSpace(relation,
2278 
2279  /* Toast and set header data in all the slots */
2280  heaptuples = palloc(ntuples * sizeof(HeapTuple));
2281  for (i = 0; i < ntuples; i++)
2282  {
2283  HeapTuple tuple;
2284 
2285  tuple = ExecFetchSlotHeapTuple(slots[i], true, NULL);
2286  slots[i]->tts_tableOid = RelationGetRelid(relation);
2287  tuple->t_tableOid = slots[i]->tts_tableOid;
2288  heaptuples[i] = heap_prepare_insert(relation, tuple, xid, cid,
2289  options);
2290  }
2291 
2292  /*
2293  * We're about to do the actual inserts -- but check for conflict first,
2294  * to minimize the possibility of having to roll back work we've just
2295  * done.
2296  *
2297  * A check here does not definitively prevent a serialization anomaly;
2298  * that check MUST be done at least past the point of acquiring an
2299  * exclusive buffer content lock on every buffer that will be affected,
2300  * and MAY be done after all inserts are reflected in the buffers and
2301  * those locks are released; otherwise there is a race condition. Since
2302  * multiple buffers can be locked and unlocked in the loop below, and it
2303  * would not be feasible to identify and lock all of those buffers before
2304  * the loop, we must do a final check at the end.
2305  *
2306  * The check here could be omitted with no loss of correctness; it is
2307  * present strictly as an optimization.
2308  *
2309  * For heap inserts, we only need to check for table-level SSI locks. Our
2310  * new tuples can't possibly conflict with existing tuple locks, and heap
2311  * page locks are only consolidated versions of tuple locks; they do not
2312  * lock "gaps" as index page locks do. So we don't need to specify a
2313  * buffer when making the call, which makes for a faster check.
2314  */
2316 
2317  ndone = 0;
2318  while (ndone < ntuples)
2319  {
2320  Buffer buffer;
2321  bool all_visible_cleared = false;
2322  bool all_frozen_set = false;
2323  int nthispage;
2324 
2326 
2327  /*
2328  * Compute number of pages needed to fit the to-be-inserted tuples in
2329  * the worst case. This will be used to determine how much to extend
2330  * the relation by in RelationGetBufferForTuple(), if needed. If we
2331  * filled a prior page from scratch, we can just update our last
2332  * computation, but if we started with a partially filled page,
2333  * recompute from scratch, the number of potentially required pages
2334  * can vary due to tuples needing to fit onto the page, page headers
2335  * etc.
2336  */
2337  if (ndone == 0 || !starting_with_empty_page)
2338  {
2339  npages = heap_multi_insert_pages(heaptuples, ndone, ntuples,
2340  saveFreeSpace);
2341  npages_used = 0;
2342  }
2343  else
2344  npages_used++;
2345 
2346  /*
2347  * Find buffer where at least the next tuple will fit. If the page is
2348  * all-visible, this will also pin the requisite visibility map page.
2349  *
2350  * Also pin visibility map page if COPY FREEZE inserts tuples into an
2351  * empty page. See all_frozen_set below.
2352  */
2353  buffer = RelationGetBufferForTuple(relation, heaptuples[ndone]->t_len,
2354  InvalidBuffer, options, bistate,
2355  &vmbuffer, NULL,
2356  npages - npages_used);
2357  page = BufferGetPage(buffer);
2358 
2359  starting_with_empty_page = PageGetMaxOffsetNumber(page) == 0;
2360 
2361  if (starting_with_empty_page && (options & HEAP_INSERT_FROZEN))
2362  all_frozen_set = true;
2363 
2364  /* NO EREPORT(ERROR) from here till changes are logged */
2366 
2367  /*
2368  * RelationGetBufferForTuple has ensured that the first tuple fits.
2369  * Put that on the page, and then as many other tuples as fit.
2370  */
2371  RelationPutHeapTuple(relation, buffer, heaptuples[ndone], false);
2372 
2373  /*
2374  * For logical decoding we need combo CIDs to properly decode the
2375  * catalog.
2376  */
2377  if (needwal && need_cids)
2378  log_heap_new_cid(relation, heaptuples[ndone]);
2379 
2380  for (nthispage = 1; ndone + nthispage < ntuples; nthispage++)
2381  {
2382  HeapTuple heaptup = heaptuples[ndone + nthispage];
2383 
2384  if (PageGetHeapFreeSpace(page) < MAXALIGN(heaptup->t_len) + saveFreeSpace)
2385  break;
2386 
2387  RelationPutHeapTuple(relation, buffer, heaptup, false);
2388 
2389  /*
2390  * For logical decoding we need combo CIDs to properly decode the
2391  * catalog.
2392  */
2393  if (needwal && need_cids)
2394  log_heap_new_cid(relation, heaptup);
2395  }
2396 
2397  /*
2398  * If the page is all visible, need to clear that, unless we're only
2399  * going to add further frozen rows to it.
2400  *
2401  * If we're only adding already frozen rows to a previously empty
2402  * page, mark it as all-visible.
2403  */
2404  if (PageIsAllVisible(page) && !(options & HEAP_INSERT_FROZEN))
2405  {
2406  all_visible_cleared = true;
2407  PageClearAllVisible(page);
2408  visibilitymap_clear(relation,
2409  BufferGetBlockNumber(buffer),
2410  vmbuffer, VISIBILITYMAP_VALID_BITS);
2411  }
2412  else if (all_frozen_set)
2413  PageSetAllVisible(page);
2414 
2415  /*
2416  * XXX Should we set PageSetPrunable on this page ? See heap_insert()
2417  */
2418 
2419  MarkBufferDirty(buffer);
2420 
2421  /* XLOG stuff */
2422  if (needwal)
2423  {
2424  XLogRecPtr recptr;
2425  xl_heap_multi_insert *xlrec;
2427  char *tupledata;
2428  int totaldatalen;
2429  char *scratchptr = scratch.data;
2430  bool init;
2431  int bufflags = 0;
2432 
2433  /*
2434  * If the page was previously empty, we can reinit the page
2435  * instead of restoring the whole thing.
2436  */
2437  init = starting_with_empty_page;
2438 
2439  /* allocate xl_heap_multi_insert struct from the scratch area */
2440  xlrec = (xl_heap_multi_insert *) scratchptr;
2441  scratchptr += SizeOfHeapMultiInsert;
2442 
2443  /*
2444  * Allocate offsets array. Unless we're reinitializing the page,
2445  * in that case the tuples are stored in order starting at
2446  * FirstOffsetNumber and we don't need to store the offsets
2447  * explicitly.
2448  */
2449  if (!init)
2450  scratchptr += nthispage * sizeof(OffsetNumber);
2451 
2452  /* the rest of the scratch space is used for tuple data */
2453  tupledata = scratchptr;
2454 
2455  /* check that the mutually exclusive flags are not both set */
2456  Assert(!(all_visible_cleared && all_frozen_set));
2457 
2458  xlrec->flags = 0;
2459  if (all_visible_cleared)
2461  if (all_frozen_set)
2463 
2464  xlrec->ntuples = nthispage;
2465 
2466  /*
2467  * Write out an xl_multi_insert_tuple and the tuple data itself
2468  * for each tuple.
2469  */
2470  for (i = 0; i < nthispage; i++)
2471  {
2472  HeapTuple heaptup = heaptuples[ndone + i];
2473  xl_multi_insert_tuple *tuphdr;
2474  int datalen;
2475 
2476  if (!init)
2477  xlrec->offsets[i] = ItemPointerGetOffsetNumber(&heaptup->t_self);
2478  /* xl_multi_insert_tuple needs two-byte alignment. */
2479  tuphdr = (xl_multi_insert_tuple *) SHORTALIGN(scratchptr);
2480  scratchptr = ((char *) tuphdr) + SizeOfMultiInsertTuple;
2481 
2482  tuphdr->t_infomask2 = heaptup->t_data->t_infomask2;
2483  tuphdr->t_infomask = heaptup->t_data->t_infomask;
2484  tuphdr->t_hoff = heaptup->t_data->t_hoff;
2485 
2486  /* write bitmap [+ padding] [+ oid] + data */
2487  datalen = heaptup->t_len - SizeofHeapTupleHeader;
2488  memcpy(scratchptr,
2489  (char *) heaptup->t_data + SizeofHeapTupleHeader,
2490  datalen);
2491  tuphdr->datalen = datalen;
2492  scratchptr += datalen;
2493  }
2494  totaldatalen = scratchptr - tupledata;
2495  Assert((scratchptr - scratch.data) < BLCKSZ);
2496 
2497  if (need_tuple_data)
2499 
2500  /*
2501  * Signal that this is the last xl_heap_multi_insert record
2502  * emitted by this call to heap_multi_insert(). Needed for logical
2503  * decoding so it knows when to cleanup temporary data.
2504  */
2505  if (ndone + nthispage == ntuples)
2506  xlrec->flags |= XLH_INSERT_LAST_IN_MULTI;
2507 
2508  if (init)
2509  {
2510  info |= XLOG_HEAP_INIT_PAGE;
2511  bufflags |= REGBUF_WILL_INIT;
2512  }
2513 
2514  /*
2515  * If we're doing logical decoding, include the new tuple data
2516  * even if we take a full-page image of the page.
2517  */
2518  if (need_tuple_data)
2519  bufflags |= REGBUF_KEEP_DATA;
2520 
2521  XLogBeginInsert();
2522  XLogRegisterData((char *) xlrec, tupledata - scratch.data);
2523  XLogRegisterBuffer(0, buffer, REGBUF_STANDARD | bufflags);
2524 
2525  XLogRegisterBufData(0, tupledata, totaldatalen);
2526 
2527  /* filtering by origin on a row level is much more efficient */
2529 
2530  recptr = XLogInsert(RM_HEAP2_ID, info);
2531 
2532  PageSetLSN(page, recptr);
2533  }
2534 
2535  END_CRIT_SECTION();
2536 
2537  /*
2538  * If we've frozen everything on the page, update the visibilitymap.
2539  * We're already holding pin on the vmbuffer.
2540  */
2541  if (all_frozen_set)
2542  {
2543  Assert(PageIsAllVisible(page));
2544  Assert(visibilitymap_pin_ok(BufferGetBlockNumber(buffer), vmbuffer));
2545 
2546  /*
2547  * It's fine to use InvalidTransactionId here - this is only used
2548  * when HEAP_INSERT_FROZEN is specified, which intentionally
2549  * violates visibility rules.
2550  */
2551  visibilitymap_set(relation, BufferGetBlockNumber(buffer), buffer,
2552  InvalidXLogRecPtr, vmbuffer,
2555  }
2556 
2557  UnlockReleaseBuffer(buffer);
2558  ndone += nthispage;
2559 
2560  /*
2561  * NB: Only release vmbuffer after inserting all tuples - it's fairly
2562  * likely that we'll insert into subsequent heap pages that are likely
2563  * to use the same vm page.
2564  */
2565  }
2566 
2567  /* We're done with inserting all tuples, so release the last vmbuffer. */
2568  if (vmbuffer != InvalidBuffer)
2569  ReleaseBuffer(vmbuffer);
2570 
2571  /*
2572  * We're done with the actual inserts. Check for conflicts again, to
2573  * ensure that all rw-conflicts in to these inserts are detected. Without
2574  * this final check, a sequential scan of the heap may have locked the
2575  * table after the "before" check, missing one opportunity to detect the
2576  * conflict, and then scanned the table before the new tuples were there,
2577  * missing the other chance to detect the conflict.
2578  *
2579  * For heap inserts, we only need to check for table-level SSI locks. Our
2580  * new tuples can't possibly conflict with existing tuple locks, and heap
2581  * page locks are only consolidated versions of tuple locks; they do not
2582  * lock "gaps" as index page locks do. So we don't need to specify a
2583  * buffer when making the call.
2584  */
2586 
2587  /*
2588  * If tuples are cachable, mark them for invalidation from the caches in
2589  * case we abort. Note it is OK to do this after releasing the buffer,
2590  * because the heaptuples data structure is all in local memory, not in
2591  * the shared buffer.
2592  */
2593  if (IsCatalogRelation(relation))
2594  {
2595  for (i = 0; i < ntuples; i++)
2596  CacheInvalidateHeapTuple(relation, heaptuples[i], NULL);
2597  }
2598 
2599  /* copy t_self fields back to the caller's slots */
2600  for (i = 0; i < ntuples; i++)
2601  slots[i]->tts_tid = heaptuples[i]->t_self;
2602 
2603  pgstat_count_heap_insert(relation, ntuples);
2604 }
Size PageGetHeapFreeSpace(Page page)
Definition: bufpage.c:980
static void PageSetAllVisible(Page page)
Definition: bufpage.h:434
#define MAXALIGN(LEN)
Definition: c.h:790
#define SHORTALIGN(LEN)
Definition: c.h:786
size_t Size
Definition: c.h:584
HeapTuple ExecFetchSlotHeapTuple(TupleTableSlot *slot, bool materialize, bool *shouldFree)
Definition: execTuples.c:1731
static int heap_multi_insert_pages(HeapTuple *heaptuples, int done, int ntuples, Size saveFreeSpace)
Definition: heapam.c:2222
#define HEAP_INSERT_FROZEN
Definition: heapam.h:37
#define SizeOfHeapMultiInsert
Definition: heapam_xlog.h:188
#define XLOG_HEAP2_MULTI_INSERT
Definition: heapam_xlog.h:64
#define XLH_INSERT_LAST_IN_MULTI
Definition: heapam_xlog.h:73
#define XLH_INSERT_ALL_FROZEN_SET
Definition: heapam_xlog.h:79
#define SizeOfMultiInsertTuple
Definition: heapam_xlog.h:199
int init
Definition: isn.c:74
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:122
#define RelationGetTargetPageFreeSpace(relation, defaultff)
Definition: rel.h:378
#define HEAP_DEFAULT_FILLFACTOR
Definition: rel.h:349
Oid tts_tableOid
Definition: tuptable.h:130
OffsetNumber offsets[FLEXIBLE_ARRAY_MEMBER]
Definition: heapam_xlog.h:185
void visibilitymap_set(Relation rel, BlockNumber heapBlk, Buffer heapBuf, XLogRecPtr recptr, Buffer vmBuf, TransactionId cutoff_xid, uint8 flags)
bool visibilitymap_pin_ok(BlockNumber heapBlk, Buffer vmbuf)
#define VISIBILITYMAP_ALL_VISIBLE
#define InvalidXLogRecPtr
Definition: xlogdefs.h:28

References Assert, BufferGetBlockNumber(), BufferGetPage(), CacheInvalidateHeapTuple(), CHECK_FOR_INTERRUPTS, CheckForSerializableConflictIn(), PGAlignedBlock::data, xl_multi_insert_tuple::datalen, END_CRIT_SECTION, ExecFetchSlotHeapTuple(), xl_heap_multi_insert::flags, GetCurrentTransactionId(), HEAP_DEFAULT_FILLFACTOR, HEAP_INSERT_FROZEN, HEAP_INSERT_NO_LOGICAL, heap_multi_insert_pages(), heap_prepare_insert(), i, init, InvalidBlockNumber, InvalidBuffer, InvalidTransactionId, InvalidXLogRecPtr, IsCatalogRelation(), ItemPointerGetOffsetNumber(), log_heap_new_cid(), MarkBufferDirty(), MAXALIGN, xl_heap_multi_insert::ntuples, xl_heap_multi_insert::offsets, PageClearAllVisible(), PageGetHeapFreeSpace(), PageGetMaxOffsetNumber(), PageIsAllVisible(), PageSetAllVisible(), PageSetLSN(), palloc(), pgstat_count_heap_insert(), REGBUF_KEEP_DATA, REGBUF_STANDARD, REGBUF_WILL_INIT, RelationGetBufferForTuple(), RelationGetRelid, RelationGetTargetPageFreeSpace, RelationIsAccessibleInLogicalDecoding, RelationIsLogicallyLogged, RelationNeedsWAL, RelationPutHeapTuple(), ReleaseBuffer(), SHORTALIGN, SizeOfHeapMultiInsert, SizeofHeapTupleHeader, SizeOfMultiInsertTuple, START_CRIT_SECTION, HeapTupleData::t_data, xl_multi_insert_tuple::t_hoff, HeapTupleHeaderData::t_hoff, xl_multi_insert_tuple::t_infomask, HeapTupleHeaderData::t_infomask, xl_multi_insert_tuple::t_infomask2, HeapTupleHeaderData::t_infomask2, HeapTupleData::t_len, HeapTupleData::t_self, HeapTupleData::t_tableOid, TupleTableSlot::tts_tableOid, UnlockReleaseBuffer(), VISIBILITYMAP_ALL_FROZEN, VISIBILITYMAP_ALL_VISIBLE, visibilitymap_clear(), visibilitymap_pin_ok(), visibilitymap_set(), VISIBILITYMAP_VALID_BITS, XLH_INSERT_ALL_FROZEN_SET, XLH_INSERT_ALL_VISIBLE_CLEARED, XLH_INSERT_CONTAINS_NEW_TUPLE, XLH_INSERT_LAST_IN_MULTI, XLOG_HEAP2_MULTI_INSERT, XLOG_HEAP_INIT_PAGE, XLOG_INCLUDE_ORIGIN, XLogBeginInsert(), XLogInsert(), XLogRegisterBufData(), XLogRegisterBuffer(), XLogRegisterData(), and XLogSetRecordFlags().

Referenced by CatalogTuplesMultiInsertWithInfo().

◆ heap_page_prune_and_freeze()

void heap_page_prune_and_freeze ( Relation  relation,
Buffer  buffer,
struct GlobalVisState vistest,
int  options,
struct VacuumCutoffs cutoffs,
PruneFreezeResult presult,
PruneReason  reason,
OffsetNumber off_loc,
TransactionId new_relfrozen_xid,
MultiXactId new_relmin_mxid 
)

Definition at line 350 of file pruneheap.c.

359 {
360  Page page = BufferGetPage(buffer);
361  BlockNumber blockno = BufferGetBlockNumber(buffer);
362  OffsetNumber offnum,
363  maxoff;
364  PruneState prstate;
365  HeapTupleData tup;
366  bool do_freeze;
367  bool do_prune;
368  bool do_hint;
369  bool hint_bit_fpi;
370  int64 fpi_before = pgWalUsage.wal_fpi;
371 
372  /* Copy parameters to prstate */
373  prstate.vistest = vistest;
375  prstate.freeze = (options & HEAP_PAGE_PRUNE_FREEZE) != 0;
376  prstate.cutoffs = cutoffs;
377 
378  /*
379  * Our strategy is to scan the page and make lists of items to change,
380  * then apply the changes within a critical section. This keeps as much
381  * logic as possible out of the critical section, and also ensures that
382  * WAL replay will work the same as the normal case.
383  *
384  * First, initialize the new pd_prune_xid value to zero (indicating no
385  * prunable tuples). If we find any tuples which may soon become
386  * prunable, we will save the lowest relevant XID in new_prune_xid. Also
387  * initialize the rest of our working state.
388  */
391  prstate.nredirected = prstate.ndead = prstate.nunused = prstate.nfrozen = 0;
392  prstate.nroot_items = 0;
393  prstate.nheaponly_items = 0;
394 
395  /* initialize page freezing working state */
396  prstate.pagefrz.freeze_required = false;
397  if (prstate.freeze)
398  {
399  Assert(new_relfrozen_xid && new_relmin_mxid);
400  prstate.pagefrz.FreezePageRelfrozenXid = *new_relfrozen_xid;
401  prstate.pagefrz.NoFreezePageRelfrozenXid = *new_relfrozen_xid;
402  prstate.pagefrz.FreezePageRelminMxid = *new_relmin_mxid;
403  prstate.pagefrz.NoFreezePageRelminMxid = *new_relmin_mxid;
404  }
405  else
406  {
407  Assert(new_relfrozen_xid == NULL && new_relmin_mxid == NULL);
412  }
413 
414  prstate.ndeleted = 0;
415  prstate.live_tuples = 0;
416  prstate.recently_dead_tuples = 0;
417  prstate.hastup = false;
418  prstate.lpdead_items = 0;
419  prstate.deadoffsets = presult->deadoffsets;
420 
421  /*
422  * Caller may update the VM after we're done. We can keep track of
423  * whether the page will be all-visible and all-frozen after pruning and
424  * freezing to help the caller to do that.
425  *
426  * Currently, only VACUUM sets the VM bits. To save the effort, only do
427  * the bookkeeping if the caller needs it. Currently, that's tied to
428  * HEAP_PAGE_PRUNE_FREEZE, but it could be a separate flag if you wanted
429  * to update the VM bits without also freezing or freeze without also
430  * setting the VM bits.
431  *
432  * In addition to telling the caller whether it can set the VM bit, we
433  * also use 'all_visible' and 'all_frozen' for our own decision-making. If
434  * the whole page would become frozen, we consider opportunistically
435  * freezing tuples. We will not be able to freeze the whole page if there
436  * are tuples present that are not visible to everyone or if there are
437  * dead tuples which are not yet removable. However, dead tuples which
438  * will be removed by the end of vacuuming should not preclude us from
439  * opportunistically freezing. Because of that, we do not clear
440  * all_visible when we see LP_DEAD items. We fix that at the end of the
441  * function, when we return the value to the caller, so that the caller
442  * doesn't set the VM bit incorrectly.
443  */
444  if (prstate.freeze)
445  {
446  prstate.all_visible = true;
447  prstate.all_frozen = true;
448  }
449  else
450  {
451  /*
452  * Initializing to false allows skipping the work to update them in
453  * heap_prune_record_unchanged_lp_normal().
454  */
455  prstate.all_visible = false;
456  prstate.all_frozen = false;
457  }
458 
459  /*
460  * The visibility cutoff xid is the newest xmin of live tuples on the
461  * page. In the common case, this will be set as the conflict horizon the
462  * caller can use for updating the VM. If, at the end of freezing and
463  * pruning, the page is all-frozen, there is no possibility that any
464  * running transaction on the standby does not see tuples on the page as
465  * all-visible, so the conflict horizon remains InvalidTransactionId.
466  */
468 
469  maxoff = PageGetMaxOffsetNumber(page);
470  tup.t_tableOid = RelationGetRelid(relation);
471 
472  /*
473  * Determine HTSV for all tuples, and queue them up for processing as HOT
474  * chain roots or as heap-only items.
475  *
476  * Determining HTSV only once for each tuple is required for correctness,
477  * to deal with cases where running HTSV twice could result in different
478  * results. For example, RECENTLY_DEAD can turn to DEAD if another
479  * checked item causes GlobalVisTestIsRemovableFullXid() to update the
480  * horizon, or INSERT_IN_PROGRESS can change to DEAD if the inserting
481  * transaction aborts.
482  *
483  * It's also good for performance. Most commonly tuples within a page are
484  * stored at decreasing offsets (while the items are stored at increasing
485  * offsets). When processing all tuples on a page this leads to reading
486  * memory at decreasing offsets within a page, with a variable stride.
487  * That's hard for CPU prefetchers to deal with. Processing the items in
488  * reverse order (and thus the tuples in increasing order) increases
489  * prefetching efficiency significantly / decreases the number of cache
490  * misses.
491  */
492  for (offnum = maxoff;
493  offnum >= FirstOffsetNumber;
494  offnum = OffsetNumberPrev(offnum))
495  {
496  ItemId itemid = PageGetItemId(page, offnum);
497  HeapTupleHeader htup;
498 
499  /*
500  * Set the offset number so that we can display it along with any
501  * error that occurred while processing this tuple.
502  */
503  *off_loc = offnum;
504 
505  prstate.processed[offnum] = false;
506  prstate.htsv[offnum] = -1;
507 
508  /* Nothing to do if slot doesn't contain a tuple */
509  if (!ItemIdIsUsed(itemid))
510  {
511  heap_prune_record_unchanged_lp_unused(page, &prstate, offnum);
512  continue;
513  }
514 
515  if (ItemIdIsDead(itemid))
516  {
517  /*
518  * If the caller set mark_unused_now true, we can set dead line
519  * pointers LP_UNUSED now.
520  */
521  if (unlikely(prstate.mark_unused_now))
522  heap_prune_record_unused(&prstate, offnum, false);
523  else
524  heap_prune_record_unchanged_lp_dead(page, &prstate, offnum);
525  continue;
526  }
527 
528  if (ItemIdIsRedirected(itemid))
529  {
530  /* This is the start of a HOT chain */
531  prstate.root_items[prstate.nroot_items++] = offnum;
532  continue;
533  }
534 
535  Assert(ItemIdIsNormal(itemid));
536 
537  /*
538  * Get the tuple's visibility status and queue it up for processing.
539  */
540  htup = (HeapTupleHeader) PageGetItem(page, itemid);
541  tup.t_data = htup;
542  tup.t_len = ItemIdGetLength(itemid);
543  ItemPointerSet(&tup.t_self, blockno, offnum);
544 
545  prstate.htsv[offnum] = heap_prune_satisfies_vacuum(&prstate, &tup,
546  buffer);
547 
548  if (!HeapTupleHeaderIsHeapOnly(htup))
549  prstate.root_items[prstate.nroot_items++] = offnum;
550  else
551  prstate.heaponly_items[prstate.nheaponly_items++] = offnum;
552  }
553 
554  /*
555  * If checksums are enabled, heap_prune_satisfies_vacuum() may have caused
556  * an FPI to be emitted.
557  */
558  hint_bit_fpi = fpi_before != pgWalUsage.wal_fpi;
559 
560  /*
561  * Process HOT chains.
562  *
563  * We added the items to the array starting from 'maxoff', so by
564  * processing the array in reverse order, we process the items in
565  * ascending offset number order. The order doesn't matter for
566  * correctness, but some quick micro-benchmarking suggests that this is
567  * faster. (Earlier PostgreSQL versions, which scanned all the items on
568  * the page instead of using the root_items array, also did it in
569  * ascending offset number order.)
570  */
571  for (int i = prstate.nroot_items - 1; i >= 0; i--)
572  {
573  offnum = prstate.root_items[i];
574 
575  /* Ignore items already processed as part of an earlier chain */
576  if (prstate.processed[offnum])
577  continue;
578 
579  /* see preceding loop */
580  *off_loc = offnum;
581 
582  /* Process this item or chain of items */
583  heap_prune_chain(page, blockno, maxoff, offnum, &prstate);
584  }
585 
586  /*
587  * Process any heap-only tuples that were not already processed as part of
588  * a HOT chain.
589  */
590  for (int i = prstate.nheaponly_items - 1; i >= 0; i--)
591  {
592  offnum = prstate.heaponly_items[i];
593 
594  if (prstate.processed[offnum])
595  continue;
596 
597  /* see preceding loop */
598  *off_loc = offnum;
599 
600  /*
601  * If the tuple is DEAD and doesn't chain to anything else, mark it
602  * unused. (If it does chain, we can only remove it as part of
603  * pruning its chain.)
604  *
605  * We need this primarily to handle aborted HOT updates, that is,
606  * XMIN_INVALID heap-only tuples. Those might not be linked to by any
607  * chain, since the parent tuple might be re-updated before any
608  * pruning occurs. So we have to be able to reap them separately from
609  * chain-pruning. (Note that HeapTupleHeaderIsHotUpdated will never
610  * return true for an XMIN_INVALID tuple, so this code will work even
611  * when there were sequential updates within the aborted transaction.)
612  */
613  if (prstate.htsv[offnum] == HEAPTUPLE_DEAD)
614  {
615  ItemId itemid = PageGetItemId(page, offnum);
616  HeapTupleHeader htup = (HeapTupleHeader) PageGetItem(page, itemid);
617 
619  {
621  &prstate.latest_xid_removed);
622  heap_prune_record_unused(&prstate, offnum, true);
623  }
624  else
625  {
626  /*
627  * This tuple should've been processed and removed as part of
628  * a HOT chain, so something's wrong. To preserve evidence,
629  * we don't dare to remove it. We cannot leave behind a DEAD
630  * tuple either, because that will cause VACUUM to error out.
631  * Throwing an error with a distinct error message seems like
632  * the least bad option.
633  */
634  elog(ERROR, "dead heap-only tuple (%u, %d) is not linked to from any HOT chain",
635  blockno, offnum);
636  }
637  }
638  else
639  heap_prune_record_unchanged_lp_normal(page, &prstate, offnum);
640  }
641 
642  /* We should now have processed every tuple exactly once */
643 #ifdef USE_ASSERT_CHECKING
644  for (offnum = FirstOffsetNumber;
645  offnum <= maxoff;
646  offnum = OffsetNumberNext(offnum))
647  {
648  *off_loc = offnum;
649 
650  Assert(prstate.processed[offnum]);
651  }
652 #endif
653 
654  /* Clear the offset information once we have processed the given page. */
655  *off_loc = InvalidOffsetNumber;
656 
657  do_prune = prstate.nredirected > 0 ||
658  prstate.ndead > 0 ||
659  prstate.nunused > 0;
660 
661  /*
662  * Even if we don't prune anything, if we found a new value for the
663  * pd_prune_xid field or the page was marked full, we will update the hint
664  * bit.
665  */
666  do_hint = ((PageHeader) page)->pd_prune_xid != prstate.new_prune_xid ||
667  PageIsFull(page);
668 
669  /*
670  * Decide if we want to go ahead with freezing according to the freeze
671  * plans we prepared, or not.
672  */
673  do_freeze = false;
674  if (prstate.freeze)
675  {
676  if (prstate.pagefrz.freeze_required)
677  {
678  /*
679  * heap_prepare_freeze_tuple indicated that at least one XID/MXID
680  * from before FreezeLimit/MultiXactCutoff is present. Must
681  * freeze to advance relfrozenxid/relminmxid.
682  */
683  do_freeze = true;
684  }
685  else
686  {
687  /*
688  * Opportunistically freeze the page if we are generating an FPI
689  * anyway and if doing so means that we can set the page
690  * all-frozen afterwards (might not happen until VACUUM's final
691  * heap pass).
692  *
693  * XXX: Previously, we knew if pruning emitted an FPI by checking
694  * pgWalUsage.wal_fpi before and after pruning. Once the freeze
695  * and prune records were combined, this heuristic couldn't be
696  * used anymore. The opportunistic freeze heuristic must be
697  * improved; however, for now, try to approximate the old logic.
698  */
699  if (prstate.all_visible && prstate.all_frozen && prstate.nfrozen > 0)
700  {
701  /*
702  * Freezing would make the page all-frozen. Have already
703  * emitted an FPI or will do so anyway?
704  */
705  if (RelationNeedsWAL(relation))
706  {
707  if (hint_bit_fpi)
708  do_freeze = true;
709  else if (do_prune)
710  {
711  if (XLogCheckBufferNeedsBackup(buffer))
712  do_freeze = true;
713  }
714  else if (do_hint)
715  {
717  do_freeze = true;
718  }
719  }
720  }
721  }
722  }
723 
724  if (do_freeze)
725  {
726  /*
727  * Validate the tuples we will be freezing before entering the
728  * critical section.
729  */
730  heap_pre_freeze_checks(buffer, prstate.frozen, prstate.nfrozen);
731  }
732  else if (prstate.nfrozen > 0)
733  {
734  /*
735  * The page contained some tuples that were not already frozen, and we
736  * chose not to freeze them now. The page won't be all-frozen then.
737  */
738  Assert(!prstate.pagefrz.freeze_required);
739 
740  prstate.all_frozen = false;
741  prstate.nfrozen = 0; /* avoid miscounts in instrumentation */
742  }
743  else
744  {
745  /*
746  * We have no freeze plans to execute. The page might already be
747  * all-frozen (perhaps only following pruning), though. Such pages
748  * can be marked all-frozen in the VM by our caller, even though none
749  * of its tuples were newly frozen here.
750  */
751  }
752 
753  /* Any error while applying the changes is critical */
755 
756  if (do_hint)
757  {
758  /*
759  * Update the page's pd_prune_xid field to either zero, or the lowest
760  * XID of any soon-prunable tuple.
761  */
762  ((PageHeader) page)->pd_prune_xid = prstate.new_prune_xid;
763 
764  /*
765  * Also clear the "page is full" flag, since there's no point in
766  * repeating the prune/defrag process until something else happens to
767  * the page.
768  */
769  PageClearFull(page);
770 
771  /*
772  * If that's all we had to do to the page, this is a non-WAL-logged
773  * hint. If we are going to freeze or prune the page, we will mark
774  * the buffer dirty below.
775  */
776  if (!do_freeze && !do_prune)
777  MarkBufferDirtyHint(buffer, true);
778  }
779 
780  if (do_prune || do_freeze)
781  {
782  /* Apply the planned item changes and repair page fragmentation. */
783  if (do_prune)
784  {
785  heap_page_prune_execute(buffer, false,
786  prstate.redirected, prstate.nredirected,
787  prstate.nowdead, prstate.ndead,
788  prstate.nowunused, prstate.nunused);
789  }
790 
791  if (do_freeze)
792  heap_freeze_prepared_tuples(buffer, prstate.frozen, prstate.nfrozen);
793 
794  MarkBufferDirty(buffer);
795 
796  /*
797  * Emit a WAL XLOG_HEAP2_PRUNE_FREEZE record showing what we did
798  */
799  if (RelationNeedsWAL(relation))
800  {
801  /*
802  * The snapshotConflictHorizon for the whole record should be the
803  * most conservative of all the horizons calculated for any of the
804  * possible modifications. If this record will prune tuples, any
805  * transactions on the standby older than the youngest xmax of the
806  * most recently removed tuple this record will prune will
807  * conflict. If this record will freeze tuples, any transactions
808  * on the standby with xids older than the youngest tuple this
809  * record will freeze will conflict.
810  */
811  TransactionId frz_conflict_horizon = InvalidTransactionId;
812  TransactionId conflict_xid;
813 
814  /*
815  * We can use the visibility_cutoff_xid as our cutoff for
816  * conflicts when the whole page is eligible to become all-frozen
817  * in the VM once we're done with it. Otherwise we generate a
818  * conservative cutoff by stepping back from OldestXmin.
819  */
820  if (do_freeze)
821  {
822  if (prstate.all_visible && prstate.all_frozen)
823  frz_conflict_horizon = prstate.visibility_cutoff_xid;
824  else
825  {
826  /* Avoids false conflicts when hot_standby_feedback in use */
827  frz_conflict_horizon = prstate.cutoffs->OldestXmin;
828  TransactionIdRetreat(frz_conflict_horizon);
829  }
830  }
831 
832  if (TransactionIdFollows(frz_conflict_horizon, prstate.latest_xid_removed))
833  conflict_xid = frz_conflict_horizon;
834  else
835  conflict_xid = prstate.latest_xid_removed;
836 
837  log_heap_prune_and_freeze(relation, buffer,
838  conflict_xid,
839  true, reason,
840  prstate.frozen, prstate.nfrozen,
841  prstate.redirected, prstate.nredirected,
842  prstate.nowdead, prstate.ndead,
843  prstate.nowunused, prstate.nunused);
844  }
845  }
846 
848 
849  /* Copy information back for caller */
850  presult->ndeleted = prstate.ndeleted;
851  presult->nnewlpdead = prstate.ndead;
852  presult->nfrozen = prstate.nfrozen;
853  presult->live_tuples = prstate.live_tuples;
854  presult->recently_dead_tuples = prstate.recently_dead_tuples;
855 
856  /*
857  * It was convenient to ignore LP_DEAD items in all_visible earlier on to
858  * make the choice of whether or not to freeze the page unaffected by the
859  * short-term presence of LP_DEAD items. These LP_DEAD items were
860  * effectively assumed to be LP_UNUSED items in the making. It doesn't
861  * matter which vacuum heap pass (initial pass or final pass) ends up
862  * setting the page all-frozen, as long as the ongoing VACUUM does it.
863  *
864  * Now that freezing has been finalized, unset all_visible if there are
865  * any LP_DEAD items on the page. It needs to reflect the present state
866  * of the page, as expected by our caller.
867  */
868  if (prstate.all_visible && prstate.lpdead_items == 0)
869  {
870  presult->all_visible = prstate.all_visible;
871  presult->all_frozen = prstate.all_frozen;
872  }
873  else
874  {
875  presult->all_visible = false;
876  presult->all_frozen = false;
877  }
878 
879  presult->hastup = prstate.hastup;
880 
881  /*
882  * For callers planning to update the visibility map, the conflict horizon
883  * for that record must be the newest xmin on the page. However, if the
884  * page is completely frozen, there can be no conflict and the
885  * vm_conflict_horizon should remain InvalidTransactionId. This includes
886  * the case that we just froze all the tuples; the prune-freeze record
887  * included the conflict XID already so the caller doesn't need it.
888  */
889  if (presult->all_frozen)
891  else
892  presult->vm_conflict_horizon = prstate.visibility_cutoff_xid;
893 
894  presult->lpdead_items = prstate.lpdead_items;
895  /* the presult->deadoffsets array was already filled in */
896 
897  if (prstate.freeze)
898  {
899  if (presult->nfrozen > 0)
900  {
901  *new_relfrozen_xid = prstate.pagefrz.FreezePageRelfrozenXid;
902  *new_relmin_mxid = prstate.pagefrz.FreezePageRelminMxid;
903  }
904  else
905  {
906  *new_relfrozen_xid = prstate.pagefrz.NoFreezePageRelfrozenXid;
907  *new_relmin_mxid = prstate.pagefrz.NoFreezePageRelminMxid;
908  }
909  }
910 }
void MarkBufferDirtyHint(Buffer buffer, bool buffer_std)
Definition: bufmgr.c:4988
static void PageClearFull(Page page)
Definition: bufpage.h:423
static bool PageIsFull(Page page)
Definition: bufpage.h:413
#define likely(x)
Definition: c.h:325
void heap_freeze_prepared_tuples(Buffer buffer, HeapTupleFreeze *tuples, int ntuples)
Definition: heapam.c:7220
void heap_pre_freeze_checks(Buffer buffer, HeapTupleFreeze *tuples, int ntuples)
Definition: heapam.c:7167
#define HEAP_PAGE_PRUNE_FREEZE
Definition: heapam.h:43
#define HEAP_PAGE_PRUNE_MARK_UNUSED_NOW
Definition: heapam.h:42
WalUsage pgWalUsage
Definition: instrument.c:22
#define InvalidMultiXactId
Definition: multixact.h:24
#define OffsetNumberPrev(offsetNumber)
Definition: off.h:54
static void heap_prune_chain(Page page, BlockNumber blockno, OffsetNumber maxoff, OffsetNumber rootoffnum, PruneState *prstate)
Definition: pruneheap.c:999
static void heap_prune_record_unchanged_lp_dead(Page page, PruneState *prstate, OffsetNumber offnum)
Definition: pruneheap.c:1508
static void heap_prune_record_unused(PruneState *prstate, OffsetNumber offnum, bool was_normal)
Definition: pruneheap.c:1297
static void heap_prune_record_unchanged_lp_normal(Page page, PruneState *prstate, OffsetNumber offnum)
Definition: pruneheap.c:1330
void log_heap_prune_and_freeze(Relation relation, Buffer buffer, TransactionId conflict_xid, bool cleanup_lock, PruneReason reason, HeapTupleFreeze *frozen, int nfrozen, OffsetNumber *redirected, int nredirected, OffsetNumber *dead, int ndead, OffsetNumber *unused, int nunused)
Definition: pruneheap.c:2053
static void heap_prune_record_unchanged_lp_unused(Page page, PruneState *prstate, OffsetNumber offnum)
Definition: pruneheap.c:1319
static HTSV_Result heap_prune_satisfies_vacuum(PruneState *prstate, HeapTuple tup, Buffer buffer)
Definition: pruneheap.c:917
void heap_page_prune_execute(Buffer buffer, bool lp_truncate_only, OffsetNumber *redirected, int nredirected, OffsetNumber *nowdead, int ndead, OffsetNumber *nowunused, int nunused)
Definition: pruneheap.c:1561
MultiXactId NoFreezePageRelminMxid
Definition: heapam.h:220
TransactionId FreezePageRelfrozenXid
Definition: heapam.h:208
bool freeze_required
Definition: heapam.h:182
MultiXactId FreezePageRelminMxid
Definition: heapam.h:209
TransactionId NoFreezePageRelfrozenXid
Definition: heapam.h:219
int recently_dead_tuples
Definition: heapam.h:235
TransactionId vm_conflict_horizon
Definition: heapam.h:250
OffsetNumber deadoffsets[MaxHeapTuplesPerPage]
Definition: heapam.h:264
bool all_visible
Definition: heapam.h:248
HeapPageFreeze pagefrz
Definition: pruneheap.c:103
bool all_visible
Definition: pruneheap.c:150
int ndead
Definition: pruneheap.c:55
bool processed[MaxHeapTuplesPerPage+1]
Definition: pruneheap.c:86
OffsetNumber heaponly_items[MaxHeapTuplesPerPage]
Definition: pruneheap.c:78
TransactionId new_prune_xid
Definition: pruneheap.c:52
bool hastup
Definition: pruneheap.c:122
int recently_dead_tuples
Definition: pruneheap.c:119
OffsetNumber nowdead[MaxHeapTuplesPerPage]
Definition: pruneheap.c:60
int nroot_items
Definition: pruneheap.c:75
OffsetNumber nowunused[MaxHeapTuplesPerPage]
Definition: pruneheap.c:61
int nheaponly_items
Definition: pruneheap.c:77
bool mark_unused_now
Definition: pruneheap.c:43
int live_tuples
Definition: pruneheap.c:118
TransactionId visibility_cutoff_xid
Definition: pruneheap.c:152
bool all_frozen
Definition: pruneheap.c:151
GlobalVisState * vistest
Definition: pruneheap.c:41
struct VacuumCutoffs * cutoffs
Definition: pruneheap.c:46
HeapTupleFreeze frozen[MaxHeapTuplesPerPage]
Definition: pruneheap.c:62
int lpdead_items
Definition: pruneheap.c:128
int nfrozen
Definition: pruneheap.c:57
OffsetNumber redirected[MaxHeapTuplesPerPage *2]
Definition: pruneheap.c:59
int ndeleted
Definition: pruneheap.c:115
bool freeze
Definition: pruneheap.c:45
int nredirected
Definition: pruneheap.c:54
int8 htsv[MaxHeapTuplesPerPage+1]
Definition: pruneheap.c:98
TransactionId latest_xid_removed
Definition: pruneheap.c:53
int nunused
Definition: pruneheap.c:56
OffsetNumber root_items[MaxHeapTuplesPerPage]
Definition: pruneheap.c:76
OffsetNumber * deadoffsets
Definition: pruneheap.c:129
TransactionId OldestXmin
Definition: vacuum.h:267
int64 wal_fpi
Definition: instrument.h:54
bool TransactionIdFollows(TransactionId id1, TransactionId id2)
Definition: transam.c:314
#define TransactionIdRetreat(dest)
Definition: transam.h:141
#define XLogHintBitIsNeeded()
Definition: xlog.h:120
bool XLogCheckBufferNeedsBackup(Buffer buffer)
Definition: xloginsert.c:1027

References PruneState::all_frozen, PruneFreezeResult::all_frozen, PruneState::all_visible, PruneFreezeResult::all_visible, Assert, BufferGetBlockNumber(), BufferGetPage(), PruneState::cutoffs, PruneState::deadoffsets, PruneFreezeResult::deadoffsets, elog, END_CRIT_SECTION, ERROR, FirstOffsetNumber, PruneState::freeze, HeapPageFreeze::freeze_required, HeapPageFreeze::FreezePageRelfrozenXid, HeapPageFreeze::FreezePageRelminMxid, PruneState::frozen, PruneState::hastup, PruneFreezeResult::hastup, heap_freeze_prepared_tuples(), heap_page_prune_execute(), HEAP_PAGE_PRUNE_FREEZE, HEAP_PAGE_PRUNE_MARK_UNUSED_NOW, heap_pre_freeze_checks(), heap_prune_chain(), heap_prune_record_unchanged_lp_dead(), heap_prune_record_unchanged_lp_normal(), heap_prune_record_unchanged_lp_unused(), heap_prune_record_unused(), heap_prune_satisfies_vacuum(), PruneState::heaponly_items, HEAPTUPLE_DEAD, HeapTupleHeaderAdvanceConflictHorizon(), HeapTupleHeaderIsHeapOnly, HeapTupleHeaderIsHotUpdated, PruneState::htsv, i, InvalidMultiXactId, InvalidOffsetNumber, InvalidTransactionId, ItemIdGetLength, ItemIdIsDead, ItemIdIsNormal, ItemIdIsRedirected, ItemIdIsUsed, ItemPointerSet(), PruneState::latest_xid_removed, likely, PruneState::live_tuples, PruneFreezeResult::live_tuples, log_heap_prune_and_freeze(), PruneState::lpdead_items, PruneFreezeResult::lpdead_items, PruneState::mark_unused_now, MarkBufferDirty(), MarkBufferDirtyHint(), PruneState::ndead, PruneState::ndeleted, PruneFreezeResult::ndeleted, PruneState::new_prune_xid, PruneState::nfrozen, PruneFreezeResult::nfrozen, PruneState::nheaponly_items, PruneFreezeResult::nnewlpdead, HeapPageFreeze::NoFreezePageRelfrozenXid, HeapPageFreeze::NoFreezePageRelminMxid, PruneState::nowdead, PruneState::nowunused, PruneState::nredirected, PruneState::nroot_items, PruneState::nunused, OffsetNumberNext, OffsetNumberPrev, VacuumCutoffs::OldestXmin, PageClearFull(), PruneState::pagefrz, PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), PageIsFull(), pgWalUsage, PruneState::processed, PruneState::recently_dead_tuples, PruneFreezeResult::recently_dead_tuples, PruneState::redirected, RelationGetRelid, RelationNeedsWAL, PruneState::root_items, START_CRIT_SECTION, HeapTupleData::t_data, HeapTupleData::t_len, HeapTupleData::t_self, HeapTupleData::t_tableOid, TransactionIdFollows(), TransactionIdRetreat, unlikely, PruneState::visibility_cutoff_xid, PruneState::vistest, PruneFreezeResult::vm_conflict_horizon, WalUsage::wal_fpi, XLogCheckBufferNeedsBackup(), and XLogHintBitIsNeeded.

Referenced by heap_page_prune_opt(), and lazy_scan_prune().

◆ heap_page_prune_execute()

void heap_page_prune_execute ( Buffer  buffer,
bool  lp_truncate_only,
OffsetNumber redirected,
int  nredirected,
OffsetNumber nowdead,
int  ndead,
OffsetNumber nowunused,
int  nunused 
)

Definition at line 1561 of file pruneheap.c.

1565 {
1566  Page page = (Page) BufferGetPage(buffer);
1567  OffsetNumber *offnum;
1569 
1570  /* Shouldn't be called unless there's something to do */
1571  Assert(nredirected > 0 || ndead > 0 || nunused > 0);
1572 
1573  /* If 'lp_truncate_only', we can only remove already-dead line pointers */
1574  Assert(!lp_truncate_only || (nredirected == 0 && ndead == 0));
1575 
1576  /* Update all redirected line pointers */
1577  offnum = redirected;
1578  for (int i = 0; i < nredirected; i++)
1579  {
1580  OffsetNumber fromoff = *offnum++;
1581  OffsetNumber tooff = *offnum++;
1582  ItemId fromlp = PageGetItemId(page, fromoff);
1584 
1585 #ifdef USE_ASSERT_CHECKING
1586 
1587  /*
1588  * Any existing item that we set as an LP_REDIRECT (any 'from' item)
1589  * must be the first item from a HOT chain. If the item has tuple
1590  * storage then it can't be a heap-only tuple. Otherwise we are just
1591  * maintaining an existing LP_REDIRECT from an existing HOT chain that
1592  * has been pruned at least once before now.
1593  */
1594  if (!ItemIdIsRedirected(fromlp))
1595  {
1596  Assert(ItemIdHasStorage(fromlp) && ItemIdIsNormal(fromlp));
1597 
1598  htup = (HeapTupleHeader) PageGetItem(page, fromlp);
1600  }
1601  else
1602  {
1603  /* We shouldn't need to redundantly set the redirect */
1604  Assert(ItemIdGetRedirect(fromlp) != tooff);
1605  }
1606 
1607  /*
1608  * The item that we're about to set as an LP_REDIRECT (the 'from'
1609  * item) will point to an existing item (the 'to' item) that is
1610  * already a heap-only tuple. There can be at most one LP_REDIRECT
1611  * item per HOT chain.
1612  *
1613  * We need to keep around an LP_REDIRECT item (after original
1614  * non-heap-only root tuple gets pruned away) so that it's always
1615  * possible for VACUUM to easily figure out what TID to delete from
1616  * indexes when an entire HOT chain becomes dead. A heap-only tuple
1617  * can never become LP_DEAD; an LP_REDIRECT item or a regular heap
1618  * tuple can.
1619  *
1620  * This check may miss problems, e.g. the target of a redirect could
1621  * be marked as unused subsequently. The page_verify_redirects() check
1622  * below will catch such problems.
1623  */
1624  tolp = PageGetItemId(page, tooff);
1625  Assert(ItemIdHasStorage(tolp) && ItemIdIsNormal(tolp));
1626  htup = (HeapTupleHeader) PageGetItem(page, tolp);
1628 #endif
1629 
1630  ItemIdSetRedirect(fromlp, tooff);
1631  }
1632 
1633  /* Update all now-dead line pointers */
1634  offnum = nowdead;
1635  for (int i = 0; i < ndead; i++)
1636  {
1637  OffsetNumber off = *offnum++;
1638  ItemId lp = PageGetItemId(page, off);
1639 
1640 #ifdef USE_ASSERT_CHECKING
1641 
1642  /*
1643  * An LP_DEAD line pointer must be left behind when the original item
1644  * (which is dead to everybody) could still be referenced by a TID in
1645  * an index. This should never be necessary with any individual
1646  * heap-only tuple item, though. (It's not clear how much of a problem
1647  * that would be, but there is no reason to allow it.)
1648  */
1649  if (ItemIdHasStorage(lp))
1650  {
1651  Assert(ItemIdIsNormal(lp));
1652  htup = (HeapTupleHeader) PageGetItem(page, lp);
1654  }
1655  else
1656  {
1657  /* Whole HOT chain becomes dead */
1659  }
1660 #endif
1661 
1662  ItemIdSetDead(lp);
1663  }
1664 
1665  /* Update all now-unused line pointers */
1666  offnum = nowunused;
1667  for (int i = 0; i < nunused; i++)
1668  {
1669  OffsetNumber off = *offnum++;
1670  ItemId lp = PageGetItemId(page, off);
1671 
1672 #ifdef USE_ASSERT_CHECKING
1673 
1674  if (lp_truncate_only)
1675  {
1676  /* Setting LP_DEAD to LP_UNUSED in vacuum's second pass */
1677  Assert(ItemIdIsDead(lp) && !ItemIdHasStorage(lp));
1678  }
1679  else
1680  {
1681  /*
1682  * When heap_page_prune_and_freeze() was called, mark_unused_now
1683  * may have been passed as true, which allows would-be LP_DEAD
1684  * items to be made LP_UNUSED instead. This is only possible if
1685  * the relation has no indexes. If there are any dead items, then
1686  * mark_unused_now was not true and every item being marked
1687  * LP_UNUSED must refer to a heap-only tuple.
1688  */
1689  if (ndead > 0)
1690  {
1692  htup = (HeapTupleHeader) PageGetItem(page, lp);
1694  }
1695  else
1696  Assert(ItemIdIsUsed(lp));
1697  }
1698 
1699 #endif
1700 
1701  ItemIdSetUnused(lp);
1702  }
1703 
1704  if (lp_truncate_only)
1706  else
1707  {
1708  /*
1709  * Finally, repair any fragmentation, and update the page's hint bit
1710  * about whether it has free pointers.
1711  */
1713 
1714  /*
1715  * Now that the page has been modified, assert that redirect items
1716  * still point to valid targets.
1717  */
1718  page_verify_redirects(page);
1719  }
1720 }
void PageRepairFragmentation(Page page)
Definition: bufpage.c:688
void PageTruncateLinePointerArray(Page page)
Definition: bufpage.c:824
#define PG_USED_FOR_ASSERTS_ONLY
Definition: c.h:197
#define ItemIdSetRedirect(itemId, link)
Definition: itemid.h:152
#define ItemIdSetDead(itemId)
Definition: itemid.h:164
#define ItemIdSetUnused(itemId)
Definition: itemid.h:128
#define ItemIdHasStorage(itemId)
Definition: itemid.h:120
static void page_verify_redirects(Page page)
Definition: pruneheap.c:1737

References Assert, BufferGetPage(), HeapTupleHeaderIsHeapOnly, i, ItemIdGetRedirect, ItemIdHasStorage, ItemIdIsDead, ItemIdIsNormal, ItemIdIsRedirected, ItemIdIsUsed, ItemIdSetDead, ItemIdSetRedirect, ItemIdSetUnused, page_verify_redirects(), PageGetItem(), PageGetItemId(), PageRepairFragmentation(), PageTruncateLinePointerArray(), and PG_USED_FOR_ASSERTS_ONLY.

Referenced by heap_page_prune_and_freeze(), and heap_xlog_prune_freeze().

◆ heap_page_prune_opt()

void heap_page_prune_opt ( Relation  relation,
Buffer  buffer 
)

Definition at line 193 of file pruneheap.c.

194 {
195  Page page = BufferGetPage(buffer);
196  TransactionId prune_xid;
197  GlobalVisState *vistest;
198  Size minfree;
199 
200  /*
201  * We can't write WAL in recovery mode, so there's no point trying to
202  * clean the page. The primary will likely issue a cleaning WAL record
203  * soon anyway, so this is no particular loss.
204  */
205  if (RecoveryInProgress())
206  return;
207 
208  /*
209  * First check whether there's any chance there's something to prune,
210  * determining the appropriate horizon is a waste if there's no prune_xid
211  * (i.e. no updates/deletes left potentially dead tuples around).
212  */
213  prune_xid = ((PageHeader) page)->pd_prune_xid;
214  if (!TransactionIdIsValid(prune_xid))
215  return;
216 
217  /*
218  * Check whether prune_xid indicates that there may be dead rows that can
219  * be cleaned up.
220  */
221  vistest = GlobalVisTestFor(relation);
222 
223  if (!GlobalVisTestIsRemovableXid(vistest, prune_xid))
224  return;
225 
226  /*
227  * We prune when a previous UPDATE failed to find enough space on the page
228  * for a new tuple version, or when free space falls below the relation's
229  * fill-factor target (but not less than 10%).
230  *
231  * Checking free space here is questionable since we aren't holding any
232  * lock on the buffer; in the worst case we could get a bogus answer. It's
233  * unlikely to be *seriously* wrong, though, since reading either pd_lower
234  * or pd_upper is probably atomic. Avoiding taking a lock seems more
235  * important than sometimes getting a wrong answer in what is after all
236  * just a heuristic estimate.
237  */
238  minfree = RelationGetTargetPageFreeSpace(relation,
240  minfree = Max(minfree, BLCKSZ / 10);
241 
242  if (PageIsFull(page) || PageGetHeapFreeSpace(page) < minfree)
243  {
244  /* OK, try to get exclusive buffer lock */
245  if (!ConditionalLockBufferForCleanup(buffer))
246  return;
247 
248  /*
249  * Now that we have buffer lock, get accurate information about the
250  * page's free space, and recheck the heuristic about whether to
251  * prune.
252  */
253  if (PageIsFull(page) || PageGetHeapFreeSpace(page) < minfree)
254  {
255  OffsetNumber dummy_off_loc;
256  PruneFreezeResult presult;
257 
258  /*
259  * For now, pass mark_unused_now as false regardless of whether or
260  * not the relation has indexes, since we cannot safely determine
261  * that during on-access pruning with the current implementation.
262  */
263  heap_page_prune_and_freeze(relation, buffer, vistest, 0,
264  NULL, &presult, PRUNE_ON_ACCESS, &dummy_off_loc, NULL, NULL);
265 
266  /*
267  * Report the number of tuples reclaimed to pgstats. This is
268  * presult.ndeleted minus the number of newly-LP_DEAD-set items.
269  *
270  * We derive the number of dead tuples like this to avoid totally
271  * forgetting about items that were set to LP_DEAD, since they
272  * still need to be cleaned up by VACUUM. We only want to count
273  * heap-only tuples that just became LP_UNUSED in our report,
274  * which don't.
275  *
276  * VACUUM doesn't have to compensate in the same way when it
277  * tracks ndeleted, since it will set the same LP_DEAD items to
278  * LP_UNUSED separately.
279  */
280  if (presult.ndeleted > presult.nnewlpdead)
282  presult.ndeleted - presult.nnewlpdead);
283  }
284 
285  /* And release buffer lock */
287 
288  /*
289  * We avoid reuse of any free space created on the page by unrelated
290  * UPDATEs/INSERTs by opting to not update the FSM at this point. The
291  * free space should be reused by UPDATEs to *this* page.
292  */
293  }
294 }
bool ConditionalLockBufferForCleanup(Buffer buffer)
Definition: bufmgr.c:5399
#define Max(x, y)
Definition: c.h:977
void pgstat_update_heap_dead_tuples(Relation rel, int delta)
bool GlobalVisTestIsRemovableXid(GlobalVisState *state, TransactionId xid)
Definition: procarray.c:4268
void heap_page_prune_and_freeze(Relation relation, Buffer buffer, GlobalVisState *vistest, int options, struct VacuumCutoffs *cutoffs, PruneFreezeResult *presult, PruneReason reason, OffsetNumber *off_loc, TransactionId *new_relfrozen_xid, MultiXactId *new_relmin_mxid)
Definition: pruneheap.c:350
bool RecoveryInProgress(void)
Definition: xlog.c:6334

References BUFFER_LOCK_UNLOCK, BufferGetPage(), ConditionalLockBufferForCleanup(), GlobalVisTestFor(), GlobalVisTestIsRemovableXid(), HEAP_DEFAULT_FILLFACTOR, heap_page_prune_and_freeze(), LockBuffer(), Max, PruneFreezeResult::ndeleted, PruneFreezeResult::nnewlpdead, PageGetHeapFreeSpace(), PageIsFull(), pgstat_update_heap_dead_tuples(), PRUNE_ON_ACCESS, RecoveryInProgress(), RelationGetTargetPageFreeSpace, and TransactionIdIsValid.

Referenced by heap_prepare_pagescan(), heapam_index_fetch_tuple(), and heapam_scan_bitmap_next_block().

◆ heap_pre_freeze_checks()

void heap_pre_freeze_checks ( Buffer  buffer,
HeapTupleFreeze tuples,
int  ntuples 
)

Definition at line 7167 of file heapam.c.

7169 {
7170  Page page = BufferGetPage(buffer);
7171 
7172  for (int i = 0; i < ntuples; i++)
7173  {
7174  HeapTupleFreeze *frz = tuples + i;
7175  ItemId itemid = PageGetItemId(page, frz->offset);
7176  HeapTupleHeader htup;
7177 
7178  htup = (HeapTupleHeader) PageGetItem(page, itemid);
7179 
7180  /* Deliberately avoid relying on tuple hint bits here */
7182  {
7184 
7186  if (unlikely(!TransactionIdDidCommit(xmin)))
7187  ereport(ERROR,
7189  errmsg_internal("uncommitted xmin %u needs to be frozen",
7190  xmin)));
7191  }
7192 
7193  /*
7194  * TransactionIdDidAbort won't work reliably in the presence of XIDs
7195  * left behind by transactions that were in progress during a crash,
7196  * so we can only check that xmax didn't commit
7197  */
7199  {
7201 
7203  if (unlikely(TransactionIdDidCommit(xmax)))
7204  ereport(ERROR,
7206  errmsg_internal("cannot freeze committed xmax %u",
7207  xmax)));
7208  }
7209  }
7210 }
#define HEAP_FREEZE_CHECK_XMAX_ABORTED
Definition: heapam.h:138
#define HEAP_FREEZE_CHECK_XMIN_COMMITTED
Definition: heapam.h:137
#define HeapTupleHeaderGetRawXmin(tup)
Definition: htup_details.h:304
#define HeapTupleHeaderXminFrozen(tup)
Definition: htup_details.h:331
#define ERRCODE_DATA_CORRUPTED
Definition: pg_basebackup.c:41
uint8 checkflags
Definition: heapam.h:150
bool TransactionIdDidCommit(TransactionId transactionId)
Definition: transam.c:126
#define TransactionIdIsNormal(xid)
Definition: transam.h:42

References Assert, BufferGetPage(), HeapTupleFreeze::checkflags, ereport, errcode(), ERRCODE_DATA_CORRUPTED, errmsg_internal(), ERROR, HEAP_FREEZE_CHECK_XMAX_ABORTED, HEAP_FREEZE_CHECK_XMIN_COMMITTED, HeapTupleHeaderGetRawXmax, HeapTupleHeaderGetRawXmin, HeapTupleHeaderXminFrozen, i, HeapTupleFreeze::offset, PageGetItem(), PageGetItemId(), TransactionIdDidCommit(), TransactionIdIsNormal, and unlikely.

Referenced by heap_page_prune_and_freeze().

◆ heap_prepare_freeze_tuple()

bool heap_prepare_freeze_tuple ( HeapTupleHeader  tuple,
const struct VacuumCutoffs cutoffs,
HeapPageFreeze pagefrz,
HeapTupleFreeze frz,
bool *  totally_frozen 
)

Definition at line 6894 of file heapam.c.

6898 {
6899  bool xmin_already_frozen = false,
6900  xmax_already_frozen = false;
6901  bool freeze_xmin = false,
6902  replace_xvac = false,
6903  replace_xmax = false,
6904  freeze_xmax = false;
6905  TransactionId xid;
6906 
6907  frz->xmax = HeapTupleHeaderGetRawXmax(tuple);
6908  frz->t_infomask2 = tuple->t_infomask2;
6909  frz->t_infomask = tuple->t_infomask;
6910  frz->frzflags = 0;
6911  frz->checkflags = 0;
6912 
6913  /*
6914  * Process xmin, while keeping track of whether it's already frozen, or
6915  * will become frozen iff our freeze plan is executed by caller (could be
6916  * neither).
6917  */
6918  xid = HeapTupleHeaderGetXmin(tuple);
6919  if (!TransactionIdIsNormal(xid))
6920  xmin_already_frozen = true;
6921  else
6922  {
6923  if (TransactionIdPrecedes(xid, cutoffs->relfrozenxid))
6924  ereport(ERROR,
6926  errmsg_internal("found xmin %u from before relfrozenxid %u",
6927  xid, cutoffs->relfrozenxid)));
6928 
6929  /* Will set freeze_xmin flags in freeze plan below */
6930  freeze_xmin = TransactionIdPrecedes(xid, cutoffs->OldestXmin);
6931 
6932  /* Verify that xmin committed if and when freeze plan is executed */
6933  if (freeze_xmin)
6935  }
6936 
6937  /*
6938  * Old-style VACUUM FULL is gone, but we have to process xvac for as long
6939  * as we support having MOVED_OFF/MOVED_IN tuples in the database
6940  */
6941  xid = HeapTupleHeaderGetXvac(tuple);
6942  if (TransactionIdIsNormal(xid))
6943  {
6945  Assert(TransactionIdPrecedes(xid, cutoffs->OldestXmin));
6946 
6947  /*
6948  * For Xvac, we always freeze proactively. This allows totally_frozen
6949  * tracking to ignore xvac.
6950  */
6951  replace_xvac = pagefrz->freeze_required = true;
6952 
6953  /* Will set replace_xvac flags in freeze plan below */
6954  }
6955 
6956  /* Now process xmax */
6957  xid = frz->xmax;
6958  if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
6959  {
6960  /* Raw xmax is a MultiXactId */
6961  TransactionId newxmax;
6962  uint16 flags;
6963 
6964  /*
6965  * We will either remove xmax completely (in the "freeze_xmax" path),
6966  * process xmax by replacing it (in the "replace_xmax" path), or
6967  * perform no-op xmax processing. The only constraint is that the
6968  * FreezeLimit/MultiXactCutoff postcondition must never be violated.
6969  */
6970  newxmax = FreezeMultiXactId(xid, tuple->t_infomask, cutoffs,
6971  &flags, pagefrz);
6972 
6973  if (flags & FRM_NOOP)
6974  {
6975  /*
6976  * xmax is a MultiXactId, and nothing about it changes for now.
6977  * This is the only case where 'freeze_required' won't have been
6978  * set for us by FreezeMultiXactId, as well as the only case where
6979  * neither freeze_xmax nor replace_xmax are set (given a multi).
6980  *
6981  * This is a no-op, but the call to FreezeMultiXactId might have
6982  * ratcheted back NewRelfrozenXid and/or NewRelminMxid trackers
6983  * for us (the "freeze page" variants, specifically). That'll
6984  * make it safe for our caller to freeze the page later on, while
6985  * leaving this particular xmax undisturbed.
6986  *
6987  * FreezeMultiXactId is _not_ responsible for the "no freeze"
6988  * NewRelfrozenXid/NewRelminMxid trackers, though -- that's our
6989  * job. A call to heap_tuple_should_freeze for this same tuple
6990  * will take place below if 'freeze_required' isn't set already.
6991  * (This repeats work from FreezeMultiXactId, but allows "no
6992  * freeze" tracker maintenance to happen in only one place.)
6993  */
6994  Assert(!MultiXactIdPrecedes(newxmax, cutoffs->MultiXactCutoff));
6995  Assert(MultiXactIdIsValid(newxmax) && xid == newxmax);
6996  }
6997  else if (flags & FRM_RETURN_IS_XID)
6998  {
6999  /*
7000  * xmax will become an updater Xid (original MultiXact's updater
7001  * member Xid will be carried forward as a simple Xid in Xmax).
7002  */
7003  Assert(!TransactionIdPrecedes(newxmax, cutoffs->OldestXmin));
7004 
7005  /*
7006  * NB -- some of these transformations are only valid because we
7007  * know the return Xid is a tuple updater (i.e. not merely a
7008  * locker.) Also note that the only reason we don't explicitly
7009  * worry about HEAP_KEYS_UPDATED is because it lives in
7010  * t_infomask2 rather than t_infomask.
7011  */
7012  frz->t_infomask &= ~HEAP_XMAX_BITS;
7013  frz->xmax = newxmax;
7014  if (flags & FRM_MARK_COMMITTED)
7016  replace_xmax = true;
7017  }
7018  else if (flags & FRM_RETURN_IS_MULTI)
7019  {
7020  uint16 newbits;
7021  uint16 newbits2;
7022 
7023  /*
7024  * xmax is an old MultiXactId that we have to replace with a new
7025  * MultiXactId, to carry forward two or more original member XIDs.
7026  */
7027  Assert(!MultiXactIdPrecedes(newxmax, cutoffs->OldestMxact));
7028 
7029  /*
7030  * We can't use GetMultiXactIdHintBits directly on the new multi
7031  * here; that routine initializes the masks to all zeroes, which
7032  * would lose other bits we need. Doing it this way ensures all
7033  * unrelated bits remain untouched.
7034  */
7035  frz->t_infomask &= ~HEAP_XMAX_BITS;
7036  frz->t_infomask2 &= ~HEAP_KEYS_UPDATED;
7037  GetMultiXactIdHintBits(newxmax, &newbits, &newbits2);
7038  frz->t_infomask |= newbits;
7039  frz->t_infomask2 |= newbits2;
7040  frz->xmax = newxmax;
7041  replace_xmax = true;
7042  }
7043  else
7044  {
7045  /*
7046  * Freeze plan for tuple "freezes xmax" in the strictest sense:
7047  * it'll leave nothing in xmax (neither an Xid nor a MultiXactId).
7048  */
7049  Assert(flags & FRM_INVALIDATE_XMAX);
7050  Assert(!TransactionIdIsValid(newxmax));
7051 
7052  /* Will set freeze_xmax flags in freeze plan below */
7053  freeze_xmax = true;
7054  }
7055 
7056  /* MultiXactId processing forces freezing (barring FRM_NOOP case) */
7057  Assert(pagefrz->freeze_required || (!freeze_xmax && !replace_xmax));
7058  }
7059  else if (TransactionIdIsNormal(xid))
7060  {
7061  /* Raw xmax is normal XID */
7062  if (TransactionIdPrecedes(xid, cutoffs->relfrozenxid))
7063  ereport(ERROR,
7065  errmsg_internal("found xmax %u from before relfrozenxid %u",
7066  xid, cutoffs->relfrozenxid)));
7067 
7068  /* Will set freeze_xmax flags in freeze plan below */
7069  freeze_xmax = TransactionIdPrecedes(xid, cutoffs->OldestXmin);
7070 
7071  /*
7072  * Verify that xmax aborted if and when freeze plan is executed,
7073  * provided it's from an update. (A lock-only xmax can be removed
7074  * independent of this, since the lock is released at xact end.)
7075  */
7076  if (freeze_xmax && !HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask))
7078  }
7079  else if (!TransactionIdIsValid(xid))
7080  {
7081  /* Raw xmax is InvalidTransactionId XID */
7082  Assert((tuple->t_infomask & HEAP_XMAX_IS_MULTI) == 0);
7083  xmax_already_frozen = true;
7084  }
7085  else
7086  ereport(ERROR,
7088  errmsg_internal("found raw xmax %u (infomask 0x%04x) not invalid and not multi",
7089  xid, tuple->t_infomask)));
7090 
7091  if (freeze_xmin)
7092  {
7093  Assert(!xmin_already_frozen);
7094 
7095  frz->t_infomask |= HEAP_XMIN_FROZEN;
7096  }
7097  if (replace_xvac)
7098  {
7099  /*
7100  * If a MOVED_OFF tuple is not dead, the xvac transaction must have
7101  * failed; whereas a non-dead MOVED_IN tuple must mean the xvac
7102  * transaction succeeded.
7103  */
7104  Assert(pagefrz->freeze_required);
7105  if (tuple->t_infomask & HEAP_MOVED_OFF)
7106  frz->frzflags |= XLH_INVALID_XVAC;
7107  else
7108  frz->frzflags |= XLH_FREEZE_XVAC;
7109  }
7110  if (replace_xmax)
7111  {
7112  Assert(!xmax_already_frozen && !freeze_xmax);
7113  Assert(pagefrz->freeze_required);
7114 
7115  /* Already set replace_xmax flags in freeze plan earlier */
7116  }
7117  if (freeze_xmax)
7118  {
7119  Assert(!xmax_already_frozen && !replace_xmax);
7120 
7121  frz->xmax = InvalidTransactionId;
7122 
7123  /*
7124  * The tuple might be marked either XMAX_INVALID or XMAX_COMMITTED +
7125  * LOCKED. Normalize to INVALID just to be sure no one gets confused.
7126  * Also get rid of the HEAP_KEYS_UPDATED bit.
7127  */
7128  frz->t_infomask &= ~HEAP_XMAX_BITS;
7129  frz->t_infomask |= HEAP_XMAX_INVALID;
7130  frz->t_infomask2 &= ~HEAP_HOT_UPDATED;
7131  frz->t_infomask2 &= ~HEAP_KEYS_UPDATED;
7132  }
7133 
7134  /*
7135  * Determine if this tuple is already totally frozen, or will become
7136  * totally frozen (provided caller executes freeze plans for the page)
7137  */
7138  *totally_frozen = ((freeze_xmin || xmin_already_frozen) &&
7139  (freeze_xmax || xmax_already_frozen));
7140 
7141  if (!pagefrz->freeze_required && !(xmin_already_frozen &&
7142  xmax_already_frozen))
7143  {
7144  /*
7145  * So far no previous tuple from the page made freezing mandatory.
7146  * Does this tuple force caller to freeze the entire page?
7147  */
7148  pagefrz->freeze_required =
7149  heap_tuple_should_freeze(tuple, cutoffs,
7150  &pagefrz->NoFreezePageRelfrozenXid,
7151  &pagefrz->NoFreezePageRelminMxid);
7152  }
7153 
7154  /* Tell caller if this tuple has a usable freeze plan set in *frz */
7155  return freeze_xmin || replace_xvac || replace_xmax || freeze_xmax;
7156 }
static void GetMultiXactIdHintBits(MultiXactId multi, uint16 *new_infomask, uint16 *new_infomask2)
Definition: heapam.c:7286
#define FRM_RETURN_IS_XID
Definition: heapam.c:6493
static TransactionId FreezeMultiXactId(MultiXactId multi, uint16 t_infomask, const struct VacuumCutoffs *cutoffs, uint16 *flags, HeapPageFreeze *pagefrz)
Definition: heapam.c:6544
bool heap_tuple_should_freeze(HeapTupleHeader tuple, const struct VacuumCutoffs *cutoffs, TransactionId *NoFreezePageRelfrozenXid, MultiXactId *NoFreezePageRelminMxid)
Definition: heapam.c:7703
#define FRM_MARK_COMMITTED
Definition: heapam.c:6495
#define FRM_NOOP
Definition: heapam.c:6491
#define FRM_RETURN_IS_MULTI
Definition: heapam.c:6494
#define FRM_INVALIDATE_XMAX
Definition: heapam.c:6492
#define HEAP_MOVED_OFF
Definition: htup_details.h:211
#define HEAP_XMIN_FROZEN
Definition: htup_details.h:206
#define HEAP_HOT_UPDATED
Definition: htup_details.h:276
#define HeapTupleHeaderGetXvac(tup)
Definition: htup_details.h:411
#define HEAP_XMAX_COMMITTED
Definition: htup_details.h:207
bool MultiXactIdPrecedes(MultiXactId multi1, MultiXactId multi2)
Definition: multixact.c:3317
#define MultiXactIdIsValid(multi)
Definition: multixact.h:28
MultiXactId OldestMxact
Definition: vacuum.h:268
bool TransactionIdPrecedesOrEquals(TransactionId id1, TransactionId id2)
Definition: transam.c:299

References Assert, HeapTupleFreeze::checkflags, ereport, errcode(), ERRCODE_DATA_CORRUPTED, errmsg_internal(), ERROR, HeapPageFreeze::freeze_required, FreezeMultiXactId(), FRM_INVALIDATE_XMAX, FRM_MARK_COMMITTED, FRM_NOOP, FRM_RETURN_IS_MULTI, FRM_RETURN_IS_XID, HeapTupleFreeze::frzflags, GetMultiXactIdHintBits(), HEAP_FREEZE_CHECK_XMAX_ABORTED, HEAP_FREEZE_CHECK_XMIN_COMMITTED, HEAP_HOT_UPDATED, HEAP_KEYS_UPDATED, HEAP_MOVED_OFF, heap_tuple_should_freeze(), HEAP_XMAX_BITS, HEAP_XMAX_COMMITTED, HEAP_XMAX_INVALID, HEAP_XMAX_IS_LOCKED_ONLY, HEAP_XMAX_IS_MULTI, HEAP_XMIN_FROZEN, HeapTupleHeaderGetRawXmax, HeapTupleHeaderGetXmin, HeapTupleHeaderGetXvac, InvalidTransactionId, VacuumCutoffs::MultiXactCutoff, MultiXactIdIsValid, MultiXactIdPrecedes(), HeapPageFreeze::NoFreezePageRelfrozenXid, HeapPageFreeze::NoFreezePageRelminMxid, VacuumCutoffs::OldestMxact, VacuumCutoffs::OldestXmin, VacuumCutoffs::relfrozenxid, HeapTupleFreeze::t_infomask, HeapTupleHeaderData::t_infomask, HeapTupleFreeze::t_infomask2, HeapTupleHeaderData::t_infomask2, TransactionIdIsNormal, TransactionIdIsValid, TransactionIdPrecedes(), TransactionIdPrecedesOrEquals(), XLH_FREEZE_XVAC, XLH_INVALID_XVAC, and HeapTupleFreeze::xmax.

Referenced by heap_freeze_tuple(), and heap_prune_record_unchanged_lp_normal().

◆ heap_prepare_pagescan()

void heap_prepare_pagescan ( TableScanDesc  sscan)

Definition at line 485 of file heapam.c.

486 {
487  HeapScanDesc scan = (HeapScanDesc) sscan;
488  Buffer buffer = scan->rs_cbuf;
489  BlockNumber block = scan->rs_cblock;
490  Snapshot snapshot;
491  Page page;
492  int lines;
493  bool all_visible;
494  bool check_serializable;
495 
496  Assert(BufferGetBlockNumber(buffer) == block);
497 
498  /* ensure we're not accidentally being used when not in pagemode */
500  snapshot = scan->rs_base.rs_snapshot;
501 
502  /*
503  * Prune and repair fragmentation for the whole page, if possible.
504  */
505  heap_page_prune_opt(scan->rs_base.rs_rd, buffer);
506 
507  /*
508  * We must hold share lock on the buffer content while examining tuple
509  * visibility. Afterwards, however, the tuples we have found to be
510  * visible are guaranteed good as long as we hold the buffer pin.
511  */
512  LockBuffer(buffer, BUFFER_LOCK_SHARE);
513 
514  page = BufferGetPage(buffer);
515  lines = PageGetMaxOffsetNumber(page);
516 
517  /*
518  * If the all-visible flag indicates that all tuples on the page are
519  * visible to everyone, we can skip the per-tuple visibility tests.
520  *
521  * Note: In hot standby, a tuple that's already visible to all
522  * transactions on the primary might still be invisible to a read-only
523  * transaction in the standby. We partly handle this problem by tracking
524  * the minimum xmin of visible tuples as the cut-off XID while marking a
525  * page all-visible on the primary and WAL log that along with the
526  * visibility map SET operation. In hot standby, we wait for (or abort)
527  * all transactions that can potentially may not see one or more tuples on
528  * the page. That's how index-only scans work fine in hot standby. A
529  * crucial difference between index-only scans and heap scans is that the
530  * index-only scan completely relies on the visibility map where as heap
531  * scan looks at the page-level PD_ALL_VISIBLE flag. We are not sure if
532  * the page-level flag can be trusted in the same way, because it might
533  * get propagated somehow without being explicitly WAL-logged, e.g. via a
534  * full page write. Until we can prove that beyond doubt, let's check each
535  * tuple for visibility the hard way.
536  */
537  all_visible = PageIsAllVisible(page) && !snapshot->takenDuringRecovery;
538  check_serializable =
540 
541  /*
542  * We call page_collect_tuples() with constant arguments, to get the
543  * compiler to constant fold the constant arguments. Separate calls with
544  * constant arguments, rather than variables, are needed on several
545  * compilers to actually perform constant folding.
546  */
547  if (likely(all_visible))
548  {
549  if (likely(!check_serializable))
550  scan->rs_ntuples = page_collect_tuples(scan, snapshot, page, buffer,
551  block, lines, true, false);
552  else
553  scan->rs_ntuples = page_collect_tuples(scan, snapshot, page, buffer,
554  block, lines, true, true);
555  }
556  else
557  {
558  if (likely(!check_serializable))
559  scan->rs_ntuples = page_collect_tuples(scan, snapshot, page, buffer,
560  block, lines, false, false);
561  else
562  scan->rs_ntuples = page_collect_tuples(scan, snapshot, page, buffer,
563  block, lines, false, true);
564  }
565 
567 }
static pg_attribute_always_inline int page_collect_tuples(HeapScanDesc scan, Snapshot snapshot, Page page, Buffer buffer, BlockNumber block, int lines, bool all_visible, bool check_serializable)
Definition: heapam.c:435
bool CheckForSerializableConflictOutNeeded(Relation relation, Snapshot snapshot)
Definition: predicate.c:3981
void heap_page_prune_opt(Relation relation, Buffer buffer)
Definition: pruneheap.c:193
BlockNumber rs_cblock
Definition: heapam.h:67
bool takenDuringRecovery
Definition: snapshot.h:184

References Assert, BUFFER_LOCK_SHARE, BUFFER_LOCK_UNLOCK, BufferGetBlockNumber(), BufferGetPage(), CheckForSerializableConflictOutNeeded(), heap_page_prune_opt(), likely, LockBuffer(), page_collect_tuples(), PageGetMaxOffsetNumber(), PageIsAllVisible(), HeapScanDescData::rs_base, HeapScanDescData::rs_cblock, HeapScanDescData::rs_cbuf, TableScanDescData::rs_flags, HeapScanDescData::rs_ntuples, TableScanDescData::rs_rd, TableScanDescData::rs_snapshot, SO_ALLOW_PAGEMODE, and SnapshotData::takenDuringRecovery.

Referenced by heapam_scan_sample_next_block(), and heapgettup_pagemode().

◆ heap_rescan()

void heap_rescan ( TableScanDesc  sscan,
ScanKey  key,
bool  set_params,
bool  allow_strat,
bool  allow_sync,
bool  allow_pagemode 
)

Definition at line 1143 of file heapam.c.

1145 {
1146  HeapScanDesc scan = (HeapScanDesc) sscan;
1147 
1148  if (set_params)
1149  {
1150  if (allow_strat)
1151  scan->rs_base.rs_flags |= SO_ALLOW_STRAT;
1152  else
1153  scan->rs_base.rs_flags &= ~SO_ALLOW_STRAT;
1154 
1155  if (allow_sync)
1156  scan->rs_base.rs_flags |= SO_ALLOW_SYNC;
1157  else
1158  scan->rs_base.rs_flags &= ~SO_ALLOW_SYNC;
1159 
1160  if (allow_pagemode && scan->rs_base.rs_snapshot &&
1163  else
1165  }
1166 
1167  /*
1168  * unpin scan buffers
1169  */
1170  if (BufferIsValid(scan->rs_cbuf))
1171  ReleaseBuffer(scan->rs_cbuf);
1172 
1173  if (BufferIsValid(scan->rs_vmbuffer))
1174  {
1175  ReleaseBuffer(scan->rs_vmbuffer);
1176  scan->rs_vmbuffer = InvalidBuffer;
1177  }
1178 
1179  /*
1180  * Reset rs_empty_tuples_pending, a field only used by bitmap heap scan,
1181  * to avoid incorrectly emitting NULL-filled tuples from a previous scan
1182  * on rescan.
1183  */
1184  scan->rs_empty_tuples_pending = 0;
1185 
1186  /*
1187  * The read stream is reset on rescan. This must be done before
1188  * initscan(), as some state referred to by read_stream_reset() is reset
1189  * in initscan().
1190  */
1191  if (scan->rs_read_stream)
1193 
1194  /*
1195  * reinitialize scan descriptor
1196  */
1197  initscan(scan, key, true);
1198 }
void read_stream_reset(ReadStream *stream)
Definition: read_stream.c:820
@ SO_ALLOW_STRAT
Definition: tableam.h:57
@ SO_ALLOW_SYNC
Definition: tableam.h:59

References BufferIsValid(), initscan(), InvalidBuffer, IsMVCCSnapshot, sort-test::key, read_stream_reset(), ReleaseBuffer(), HeapScanDescData::rs_base, HeapScanDescData::rs_cbuf, HeapScanDescData::rs_empty_tuples_pending, TableScanDescData::rs_flags, HeapScanDescData::rs_read_stream, TableScanDescData::rs_snapshot, HeapScanDescData::rs_vmbuffer, SO_ALLOW_PAGEMODE, SO_ALLOW_STRAT, and SO_ALLOW_SYNC.

◆ heap_set_tidrange()

void heap_set_tidrange ( TableScanDesc  sscan,
ItemPointer  mintid,
ItemPointer  maxtid 
)

Definition at line 1322 of file heapam.c.

1324 {
1325  HeapScanDesc scan = (HeapScanDesc) sscan;
1326  BlockNumber startBlk;
1327  BlockNumber numBlks;
1328  ItemPointerData highestItem;
1329  ItemPointerData lowestItem;
1330 
1331  /*
1332  * For relations without any pages, we can simply leave the TID range
1333  * unset. There will be no tuples to scan, therefore no tuples outside
1334  * the given TID range.
1335  */
1336  if (scan->rs_nblocks == 0)
1337  return;
1338 
1339  /*
1340  * Set up some ItemPointers which point to the first and last possible
1341  * tuples in the heap.
1342  */
1343  ItemPointerSet(&highestItem, scan->rs_nblocks - 1, MaxOffsetNumber);
1344  ItemPointerSet(&lowestItem, 0, FirstOffsetNumber);
1345 
1346  /*
1347  * If the given maximum TID is below the highest possible TID in the
1348  * relation, then restrict the range to that, otherwise we scan to the end
1349  * of the relation.
1350  */
1351  if (ItemPointerCompare(maxtid, &highestItem) < 0)
1352  ItemPointerCopy(maxtid, &highestItem);
1353 
1354  /*
1355  * If the given minimum TID is above the lowest possible TID in the
1356  * relation, then restrict the range to only scan for TIDs above that.
1357  */
1358  if (ItemPointerCompare(mintid, &lowestItem) > 0)
1359  ItemPointerCopy(mintid, &lowestItem);
1360 
1361  /*
1362  * Check for an empty range and protect from would be negative results
1363  * from the numBlks calculation below.
1364  */
1365  if (ItemPointerCompare(&highestItem, &lowestItem) < 0)
1366  {
1367  /* Set an empty range of blocks to scan */
1368  heap_setscanlimits(sscan, 0, 0);
1369  return;
1370  }
1371 
1372  /*
1373  * Calculate the first block and the number of blocks we must scan. We
1374  * could be more aggressive here and perform some more validation to try
1375  * and further narrow the scope of blocks to scan by checking if the
1376  * lowestItem has an offset above MaxOffsetNumber. In this case, we could
1377  * advance startBlk by one. Likewise, if highestItem has an offset of 0
1378  * we could scan one fewer blocks. However, such an optimization does not
1379  * seem worth troubling over, currently.
1380  */
1381  startBlk = ItemPointerGetBlockNumberNoCheck(&lowestItem);
1382 
1383  numBlks = ItemPointerGetBlockNumberNoCheck(&highestItem) -
1384  ItemPointerGetBlockNumberNoCheck(&lowestItem) + 1;
1385 
1386  /* Set the start block and number of blocks to scan */
1387  heap_setscanlimits(sscan, startBlk, numBlks);
1388 
1389  /* Finally, set the TID range in sscan */
1390  ItemPointerCopy(&lowestItem, &sscan->st.tidrange.rs_mintid);
1391  ItemPointerCopy(&highestItem, &sscan->st.tidrange.rs_maxtid);
1392 }
void heap_setscanlimits(TableScanDesc sscan, BlockNumber startBlk, BlockNumber numBlks)
Definition: heapam.c:413
static BlockNumber ItemPointerGetBlockNumberNoCheck(const ItemPointerData *pointer)
Definition: itemptr.h:93
#define MaxOffsetNumber
Definition: off.h:28
BlockNumber rs_nblocks
Definition: heapam.h:59

References FirstOffsetNumber, heap_setscanlimits(), ItemPointerCompare(), ItemPointerCopy(), ItemPointerGetBlockNumberNoCheck(), ItemPointerSet(), MaxOffsetNumber, HeapScanDescData::rs_nblocks, TableScanDescData::st, and TableScanDescData::tidrange.

◆ heap_setscanlimits()

void heap_setscanlimits ( TableScanDesc  sscan,
BlockNumber  startBlk,
BlockNumber  numBlks 
)

Definition at line 413 of file heapam.c.

414 {
415  HeapScanDesc scan = (HeapScanDesc) sscan;
416 
417  Assert(!scan->rs_inited); /* else too late to change */
418  /* else rs_startblock is significant */
419  Assert(!(scan->rs_base.rs_flags & SO_ALLOW_SYNC));
420 
421  /* Check startBlk is valid (but allow case of zero blocks...) */
422  Assert(startBlk == 0 || startBlk < scan->rs_nblocks);
423 
424  scan->rs_startblock = startBlk;
425  scan->rs_numblocks = numBlks;
426 }
bool rs_inited
Definition: heapam.h:65
BlockNumber rs_startblock
Definition: heapam.h:60
BlockNumber rs_numblocks
Definition: heapam.h:61

References Assert, HeapScanDescData::rs_base, TableScanDescData::rs_flags, HeapScanDescData::rs_inited, HeapScanDescData::rs_numblocks, HeapScanDescData::rs_startblock, and SO_ALLOW_SYNC.

Referenced by heap_set_tidrange(), and heapam_index_build_range_scan().

◆ heap_tuple_needs_eventual_freeze()

bool heap_tuple_needs_eventual_freeze ( HeapTupleHeader  tuple)

Definition at line 7648 of file heapam.c.

7649 {
7650  TransactionId xid;
7651 
7652  /*
7653  * If xmin is a normal transaction ID, this tuple is definitely not
7654  * frozen.
7655  */
7656  xid = HeapTupleHeaderGetXmin(tuple);
7657  if (TransactionIdIsNormal(xid))
7658  return true;
7659 
7660  /*
7661  * If xmax is a valid xact or multixact, this tuple is also not frozen.
7662  */
7663  if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
7664  {
7665  MultiXactId multi;
7666 
7667  multi = HeapTupleHeaderGetRawXmax(tuple);
7668  if (MultiXactIdIsValid(multi))
7669  return true;
7670  }
7671  else
7672  {
7673  xid = HeapTupleHeaderGetRawXmax(tuple);
7674  if (TransactionIdIsNormal(xid))
7675  return true;
7676  }
7677 
7678  if (tuple->t_infomask & HEAP_MOVED)
7679  {
7680  xid = HeapTupleHeaderGetXvac(tuple);
7681  if (TransactionIdIsNormal(xid))
7682  return true;
7683  }
7684 
7685  return false;
7686 }

References HEAP_MOVED, HEAP_XMAX_IS_MULTI, HeapTupleHeaderGetRawXmax, HeapTupleHeaderGetXmin, HeapTupleHeaderGetXvac, MultiXactIdIsValid, HeapTupleHeaderData::t_infomask, and TransactionIdIsNormal.

Referenced by collect_corrupt_items(), and heap_page_is_all_visible().

◆ heap_tuple_should_freeze()

bool heap_tuple_should_freeze ( HeapTupleHeader  tuple,
const struct VacuumCutoffs cutoffs,
TransactionId NoFreezePageRelfrozenXid,
MultiXactId NoFreezePageRelminMxid 
)

Definition at line 7703 of file heapam.c.

7707 {
7708  TransactionId xid;
7709  MultiXactId multi;
7710  bool freeze = false;
7711 
7712  /* First deal with xmin */
7713  xid = HeapTupleHeaderGetXmin(tuple);
7714  if (TransactionIdIsNormal(xid))
7715  {
7717  if (TransactionIdPrecedes(xid, *NoFreezePageRelfrozenXid))
7718  *NoFreezePageRelfrozenXid = xid;
7719  if (TransactionIdPrecedes(xid, cutoffs->FreezeLimit))
7720  freeze = true;
7721  }
7722 
7723  /* Now deal with xmax */
7724  xid = InvalidTransactionId;
7725  multi = InvalidMultiXactId;
7726  if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
7727  multi = HeapTupleHeaderGetRawXmax(tuple);
7728  else
7729  xid = HeapTupleHeaderGetRawXmax(tuple);
7730 
7731  if (TransactionIdIsNormal(xid))
7732  {
7734  /* xmax is a non-permanent XID */
7735  if (TransactionIdPrecedes(xid, *NoFreezePageRelfrozenXid))
7736  *NoFreezePageRelfrozenXid = xid;
7737  if (TransactionIdPrecedes(xid, cutoffs->FreezeLimit))
7738  freeze = true;
7739  }
7740  else if (!MultiXactIdIsValid(multi))
7741  {
7742  /* xmax is a permanent XID or invalid MultiXactId/XID */
7743  }
7744  else if (HEAP_LOCKED_UPGRADED(tuple->t_infomask))
7745  {
7746  /* xmax is a pg_upgrade'd MultiXact, which can't have updater XID */
7747  if (MultiXactIdPrecedes(multi, *NoFreezePageRelminMxid))
7748  *NoFreezePageRelminMxid = multi;
7749  /* heap_prepare_freeze_tuple always freezes pg_upgrade'd xmax */
7750  freeze = true;
7751  }
7752  else
7753  {
7754  /* xmax is a MultiXactId that may have an updater XID */
7755  MultiXactMember *members;
7756  int nmembers;
7757 
7758  Assert(MultiXactIdPrecedesOrEquals(cutoffs->relminmxid, multi));
7759  if (MultiXactIdPrecedes(multi, *NoFreezePageRelminMxid))
7760  *NoFreezePageRelminMxid = multi;
7761  if (MultiXactIdPrecedes(multi, cutoffs->MultiXactCutoff))
7762  freeze = true;
7763 
7764  /* need to check whether any member of the mxact is old */
7765  nmembers = GetMultiXactIdMembers(multi, &members, false,
7767 
7768  for (int i = 0; i < nmembers; i++)
7769  {
7770  xid = members[i].xid;
7772  if (TransactionIdPrecedes(xid, *NoFreezePageRelfrozenXid))
7773  *NoFreezePageRelfrozenXid = xid;
7774  if (TransactionIdPrecedes(xid, cutoffs->FreezeLimit))
7775  freeze = true;
7776  }
7777  if (nmembers > 0)
7778  pfree(members);
7779  }
7780 
7781  if (tuple->t_infomask & HEAP_MOVED)
7782  {
7783  xid = HeapTupleHeaderGetXvac(tuple);
7784  if (TransactionIdIsNormal(xid))
7785  {
7787  if (TransactionIdPrecedes(xid, *NoFreezePageRelfrozenXid))
7788  *NoFreezePageRelfrozenXid = xid;
7789  /* heap_prepare_freeze_tuple forces xvac freezing */
7790  freeze = true;
7791  }
7792  }
7793 
7794  return freeze;
7795 }
#define HEAP_LOCKED_UPGRADED(infomask)
Definition: htup_details.h:249
bool MultiXactIdPrecedesOrEquals(MultiXactId multi1, MultiXactId multi2)
Definition: multixact.c:3331
TransactionId xid
Definition: multixact.h:58

References Assert, VacuumCutoffs::FreezeLimit, GetMultiXactIdMembers(), HEAP_LOCKED_UPGRADED, HEAP_MOVED, HEAP_XMAX_IS_LOCKED_ONLY, HEAP_XMAX_IS_MULTI, HeapTupleHeaderGetRawXmax, HeapTupleHeaderGetXmin, HeapTupleHeaderGetXvac, i, InvalidMultiXactId, InvalidTransactionId, VacuumCutoffs::MultiXactCutoff, MultiXactIdIsValid, MultiXactIdPrecedes(), MultiXactIdPrecedesOrEquals(), pfree(), VacuumCutoffs::relfrozenxid, VacuumCutoffs::relminmxid, HeapTupleHeaderData::t_infomask, TransactionIdIsNormal, TransactionIdPrecedes(), TransactionIdPrecedesOrEquals(), and MultiXactMember::xid.

Referenced by heap_prepare_freeze_tuple(), and lazy_scan_noprune().

◆ heap_update()

TM_Result heap_update ( Relation  relation,
ItemPointer  otid,
HeapTuple  newtup,
CommandId  cid,
Snapshot  crosscheck,
bool  wait,
struct TM_FailureData tmfd,
LockTupleMode lockmode,
TU_UpdateIndexes update_indexes 
)

Definition at line 3141 of file heapam.c.

3145 {
3146  TM_Result result;
3148  Bitmapset *hot_attrs;
3149  Bitmapset *sum_attrs;
3150  Bitmapset *key_attrs;
3151  Bitmapset *id_attrs;
3152  Bitmapset *interesting_attrs;
3153  Bitmapset *modified_attrs;
3154  ItemId lp;
3155  HeapTupleData oldtup;
3156  HeapTuple heaptup;
3157  HeapTuple old_key_tuple = NULL;
3158  bool old_key_copied = false;
3159  Page page;
3160  BlockNumber block;
3161  MultiXactStatus mxact_status;
3162  Buffer buffer,
3163  newbuf,
3164  vmbuffer = InvalidBuffer,
3165  vmbuffer_new = InvalidBuffer;
3166  bool need_toast;
3167  Size newtupsize,
3168  pagefree;
3169  bool have_tuple_lock = false;
3170  bool iscombo;
3171  bool use_hot_update = false;
3172  bool summarized_update = false;
3173  bool key_intact;
3174  bool all_visible_cleared = false;
3175  bool all_visible_cleared_new = false;
3176  bool checked_lockers;
3177  bool locker_remains;
3178  bool id_has_external = false;
3179  TransactionId xmax_new_tuple,
3180  xmax_old_tuple;
3181  uint16 infomask_old_tuple,
3182  infomask2_old_tuple,
3183  infomask_new_tuple,
3184  infomask2_new_tuple;
3185 
3186  Assert(ItemPointerIsValid(otid));
3187 
3188  /* Cheap, simplistic check that the tuple matches the rel's rowtype. */
3190  RelationGetNumberOfAttributes(relation));
3191 
3192  /*
3193  * Forbid this during a parallel operation, lest it allocate a combo CID.
3194  * Other workers might need that combo CID for visibility checks, and we
3195  * have no provision for broadcasting it to them.
3196  */
3197  if (IsInParallelMode())
3198  ereport(ERROR,
3199  (errcode(ERRCODE_INVALID_TRANSACTION_STATE),
3200  errmsg("cannot update tuples during a parallel operation")));
3201 
3202 #ifdef USE_ASSERT_CHECKING
3203  check_lock_if_inplace_updateable_rel(relation, otid, newtup);
3204 #endif
3205 
3206  /*
3207  * Fetch the list of attributes to be checked for various operations.
3208  *
3209  * For HOT considerations, this is wasted effort if we fail to update or
3210  * have to put the new tuple on a different page. But we must compute the
3211  * list before obtaining buffer lock --- in the worst case, if we are
3212  * doing an update on one of the relevant system catalogs, we could
3213  * deadlock if we try to fetch the list later. In any case, the relcache
3214  * caches the data so this is usually pretty cheap.
3215  *
3216  * We also need columns used by the replica identity and columns that are
3217  * considered the "key" of rows in the table.
3218  *
3219  * Note that we get copies of each bitmap, so we need not worry about
3220  * relcache flush happening midway through.
3221  */
3222  hot_attrs = RelationGetIndexAttrBitmap(relation,
3224  sum_attrs = RelationGetIndexAttrBitmap(relation,
3226  key_attrs = RelationGetIndexAttrBitmap(relation, INDEX_ATTR_BITMAP_KEY);
3227  id_attrs = RelationGetIndexAttrBitmap(relation,
3229  interesting_attrs = NULL;
3230  interesting_attrs = bms_add_members(interesting_attrs, hot_attrs);
3231  interesting_attrs = bms_add_members(interesting_attrs, sum_attrs);
3232  interesting_attrs = bms_add_members(interesting_attrs, key_attrs);
3233  interesting_attrs = bms_add_members(interesting_attrs, id_attrs);
3234 
3235  block = ItemPointerGetBlockNumber(otid);
3236  buffer = ReadBuffer(relation, block);
3237  page = BufferGetPage(buffer);
3238 
3239  /*
3240  * Before locking the buffer, pin the visibility map page if it appears to
3241  * be necessary. Since we haven't got the lock yet, someone else might be
3242  * in the middle of changing this, so we'll need to recheck after we have
3243  * the lock.
3244  */
3245  if (PageIsAllVisible(page))
3246  visibilitymap_pin(relation, block, &vmbuffer);
3247 
3249 
3250  lp = PageGetItemId(page, ItemPointerGetOffsetNumber(otid));
3251  Assert(ItemIdIsNormal(lp));
3252 
3253  /*
3254  * Fill in enough data in oldtup for HeapDetermineColumnsInfo to work
3255  * properly.
3256  */
3257  oldtup.t_tableOid = RelationGetRelid(relation);
3258  oldtup.t_data = (HeapTupleHeader) PageGetItem(page, lp);
3259  oldtup.t_len = ItemIdGetLength(lp);
3260  oldtup.t_self = *otid;
3261 
3262  /* the new tuple is ready, except for this: */
3263  newtup->t_tableOid = RelationGetRelid(relation);
3264 
3265  /*
3266  * Determine columns modified by the update. Additionally, identify
3267  * whether any of the unmodified replica identity key attributes in the
3268  * old tuple is externally stored or not. This is required because for
3269  * such attributes the flattened value won't be WAL logged as part of the
3270  * new tuple so we must include it as part of the old_key_tuple. See
3271  * ExtractReplicaIdentity.
3272  */
3273  modified_attrs = HeapDetermineColumnsInfo(relation, interesting_attrs,
3274  id_attrs, &oldtup,
3275  newtup, &id_has_external);
3276 
3277  /*
3278  * If we're not updating any "key" column, we can grab a weaker lock type.
3279  * This allows for more concurrency when we are running simultaneously
3280  * with foreign key checks.
3281  *
3282  * Note that if a column gets detoasted while executing the update, but
3283  * the value ends up being the same, this test will fail and we will use
3284  * the stronger lock. This is acceptable; the important case to optimize
3285  * is updates that don't manipulate key columns, not those that
3286  * serendipitously arrive at the same key values.
3287  */
3288  if (!bms_overlap(modified_attrs, key_attrs))
3289  {
3290  *lockmode = LockTupleNoKeyExclusive;
3291  mxact_status = MultiXactStatusNoKeyUpdate;
3292  key_intact = true;
3293 
3294  /*
3295  * If this is the first possibly-multixact-able operation in the
3296  * current transaction, set my per-backend OldestMemberMXactId
3297  * setting. We can be certain that the transaction will never become a
3298  * member of any older MultiXactIds than that. (We have to do this
3299  * even if we end up just using our own TransactionId below, since
3300  * some other backend could incorporate our XID into a MultiXact
3301  * immediately afterwards.)
3302  */
3304  }
3305  else
3306  {
3307  *lockmode = LockTupleExclusive;
3308  mxact_status = MultiXactStatusUpdate;
3309  key_intact = false;
3310  }
3311 
3312  /*
3313  * Note: beyond this point, use oldtup not otid to refer to old tuple.
3314  * otid may very well point at newtup->t_self, which we will overwrite
3315  * with the new tuple's location, so there's great risk of confusion if we
3316  * use otid anymore.
3317  */
3318 
3319 l2:
3320  checked_lockers = false;
3321  locker_remains = false;
3322  result = HeapTupleSatisfiesUpdate(&oldtup, cid, buffer);
3323 
3324  /* see below about the "no wait" case */
3325  Assert(result != TM_BeingModified || wait);
3326 
3327  if (result == TM_Invisible)
3328  {
3329  UnlockReleaseBuffer(buffer);
3330  ereport(ERROR,
3331  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
3332  errmsg("attempted to update invisible tuple")));
3333  }
3334  else if (result == TM_BeingModified && wait)
3335  {
3336  TransactionId xwait;
3337  uint16 infomask;
3338  bool can_continue = false;
3339 
3340  /*
3341  * XXX note that we don't consider the "no wait" case here. This
3342  * isn't a problem currently because no caller uses that case, but it
3343  * should be fixed if such a caller is introduced. It wasn't a
3344  * problem previously because this code would always wait, but now
3345  * that some tuple locks do not conflict with one of the lock modes we
3346  * use, it is possible that this case is interesting to handle
3347  * specially.
3348  *
3349  * This may cause failures with third-party code that calls
3350  * heap_update directly.
3351  */
3352 
3353  /* must copy state data before unlocking buffer */
3354  xwait = HeapTupleHeaderGetRawXmax(oldtup.t_data);
3355  infomask = oldtup.t_data->t_infomask;
3356 
3357  /*
3358  * Now we have to do something about the existing locker. If it's a
3359  * multi, sleep on it; we might be awakened before it is completely
3360  * gone (or even not sleep at all in some cases); we need to preserve
3361  * it as locker, unless it is gone completely.
3362  *
3363  * If it's not a multi, we need to check for sleeping conditions
3364  * before actually going to sleep. If the update doesn't conflict
3365  * with the locks, we just continue without sleeping (but making sure
3366  * it is preserved).
3367  *
3368  * Before sleeping, we need to acquire tuple lock to establish our
3369  * priority for the tuple (see heap_lock_tuple). LockTuple will
3370  * release us when we are next-in-line for the tuple. Note we must
3371  * not acquire the tuple lock until we're sure we're going to sleep;
3372  * otherwise we're open for race conditions with other transactions
3373  * holding the tuple lock which sleep on us.
3374  *
3375  * If we are forced to "start over" below, we keep the tuple lock;
3376  * this arranges that we stay at the head of the line while rechecking
3377  * tuple state.
3378  */
3379  if (infomask & HEAP_XMAX_IS_MULTI)
3380  {
3381  TransactionId update_xact;
3382  int remain;
3383  bool current_is_member = false;
3384 
3385  if (DoesMultiXactIdConflict((MultiXactId) xwait, infomask,
3386  *lockmode, &current_is_member))
3387  {
3388  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
3389 
3390  /*
3391  * Acquire the lock, if necessary (but skip it when we're
3392  * requesting a lock and already have one; avoids deadlock).
3393  */
3394  if (!current_is_member)
3395  heap_acquire_tuplock(relation, &(oldtup.t_self), *lockmode,
3396  LockWaitBlock, &have_tuple_lock);
3397 
3398  /* wait for multixact */
3399  MultiXactIdWait((MultiXactId) xwait, mxact_status, infomask,
3400  relation, &oldtup.t_self, XLTW_Update,
3401  &remain);
3402  checked_lockers = true;
3403  locker_remains = remain != 0;
3405 
3406  /*
3407  * If xwait had just locked the tuple then some other xact
3408  * could update this tuple before we get to this point. Check
3409  * for xmax change, and start over if so.
3410  */
3412  infomask) ||
3414  xwait))
3415  goto l2;
3416  }
3417 
3418  /*
3419  * Note that the multixact may not be done by now. It could have
3420  * surviving members; our own xact or other subxacts of this
3421  * backend, and also any other concurrent transaction that locked
3422  * the tuple with LockTupleKeyShare if we only got
3423  * LockTupleNoKeyExclusive. If this is the case, we have to be
3424  * careful to mark the updated tuple with the surviving members in
3425  * Xmax.
3426  *
3427  * Note that there could have been another update in the
3428  * MultiXact. In that case, we need to check whether it committed
3429  * or aborted. If it aborted we are safe to update it again;
3430  * otherwise there is an update conflict, and we have to return
3431  * TableTuple{Deleted, Updated} below.
3432  *
3433  * In the LockTupleExclusive case, we still need to preserve the
3434  * surviving members: those would include the tuple locks we had
3435  * before this one, which are important to keep in case this
3436  * subxact aborts.
3437  */
3439  update_xact = HeapTupleGetUpdateXid(oldtup.t_data);
3440  else
3441  update_xact = InvalidTransactionId;
3442 
3443  /*
3444  * There was no UPDATE in the MultiXact; or it aborted. No
3445  * TransactionIdIsInProgress() call needed here, since we called
3446  * MultiXactIdWait() above.
3447  */
3448  if (!TransactionIdIsValid(update_xact) ||
3449  TransactionIdDidAbort(update_xact))
3450  can_continue = true;
3451  }
3452  else if (TransactionIdIsCurrentTransactionId(xwait))
3453  {
3454  /*
3455  * The only locker is ourselves; we can avoid grabbing the tuple
3456  * lock here, but must preserve our locking information.
3457  */
3458  checked_lockers = true;
3459  locker_remains = true;
3460  can_continue = true;
3461  }
3462  else if (HEAP_XMAX_IS_KEYSHR_LOCKED(infomask) && key_intact)
3463  {
3464  /*
3465  * If it's just a key-share locker, and we're not changing the key
3466  * columns, we don't need to wait for it to end; but we need to
3467  * preserve it as locker.
3468  */
3469  checked_lockers = true;
3470  locker_remains = true;
3471  can_continue = true;
3472  }
3473  else
3474  {
3475  /*
3476  * Wait for regular transaction to end; but first, acquire tuple
3477  * lock.
3478  */
3479  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
3480  heap_acquire_tuplock(relation, &(oldtup.t_self), *lockmode,
3481  LockWaitBlock, &have_tuple_lock);
3482  XactLockTableWait(xwait, relation, &oldtup.t_self,
3483  XLTW_Update);
3484  checked_lockers = true;
3486 
3487  /*
3488  * xwait is done, but if xwait had just locked the tuple then some
3489  * other xact could update this tuple before we get to this point.
3490  * Check for xmax change, and start over if so.
3491  */
3492  if (xmax_infomask_changed(oldtup.t_data->t_infomask, infomask) ||
3493  !TransactionIdEquals(xwait,
3495  goto l2;
3496 
3497  /* Otherwise check if it committed or aborted */
3498  UpdateXmaxHintBits(oldtup.t_data, buffer, xwait);
3499  if (oldtup.t_data->t_infomask & HEAP_XMAX_INVALID)
3500  can_continue = true;
3501  }
3502 
3503  if (can_continue)
3504  result = TM_Ok;
3505  else if (!ItemPointerEquals(&oldtup.t_self, &oldtup.t_data->t_ctid))
3506  result = TM_Updated;
3507  else
3508  result = TM_Deleted;
3509  }
3510 
3511  /* Sanity check the result HeapTupleSatisfiesUpdate() and the logic above */
3512  if (result != TM_Ok)
3513  {
3514  Assert(result == TM_SelfModified ||
3515  result == TM_Updated ||
3516  result == TM_Deleted ||
3517  result == TM_BeingModified);
3518  Assert(!(oldtup.t_data->t_infomask & HEAP_XMAX_INVALID));
3519  Assert(result != TM_Updated ||
3520  !ItemPointerEquals(&oldtup.t_self, &oldtup.t_data->t_ctid));
3521  }
3522 
3523  if (crosscheck != InvalidSnapshot && result == TM_Ok)
3524  {
3525  /* Perform additional check for transaction-snapshot mode RI updates */
3526  if (!HeapTupleSatisfiesVisibility(&oldtup, crosscheck, buffer))
3527  result = TM_Updated;
3528  }
3529 
3530  if (result != TM_Ok)
3531  {
3532  tmfd->ctid = oldtup.t_data->t_ctid;
3533  tmfd->xmax = HeapTupleHeaderGetUpdateXid(oldtup.t_data);
3534  if (result == TM_SelfModified)
3535  tmfd->cmax = HeapTupleHeaderGetCmax(oldtup.t_data);
3536  else
3537  tmfd->cmax = InvalidCommandId;
3538  UnlockReleaseBuffer(buffer);
3539  if (have_tuple_lock)
3540  UnlockTupleTuplock(relation, &(oldtup.t_self), *lockmode);
3541  if (vmbuffer != InvalidBuffer)
3542  ReleaseBuffer(vmbuffer);
3543  *update_indexes = TU_None;
3544 
3545  bms_free(hot_attrs);
3546  bms_free(sum_attrs);
3547  bms_free(key_attrs);
3548  bms_free(id_attrs);
3549  bms_free(modified_attrs);
3550  bms_free(interesting_attrs);
3551  return result;
3552  }
3553 
3554  /*
3555  * If we didn't pin the visibility map page and the page has become all
3556  * visible while we were busy locking the buffer, or during some
3557  * subsequent window during which we had it unlocked, we'll have to unlock
3558  * and re-lock, to avoid holding the buffer lock across an I/O. That's a
3559  * bit unfortunate, especially since we'll now have to recheck whether the
3560  * tuple has been locked or updated under us, but hopefully it won't
3561  * happen very often.
3562  */
3563  if (vmbuffer == InvalidBuffer && PageIsAllVisible(page))
3564  {
3565  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
3566  visibilitymap_pin(relation, block, &vmbuffer);
3568  goto l2;
3569  }
3570 
3571  /* Fill in transaction status data */
3572 
3573  /*
3574  * If the tuple we're updating is locked, we need to preserve the locking
3575  * info in the old tuple's Xmax. Prepare a new Xmax value for this.
3576  */
3578  oldtup.t_data->t_infomask,
3579  oldtup.t_data->t_infomask2,
3580  xid, *lockmode, true,
3581  &xmax_old_tuple, &infomask_old_tuple,
3582  &infomask2_old_tuple);
3583 
3584  /*
3585  * And also prepare an Xmax value for the new copy of the tuple. If there
3586  * was no xmax previously, or there was one but all lockers are now gone,
3587  * then use InvalidTransactionId; otherwise, get the xmax from the old
3588  * tuple. (In rare cases that might also be InvalidTransactionId and yet
3589  * not have the HEAP_XMAX_INVALID bit set; that's fine.)
3590  */
3591  if ((oldtup.t_data->t_infomask & HEAP_XMAX_INVALID) ||
3593  (checked_lockers && !locker_remains))
3594  xmax_new_tuple = InvalidTransactionId;
3595  else
3596  xmax_new_tuple = HeapTupleHeaderGetRawXmax(oldtup.t_data);
3597 
3598  if (!TransactionIdIsValid(xmax_new_tuple))
3599  {
3600  infomask_new_tuple = HEAP_XMAX_INVALID;
3601  infomask2_new_tuple = 0;
3602  }
3603  else
3604  {
3605  /*
3606  * If we found a valid Xmax for the new tuple, then the infomask bits
3607  * to use on the new tuple depend on what was there on the old one.
3608  * Note that since we're doing an update, the only possibility is that
3609  * the lockers had FOR KEY SHARE lock.
3610  */
3611  if (oldtup.t_data->t_infomask & HEAP_XMAX_IS_MULTI)
3612  {
3613  GetMultiXactIdHintBits(xmax_new_tuple, &infomask_new_tuple,
3614  &infomask2_new_tuple);
3615  }
3616  else
3617  {
3618  infomask_new_tuple = HEAP_XMAX_KEYSHR_LOCK | HEAP_XMAX_LOCK_ONLY;
3619  infomask2_new_tuple = 0;
3620  }
3621  }
3622 
3623  /*
3624  * Prepare the new tuple with the appropriate initial values of Xmin and
3625  * Xmax, as well as initial infomask bits as computed above.
3626  */
3627  newtup->t_data->t_infomask &= ~(HEAP_XACT_MASK);
3628  newtup->t_data->t_infomask2 &= ~(HEAP2_XACT_MASK);
3629  HeapTupleHeaderSetXmin(newtup->t_data, xid);
3630  HeapTupleHeaderSetCmin(newtup->t_data, cid);
3631  newtup->t_data->t_infomask |= HEAP_UPDATED | infomask_new_tuple;
3632  newtup->t_data->t_infomask2 |= infomask2_new_tuple;
3633  HeapTupleHeaderSetXmax(newtup->t_data, xmax_new_tuple);
3634 
3635  /*
3636  * Replace cid with a combo CID if necessary. Note that we already put
3637  * the plain cid into the new tuple.
3638  */
3639  HeapTupleHeaderAdjustCmax(oldtup.t_data, &cid, &iscombo);
3640 
3641  /*
3642  * If the toaster needs to be activated, OR if the new tuple will not fit
3643  * on the same page as the old, then we need to release the content lock
3644  * (but not the pin!) on the old tuple's buffer while we are off doing
3645  * TOAST and/or table-file-extension work. We must mark the old tuple to
3646  * show that it's locked, else other processes may try to update it
3647  * themselves.
3648  *
3649  * We need to invoke the toaster if there are already any out-of-line
3650  * toasted values present, or if the new tuple is over-threshold.
3651  */
3652  if (relation->rd_rel->relkind != RELKIND_RELATION &&
3653  relation->rd_rel->relkind != RELKIND_MATVIEW)
3654  {
3655  /* toast table entries should never be recursively toasted */
3656  Assert(!HeapTupleHasExternal(&oldtup));
3657  Assert(!HeapTupleHasExternal(newtup));
3658  need_toast = false;
3659  }
3660  else
3661  need_toast = (HeapTupleHasExternal(&oldtup) ||
3662  HeapTupleHasExternal(newtup) ||
3663  newtup->t_len > TOAST_TUPLE_THRESHOLD);
3664 
3665  pagefree = PageGetHeapFreeSpace(page);
3666 
3667  newtupsize = MAXALIGN(newtup->t_len);
3668 
3669  if (need_toast || newtupsize > pagefree)
3670  {
3671  TransactionId xmax_lock_old_tuple;
3672  uint16 infomask_lock_old_tuple,
3673  infomask2_lock_old_tuple;
3674  bool cleared_all_frozen = false;
3675 
3676  /*
3677  * To prevent concurrent sessions from updating the tuple, we have to
3678  * temporarily mark it locked, while we release the page-level lock.
3679  *
3680  * To satisfy the rule that any xid potentially appearing in a buffer
3681  * written out to disk, we unfortunately have to WAL log this
3682  * temporary modification. We can reuse xl_heap_lock for this
3683  * purpose. If we crash/error before following through with the
3684  * actual update, xmax will be of an aborted transaction, allowing
3685  * other sessions to proceed.
3686  */
3687 
3688  /*
3689  * Compute xmax / infomask appropriate for locking the tuple. This has
3690  * to be done separately from the combo that's going to be used for
3691  * updating, because the potentially created multixact would otherwise
3692  * be wrong.
3693  */
3695  oldtup.t_data->t_infomask,
3696  oldtup.t_data->t_infomask2,
3697  xid, *lockmode, false,
3698  &xmax_lock_old_tuple, &infomask_lock_old_tuple,
3699  &infomask2_lock_old_tuple);
3700 
3701  Assert(HEAP_XMAX_IS_LOCKED_ONLY(infomask_lock_old_tuple));
3702 
3704 
3705  /* Clear obsolete visibility flags ... */
3706  oldtup.t_data->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);
3707  oldtup.t_data->t_infomask2 &= ~HEAP_KEYS_UPDATED;
3708  HeapTupleClearHotUpdated(&oldtup);
3709  /* ... and store info about transaction updating this tuple */
3710  Assert(TransactionIdIsValid(xmax_lock_old_tuple));
3711  HeapTupleHeaderSetXmax(oldtup.t_data, xmax_lock_old_tuple);
3712  oldtup.t_data->t_infomask |= infomask_lock_old_tuple;
3713  oldtup.t_data->t_infomask2 |= infomask2_lock_old_tuple;
3714  HeapTupleHeaderSetCmax(oldtup.t_data, cid, iscombo);
3715 
3716  /* temporarily make it look not-updated, but locked */
3717  oldtup.t_data->t_ctid = oldtup.t_self;
3718 
3719  /*
3720  * Clear all-frozen bit on visibility map if needed. We could
3721  * immediately reset ALL_VISIBLE, but given that the WAL logging
3722  * overhead would be unchanged, that doesn't seem necessarily
3723  * worthwhile.
3724  */
3725  if (PageIsAllVisible(page) &&
3726  visibilitymap_clear(relation, block, vmbuffer,
3728  cleared_all_frozen = true;
3729 
3730  MarkBufferDirty(buffer);
3731 
3732  if (RelationNeedsWAL(relation))
3733  {
3734  xl_heap_lock xlrec;
3735  XLogRecPtr recptr;
3736 
3737  XLogBeginInsert();
3738  XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
3739 
3740  xlrec.offnum = ItemPointerGetOffsetNumber(&oldtup.t_self);
3741  xlrec.xmax = xmax_lock_old_tuple;
3743  oldtup.t_data->t_infomask2);
3744  xlrec.flags =
3745  cleared_all_frozen ? XLH_LOCK_ALL_FROZEN_CLEARED : 0;
3746  XLogRegisterData((char *) &xlrec, SizeOfHeapLock);
3747  recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_LOCK);
3748  PageSetLSN(page, recptr);
3749  }
3750 
3751  END_CRIT_SECTION();
3752 
3753  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
3754 
3755  /*
3756  * Let the toaster do its thing, if needed.
3757  *
3758  * Note: below this point, heaptup is the data we actually intend to
3759  * store into the relation; newtup is the caller's original untoasted
3760  * data.
3761  */
3762  if (need_toast)
3763  {
3764  /* Note we always use WAL and FSM during updates */
3765  heaptup = heap_toast_insert_or_update(relation, newtup, &oldtup, 0);
3766  newtupsize = MAXALIGN(heaptup->t_len);
3767  }
3768  else
3769  heaptup = newtup;
3770 
3771  /*
3772  * Now, do we need a new page for the tuple, or not? This is a bit
3773  * tricky since someone else could have added tuples to the page while
3774  * we weren't looking. We have to recheck the available space after
3775  * reacquiring the buffer lock. But don't bother to do that if the
3776  * former amount of free space is still not enough; it's unlikely
3777  * there's more free now than before.
3778  *
3779  * What's more, if we need to get a new page, we will need to acquire
3780  * buffer locks on both old and new pages. To avoid deadlock against
3781  * some other backend trying to get the same two locks in the other
3782  * order, we must be consistent about the order we get the locks in.
3783  * We use the rule "lock the lower-numbered page of the relation
3784  * first". To implement this, we must do RelationGetBufferForTuple
3785  * while not holding the lock on the old page, and we must rely on it
3786  * to get the locks on both pages in the correct order.
3787  *
3788  * Another consideration is that we need visibility map page pin(s) if
3789  * we will have to clear the all-visible flag on either page. If we
3790  * call RelationGetBufferForTuple, we rely on it to acquire any such
3791  * pins; but if we don't, we have to handle that here. Hence we need
3792  * a loop.
3793  */
3794  for (;;)
3795  {
3796  if (newtupsize > pagefree)
3797  {
3798  /* It doesn't fit, must use RelationGetBufferForTuple. */
3799  newbuf = RelationGetBufferForTuple(relation, heaptup->t_len,
3800  buffer, 0, NULL,
3801  &vmbuffer_new, &vmbuffer,
3802  0);
3803  /* We're all done. */
3804  break;
3805  }
3806  /* Acquire VM page pin if needed and we don't have it. */
3807  if (vmbuffer == InvalidBuffer && PageIsAllVisible(page))
3808  visibilitymap_pin(relation, block, &vmbuffer);
3809  /* Re-acquire the lock on the old tuple's page. */
3811  /* Re-check using the up-to-date free space */
3812  pagefree = PageGetHeapFreeSpace(page);
3813  if (newtupsize > pagefree ||
3814  (vmbuffer == InvalidBuffer && PageIsAllVisible(page)))
3815  {
3816  /*
3817  * Rats, it doesn't fit anymore, or somebody just now set the
3818  * all-visible flag. We must now unlock and loop to avoid
3819  * deadlock. Fortunately, this path should seldom be taken.
3820  */
3821  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
3822  }
3823  else
3824  {
3825  /* We're all done. */
3826  newbuf = buffer;
3827  break;
3828  }
3829  }
3830  }
3831  else
3832  {
3833  /* No TOAST work needed, and it'll fit on same page */
3834  newbuf = buffer;
3835  heaptup = newtup;
3836  }
3837 
3838  /*
3839  * We're about to do the actual update -- check for conflict first, to
3840  * avoid possibly having to roll back work we've just done.
3841  *
3842  * This is safe without a recheck as long as there is no possibility of
3843  * another process scanning the pages between this check and the update
3844  * being visible to the scan (i.e., exclusive buffer content lock(s) are
3845  * continuously held from this point until the tuple update is visible).
3846  *
3847  * For the new tuple the only check needed is at the relation level, but
3848  * since both tuples are in the same relation and the check for oldtup
3849  * will include checking the relation level, there is no benefit to a
3850  * separate check for the new tuple.
3851  */
3852  CheckForSerializableConflictIn(relation, &oldtup.t_self,
3853  BufferGetBlockNumber(buffer));
3854 
3855  /*
3856  * At this point newbuf and buffer are both pinned and locked, and newbuf
3857  * has enough space for the new tuple. If they are the same buffer, only
3858  * one pin is held.
3859  */
3860 
3861  if (newbuf == buffer)
3862  {
3863  /*
3864  * Since the new tuple is going into the same page, we might be able
3865  * to do a HOT update. Check if any of the index columns have been
3866  * changed.
3867  */
3868  if (!bms_overlap(modified_attrs, hot_attrs))
3869  {
3870  use_hot_update = true;
3871 
3872  /*
3873  * If none of the columns that are used in hot-blocking indexes
3874  * were updated, we can apply HOT, but we do still need to check
3875  * if we need to update the summarizing indexes, and update those
3876  * indexes if the columns were updated, or we may fail to detect
3877  * e.g. value bound changes in BRIN minmax indexes.
3878  */
3879  if (bms_overlap(modified_attrs, sum_attrs))
3880  summarized_update = true;
3881  }
3882  }
3883  else
3884  {
3885  /* Set a hint that the old page could use prune/defrag */
3886  PageSetFull(page);
3887  }
3888 
3889  /*
3890  * Compute replica identity tuple before entering the critical section so
3891  * we don't PANIC upon a memory allocation failure.
3892  * ExtractReplicaIdentity() will return NULL if nothing needs to be
3893  * logged. Pass old key required as true only if the replica identity key
3894  * columns are modified or it has external data.
3895  */
3896  old_key_tuple = ExtractReplicaIdentity(relation, &oldtup,
3897  bms_overlap(modified_attrs, id_attrs) ||
3898  id_has_external,
3899  &old_key_copied);
3900 
3901  /* NO EREPORT(ERROR) from here till changes are logged */
3903 
3904  /*
3905  * If this transaction commits, the old tuple will become DEAD sooner or
3906  * later. Set flag that this page is a candidate for pruning once our xid
3907  * falls below the OldestXmin horizon. If the transaction finally aborts,
3908  * the subsequent page pruning will be a no-op and the hint will be
3909  * cleared.
3910  *
3911  * XXX Should we set hint on newbuf as well? If the transaction aborts,
3912  * there would be a prunable tuple in the newbuf; but for now we choose
3913  * not to optimize for aborts. Note that heap_xlog_update must be kept in
3914  * sync if this decision changes.
3915  */
3916  PageSetPrunable(page, xid);
3917 
3918  if (use_hot_update)
3919  {
3920  /* Mark the old tuple as HOT-updated */
3921  HeapTupleSetHotUpdated(&oldtup);
3922  /* And mark the new tuple as heap-only */
3923  HeapTupleSetHeapOnly(heaptup);
3924  /* Mark the caller's copy too, in case different from heaptup */
3925  HeapTupleSetHeapOnly(newtup);
3926  }
3927  else
3928  {
3929  /* Make sure tuples are correctly marked as not-HOT */
3930  HeapTupleClearHotUpdated(&oldtup);
3931  HeapTupleClearHeapOnly(heaptup);
3932  HeapTupleClearHeapOnly(newtup);
3933  }
3934 
3935  RelationPutHeapTuple(relation, newbuf, heaptup, false); /* insert new tuple */
3936 
3937 
3938  /* Clear obsolete visibility flags, possibly set by ourselves above... */
3939  oldtup.t_data->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);
3940  oldtup.t_data->t_infomask2 &= ~HEAP_KEYS_UPDATED;
3941  /* ... and store info about transaction updating this tuple */
3942  Assert(TransactionIdIsValid(xmax_old_tuple));
3943  HeapTupleHeaderSetXmax(oldtup.t_data, xmax_old_tuple);
3944  oldtup.t_data->t_infomask |= infomask_old_tuple;
3945  oldtup.t_data->t_infomask2 |= infomask2_old_tuple;
3946  HeapTupleHeaderSetCmax(oldtup.t_data, cid, iscombo);
3947 
3948  /* record address of new tuple in t_ctid of old one */
3949  oldtup.t_data->t_ctid = heaptup->t_self;
3950 
3951  /* clear PD_ALL_VISIBLE flags, reset all visibilitymap bits */
3952  if (PageIsAllVisible(BufferGetPage(buffer)))
3953  {
3954  all_visible_cleared = true;
3956  visibilitymap_clear(relation, BufferGetBlockNumber(buffer),
3957  vmbuffer, VISIBILITYMAP_VALID_BITS);
3958  }
3959  if (newbuf != buffer && PageIsAllVisible(BufferGetPage(newbuf)))
3960  {
3961  all_visible_cleared_new = true;
3963  visibilitymap_clear(relation, BufferGetBlockNumber(newbuf),
3964  vmbuffer_new, VISIBILITYMAP_VALID_BITS);
3965  }
3966 
3967  if (newbuf != buffer)
3968  MarkBufferDirty(newbuf);
3969  MarkBufferDirty(buffer);
3970 
3971  /* XLOG stuff */
3972  if (RelationNeedsWAL(relation))
3973  {
3974  XLogRecPtr recptr;
3975 
3976  /*
3977  * For logical decoding we need combo CIDs to properly decode the
3978  * catalog.
3979  */
3981  {
3982  log_heap_new_cid(relation, &oldtup);
3983  log_heap_new_cid(relation, heaptup);
3984  }
3985 
3986  recptr = log_heap_update(relation, buffer,
3987  newbuf, &oldtup, heaptup,
3988  old_key_tuple,
3989  all_visible_cleared,
3990  all_visible_cleared_new);
3991  if (newbuf != buffer)
3992  {
3993  PageSetLSN(BufferGetPage(newbuf), recptr);
3994  }
3995  PageSetLSN(BufferGetPage(buffer), recptr);
3996  }
3997 
3998  END_CRIT_SECTION();
3999 
4000  if (newbuf != buffer)
4001  LockBuffer(newbuf, BUFFER_LOCK_UNLOCK);
4002  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
4003 
4004  /*
4005  * Mark old tuple for invalidation from system caches at next command
4006  * boundary, and mark the new tuple for invalidation in case we abort. We
4007  * have to do this before releasing the buffer because oldtup is in the
4008  * buffer. (heaptup is all in local memory, but it's necessary to process
4009  * both tuple versions in one call to inval.c so we can avoid redundant
4010  * sinval messages.)
4011  */
4012  CacheInvalidateHeapTuple(relation, &oldtup, heaptup);
4013 
4014  /* Now we can release the buffer(s) */
4015  if (newbuf != buffer)
4016  ReleaseBuffer(newbuf);
4017  ReleaseBuffer(buffer);
4018  if (BufferIsValid(vmbuffer_new))
4019  ReleaseBuffer(vmbuffer_new);
4020  if (BufferIsValid(vmbuffer))
4021  ReleaseBuffer(vmbuffer);
4022 
4023  /*
4024  * Release the lmgr tuple lock, if we had it.
4025  */
4026  if (have_tuple_lock)
4027  UnlockTupleTuplock(relation, &(oldtup.t_self), *lockmode);
4028 
4029  pgstat_count_heap_update(relation, use_hot_update, newbuf != buffer);
4030 
4031  /*
4032  * If heaptup is a private copy, release it. Don't forget to copy t_self
4033  * back to the caller's image, too.
4034  */
4035  if (heaptup != newtup)
4036  {
4037  newtup->t_self = heaptup->t_self;
4038  heap_freetuple(heaptup);
4039  }
4040 
4041  /*
4042  * If it is a HOT update, the update may still need to update summarized
4043  * indexes, lest we fail to update those summaries and get incorrect
4044  * results (for example, minmax bounds of the block may change with this
4045  * update).
4046  */
4047  if (use_hot_update)
4048  {
4049  if (summarized_update)
4050  *update_indexes = TU_Summarizing;
4051  else
4052  *update_indexes = TU_None;
4053  }
4054  else
4055  *update_indexes = TU_All;
4056 
4057  if (old_key_tuple != NULL && old_key_copied)
4058  heap_freetuple(old_key_tuple);
4059 
4060  bms_free(hot_attrs);
4061  bms_free(sum_attrs);
4062  bms_free(key_attrs);
4063  bms_free(id_attrs);
4064  bms_free(modified_attrs);
4065  bms_free(interesting_attrs);
4066 
4067  return TM_Ok;
4068 }
void bms_free(Bitmapset *a)
Definition: bitmapset.c:239
Bitmapset * bms_add_members(Bitmapset *a, const Bitmapset *b)
Definition: bitmapset.c:917
bool bms_overlap(const Bitmapset *a, const Bitmapset *b)
Definition: bitmapset.c:582
static void PageSetFull(Page page)
Definition: bufpage.h:418
TransactionId HeapTupleGetUpdateXid(HeapTupleHeader tuple)
Definition: heapam.c:7419
static Bitmapset * HeapDetermineColumnsInfo(Relation relation, Bitmapset *interesting_cols, Bitmapset *external_cols, HeapTuple oldtup, HeapTuple newtup, bool *has_external)
Definition: heapam.c:4248
static XLogRecPtr log_heap_update(Relation reln, Buffer oldbuf, Buffer newbuf, HeapTuple oldtup, HeapTuple newtup, HeapTuple old_key_tuple, bool all_visible_cleared, bool new_all_visible_cleared)
Definition: heapam.c:8676
HeapTuple heap_toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup, int options)
Definition: heaptoast.c:96
#define TOAST_TUPLE_THRESHOLD
Definition: heaptoast.h:48
#define HeapTupleSetHotUpdated(tuple)
Definition: htup_details.h:677
#define HEAP2_XACT_MASK
Definition: htup_details.h:279
#define HEAP_XMAX_LOCK_ONLY
Definition: htup_details.h:197
#define HeapTupleHeaderSetCmin(tup, cid)
Definition: htup_details.h:393
#define HEAP_XACT_MASK
Definition: htup_details.h:215
#define HeapTupleSetHeapOnly(tuple)
Definition: htup_details.h:686
#define HeapTupleClearHeapOnly(tuple)
Definition: htup_details.h:689
#define HEAP_UPDATED
Definition: htup_details.h:210
#define HEAP_XMAX_KEYSHR_LOCK
Definition: htup_details.h:194
#define HeapTupleClearHotUpdated(tuple)
Definition: htup_details.h:680
void pgstat_count_heap_update(Relation rel, bool hot, bool newpage)
Bitmapset * RelationGetIndexAttrBitmap(Relation relation, IndexAttrBitmapKind attrKind)
Definition: relcache.c:5233
@ INDEX_ATTR_BITMAP_KEY
Definition: relcache.h:61
@ INDEX_ATTR_BITMAP_HOT_BLOCKING
Definition: relcache.h:64
@ INDEX_ATTR_BITMAP_SUMMARIZED
Definition: relcache.h:65
@ INDEX_ATTR_BITMAP_IDENTITY_KEY
Definition: relcache.h:63
@ TU_Summarizing
Definition: tableam.h:125
@ TU_All
Definition: tableam.h:122
@ TU_None
Definition: tableam.h:119
bool TransactionIdDidAbort(TransactionId transactionId)
Definition: transam.c:188

References Assert, bms_add_members(), bms_free(), bms_overlap(), BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferGetBlockNumber(), BufferGetPage(), BufferIsValid(), CacheInvalidateHeapTuple(), CheckForSerializableConflictIn(), TM_FailureData::cmax, compute_infobits(), compute_new_xmax_infomask(), TM_FailureData::ctid, DoesMultiXactIdConflict(), END_CRIT_SECTION, ereport, errcode(), errmsg(), ERROR, ExtractReplicaIdentity(), xl_heap_lock::flags, GetCurrentTransactionId(), GetMultiXactIdHintBits(), HEAP2_XACT_MASK, heap_acquire_tuplock(), heap_freetuple(), HEAP_KEYS_UPDATED, HEAP_LOCKED_UPGRADED, HEAP_MOVED, heap_toast_insert_or_update(), HEAP_UPDATED, HEAP_XACT_MASK, HEAP_XMAX_BITS, HEAP_XMAX_INVALID, HEAP_XMAX_IS_KEYSHR_LOCKED, HEAP_XMAX_IS_LOCKED_ONLY, HEAP_XMAX_IS_MULTI, HEAP_XMAX_KEYSHR_LOCK, HEAP_XMAX_LOCK_ONLY, HeapDetermineColumnsInfo(), HeapTupleClearHeapOnly, HeapTupleClearHotUpdated, HeapTupleGetUpdateXid(), HeapTupleHasExternal, HeapTupleHeaderAdjustCmax(), HeapTupleHeaderGetCmax(), HeapTupleHeaderGetNatts, HeapTupleHeaderGetRawXmax, HeapTupleHeaderGetUpdateXid, HeapTupleHeaderSetCmax, HeapTupleHeaderSetCmin, HeapTupleHeaderSetXmax, HeapTupleHeaderSetXmin, HeapTupleSatisfiesUpdate(), HeapTupleSatisfiesVisibility(), HeapTupleSetHeapOnly, HeapTupleSetHotUpdated, INDEX_ATTR_BITMAP_HOT_BLOCKING, INDEX_ATTR_BITMAP_IDENTITY_KEY, INDEX_ATTR_BITMAP_KEY, INDEX_ATTR_BITMAP_SUMMARIZED, xl_heap_lock::infobits_set, InvalidBuffer, InvalidCommandId, InvalidSnapshot, InvalidTransactionId, IsInParallelMode(), ItemIdGetLength, ItemIdIsNormal, ItemPointerEquals(), ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), ItemPointerIsValid(), LockBuffer(), LockTupleExclusive, LockTupleNoKeyExclusive, LockWaitBlock, log_heap_new_cid(), log_heap_update(), MarkBufferDirty(), MAXALIGN, MultiXactIdSetOldestMember(), MultiXactIdWait(), MultiXactStatusNoKeyUpdate, MultiXactStatusUpdate, xl_heap_lock::offnum, PageClearAllVisible(), PageGetHeapFreeSpace(), PageGetItem(), PageGetItemId(), PageIsAllVisible(), PageSetFull(), PageSetLSN(), PageSetPrunable, pgstat_count_heap_update(), RelationData::rd_rel, ReadBuffer(), REGBUF_STANDARD, RelationGetBufferForTuple(), RelationGetIndexAttrBitmap(), RelationGetNumberOfAttributes, RelationGetRelid, RelationIsAccessibleInLogicalDecoding, RelationNeedsWAL, RelationPutHeapTuple(), ReleaseBuffer(), SizeOfHeapLock, START_CRIT_SECTION, HeapTupleHeaderData::t_ctid, HeapTupleData::t_data, HeapTupleHeaderData::t_infomask, HeapTupleHeaderData::t_infomask2, HeapTupleData::t_len, HeapTupleData::t_self, HeapTupleData::t_tableOid, TM_BeingModified, TM_Deleted, TM_Invisible, TM_Ok, TM_SelfModified, TM_Updated, TOAST_TUPLE_THRESHOLD, TransactionIdDidAbort(), TransactionIdEquals, TransactionIdIsCurrentTransactionId(), TransactionIdIsValid, TU_All, TU_None, TU_Summarizing, UnlockReleaseBuffer(), UnlockTupleTuplock, UpdateXmaxHintBits(), VISIBILITYMAP_ALL_FROZEN, visibilitymap_clear(), visibilitymap_pin(), VISIBILITYMAP_VALID_BITS, XactLockTableWait(), XLH_LOCK_ALL_FROZEN_CLEARED, XLOG_HEAP_LOCK, XLogBeginInsert(), XLogInsert(), XLogRegisterBuffer(), XLogRegisterData(), XLTW_Update, xl_heap_lock::xmax, TM_FailureData::xmax, and xmax_infomask_changed().

Referenced by heapam_tuple_update(), and simple_heap_update().

◆ heap_vacuum_rel()

void heap_vacuum_rel ( Relation  rel,
struct VacuumParams params,
BufferAccessStrategy  bstrategy 
)

Definition at line 293 of file vacuumlazy.c.

295 {
296  LVRelState *vacrel;
297  bool verbose,
298  instrument,
299  skipwithvm,
300  frozenxid_updated,
301  minmulti_updated;
302  BlockNumber orig_rel_pages,
303  new_rel_pages,
304  new_rel_allvisible;
305  PGRUsage ru0;
306  TimestampTz starttime = 0;
307  PgStat_Counter startreadtime = 0,
308  startwritetime = 0;
309  WalUsage startwalusage = pgWalUsage;
310  BufferUsage startbufferusage = pgBufferUsage;
311  ErrorContextCallback errcallback;
312  char **indnames = NULL;
313 
314  verbose = (params->options & VACOPT_VERBOSE) != 0;
315  instrument = (verbose || (AmAutoVacuumWorkerProcess() &&
316  params->log_min_duration >= 0));
317  if (instrument)
318  {
319  pg_rusage_init(&ru0);
320  starttime = GetCurrentTimestamp();
321  if (track_io_timing)
322  {
323  startreadtime = pgStatBlockReadTime;
324  startwritetime = pgStatBlockWriteTime;
325  }
326  }
327 
329  RelationGetRelid(rel));
330 
331  /*
332  * Setup error traceback support for ereport() first. The idea is to set
333  * up an error context callback to display additional information on any
334  * error during a vacuum. During different phases of vacuum, we update
335  * the state so that the error context callback always display current
336  * information.
337  *
338  * Copy the names of heap rel into local memory for error reporting
339  * purposes, too. It isn't always safe to assume that we can get the name
340  * of each rel. It's convenient for code in lazy_scan_heap to always use
341  * these temp copies.
342  */
343  vacrel = (LVRelState *) palloc0(sizeof(LVRelState));
346  vacrel->relname = pstrdup(RelationGetRelationName(rel));
347  vacrel->indname = NULL;
349  vacrel->verbose = verbose;
350  errcallback.callback = vacuum_error_callback;
351  errcallback.arg = vacrel;
352  errcallback.previous = error_context_stack;
353  error_context_stack = &errcallback;
354 
355  /* Set up high level stuff about rel and its indexes */
356  vacrel->rel = rel;
357  vac_open_indexes(vacrel->rel, RowExclusiveLock, &vacrel->nindexes,
358  &vacrel->indrels);
359  vacrel->bstrategy = bstrategy;
360  if (instrument && vacrel->nindexes > 0)
361  {
362  /* Copy index names used by instrumentation (not error reporting) */
363  indnames = palloc(sizeof(char *) * vacrel->nindexes);
364  for (int i = 0; i < vacrel->nindexes; i++)
365  indnames[i] = pstrdup(RelationGetRelationName(vacrel->indrels[i]));
366  }
367 
368  /*
369  * The index_cleanup param either disables index vacuuming and cleanup or
370  * forces it to go ahead when we would otherwise apply the index bypass
371  * optimization. The default is 'auto', which leaves the final decision
372  * up to lazy_vacuum().
373  *
374  * The truncate param allows user to avoid attempting relation truncation,
375  * though it can't force truncation to happen.
376  */
379  params->truncate != VACOPTVALUE_AUTO);
380 
381  /*
382  * While VacuumFailSafeActive is reset to false before calling this, we
383  * still need to reset it here due to recursive calls.
384  */
385  VacuumFailsafeActive = false;
386  vacrel->consider_bypass_optimization = true;
387  vacrel->do_index_vacuuming = true;
388  vacrel->do_index_cleanup = true;
389  vacrel->do_rel_truncate = (params->truncate != VACOPTVALUE_DISABLED);
390  if (params->index_cleanup == VACOPTVALUE_DISABLED)
391  {
392  /* Force disable index vacuuming up-front */
393  vacrel->do_index_vacuuming = false;
394  vacrel->do_index_cleanup = false;
395  }
396  else if (params->index_cleanup == VACOPTVALUE_ENABLED)
397  {
398  /* Force index vacuuming. Note that failsafe can still bypass. */
399  vacrel->consider_bypass_optimization = false;
400  }
401  else
402  {
403  /* Default/auto, make all decisions dynamically */
405  }
406 
407  /* Initialize page counters explicitly (be tidy) */
408  vacrel->scanned_pages = 0;
409  vacrel->removed_pages = 0;
410  vacrel->frozen_pages = 0;
411  vacrel->lpdead_item_pages = 0;
412  vacrel->missed_dead_pages = 0;
413  vacrel->nonempty_pages = 0;
414  /* dead_items_alloc allocates vacrel->dead_items later on */
415 
416  /* Allocate/initialize output statistics state */
417  vacrel->new_rel_tuples = 0;
418  vacrel->new_live_tuples = 0;
419  vacrel->indstats = (IndexBulkDeleteResult **)
420  palloc0(vacrel->nindexes * sizeof(IndexBulkDeleteResult *));
421 
422  /* Initialize remaining counters (be tidy) */
423  vacrel->num_index_scans = 0;
424  vacrel->tuples_deleted = 0;
425  vacrel->tuples_frozen = 0;
426  vacrel->lpdead_items = 0;
427  vacrel->live_tuples = 0;
428  vacrel->recently_dead_tuples = 0;
429  vacrel->missed_dead_tuples = 0;
430 
431  /*
432  * Get cutoffs that determine which deleted tuples are considered DEAD,
433  * not just RECENTLY_DEAD, and which XIDs/MXIDs to freeze. Then determine
434  * the extent of the blocks that we'll scan in lazy_scan_heap. It has to
435  * happen in this order to ensure that the OldestXmin cutoff field works
436  * as an upper bound on the XIDs stored in the pages we'll actually scan
437  * (NewRelfrozenXid tracking must never be allowed to miss unfrozen XIDs).
438  *
439  * Next acquire vistest, a related cutoff that's used in pruning. We use
440  * vistest in combination with OldestXmin to ensure that
441  * heap_page_prune_and_freeze() always removes any deleted tuple whose
442  * xmax is < OldestXmin. lazy_scan_prune must never become confused about
443  * whether a tuple should be frozen or removed. (In the future we might
444  * want to teach lazy_scan_prune to recompute vistest from time to time,
445  * to increase the number of dead tuples it can prune away.)
446  */
447  vacrel->aggressive = vacuum_get_cutoffs(rel, params, &vacrel->cutoffs);
448  vacrel->rel_pages = orig_rel_pages = RelationGetNumberOfBlocks(rel);
449  vacrel->vistest = GlobalVisTestFor(rel);
450  /* Initialize state used to track oldest extant XID/MXID */
451  vacrel->NewRelfrozenXid = vacrel->cutoffs.OldestXmin;
452  vacrel->NewRelminMxid = vacrel->cutoffs.OldestMxact;
453  vacrel->skippedallvis = false;
454  skipwithvm = true;
456  {
457  /*
458  * Force aggressive mode, and disable skipping blocks using the
459  * visibility map (even those set all-frozen)
460  */
461  vacrel->aggressive = true;
462  skipwithvm = false;
463  }
464 
465  vacrel->skipwithvm = skipwithvm;
466 
467  if (verbose)
468  {
469  if (vacrel->aggressive)
470  ereport(INFO,
471  (errmsg("aggressively vacuuming \"%s.%s.%s\"",
472  vacrel->dbname, vacrel->relnamespace,
473  vacrel->relname)));
474  else
475  ereport(INFO,
476  (errmsg("vacuuming \"%s.%s.%s\"",
477  vacrel->dbname, vacrel->relnamespace,
478  vacrel->relname)));
479  }
480 
481  /*
482  * Allocate dead_items memory using dead_items_alloc. This handles
483  * parallel VACUUM initialization as part of allocating shared memory
484  * space used for dead_items. (But do a failsafe precheck first, to
485  * ensure that parallel VACUUM won't be attempted at all when relfrozenxid
486  * is already dangerously old.)
487  */
489  dead_items_alloc(vacrel, params->nworkers);
490 
491  /*
492  * Call lazy_scan_heap to perform all required heap pruning, index
493  * vacuuming, and heap vacuuming (plus related processing)
494  */
495  lazy_scan_heap(vacrel);
496 
497  /*
498  * Free resources managed by dead_items_alloc. This ends parallel mode in
499  * passing when necessary.
500  */
501  dead_items_cleanup(vacrel);
503 
504  /*
505  * Update pg_class entries for each of rel's indexes where appropriate.
506  *
507  * Unlike the later update to rel's pg_class entry, this is not critical.
508  * Maintains relpages/reltuples statistics used by the planner only.
509  */
510  if (vacrel->do_index_cleanup)
512 
513  /* Done with rel's indexes */
514  vac_close_indexes(vacrel->nindexes, vacrel->indrels, NoLock);
515 
516  /* Optionally truncate rel */
517  if (should_attempt_truncation(vacrel))
518  lazy_truncate_heap(vacrel);
519 
520  /* Pop the error context stack */
521  error_context_stack = errcallback.previous;
522 
523  /* Report that we are now doing final cleanup */
526 
527  /*
528  * Prepare to update rel's pg_class entry.
529  *
530  * Aggressive VACUUMs must always be able to advance relfrozenxid to a
531  * value >= FreezeLimit, and relminmxid to a value >= MultiXactCutoff.
532  * Non-aggressive VACUUMs may advance them by any amount, or not at all.
533  */
534  Assert(vacrel->NewRelfrozenXid == vacrel->cutoffs.OldestXmin ||
536  vacrel->cutoffs.relfrozenxid,
537  vacrel->NewRelfrozenXid));
538  Assert(vacrel->NewRelminMxid == vacrel->cutoffs.OldestMxact ||
540  vacrel->cutoffs.relminmxid,
541  vacrel->NewRelminMxid));
542  if (vacrel->skippedallvis)
543  {
544  /*
545  * Must keep original relfrozenxid in a non-aggressive VACUUM that
546  * chose to skip an all-visible page range. The state that tracks new
547  * values will have missed unfrozen XIDs from the pages we skipped.
548  */
549  Assert(!vacrel->aggressive);
552  }
553 
554  /*
555  * For safety, clamp relallvisible to be not more than what we're setting
556  * pg_class.relpages to
557  */
558  new_rel_pages = vacrel->rel_pages; /* After possible rel truncation */
559  visibilitymap_count(rel, &new_rel_allvisible, NULL);
560  if (new_rel_allvisible > new_rel_pages)
561  new_rel_allvisible = new_rel_pages;
562 
563  /*
564  * Now actually update rel's pg_class entry.
565  *
566  * In principle new_live_tuples could be -1 indicating that we (still)
567  * don't know the tuple count. In practice that can't happen, since we
568  * scan every page that isn't skipped using the visibility map.
569  */
570  vac_update_relstats(rel, new_rel_pages, vacrel->new_live_tuples,
571  new_rel_allvisible, vacrel->nindexes > 0,
572  vacrel->NewRelfrozenXid, vacrel->NewRelminMxid,
573  &frozenxid_updated, &minmulti_updated, false);
574 
575  /*
576  * Report results to the cumulative stats system, too.
577  *
578  * Deliberately avoid telling the stats system about LP_DEAD items that
579  * remain in the table due to VACUUM bypassing index and heap vacuuming.
580  * ANALYZE will consider the remaining LP_DEAD items to be dead "tuples".
581  * It seems like a good idea to err on the side of not vacuuming again too
582  * soon in cases where the failsafe prevented significant amounts of heap
583  * vacuuming.
584  */
586  rel->rd_rel->relisshared,
587  Max(vacrel->new_live_tuples, 0),
588  vacrel->recently_dead_tuples +
589  vacrel->missed_dead_tuples);
591 
592  if (instrument)
593  {
594  TimestampTz endtime = GetCurrentTimestamp();
595 
596  if (verbose || params->log_min_duration == 0 ||
597  TimestampDifferenceExceeds(starttime, endtime,
598  params->log_min_duration))
599  {
600  long secs_dur;
601  int usecs_dur;
602  WalUsage walusage;
603  BufferUsage bufferusage;
605  char *msgfmt;
606  int32 diff;
607  double read_rate = 0,
608  write_rate = 0;
609  int64 total_blks_hit;
610  int64 total_blks_read;
611  int64 total_blks_dirtied;
612 
613  TimestampDifference(starttime, endtime, &secs_dur, &usecs_dur);
614  memset(&walusage, 0, sizeof(WalUsage));
615  WalUsageAccumDiff(&walusage, &pgWalUsage, &startwalusage);
616  memset(&bufferusage, 0, sizeof(BufferUsage));
617  BufferUsageAccumDiff(&bufferusage, &pgBufferUsage, &startbufferusage);
618 
619  total_blks_hit = bufferusage.shared_blks_hit +
620  bufferusage.local_blks_hit;
621  total_blks_read = bufferusage.shared_blks_read +
622  bufferusage.local_blks_read;
623  total_blks_dirtied = bufferusage.shared_blks_dirtied +
624  bufferusage.local_blks_dirtied;
625 
627  if (verbose)
628  {
629  /*
630  * Aggressiveness already reported earlier, in dedicated
631  * VACUUM VERBOSE ereport
632  */
633  Assert(!params->is_wraparound);
634  msgfmt = _("finished vacuuming \"%s.%s.%s\": index scans: %d\n");
635  }
636  else if (params->is_wraparound)
637  {
638  /*
639  * While it's possible for a VACUUM to be both is_wraparound
640  * and !aggressive, that's just a corner-case -- is_wraparound
641  * implies aggressive. Produce distinct output for the corner
642  * case all the same, just in case.
643  */
644  if (vacrel->aggressive)
645  msgfmt = _("automatic aggressive vacuum to prevent wraparound of table \"%s.%s.%s\": index scans: %d\n");
646  else
647  msgfmt = _("automatic vacuum to prevent wraparound of table \"%s.%s.%s\": index scans: %d\n");
648  }
649  else
650  {
651  if (vacrel->aggressive)
652  msgfmt = _("automatic aggressive vacuum of table \"%s.%s.%s\": index scans: %d\n");
653  else
654  msgfmt = _("automatic vacuum of table \"%s.%s.%s\": index scans: %d\n");
655  }
656  appendStringInfo(&buf, msgfmt,
657  vacrel->dbname,
658  vacrel->relnamespace,
659  vacrel->relname,
660  vacrel->num_index_scans);
661  appendStringInfo(&buf, _("pages: %u removed, %u remain, %u scanned (%.2f%% of total)\n"),
662  vacrel->removed_pages,
663  new_rel_pages,
664  vacrel->scanned_pages,
665  orig_rel_pages == 0 ? 100.0 :
666  100.0 * vacrel->scanned_pages / orig_rel_pages);
668  _("tuples: %lld removed, %lld remain, %lld are dead but not yet removable\n"),
669  (long long) vacrel->tuples_deleted,
670  (long long) vacrel->new_rel_tuples,
671  (long long) vacrel->recently_dead_tuples);
672  if (vacrel->missed_dead_tuples > 0)
674  _("tuples missed: %lld dead from %u pages not removed due to cleanup lock contention\n"),
675  (long long) vacrel->missed_dead_tuples,
676  vacrel->missed_dead_pages);
677  diff = (int32) (ReadNextTransactionId() -
678  vacrel->cutoffs.OldestXmin);
680  _("removable cutoff: %u, which was %d XIDs old when operation ended\n"),
681  vacrel->cutoffs.OldestXmin, diff);
682  if (frozenxid_updated)
683  {
684  diff = (int32) (vacrel->NewRelfrozenXid -
685  vacrel->cutoffs.relfrozenxid);
687  _("new relfrozenxid: %u, which is %d XIDs ahead of previous value\n"),
688  vacrel->NewRelfrozenXid, diff);
689  }
690  if (minmulti_updated)
691  {
692  diff = (int32) (vacrel->NewRelminMxid -
693  vacrel->cutoffs.relminmxid);
695  _("new relminmxid: %u, which is %d MXIDs ahead of previous value\n"),
696  vacrel->NewRelminMxid, diff);
697  }
698  appendStringInfo(&buf, _("frozen: %u pages from table (%.2f%% of total) had %lld tuples frozen\n"),
699  vacrel->frozen_pages,
700  orig_rel_pages == 0 ? 100.0 :
701  100.0 * vacrel->frozen_pages / orig_rel_pages,
702  (long long) vacrel->tuples_frozen);
703  if (vacrel->do_index_vacuuming)
704  {
705  if (vacrel->nindexes == 0 || vacrel->num_index_scans == 0)
706  appendStringInfoString(&buf, _("index scan not needed: "));
707  else
708  appendStringInfoString(&buf, _("index scan needed: "));
709 
710  msgfmt = _("%u pages from table (%.2f%% of total) had %lld dead item identifiers removed\n");
711  }
712  else
713  {
715  appendStringInfoString(&buf, _("index scan bypassed: "));
716  else
717  appendStringInfoString(&buf, _("index scan bypassed by failsafe: "));
718 
719  msgfmt = _("%u pages from table (%.2f%% of total) have %lld dead item identifiers\n");
720  }
721  appendStringInfo(&buf, msgfmt,
722  vacrel->lpdead_item_pages,
723  orig_rel_pages == 0 ? 100.0 :
724  100.0 * vacrel->lpdead_item_pages / orig_rel_pages,
725  (long long) vacrel->lpdead_items);
726  for (int i = 0; i < vacrel->nindexes; i++)
727  {
728  IndexBulkDeleteResult *istat = vacrel->indstats[i];
729 
730  if (!istat)
731  continue;
732 
734  _("index \"%s\": pages: %u in total, %u newly deleted, %u currently deleted, %u reusable\n"),
735  indnames[i],
736  istat->num_pages,
737  istat->pages_newly_deleted,
738  istat->pages_deleted,
739  istat->pages_free);
740  }
741  if (track_io_timing)
742  {
743  double read_ms = (double) (pgStatBlockReadTime - startreadtime) / 1000;
744  double write_ms = (double) (pgStatBlockWriteTime - startwritetime) / 1000;
745 
746  appendStringInfo(&buf, _("I/O timings: read: %.3f ms, write: %.3f ms\n"),
747  read_ms, write_ms);
748  }
749  if (secs_dur > 0 || usecs_dur > 0)
750  {
751  read_rate = (double) BLCKSZ * total_blks_read /
752  (1024 * 1024) / (secs_dur + usecs_dur / 1000000.0);
753  write_rate = (double) BLCKSZ * total_blks_dirtied /
754  (1024 * 1024) / (secs_dur + usecs_dur / 1000000.0);
755  }
756  appendStringInfo(&buf, _("avg read rate: %.3f MB/s, avg write rate: %.3f MB/s\n"),
757  read_rate, write_rate);
759  _("buffer usage: %lld hits, %lld reads, %lld dirtied\n"),
760  (long long) total_blks_hit,
761  (long long) total_blks_read,
762  (long long) total_blks_dirtied);
764  _("WAL usage: %lld records, %lld full page images, %llu bytes\n"),
765  (long long) walusage.wal_records,
766  (long long) walusage.wal_fpi,
767  (unsigned long long) walusage.wal_bytes);
768  appendStringInfo(&buf, _("system usage: %s"), pg_rusage_show(&ru0));
769 
770  ereport(verbose ? INFO : LOG,
771  (errmsg_internal("%s", buf.data)));
772  pfree(buf.data);
773  }
774  }
775 
776  /* Cleanup index statistics and index names */
777  for (int i = 0; i < vacrel->nindexes; i++)
778  {
779  if (vacrel->indstats[i])
780  pfree(vacrel->indstats[i]);
781 
782  if (instrument)
783  pfree(indnames[i]);
784  }
785 }
void TimestampDifference(TimestampTz start_time, TimestampTz stop_time, long *secs, int *microsecs)
Definition: timestamp.c:1720
bool TimestampDifferenceExceeds(TimestampTz start_time, TimestampTz stop_time, int msec)
Definition: timestamp.c:1780
TimestampTz GetCurrentTimestamp(void)
Definition: timestamp.c:1644
void pgstat_progress_start_command(ProgressCommandType cmdtype, Oid relid)
void pgstat_progress_update_param(int index, int64 val)
void pgstat_progress_end_command(void)
@ PROGRESS_COMMAND_VACUUM
bool track_io_timing
Definition: bufmgr.c:143
#define RelationGetNumberOfBlocks(reln)
Definition: bufmgr.h:273
signed int int32
Definition: c.h:482
int64 TimestampTz
Definition: timestamp.h:39
char * get_database_name(Oid dbid)
Definition: dbcommands.c:3187
ErrorContextCallback * error_context_stack
Definition: elog.c:94
#define _(x)
Definition: elog.c:90
#define LOG
Definition: elog.h:31
#define INFO
Definition: elog.h:34
int verbose
void WalUsageAccumDiff(WalUsage *dst, const WalUsage *add, const WalUsage *sub)
Definition: instrument.c:286
BufferUsage pgBufferUsage
Definition: instrument.c:20
void BufferUsageAccumDiff(BufferUsage *dst, const BufferUsage *add, const BufferUsage *sub)
Definition: instrument.c:248
#define NoLock
Definition: lockdefs.h:34
#define RowExclusiveLock
Definition: lockdefs.h:38
char * get_namespace_name(Oid nspid)
Definition: lsyscache.c:3366
char * pstrdup(const char *in)
Definition: mcxt.c:1696
void * palloc0(Size size)
Definition: mcxt.c:1347
#define AmAutoVacuumWorkerProcess()
Definition: miscadmin.h:373
const char * pg_rusage_show(const PGRUsage *ru0)
Definition: pg_rusage.c:40
void pg_rusage_init(PGRUsage *ru0)
Definition: pg_rusage.c:27
int64 PgStat_Counter
Definition: pgstat.h:120
PgStat_Counter pgStatBlockReadTime
PgStat_Counter pgStatBlockWriteTime
void pgstat_report_vacuum(Oid tableoid, bool shared, PgStat_Counter livetuples, PgStat_Counter deadtuples)
#define PROGRESS_VACUUM_PHASE_FINAL_CLEANUP
Definition: progress.h:38
#define PROGRESS_VACUUM_PHASE
Definition: progress.h:21
#define RelationGetNamespace(relation)
Definition: rel.h:546
void appendStringInfo(StringInfo str, const char *fmt,...)
Definition: stringinfo.c:94
void appendStringInfoString(StringInfo str, const char *s)
Definition: stringinfo.c:179
void initStringInfo(StringInfo str)
Definition: stringinfo.c:56
int64 shared_blks_dirtied
Definition: instrument.h:28
int64 local_blks_hit
Definition: instrument.h:30
int64 shared_blks_read
Definition: instrument.h:27
int64 local_blks_read
Definition: instrument.h:31
int64 local_blks_dirtied
Definition: instrument.h:32
int64 shared_blks_hit
Definition: instrument.h:26
struct ErrorContextCallback * previous
Definition: elog.h:296
void(* callback)(void *arg)
Definition: elog.h:297
BlockNumber pages_deleted
Definition: genam.h:82
BlockNumber pages_newly_deleted
Definition: genam.h:81
BlockNumber pages_free
Definition: genam.h:83
BlockNumber num_pages
Definition: genam.h:77
bool verbose
Definition: vacuumlazy.c:173
int nindexes
Definition: vacuumlazy.c:139
int64 tuples_deleted
Definition: vacuumlazy.c:205
BlockNumber nonempty_pages
Definition: vacuumlazy.c:194
bool do_rel_truncate
Definition: vacuumlazy.c:155
BlockNumber scanned_pages
Definition: vacuumlazy.c:189
bool aggressive
Definition: vacuumlazy.c:146
GlobalVisState * vistest
Definition: vacuumlazy.c:159
BlockNumber removed_pages
Definition: vacuumlazy.c:190
int num_index_scans
Definition: vacuumlazy.c:203
IndexBulkDeleteResult ** indstats
Definition: vacuumlazy.c:200
double new_live_tuples
Definition: vacuumlazy.c:198
double new_rel_tuples
Definition: vacuumlazy.c:197
TransactionId NewRelfrozenXid
Definition: vacuumlazy.c:161
Relation rel
Definition: vacuumlazy.c:137
bool consider_bypass_optimization
Definition: vacuumlazy.c:150
BlockNumber rel_pages
Definition: vacuumlazy.c:188
int64 recently_dead_tuples
Definition: vacuumlazy.c:209
int64 tuples_frozen
Definition: vacuumlazy.c:206
BlockNumber frozen_pages
Definition: vacuumlazy.c:191
char * dbname
Definition: vacuumlazy.c:166
BlockNumber missed_dead_pages
Definition: vacuumlazy.c:193
char * relnamespace
Definition: vacuumlazy.c:167
int64 live_tuples
Definition: vacuumlazy.c:208
int64 lpdead_items
Definition: vacuumlazy.c:207
BufferAccessStrategy bstrategy
Definition: vacuumlazy.c:142
bool skippedallvis
Definition: vacuumlazy.c:163
BlockNumber lpdead_item_pages
Definition: vacuumlazy.c:192
Relation * indrels
Definition: vacuumlazy.c:138
bool skipwithvm
Definition: vacuumlazy.c:148
bool do_index_cleanup
Definition: vacuumlazy.c:154
MultiXactId NewRelminMxid
Definition: vacuumlazy.c:162
int64 missed_dead_tuples
Definition: vacuumlazy.c:210
struct VacuumCutoffs cutoffs
Definition: vacuumlazy.c:158
char * relname
Definition: vacuumlazy.c:168
VacErrPhase phase
Definition: vacuumlazy.c:172
char * indname
Definition: vacuumlazy.c:169
bool do_index_vacuuming
Definition: vacuumlazy.c:153
int nworkers
Definition: vacuum.h:239
VacOptValue truncate
Definition: vacuum.h:231
bits32 options
Definition: vacuum.h:219
bool is_wraparound
Definition: vacuum.h:226
int log_min_duration
Definition: vacuum.h:227
VacOptValue index_cleanup
Definition: vacuum.h:230
uint64 wal_bytes
Definition: instrument.h:55
int64 wal_records
Definition: instrument.h:53
static TransactionId ReadNextTransactionId(void)
Definition: transam.h:315
void vac_open_indexes(Relation relation, LOCKMODE lockmode, int *nindexes, Relation **Irel)
Definition: vacuum.c:2298
void vac_update_relstats(Relation relation, BlockNumber num_pages, double num_tuples, BlockNumber num_all_visible_pages, bool hasindex, TransactionId frozenxid, MultiXactId minmulti, bool *frozenxid_updated, bool *minmulti_updated, bool in_outer_xact)
Definition: vacuum.c:1410
void vac_close_indexes(int nindexes, Relation *Irel, LOCKMODE lockmode)
Definition: vacuum.c:2341
bool vacuum_get_cutoffs(Relation rel, const VacuumParams *params, struct VacuumCutoffs *cutoffs)
Definition: vacuum.c:1084
bool VacuumFailsafeActive
Definition: vacuum.c:95
#define VACOPT_VERBOSE
Definition: vacuum.h:182
@ VACOPTVALUE_AUTO
Definition: vacuum.h:203
@ VACOPTVALUE_ENABLED
Definition: vacuum.h:205
@ VACOPTVALUE_UNSPECIFIED
Definition: vacuum.h:202
@ VACOPTVALUE_DISABLED
Definition: vacuum.h:204
#define VACOPT_DISABLE_PAGE_SKIPPING
Definition: vacuum.h:188
static void dead_items_cleanup(LVRelState *vacrel)
Definition: vacuumlazy.c:2940
static void update_relstats_all_indexes(LVRelState *vacrel)
Definition: vacuumlazy.c:3081
static void vacuum_error_callback(void *arg)
Definition: vacuumlazy.c:3116
static void lazy_truncate_heap(LVRelState *vacrel)
Definition: vacuumlazy.c:2557
static bool should_attempt_truncation(LVRelState *vacrel)
Definition: vacuumlazy.c:2537
@ VACUUM_ERRCB_PHASE_UNKNOWN
Definition: vacuumlazy.c:126
static void lazy_scan_heap(LVRelState *vacrel)
Definition: vacuumlazy.c:824
static bool lazy_check_wraparound_failsafe(LVRelState *vacrel)
Definition: vacuumlazy.c:2307
static void dead_items_alloc(LVRelState *vacrel, int nworkers)
Definition: vacuumlazy.c:2830
void visibilitymap_count(Relation rel, BlockNumber *all_visible, BlockNumber *all_frozen)

References _, LVRelState::aggressive, AmAutoVacuumWorkerProcess, appendStringInfo(), appendStringInfoString(), ErrorContextCallback::arg, Assert, LVRelState::bstrategy, buf, BufferUsageAccumDiff(), ErrorContextCallback::callback, LVRelState::consider_bypass_optimization, LVRelState::cutoffs, LVRelState::dbname, dead_items_alloc(), dead_items_cleanup(), LVRelState::do_index_cleanup, LVRelState::do_index_vacuuming, LVRelState::do_rel_truncate, ereport, errmsg(), errmsg_internal(), error_context_stack, VacuumCutoffs::FreezeLimit, LVRelState::frozen_pages, get_database_name(), get_namespace_name(), GetCurrentTimestamp(), GlobalVisTestFor(), i, VacuumParams::index_cleanup, LVRelState::indname, LVRelState::indrels, LVRelState::indstats, INFO, initStringInfo(), InvalidMultiXactId, InvalidTransactionId, VacuumParams::is_wraparound, IsInParallelMode(), lazy_check_wraparound_failsafe(), lazy_scan_heap(), lazy_truncate_heap(), LVRelState::live_tuples, BufferUsage::local_blks_dirtied, BufferUsage::local_blks_hit, BufferUsage::local_blks_read, LOG, VacuumParams::log_min_duration, LVRelState::lpdead_item_pages, LVRelState::lpdead_items, Max, LVRelState::missed_dead_pages, LVRelState::missed_dead_tuples, VacuumCutoffs::MultiXactCutoff, MultiXactIdPrecedesOrEquals(), MyDatabaseId, LVRelState::new_live_tuples, LVRelState::new_rel_tuples, LVRelState::NewRelfrozenXid, LVRelState::NewRelminMxid, LVRelState::nindexes, NoLock, LVRelState::nonempty_pages, LVRelState::num_index_scans, IndexBulkDeleteResult::num_pages, VacuumParams::nworkers, VacuumCutoffs::OldestMxact, VacuumCutoffs::OldestXmin, VacuumParams::options, IndexBulkDeleteResult::pages_deleted, IndexBulkDeleteResult::pages_free, IndexBulkDeleteResult::pages_newly_deleted, palloc(), palloc0(), pfree(), pg_rusage_init(), pg_rusage_show(), pgBufferUsage, pgstat_progress_end_command(), pgstat_progress_start_command(), pgstat_progress_update_param(), pgstat_report_vacuum(), pgStatBlockReadTime, pgStatBlockWriteTime, pgWalUsage, LVRelState::phase, ErrorContextCallback::previous, PROGRESS_COMMAND_VACUUM, PROGRESS_VACUUM_PHASE, PROGRESS_VACUUM_PHASE_FINAL_CLEANUP, pstrdup(), RelationData::rd_rel, ReadNextTransactionId(), LVRelState::recently_dead_tuples, LVRelState::rel, LVRelState::rel_pages, RelationGetNamespace, RelationGetNumberOfBlocks, RelationGetRelationName, RelationGetRelid, VacuumCutoffs::relfrozenxid, VacuumCutoffs::relminmxid, LVRelState::relname, LVRelState::relnamespace, LVRelState::removed_pages, RowExclusiveLock, LVRelState::scanned_pages, BufferUsage::shared_blks_dirtied, BufferUsage::shared_blks_hit, BufferUsage::shared_blks_read, should_attempt_truncation(), LVRelState::skippedallvis, LVRelState::skipwithvm, TimestampDifference(), TimestampDifferenceExceeds(), track_io_timing, TransactionIdPrecedesOrEquals(), VacuumParams::truncate, LVRelState::tuples_deleted, LVRelState::tuples_frozen, update_relstats_all_indexes(), vac_close_indexes(), vac_open_indexes(), vac_update_relstats(), VACOPT_DISABLE_PAGE_SKIPPING, VACOPT_VERBOSE, VACOPTVALUE_AUTO, VACOPTVALUE_DISABLED, VACOPTVALUE_ENABLED, VACOPTVALUE_UNSPECIFIED, VACUUM_ERRCB_PHASE_UNKNOWN, vacuum_error_callback(), vacuum_get_cutoffs(), VacuumFailsafeActive, LVRelState::verbose, verbose, visibilitymap_count(), LVRelState::vistest, WalUsage::wal_bytes, WalUsage::wal_fpi, WalUsage::wal_records, and WalUsageAccumDiff().

◆ HeapCheckForSerializableConflictOut()

void HeapCheckForSerializableConflictOut ( bool  visible,
Relation  relation,
HeapTuple  tuple,
Buffer  buffer,
Snapshot  snapshot 
)

Definition at line 9083 of file heapam.c.

9086 {
9087  TransactionId xid;
9088  HTSV_Result htsvResult;
9089 
9090  if (!CheckForSerializableConflictOutNeeded(relation, snapshot))
9091  return;
9092 
9093  /*
9094  * Check to see whether the tuple has been written to by a concurrent
9095  * transaction, either to create it not visible to us, or to delete it
9096  * while it is visible to us. The "visible" bool indicates whether the
9097  * tuple is visible to us, while HeapTupleSatisfiesVacuum checks what else
9098  * is going on with it.
9099  *
9100  * In the event of a concurrently inserted tuple that also happens to have
9101  * been concurrently updated (by a separate transaction), the xmin of the
9102  * tuple will be used -- not the updater's xid.
9103  */
9104  htsvResult = HeapTupleSatisfiesVacuum(tuple, TransactionXmin, buffer);
9105  switch (htsvResult)
9106  {
9107  case HEAPTUPLE_LIVE:
9108  if (visible)
9109  return;
9110  xid = HeapTupleHeaderGetXmin(tuple->t_data);
9111  break;
9114  if (visible)
9115  xid = HeapTupleHeaderGetUpdateXid(tuple->t_data);
9116  else
9117  xid = HeapTupleHeaderGetXmin(tuple->t_data);
9118 
9120  {
9121  /* This is like the HEAPTUPLE_DEAD case */
9122  Assert(!visible);
9123  return;
9124  }
9125  break;
9127  xid = HeapTupleHeaderGetXmin(tuple->t_data);
9128  break;
9129  case HEAPTUPLE_DEAD:
9130  Assert(!visible);
9131  return;
9132  default:
9133 
9134  /*
9135  * The only way to get to this default clause is if a new value is
9136  * added to the enum type without adding it to this switch
9137  * statement. That's a bug, so elog.
9138  */
9139  elog(ERROR, "unrecognized return value from HeapTupleSatisfiesVacuum: %u", htsvResult);
9140 
9141  /*
9142  * In spite of having all enum values covered and calling elog on
9143  * this default, some compilers think this is a code path which
9144  * allows xid to be used below without initialization. Silence
9145  * that warning.
9146  */
9147  xid = InvalidTransactionId;
9148  }
9149 
9152 
9153  /*
9154  * Find top level xid. Bail out if xid is too early to be a conflict, or
9155  * if it's our own xid.
9156  */
9158  return;
9159  xid = SubTransGetTopmostTransaction(xid);
9161  return;
9162 
9163  CheckForSerializableConflictOut(relation, xid, snapshot);
9164 }
HTSV_Result HeapTupleSatisfiesVacuum(HeapTuple htup, TransactionId OldestXmin, Buffer buffer)
void CheckForSerializableConflictOut(Relation relation, TransactionId xid, Snapshot snapshot)
Definition: predicate.c:4013
TransactionId SubTransGetTopmostTransaction(TransactionId xid)
Definition: subtrans.c:163
bool TransactionIdFollowsOrEquals(TransactionId id1, TransactionId id2)
Definition: transam.c:329
TransactionId GetTopTransactionIdIfAny(void)
Definition: xact.c:440

References Assert, CheckForSerializableConflictOut(), CheckForSerializableConflictOutNeeded(), elog, ERROR, GetTopTransactionIdIfAny(), HEAPTUPLE_DEAD, HEAPTUPLE_DELETE_IN_PROGRESS, HEAPTUPLE_INSERT_IN_PROGRESS, HEAPTUPLE_LIVE, HEAPTUPLE_RECENTLY_DEAD, HeapTupleHeaderGetUpdateXid, HeapTupleHeaderGetXmin, HeapTupleSatisfiesVacuum(), InvalidTransactionId, SubTransGetTopmostTransaction(), HeapTupleData::t_data, TransactionIdEquals, TransactionIdFollowsOrEquals(), TransactionIdIsValid, TransactionIdPrecedes(), and TransactionXmin.

Referenced by heap_fetch(), heap_get_latest_tid(), heap_hot_search_buffer(), heapam_scan_bitmap_next_block(), heapam_scan_sample_next_tuple(), heapgettup(), and page_collect_tuples().

◆ HeapTupleHeaderIsOnlyLocked()

bool HeapTupleHeaderIsOnlyLocked ( HeapTupleHeader  tuple)

Definition at line 1520 of file heapam_visibility.c.

1521 {
1522  TransactionId xmax;
1523 
1524  /* if there's no valid Xmax, then there's obviously no update either */
1525  if (tuple->t_infomask & HEAP_XMAX_INVALID)
1526  return true;
1527 
1528  if (tuple->t_infomask & HEAP_XMAX_LOCK_ONLY)
1529  return true;
1530 
1531  /* invalid xmax means no update */
1533  return true;
1534 
1535  /*
1536  * if HEAP_XMAX_LOCK_ONLY is not set and not a multi, then this must
1537  * necessarily have been updated
1538  */
1539  if (!(tuple->t_infomask & HEAP_XMAX_IS_MULTI))
1540  return false;
1541 
1542  /* ... but if it's a multi, then perhaps the updating Xid aborted. */
1543  xmax = HeapTupleGetUpdateXid(tuple);
1544 
1545  /* not LOCKED_ONLY, so it has to have an xmax */
1547 
1549  return false;
1550  if (TransactionIdIsInProgress(xmax))
1551  return false;
1552  if (TransactionIdDidCommit(xmax))
1553  return false;
1554 
1555  /*
1556  * not current, not in progress, not committed -- must have aborted or
1557  * crashed
1558  */
1559  return true;
1560 }
bool TransactionIdIsInProgress(TransactionId xid)
Definition: procarray.c:1402

References Assert, HEAP_XMAX_INVALID, HEAP_XMAX_IS_MULTI, HEAP_XMAX_LOCK_ONLY, HeapTupleGetUpdateXid(), HeapTupleHeaderGetRawXmax, HeapTupleHeaderData::t_infomask, TransactionIdDidCommit(), TransactionIdIsCurrentTransactionId(), TransactionIdIsInProgress(), and TransactionIdIsValid.

Referenced by heap_delete(), heap_get_latest_tid(), heap_lock_tuple(), heap_lock_updated_tuple_rec(), HeapTupleSatisfiesVacuumHorizon(), and rewrite_heap_tuple().

◆ HeapTupleIsSurelyDead()

bool HeapTupleIsSurelyDead ( HeapTuple  htup,
struct GlobalVisState vistest 
)

Definition at line 1465 of file heapam_visibility.c.

1466 {
1467  HeapTupleHeader tuple = htup->t_data;
1468 
1469  Assert(ItemPointerIsValid(&htup->t_self));
1470  Assert(htup->t_tableOid != InvalidOid);
1471 
1472  /*
1473  * If the inserting transaction is marked invalid, then it aborted, and
1474  * the tuple is definitely dead. If it's marked neither committed nor
1475  * invalid, then we assume it's still alive (since the presumption is that
1476  * all relevant hint bits were just set moments ago).
1477  */
1478  if (!HeapTupleHeaderXminCommitted(tuple))
1479  return HeapTupleHeaderXminInvalid(tuple);
1480 
1481  /*
1482  * If the inserting transaction committed, but any deleting transaction
1483  * aborted, the tuple is still alive.
1484  */
1485  if (tuple->t_infomask & HEAP_XMAX_INVALID)
1486  return false;
1487 
1488  /*
1489  * If the XMAX is just a lock, the tuple is still alive.
1490  */
1492  return false;
1493 
1494  /*
1495  * If the Xmax is a MultiXact, it might be dead or alive, but we cannot
1496  * know without checking pg_multixact.
1497  */
1498  if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
1499  return false;
1500 
1501  /* If deleter isn't known to have committed, assume it's still running. */
1502  if (!(tuple->t_infomask & HEAP_XMAX_COMMITTED))
1503  return false;
1504 
1505  /* Deleter committed, so tuple is dead if the XID is old enough. */
1506  return GlobalVisTestIsRemovableXid(vistest,
1507  HeapTupleHeaderGetRawXmax(tuple));
1508 }
#define HeapTupleHeaderXminCommitted(tup)
Definition: htup_details.h:320
#define HeapTupleHeaderXminInvalid(tup)
Definition: htup_details.h:325
#define InvalidOid
Definition: postgres_ext.h:36

References Assert, GlobalVisTestIsRemovableXid(), HEAP_XMAX_COMMITTED, HEAP_XMAX_INVALID, HEAP_XMAX_IS_LOCKED_ONLY, HEAP_XMAX_IS_MULTI, HeapTupleHeaderGetRawXmax, HeapTupleHeaderXminCommitted, HeapTupleHeaderXminInvalid, InvalidOid, ItemPointerIsValid(), HeapTupleData::t_data, HeapTupleHeaderData::t_infomask, HeapTupleData::t_self, and HeapTupleData::t_tableOid.

Referenced by heap_hot_search_buffer().

◆ HeapTupleSatisfiesUpdate()

TM_Result HeapTupleSatisfiesUpdate ( HeapTuple  htup,
CommandId  curcid,
Buffer  buffer 
)

Definition at line 458 of file heapam_visibility.c.

460 {
461  HeapTupleHeader tuple = htup->t_data;
462 
464  Assert(htup->t_tableOid != InvalidOid);
465 
466  if (!HeapTupleHeaderXminCommitted(tuple))
467  {
468  if (HeapTupleHeaderXminInvalid(tuple))
469  return TM_Invisible;
470 
471  /* Used by pre-9.0 binary upgrades */
472  if (tuple->t_infomask & HEAP_MOVED_OFF)
473  {
475 
477  return TM_Invisible;
478  if (!TransactionIdIsInProgress(xvac))
479  {
480  if (TransactionIdDidCommit(xvac))
481  {
482  SetHintBits(tuple, buffer, HEAP_XMIN_INVALID,
484  return TM_Invisible;
485  }
486  SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED,
488  }
489  }
490  /* Used by pre-9.0 binary upgrades */
491  else if (tuple->t_infomask & HEAP_MOVED_IN)
492  {
494 
496  {
497  if (TransactionIdIsInProgress(xvac))
498  return TM_Invisible;
499  if (TransactionIdDidCommit(xvac))
500  SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED,
502  else
503  {
504  SetHintBits(tuple, buffer, HEAP_XMIN_INVALID,
506  return TM_Invisible;
507  }
508  }
509  }
511  {
512  if (HeapTupleHeaderGetCmin(tuple) >= curcid)
513  return TM_Invisible; /* inserted after scan started */
514 
515  if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid */
516  return TM_Ok;
517 
519  {
520  TransactionId xmax;
521 
522  xmax = HeapTupleHeaderGetRawXmax(tuple);
523 
524  /*
525  * Careful here: even though this tuple was created by our own
526  * transaction, it might be locked by other transactions, if
527  * the original version was key-share locked when we updated
528  * it.
529  */
530 
531  if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
532  {
533  if (MultiXactIdIsRunning(xmax, true))
534  return TM_BeingModified;
535  else
536  return TM_Ok;
537  }
538 
539  /*
540  * If the locker is gone, then there is nothing of interest
541  * left in this Xmax; otherwise, report the tuple as
542  * locked/updated.
543  */
544  if (!TransactionIdIsInProgress(xmax))
545  return TM_Ok;
546  return TM_BeingModified;
547  }
548 
549  if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
550  {
551  TransactionId xmax;
552 
553  xmax = HeapTupleGetUpdateXid(tuple);
554 
555  /* not LOCKED_ONLY, so it has to have an xmax */
557 
558  /* deleting subtransaction must have aborted */
560  {
562  false))
563  return TM_BeingModified;
564  return TM_Ok;
565  }
566  else
567  {
568  if (HeapTupleHeaderGetCmax(tuple) >= curcid)
569  return TM_SelfModified; /* updated after scan started */
570  else
571  return TM_Invisible; /* updated before scan started */
572  }
573  }
574 
576  {
577  /* deleting subtransaction must have aborted */
578  SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
580  return TM_Ok;
581  }
582 
583  if (HeapTupleHeaderGetCmax(tuple) >= curcid)
584  return TM_SelfModified; /* updated after scan started */
585  else
586  return TM_Invisible; /* updated before scan started */
587  }
589  return TM_Invisible;
591  SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED,
593  else
594  {
595  /* it must have aborted or crashed */
596  SetHintBits(tuple, buffer, HEAP_XMIN_INVALID,
598  return TM_Invisible;
599  }
600  }
601 
602  /* by here, the inserting transaction has committed */
603 
604  if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid or aborted */
605  return TM_Ok;
606 
607  if (tuple->t_infomask & HEAP_XMAX_COMMITTED)
608  {
610  return TM_Ok;
611  if (!ItemPointerEquals(&htup->t_self, &tuple->t_ctid))
612  return TM_Updated; /* updated by other */
613  else
614  return TM_Deleted; /* deleted by other */
615  }
616 
617  if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
618  {
619  TransactionId xmax;
620 
621  if (HEAP_LOCKED_UPGRADED(tuple->t_infomask))
622  return TM_Ok;
623 
625  {
627  return TM_BeingModified;
628 
630  return TM_Ok;
631  }
632 
633  xmax = HeapTupleGetUpdateXid(tuple);
634  if (!TransactionIdIsValid(xmax))
635  {
637  return TM_BeingModified;
638  }
639 
640  /* not LOCKED_ONLY, so it has to have an xmax */
642 
644  {
645  if (HeapTupleHeaderGetCmax(tuple) >= curcid)
646  return TM_SelfModified; /* updated after scan started */
647  else
648  return TM_Invisible; /* updated before scan started */
649  }
650 
652  return TM_BeingModified;
653 
654  if (TransactionIdDidCommit(xmax))
655  {
656  if (!ItemPointerEquals(&htup->t_self, &tuple->t_ctid))
657  return TM_Updated;
658  else
659  return TM_Deleted;
660  }
661 
662  /*
663  * By here, the update in the Xmax is either aborted or crashed, but
664  * what about the other members?
665  */
666 
668  {
669  /*
670  * There's no member, even just a locker, alive anymore, so we can
671  * mark the Xmax as invalid.
672  */
673  SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
675  return TM_Ok;
676  }
677  else
678  {
679  /* There are lockers running */
680  return TM_BeingModified;
681  }
682  }
683 
685  {
687  return TM_BeingModified;
688  if (HeapTupleHeaderGetCmax(tuple) >= curcid)
689  return TM_SelfModified; /* updated after scan started */
690  else
691  return TM_Invisible; /* updated before scan started */
692  }
693 
695  return TM_BeingModified;
696 
698  {
699  /* it must have aborted or crashed */
700  SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
702  return TM_Ok;
703  }
704 
705  /* xmax transaction committed */
706 
708  {
709  SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
711  return TM_Ok;
712  }
713 
714  SetHintBits(tuple, buffer, HEAP_XMAX_COMMITTED,
716  if (!ItemPointerEquals(&htup->t_self, &tuple->t_ctid))
717  return TM_Updated; /* updated by other */
718  else
719  return TM_Deleted; /* deleted by other */
720 }
CommandId HeapTupleHeaderGetCmin(HeapTupleHeader tup)
Definition: combocid.c:104
static void SetHintBits(HeapTupleHeader tuple, Buffer buffer, uint16 infomask, TransactionId xid)
#define HEAP_XMIN_COMMITTED
Definition: htup_details.h:204
#define HEAP_MOVED_IN
Definition: htup_details.h:212
#define HEAP_XMIN_INVALID
Definition: htup_details.h:205
bool MultiXactIdIsRunning(MultiXactId multi, bool isLockOnly)
Definition: multixact.c:599

References Assert, HEAP_LOCKED_UPGRADED, HEAP_MOVED_IN, HEAP_MOVED_OFF, HEAP_XMAX_COMMITTED, HEAP_XMAX_INVALID, HEAP_XMAX_IS_LOCKED_ONLY, HEAP_XMAX_IS_MULTI, HEAP_XMIN_COMMITTED, HEAP_XMIN_INVALID, HeapTupleGetUpdateXid(), HeapTupleHeaderGetCmax(), HeapTupleHeaderGetCmin(), HeapTupleHeaderGetRawXmax, HeapTupleHeaderGetRawXmin, HeapTupleHeaderGetXvac, HeapTupleHeaderXminCommitted, HeapTupleHeaderXminInvalid, InvalidOid, InvalidTransactionId, ItemPointerEquals(), ItemPointerIsValid(), MultiXactIdIsRunning(), SetHintBits(), HeapTupleHeaderData::t_ctid, HeapTupleData::t_data, HeapTupleHeaderData::t_infomask, HeapTupleData::t_self, HeapTupleData::t_tableOid, TM_BeingModified, TM_Deleted, TM_Invisible, TM_Ok, TM_SelfModified, TM_Updated, TransactionIdDidCommit(), TransactionIdIsCurrentTransactionId(), TransactionIdIsInProgress(), and TransactionIdIsValid.

Referenced by heap_delete(), heap_inplace_lock(), heap_lock_tuple(), heap_update(), and pgrowlocks().

◆ HeapTupleSatisfiesVacuum()

HTSV_Result HeapTupleSatisfiesVacuum ( HeapTuple  htup,
TransactionId  OldestXmin,
Buffer  buffer 
)

Definition at line 1162 of file heapam_visibility.c.

1164 {
1165  TransactionId dead_after = InvalidTransactionId;
1166  HTSV_Result res;
1167 
1168  res = HeapTupleSatisfiesVacuumHorizon(htup, buffer, &dead_after);
1169 
1171  {
1172  Assert(TransactionIdIsValid(dead_after));
1173 
1174  if (TransactionIdPrecedes(dead_after, OldestXmin))
1175  res = HEAPTUPLE_DEAD;
1176  }
1177  else
1178  Assert(!TransactionIdIsValid(dead_after));
1179 
1180  return res;
1181 }
HTSV_Result HeapTupleSatisfiesVacuumHorizon(HeapTuple htup, Buffer buffer, TransactionId *dead_after)

References Assert, HEAPTUPLE_DEAD, HEAPTUPLE_RECENTLY_DEAD, HeapTupleSatisfiesVacuumHorizon(), InvalidTransactionId, res, TransactionIdIsValid, and TransactionIdPrecedes().

Referenced by heap_page_is_all_visible(), heapam_index_build_range_scan(), heapam_relation_copy_for_cluster(), heapam_scan_analyze_next_tuple(), HeapCheckForSerializableConflictOut(), lazy_scan_noprune(), statapprox_heap(), and tuple_all_visible().

◆ HeapTupleSatisfiesVacuumHorizon()

HTSV_Result HeapTupleSatisfiesVacuumHorizon ( HeapTuple  htup,
Buffer  buffer,
TransactionId dead_after 
)

Definition at line 1196 of file heapam_visibility.c.

1197 {
1198  HeapTupleHeader tuple = htup->t_data;
1199 
1200  Assert(ItemPointerIsValid(&htup->t_self));
1201  Assert(htup->t_tableOid != InvalidOid);
1202  Assert(dead_after != NULL);
1203 
1204  *dead_after = InvalidTransactionId;
1205 
1206  /*
1207  * Has inserting transaction committed?
1208  *
1209  * If the inserting transaction aborted, then the tuple was never visible
1210  * to any other transaction, so we can delete it immediately.
1211  */
1212  if (!HeapTupleHeaderXminCommitted(tuple))
1213  {
1214  if (HeapTupleHeaderXminInvalid(tuple))
1215  return HEAPTUPLE_DEAD;
1216  /* Used by pre-9.0 binary upgrades */
1217  else if (tuple->t_infomask & HEAP_MOVED_OFF)
1218  {
1219  TransactionId xvac = HeapTupleHeaderGetXvac(tuple);
1220 
1223  if (TransactionIdIsInProgress(xvac))
1225  if (TransactionIdDidCommit(xvac))
1226  {
1227  SetHintBits(tuple, buffer, HEAP_XMIN_INVALID,
1229  return HEAPTUPLE_DEAD;
1230  }
1231  SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED,
1233  }
1234  /* Used by pre-9.0 binary upgrades */
1235  else if (tuple->t_infomask & HEAP_MOVED_IN)
1236  {
1237  TransactionId xvac = HeapTupleHeaderGetXvac(tuple);
1238 
1241  if (TransactionIdIsInProgress(xvac))
1243  if (TransactionIdDidCommit(xvac))
1244  SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED,
1246  else
1247  {
1248  SetHintBits(tuple, buffer, HEAP_XMIN_INVALID,
1250  return HEAPTUPLE_DEAD;
1251  }
1252  }
1254  {
1255  if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid */
1257  /* only locked? run infomask-only check first, for performance */
1258  if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask) ||
1261  /* inserted and then deleted by same xact */
1264  /* deleting subtransaction must have aborted */
1266  }
1268  {
1269  /*
1270  * It'd be possible to discern between INSERT/DELETE in progress
1271  * here by looking at xmax - but that doesn't seem beneficial for
1272  * the majority of callers and even detrimental for some. We'd
1273  * rather have callers look at/wait for xmin than xmax. It's
1274  * always correct to return INSERT_IN_PROGRESS because that's
1275  * what's happening from the view of other backends.
1276  */
1278  }
1280  SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED,
1281  HeapTupleHeaderGetRawXmin(tuple));
1282  else
1283  {
1284  /*
1285  * Not in Progress, Not Committed, so either Aborted or crashed
1286  */
1287  SetHintBits(tuple, buffer, HEAP_XMIN_INVALID,
1289  return HEAPTUPLE_DEAD;
1290  }
1291 
1292  /*
1293  * At this point the xmin is known committed, but we might not have
1294  * been able to set the hint bit yet; so we can no longer Assert that
1295  * it's set.
1296  */
1297  }
1298 
1299  /*
1300  * Okay, the inserter committed, so it was good at some point. Now what
1301  * about the deleting transaction?
1302  */
1303  if (tuple->t_infomask & HEAP_XMAX_INVALID)
1304  return HEAPTUPLE_LIVE;
1305 
1307  {
1308  /*
1309  * "Deleting" xact really only locked it, so the tuple is live in any
1310  * case. However, we should make sure that either XMAX_COMMITTED or
1311  * XMAX_INVALID gets set once the xact is gone, to reduce the costs of
1312  * examining the tuple for future xacts.
1313  */
1314  if (!(tuple->t_infomask & HEAP_XMAX_COMMITTED))
1315  {
1316  if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
1317  {
1318  /*
1319  * If it's a pre-pg_upgrade tuple, the multixact cannot
1320  * possibly be running; otherwise have to check.
1321  */
1322  if (!HEAP_LOCKED_UPGRADED(tuple->t_infomask) &&
1324  true))
1325  return HEAPTUPLE_LIVE;
1327  }
1328  else
1329  {
1331  return HEAPTUPLE_LIVE;
1332  SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
1334  }
1335  }
1336 
1337  /*
1338  * We don't really care whether xmax did commit, abort or crash. We
1339  * know that xmax did lock the tuple, but it did not and will never
1340  * actually update it.
1341  */
1342 
1343  return HEAPTUPLE_LIVE;
1344  }
1345 
1346  if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
1347  {
1348  TransactionId xmax = HeapTupleGetUpdateXid(tuple);
1349 
1350  /* already checked above */
1352 
1353  /* not LOCKED_ONLY, so it has to have an xmax */
1355 
1356  if (TransactionIdIsInProgress(xmax))
1358  else if (TransactionIdDidCommit(xmax))
1359  {
1360  /*
1361  * The multixact might still be running due to lockers. Need to
1362  * allow for pruning if below the xid horizon regardless --
1363  * otherwise we could end up with a tuple where the updater has to
1364  * be removed due to the horizon, but is not pruned away. It's
1365  * not a problem to prune that tuple, because any remaining
1366  * lockers will also be present in newer tuple versions.
1367  */
1368  *dead_after = xmax;
1369  return HEAPTUPLE_RECENTLY_DEAD;
1370  }
1371  else if (!MultiXactIdIsRunning(HeapTupleHeaderGetRawXmax(tuple), false))
1372  {
1373  /*
1374  * Not in Progress, Not Committed, so either Aborted or crashed.
1375  * Mark the Xmax as invalid.
1376  */
1378  }
1379 
1380  return HEAPTUPLE_LIVE;
1381  }
1382 
1383  if (!(tuple->t_infomask & HEAP_XMAX_COMMITTED))
1384  {
1388  SetHintBits(tuple, buffer, HEAP_XMAX_COMMITTED,
1389  HeapTupleHeaderGetRawXmax(tuple));
1390  else
1391  {
1392  /*
1393  * Not in Progress, Not Committed, so either Aborted or crashed
1394  */
1395  SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
1397  return HEAPTUPLE_LIVE;
1398  }
1399 
1400  /*
1401  * At this point the xmax is known committed, but we might not have
1402  * been able to set the hint bit yet; so we can no longer Assert that
1403  * it's set.
1404  */
1405  }
1406 
1407  /*
1408  * Deleter committed, allow caller to check if it was recent enough that
1409  * some open transactions could still see the tuple.
1410  */
1411  *dead_after = HeapTupleHeaderGetRawXmax(tuple);
1412  return HEAPTUPLE_RECENTLY_DEAD;
1413 }

References Assert, HEAP_LOCKED_UPGRADED, HEAP_MOVED_IN, HEAP_MOVED_OFF, HEAP_XMAX_COMMITTED, HEAP_XMAX_INVALID, HEAP_XMAX_IS_LOCKED_ONLY, HEAP_XMAX_IS_MULTI, HEAP_XMIN_COMMITTED, HEAP_XMIN_INVALID, HEAPTUPLE_DEAD, HEAPTUPLE_DELETE_IN_PROGRESS, HEAPTUPLE_INSERT_IN_PROGRESS, HEAPTUPLE_LIVE, HEAPTUPLE_RECENTLY_DEAD, HeapTupleGetUpdateXid(), HeapTupleHeaderGetRawXmax, HeapTupleHeaderGetRawXmin, HeapTupleHeaderGetUpdateXid, HeapTupleHeaderGetXvac, HeapTupleHeaderIsOnlyLocked(), HeapTupleHeaderXminCommitted, HeapTupleHeaderXminInvalid, InvalidOid, InvalidTransactionId, ItemPointerIsValid(), MultiXactIdIsRunning(), SetHintBits(), HeapTupleData::t_data, HeapTupleHeaderData::t_infomask, HeapTupleData::t_self, HeapTupleData::t_tableOid, TransactionIdDidCommit(), TransactionIdIsCurrentTransactionId(), TransactionIdIsInProgress(), and TransactionIdIsValid.

Referenced by heap_prune_satisfies_vacuum(), HeapTupleSatisfiesNonVacuumable(), and HeapTupleSatisfiesVacuum().

◆ HeapTupleSatisfiesVisibility()

bool HeapTupleSatisfiesVisibility ( HeapTuple  htup,
Snapshot  snapshot,
Buffer  buffer 
)

Definition at line 1767 of file heapam_visibility.c.

1768 {
1769  switch (snapshot->snapshot_type)
1770  {
1771  case SNAPSHOT_MVCC:
1772  return HeapTupleSatisfiesMVCC(htup, snapshot, buffer);
1773  case SNAPSHOT_SELF:
1774  return HeapTupleSatisfiesSelf(htup, snapshot, buffer);
1775  case SNAPSHOT_ANY:
1776  return HeapTupleSatisfiesAny(htup, snapshot, buffer);
1777  case SNAPSHOT_TOAST:
1778  return HeapTupleSatisfiesToast(htup, snapshot, buffer);
1779  case SNAPSHOT_DIRTY:
1780  return HeapTupleSatisfiesDirty(htup, snapshot, buffer);
1782  return HeapTupleSatisfiesHistoricMVCC(htup, snapshot, buffer);
1784  return HeapTupleSatisfiesNonVacuumable(htup, snapshot, buffer);
1785  }
1786 
1787  return false; /* keep compiler quiet */
1788 }
static bool HeapTupleSatisfiesAny(HeapTuple htup, Snapshot snapshot, Buffer buffer)
static bool HeapTupleSatisfiesNonVacuumable(HeapTuple htup, Snapshot snapshot, Buffer buffer)
static bool HeapTupleSatisfiesToast(HeapTuple htup, Snapshot snapshot, Buffer buffer)
static bool HeapTupleSatisfiesDirty(HeapTuple htup, Snapshot snapshot, Buffer buffer)
static bool HeapTupleSatisfiesMVCC(HeapTuple htup, Snapshot snapshot, Buffer buffer)
static bool HeapTupleSatisfiesSelf(HeapTuple htup, Snapshot snapshot, Buffer buffer)
static bool HeapTupleSatisfiesHistoricMVCC(HeapTuple htup, Snapshot snapshot, Buffer buffer)
@ SNAPSHOT_TOAST
Definition: snapshot.h:74
@ SNAPSHOT_SELF
Definition: snapshot.h:64
@ SNAPSHOT_NON_VACUUMABLE
Definition: snapshot.h:118
@ SNAPSHOT_MVCC
Definition: snapshot.h:50
@ SNAPSHOT_ANY
Definition: snapshot.h:69
@ SNAPSHOT_HISTORIC_MVCC
Definition: snapshot.h:109
@ SNAPSHOT_DIRTY
Definition: snapshot.h:102
SnapshotType snapshot_type
Definition: snapshot.h:144

References HeapTupleSatisfiesAny(), HeapTupleSatisfiesDirty(), HeapTupleSatisfiesHistoricMVCC(), HeapTupleSatisfiesMVCC(), HeapTupleSatisfiesNonVacuumable(), HeapTupleSatisfiesSelf(), HeapTupleSatisfiesToast(), SNAPSHOT_ANY, SNAPSHOT_DIRTY, SNAPSHOT_HISTORIC_MVCC, SNAPSHOT_MVCC, SNAPSHOT_NON_VACUUMABLE, SNAPSHOT_SELF, SNAPSHOT_TOAST, and SnapshotData::snapshot_type.

Referenced by heap_delete(), heap_fetch(), heap_get_latest_tid(), heap_hot_search_buffer(), heap_update(), heapam_scan_bitmap_next_block(), heapam_tuple_satisfies_snapshot(), heapgettup(), page_collect_tuples(), pgstat_heap(), SampleHeapTupleVisible(), and ScanSourceDatabasePgClassPage().

◆ HeapTupleSetHintBits()

void HeapTupleSetHintBits ( HeapTupleHeader  tuple,
Buffer  buffer,
uint16  infomask,
TransactionId  xid 
)

Definition at line 141 of file heapam_visibility.c.

143 {
144  SetHintBits(tuple, buffer, infomask, xid);
145 }

References SetHintBits().

Referenced by UpdateXmaxHintBits().

◆ log_heap_prune_and_freeze()

void log_heap_prune_and_freeze ( Relation  relation,
Buffer  buffer,
TransactionId  conflict_xid,
bool  cleanup_lock,
PruneReason  reason,
HeapTupleFreeze frozen,
int  nfrozen,
OffsetNumber redirected,
int  nredirected,
OffsetNumber dead,
int  ndead,
OffsetNumber unused,
int  nunused 
)

Definition at line 2053 of file pruneheap.c.

2061 {
2062  xl_heap_prune xlrec;
2063  XLogRecPtr recptr;
2064  uint8 info;
2065 
2066  /* The following local variables hold data registered in the WAL record: */
2068  xlhp_freeze_plans freeze_plans;
2069  xlhp_prune_items redirect_items;
2070  xlhp_prune_items dead_items;
2071  xlhp_prune_items unused_items;
2072  OffsetNumber frz_offsets[MaxHeapTuplesPerPage];
2073 
2074  xlrec.flags = 0;
2075 
2076  /*
2077  * Prepare data for the buffer. The arrays are not actually in the
2078  * buffer, but we pretend that they are. When XLogInsert stores a full
2079  * page image, the arrays can be omitted.
2080  */
2081  XLogBeginInsert();
2082  XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
2083  if (nfrozen > 0)
2084  {
2085  int nplans;
2086 
2087  xlrec.flags |= XLHP_HAS_FREEZE_PLANS;
2088 
2089  /*
2090  * Prepare deduplicated representation for use in the WAL record. This
2091  * destructively sorts frozen tuples array in-place.
2092  */
2093  nplans = heap_log_freeze_plan(frozen, nfrozen, plans, frz_offsets);
2094 
2095  freeze_plans.nplans = nplans;
2096  XLogRegisterBufData(0, (char *) &freeze_plans,
2097  offsetof(xlhp_freeze_plans, plans));
2098  XLogRegisterBufData(0, (char *) plans,
2099  sizeof(xlhp_freeze_plan) * nplans);
2100  }
2101  if (nredirected > 0)
2102  {
2103  xlrec.flags |= XLHP_HAS_REDIRECTIONS;
2104 
2105  redirect_items.ntargets = nredirected;
2106  XLogRegisterBufData(0, (char *) &redirect_items,
2107  offsetof(xlhp_prune_items, data));
2108  XLogRegisterBufData(0, (char *) redirected,
2109  sizeof(OffsetNumber[2]) * nredirected);
2110  }
2111  if (ndead > 0)
2112  {
2113  xlrec.flags |= XLHP_HAS_DEAD_ITEMS;
2114 
2115  dead_items.ntargets = ndead;
2116  XLogRegisterBufData(0, (char *) &dead_items,
2117  offsetof(xlhp_prune_items, data));
2118  XLogRegisterBufData(0, (char *) dead,
2119  sizeof(OffsetNumber) * ndead);
2120  }
2121  if (nunused > 0)
2122  {
2124 
2125  unused_items.ntargets = nunused;
2126  XLogRegisterBufData(0, (char *) &unused_items,
2127  offsetof(xlhp_prune_items, data));
2128  XLogRegisterBufData(0, (char *) unused,
2129  sizeof(OffsetNumber) * nunused);
2130  }
2131  if (nfrozen > 0)
2132  XLogRegisterBufData(0, (char *) frz_offsets,
2133  sizeof(OffsetNumber) * nfrozen);
2134 
2135  /*
2136  * Prepare the main xl_heap_prune record. We already set the XLPH_HAS_*
2137  * flag above.
2138  */
2140  xlrec.flags |= XLHP_IS_CATALOG_REL;
2141  if (TransactionIdIsValid(conflict_xid))
2143  if (cleanup_lock)
2144  xlrec.flags |= XLHP_CLEANUP_LOCK;
2145  else
2146  {
2147  Assert(nredirected == 0 && ndead == 0);
2148  /* also, any items in 'unused' must've been LP_DEAD previously */
2149  }
2150  XLogRegisterData((char *) &xlrec, SizeOfHeapPrune);
2151  if (TransactionIdIsValid(conflict_xid))
2152  XLogRegisterData((char *) &conflict_xid, sizeof(TransactionId));
2153 
2154  switch (reason)
2155  {
2156  case PRUNE_ON_ACCESS:
2158  break;
2159  case PRUNE_VACUUM_SCAN:
2161  break;
2162  case PRUNE_VACUUM_CLEANUP:
2164  break;
2165  default:
2166  elog(ERROR, "unrecognized prune reason: %d", (int) reason);
2167  break;
2168  }
2169  recptr = XLogInsert(RM_HEAP2_ID, info);
2170 
2171  PageSetLSN(BufferGetPage(buffer), recptr);
2172 }
#define XLHP_HAS_CONFLICT_HORIZON
Definition: heapam_xlog.h:317
#define XLHP_HAS_FREEZE_PLANS
Definition: heapam_xlog.h:323
#define SizeOfHeapPrune
Definition: heapam_xlog.h:296
#define XLHP_HAS_NOW_UNUSED_ITEMS
Definition: heapam_xlog.h:332
#define XLHP_HAS_REDIRECTIONS
Definition: heapam_xlog.h:330
#define XLOG_HEAP2_PRUNE_VACUUM_SCAN
Definition: heapam_xlog.h:61
#define XLOG_HEAP2_PRUNE_ON_ACCESS
Definition: heapam_xlog.h:60
#define XLHP_CLEANUP_LOCK
Definition: heapam_xlog.h:309
#define XLHP_HAS_DEAD_ITEMS
Definition: heapam_xlog.h:331
#define XLOG_HEAP2_PRUNE_VACUUM_CLEANUP
Definition: heapam_xlog.h:62
#define XLHP_IS_CATALOG_REL
Definition: heapam_xlog.h:299
const void * data
static int heap_log_freeze_plan(HeapTupleFreeze *tuples, int ntuples, xlhp_freeze_plan *plans_out, OffsetNumber *offsets_out)
Definition: pruneheap.c:1978

References Assert, BufferGetPage(), data, elog, ERROR, xl_heap_prune::flags, heap_log_freeze_plan(), MaxHeapTuplesPerPage, xlhp_freeze_plans::nplans, xlhp_prune_items::ntargets, PageSetLSN(), PRUNE_ON_ACCESS, PRUNE_VACUUM_CLEANUP, PRUNE_VACUUM_SCAN, REGBUF_STANDARD, RelationIsAccessibleInLogicalDecoding, SizeOfHeapPrune, TransactionIdIsValid, XLHP_CLEANUP_LOCK, XLHP_HAS_CONFLICT_HORIZON, XLHP_HAS_DEAD_ITEMS, XLHP_HAS_FREEZE_PLANS, XLHP_HAS_NOW_UNUSED_ITEMS, XLHP_HAS_REDIRECTIONS, XLHP_IS_CATALOG_REL, XLOG_HEAP2_PRUNE_ON_ACCESS, XLOG_HEAP2_PRUNE_VACUUM_CLEANUP, XLOG_HEAP2_PRUNE_VACUUM_SCAN, XLogBeginInsert(), XLogInsert(), XLogRegisterBufData(), XLogRegisterBuffer(), and XLogRegisterData().

Referenced by heap_page_prune_and_freeze(), and lazy_vacuum_heap_page().

◆ ReleaseBulkInsertStatePin()

void ReleaseBulkInsertStatePin ( BulkInsertState  bistate)

Definition at line 1947 of file heapam.c.

1948 {
1949  if (bistate->current_buf != InvalidBuffer)
1950  ReleaseBuffer(bistate->current_buf);
1951  bistate->current_buf = InvalidBuffer;
1952 
1953  /*
1954  * Despite the name, we also reset bulk relation extension state.
1955  * Otherwise we can end up erroring out due to looking for free space in
1956  * ->next_free of one partition, even though ->next_free was set when
1957  * extending another partition. It could obviously also be bad for
1958  * efficiency to look at existing blocks at offsets from another
1959  * partition, even if we don't error out.
1960  */
1961  bistate->next_free = InvalidBlockNumber;
1962  bistate->last_free = InvalidBlockNumber;
1963 }

References BulkInsertStateData::current_buf, InvalidBlockNumber, InvalidBuffer, BulkInsertStateData::last_free, BulkInsertStateData::next_free, and ReleaseBuffer().

Referenced by CopyFrom().

◆ ResolveCminCmaxDuringDecoding()

bool ResolveCminCmaxDuringDecoding ( struct HTAB tuplecid_data,
Snapshot  snapshot,
HeapTuple  htup,
Buffer  buffer,
CommandId cmin,
CommandId cmax 
)

Definition at line 5275 of file reorderbuffer.c.

5279 {
5282  ForkNumber forkno;
5283  BlockNumber blockno;
5284  bool updated_mapping = false;
5285 
5286  /*
5287  * Return unresolved if tuplecid_data is not valid. That's because when
5288  * streaming in-progress transactions we may run into tuples with the CID
5289  * before actually decoding them. Think e.g. about INSERT followed by
5290  * TRUNCATE, where the TRUNCATE may not be decoded yet when applying the
5291  * INSERT. So in such cases, we assume the CID is from the future
5292  * command.
5293  */
5294  if (tuplecid_data == NULL)
5295  return false;
5296 
5297  /* be careful about padding */
5298  memset(&key, 0, sizeof(key));
5299 
5300  Assert(!BufferIsLocal(buffer));
5301 
5302  /*
5303  * get relfilelocator from the buffer, no convenient way to access it
5304  * other than that.
5305  */
5306  BufferGetTag(buffer, &key.rlocator, &forkno, &blockno);
5307 
5308  /* tuples can only be in the main fork */
5309  Assert(forkno == MAIN_FORKNUM);
5310  Assert(blockno == ItemPointerGetBlockNumber(&htup->t_self));
5311 
5312  ItemPointerCopy(&htup->t_self,
5313  &key.tid);
5314 
5315 restart:
5316  ent = (ReorderBufferTupleCidEnt *)
5318 
5319  /*
5320  * failed to find a mapping, check whether the table was rewritten and
5321  * apply mapping if so, but only do that once - there can be no new
5322  * mappings while we are in here since we have to hold a lock on the
5323  * relation.
5324  */
5325  if (ent == NULL && !updated_mapping)
5326  {
5328  /* now check but don't update for a mapping again */
5329  updated_mapping = true;
5330  goto restart;
5331  }
5332  else if (ent == NULL)
5333  return false;
5334 
5335  if (cmin)
5336  *cmin = ent->cmin;
5337  if (cmax)
5338  *cmax = ent->cmax;
5339  return true;
5340 }
#define BufferIsLocal(buffer)
Definition: buf.h:37
void * hash_search(HTAB *hashp, const void *keyPtr, HASHACTION action, bool *foundPtr)
Definition: dynahash.c:955
@ HASH_FIND
Definition: hsearch.h:113
static void UpdateLogicalMappings(HTAB *tuplecid_data, Oid relid, Snapshot snapshot)
static HTAB * tuplecid_data
Definition: snapmgr.c:102

References Assert, BufferGetTag(), BufferIsLocal, ReorderBufferTupleCidEnt::cmax, ReorderBufferTupleCidEnt::cmin, HASH_FIND, hash_search(), ItemPointerCopy(), ItemPointerGetBlockNumber(), sort-test::key, MAIN_FORKNUM, HeapTupleData::t_self, HeapTupleData::t_tableOid, tuplecid_data, and UpdateLogicalMappings().

Referenced by HeapTupleSatisfiesHistoricMVCC().

◆ simple_heap_delete()

void simple_heap_delete ( Relation  relation,
ItemPointer  tid 
)

Definition at line 3095 of file heapam.c.

3096 {
3097  TM_Result result;
3098  TM_FailureData tmfd;
3099 
3100  result = heap_delete(relation, tid,
3102  true /* wait for commit */ ,
3103  &tmfd, false /* changingPart */ );
3104  switch (result)
3105  {
3106  case TM_SelfModified:
3107  /* Tuple was already updated in current command? */
3108  elog(ERROR, "tuple already updated by self");
3109  break;
3110 
3111  case TM_Ok:
3112  /* done successfully */
3113  break;
3114 
3115  case TM_Updated:
3116  elog(ERROR, "tuple concurrently updated");
3117  break;
3118 
3119  case TM_Deleted:
3120  elog(ERROR, "tuple concurrently deleted");
3121  break;
3122 
3123  default:
3124  elog(ERROR, "unrecognized heap_delete status: %u", result);
3125  break;
3126  }
3127 }
TM_Result heap_delete(Relation relation, ItemPointer tid, CommandId cid, Snapshot crosscheck, bool wait, TM_FailureData *tmfd, bool changingPart)
Definition: heapam.c:2674

References elog, ERROR, GetCurrentCommandId(), heap_delete(), InvalidSnapshot, TM_Deleted, TM_Ok, TM_SelfModified, and TM_Updated.

Referenced by CatalogTupleDelete(), and toast_delete_datum().

◆ simple_heap_insert()

void simple_heap_insert ( Relation  relation,
HeapTuple  tup 
)

Definition at line 2616 of file heapam.c.

2617 {
2618  heap_insert(relation, tup, GetCurrentCommandId(true), 0, NULL);
2619 }
void heap_insert(Relation relation, HeapTuple tup, CommandId cid, int options, BulkInsertState bistate)
Definition: heapam.c:1985

References GetCurrentCommandId(), and heap_insert().

Referenced by CatalogTupleInsert(), CatalogTupleInsertWithInfo(), and InsertOneTuple().

◆ simple_heap_update()

void simple_heap_update ( Relation  relation,
ItemPointer  otid,
HeapTuple  tup,
TU_UpdateIndexes update_indexes 
)

Definition at line 4338 of file heapam.c.

4340 {
4341  TM_Result result;
4342  TM_FailureData tmfd;
4343  LockTupleMode lockmode;
4344 
4345  result = heap_update(relation, otid, tup,
4347  true /* wait for commit */ ,
4348  &tmfd, &lockmode, update_indexes);
4349  switch (result)
4350  {
4351  case TM_SelfModified:
4352  /* Tuple was already updated in current command? */
4353  elog(ERROR, "tuple already updated by self");
4354  break;
4355 
4356  case TM_Ok:
4357  /* done successfully */
4358  break;
4359 
4360  case TM_Updated:
4361  elog(ERROR, "tuple concurrently updated");
4362  break;
4363 
4364  case TM_Deleted:
4365  elog(ERROR, "tuple concurrently deleted");
4366  break;
4367 
4368  default:
4369  elog(ERROR, "unrecognized heap_update status: %u", result);
4370  break;
4371  }
4372 }
TM_Result heap_update(Relation relation, ItemPointer otid, HeapTuple newtup, CommandId cid, Snapshot crosscheck, bool wait, TM_FailureData *tmfd, LockTupleMode *lockmode, TU_UpdateIndexes *update_indexes)
Definition: heapam.c:3141

References elog, ERROR, GetCurrentCommandId(), heap_update(), InvalidSnapshot, TM_Deleted, TM_Ok, TM_SelfModified, and TM_Updated.

Referenced by CatalogTupleUpdate(), and CatalogTupleUpdateWithInfo().