PostgreSQL Source Code git master
heapam.h File Reference
#include "access/heapam_xlog.h"
#include "access/relation.h"
#include "access/relscan.h"
#include "access/sdir.h"
#include "access/skey.h"
#include "access/table.h"
#include "access/tableam.h"
#include "commands/vacuum.h"
#include "nodes/lockoptions.h"
#include "nodes/primnodes.h"
#include "storage/bufpage.h"
#include "storage/dsm.h"
#include "storage/lockdefs.h"
#include "storage/read_stream.h"
#include "storage/shm_toc.h"
#include "utils/relcache.h"
#include "utils/snapshot.h"
Include dependency graph for heapam.h:
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Data Structures

struct  HeapScanDescData
 
struct  BitmapHeapScanDescData
 
struct  IndexFetchHeapData
 
struct  HeapTupleFreeze
 
struct  HeapPageFreeze
 
struct  PruneFreezeParams
 
struct  PruneFreezeResult
 

Macros

#define HEAP_INSERT_SKIP_FSM   TABLE_INSERT_SKIP_FSM
 
#define HEAP_INSERT_FROZEN   TABLE_INSERT_FROZEN
 
#define HEAP_INSERT_NO_LOGICAL   TABLE_INSERT_NO_LOGICAL
 
#define HEAP_INSERT_SPECULATIVE   0x0010
 
#define HEAP_PAGE_PRUNE_MARK_UNUSED_NOW   (1 << 0)
 
#define HEAP_PAGE_PRUNE_FREEZE   (1 << 1)
 
#define MaxLockTupleMode   LockTupleExclusive
 
#define HEAP_FREEZE_CHECK_XMIN_COMMITTED   0x01
 
#define HEAP_FREEZE_CHECK_XMAX_ABORTED   0x02
 

Typedefs

typedef struct BulkInsertStateDataBulkInsertState
 
typedef struct GlobalVisState GlobalVisState
 
typedef struct TupleTableSlot TupleTableSlot
 
typedef struct HeapScanDescData HeapScanDescData
 
typedef struct HeapScanDescDataHeapScanDesc
 
typedef struct BitmapHeapScanDescData BitmapHeapScanDescData
 
typedef struct BitmapHeapScanDescDataBitmapHeapScanDesc
 
typedef struct IndexFetchHeapData IndexFetchHeapData
 
typedef struct HeapTupleFreeze HeapTupleFreeze
 
typedef struct HeapPageFreeze HeapPageFreeze
 
typedef struct PruneFreezeParams PruneFreezeParams
 
typedef struct PruneFreezeResult PruneFreezeResult
 

Enumerations

enum  HTSV_Result {
  HEAPTUPLE_DEAD , HEAPTUPLE_LIVE , HEAPTUPLE_RECENTLY_DEAD , HEAPTUPLE_INSERT_IN_PROGRESS ,
  HEAPTUPLE_DELETE_IN_PROGRESS
}
 
enum  PruneReason { PRUNE_ON_ACCESS , PRUNE_VACUUM_SCAN , PRUNE_VACUUM_CLEANUP }
 

Functions

TableScanDesc heap_beginscan (Relation relation, Snapshot snapshot, int nkeys, ScanKey key, ParallelTableScanDesc parallel_scan, uint32 flags)
 
void heap_setscanlimits (TableScanDesc sscan, BlockNumber startBlk, BlockNumber numBlks)
 
void heap_prepare_pagescan (TableScanDesc sscan)
 
void heap_rescan (TableScanDesc sscan, ScanKey key, bool set_params, bool allow_strat, bool allow_sync, bool allow_pagemode)
 
void heap_endscan (TableScanDesc sscan)
 
HeapTuple heap_getnext (TableScanDesc sscan, ScanDirection direction)
 
bool heap_getnextslot (TableScanDesc sscan, ScanDirection direction, TupleTableSlot *slot)
 
void heap_set_tidrange (TableScanDesc sscan, ItemPointer mintid, ItemPointer maxtid)
 
bool heap_getnextslot_tidrange (TableScanDesc sscan, ScanDirection direction, TupleTableSlot *slot)
 
bool heap_fetch (Relation relation, Snapshot snapshot, HeapTuple tuple, Buffer *userbuf, bool keep_buf)
 
bool heap_hot_search_buffer (ItemPointer tid, Relation relation, Buffer buffer, Snapshot snapshot, HeapTuple heapTuple, bool *all_dead, bool first_call)
 
void heap_get_latest_tid (TableScanDesc sscan, ItemPointer tid)
 
BulkInsertState GetBulkInsertState (void)
 
void FreeBulkInsertState (BulkInsertState)
 
void ReleaseBulkInsertStatePin (BulkInsertState bistate)
 
void heap_insert (Relation relation, HeapTuple tup, CommandId cid, int options, BulkInsertState bistate)
 
void heap_multi_insert (Relation relation, TupleTableSlot **slots, int ntuples, CommandId cid, int options, BulkInsertState bistate)
 
TM_Result heap_delete (Relation relation, const ItemPointerData *tid, CommandId cid, Snapshot crosscheck, bool wait, TM_FailureData *tmfd, bool changingPart)
 
void heap_finish_speculative (Relation relation, const ItemPointerData *tid)
 
void heap_abort_speculative (Relation relation, const ItemPointerData *tid)
 
TM_Result heap_update (Relation relation, const ItemPointerData *otid, HeapTuple newtup, CommandId cid, Snapshot crosscheck, bool wait, TM_FailureData *tmfd, LockTupleMode *lockmode, TU_UpdateIndexes *update_indexes)
 
TM_Result heap_lock_tuple (Relation relation, HeapTuple tuple, CommandId cid, LockTupleMode mode, LockWaitPolicy wait_policy, bool follow_updates, Buffer *buffer, TM_FailureData *tmfd)
 
bool heap_inplace_lock (Relation relation, HeapTuple oldtup_ptr, Buffer buffer, void(*release_callback)(void *), void *arg)
 
void heap_inplace_update_and_unlock (Relation relation, HeapTuple oldtup, HeapTuple tuple, Buffer buffer)
 
void heap_inplace_unlock (Relation relation, HeapTuple oldtup, Buffer buffer)
 
bool heap_prepare_freeze_tuple (HeapTupleHeader tuple, const struct VacuumCutoffs *cutoffs, HeapPageFreeze *pagefrz, HeapTupleFreeze *frz, bool *totally_frozen)
 
void heap_pre_freeze_checks (Buffer buffer, HeapTupleFreeze *tuples, int ntuples)
 
void heap_freeze_prepared_tuples (Buffer buffer, HeapTupleFreeze *tuples, int ntuples)
 
bool heap_freeze_tuple (HeapTupleHeader tuple, TransactionId relfrozenxid, TransactionId relminmxid, TransactionId FreezeLimit, TransactionId MultiXactCutoff)
 
bool heap_tuple_should_freeze (HeapTupleHeader tuple, const struct VacuumCutoffs *cutoffs, TransactionId *NoFreezePageRelfrozenXid, MultiXactId *NoFreezePageRelminMxid)
 
bool heap_tuple_needs_eventual_freeze (HeapTupleHeader tuple)
 
void simple_heap_insert (Relation relation, HeapTuple tup)
 
void simple_heap_delete (Relation relation, const ItemPointerData *tid)
 
void simple_heap_update (Relation relation, const ItemPointerData *otid, HeapTuple tup, TU_UpdateIndexes *update_indexes)
 
TransactionId heap_index_delete_tuples (Relation rel, TM_IndexDeleteOp *delstate)
 
void heap_page_prune_opt (Relation relation, Buffer buffer)
 
void heap_page_prune_and_freeze (PruneFreezeParams *params, PruneFreezeResult *presult, OffsetNumber *off_loc, TransactionId *new_relfrozen_xid, MultiXactId *new_relmin_mxid)
 
void heap_page_prune_execute (Buffer buffer, bool lp_truncate_only, OffsetNumber *redirected, int nredirected, OffsetNumber *nowdead, int ndead, OffsetNumber *nowunused, int nunused)
 
void heap_get_root_tuples (Page page, OffsetNumber *root_offsets)
 
void log_heap_prune_and_freeze (Relation relation, Buffer buffer, Buffer vmbuffer, uint8 vmflags, TransactionId conflict_xid, bool cleanup_lock, PruneReason reason, HeapTupleFreeze *frozen, int nfrozen, OffsetNumber *redirected, int nredirected, OffsetNumber *dead, int ndead, OffsetNumber *unused, int nunused)
 
void heap_vacuum_rel (Relation rel, const VacuumParams params, BufferAccessStrategy bstrategy)
 
bool HeapTupleSatisfiesVisibility (HeapTuple htup, Snapshot snapshot, Buffer buffer)
 
TM_Result HeapTupleSatisfiesUpdate (HeapTuple htup, CommandId curcid, Buffer buffer)
 
HTSV_Result HeapTupleSatisfiesVacuum (HeapTuple htup, TransactionId OldestXmin, Buffer buffer)
 
HTSV_Result HeapTupleSatisfiesVacuumHorizon (HeapTuple htup, Buffer buffer, TransactionId *dead_after)
 
void HeapTupleSetHintBits (HeapTupleHeader tuple, Buffer buffer, uint16 infomask, TransactionId xid)
 
bool HeapTupleHeaderIsOnlyLocked (HeapTupleHeader tuple)
 
bool HeapTupleIsSurelyDead (HeapTuple htup, GlobalVisState *vistest)
 
bool ResolveCminCmaxDuringDecoding (struct HTAB *tuplecid_data, Snapshot snapshot, HeapTuple htup, Buffer buffer, CommandId *cmin, CommandId *cmax)
 
void HeapCheckForSerializableConflictOut (bool visible, Relation relation, HeapTuple tuple, Buffer buffer, Snapshot snapshot)
 
static void heap_execute_freeze_tuple (HeapTupleHeader tuple, HeapTupleFreeze *frz)
 

Macro Definition Documentation

◆ HEAP_FREEZE_CHECK_XMAX_ABORTED

#define HEAP_FREEZE_CHECK_XMAX_ABORTED   0x02

Definition at line 138 of file heapam.h.

◆ HEAP_FREEZE_CHECK_XMIN_COMMITTED

#define HEAP_FREEZE_CHECK_XMIN_COMMITTED   0x01

Definition at line 137 of file heapam.h.

◆ HEAP_INSERT_FROZEN

#define HEAP_INSERT_FROZEN   TABLE_INSERT_FROZEN

Definition at line 38 of file heapam.h.

◆ HEAP_INSERT_NO_LOGICAL

#define HEAP_INSERT_NO_LOGICAL   TABLE_INSERT_NO_LOGICAL

Definition at line 39 of file heapam.h.

◆ HEAP_INSERT_SKIP_FSM

#define HEAP_INSERT_SKIP_FSM   TABLE_INSERT_SKIP_FSM

Definition at line 37 of file heapam.h.

◆ HEAP_INSERT_SPECULATIVE

#define HEAP_INSERT_SPECULATIVE   0x0010

Definition at line 40 of file heapam.h.

◆ HEAP_PAGE_PRUNE_FREEZE

#define HEAP_PAGE_PRUNE_FREEZE   (1 << 1)

Definition at line 44 of file heapam.h.

◆ HEAP_PAGE_PRUNE_MARK_UNUSED_NOW

#define HEAP_PAGE_PRUNE_MARK_UNUSED_NOW   (1 << 0)

Definition at line 43 of file heapam.h.

◆ MaxLockTupleMode

#define MaxLockTupleMode   LockTupleExclusive

Definition at line 51 of file heapam.h.

Typedef Documentation

◆ BitmapHeapScanDesc

Definition at line 110 of file heapam.h.

◆ BitmapHeapScanDescData

◆ BulkInsertState

Definition at line 46 of file heapam.h.

◆ GlobalVisState

Definition at line 47 of file heapam.h.

◆ HeapPageFreeze

◆ HeapScanDesc

typedef struct HeapScanDescData* HeapScanDesc

Definition at line 102 of file heapam.h.

◆ HeapScanDescData

◆ HeapTupleFreeze

◆ IndexFetchHeapData

◆ PruneFreezeParams

◆ PruneFreezeResult

◆ TupleTableSlot

Definition at line 48 of file heapam.h.

Enumeration Type Documentation

◆ HTSV_Result

Enumerator
HEAPTUPLE_DEAD 
HEAPTUPLE_LIVE 
HEAPTUPLE_RECENTLY_DEAD 
HEAPTUPLE_INSERT_IN_PROGRESS 
HEAPTUPLE_DELETE_IN_PROGRESS 

Definition at line 124 of file heapam.h.

125{
126 HEAPTUPLE_DEAD, /* tuple is dead and deletable */
127 HEAPTUPLE_LIVE, /* tuple is live (committed, no deleter) */
128 HEAPTUPLE_RECENTLY_DEAD, /* tuple is dead, but not deletable yet */
129 HEAPTUPLE_INSERT_IN_PROGRESS, /* inserting xact is still in progress */
130 HEAPTUPLE_DELETE_IN_PROGRESS, /* deleting xact is still in progress */
HTSV_Result
Definition: heapam.h:125
@ HEAPTUPLE_RECENTLY_DEAD
Definition: heapam.h:128
@ HEAPTUPLE_INSERT_IN_PROGRESS
Definition: heapam.h:129
@ HEAPTUPLE_LIVE
Definition: heapam.h:127
@ HEAPTUPLE_DELETE_IN_PROGRESS
Definition: heapam.h:130
@ HEAPTUPLE_DEAD
Definition: heapam.h:126

◆ PruneReason

Enumerator
PRUNE_ON_ACCESS 
PRUNE_VACUUM_SCAN 
PRUNE_VACUUM_CLEANUP 

Definition at line 226 of file heapam.h.

227{
228 PRUNE_ON_ACCESS, /* on-access pruning */
229 PRUNE_VACUUM_SCAN, /* VACUUM 1st heap pass */
230 PRUNE_VACUUM_CLEANUP, /* VACUUM 2nd heap pass */
PruneReason
Definition: heapam.h:227
@ PRUNE_VACUUM_CLEANUP
Definition: heapam.h:230
@ PRUNE_ON_ACCESS
Definition: heapam.h:228
@ PRUNE_VACUUM_SCAN
Definition: heapam.h:229

Function Documentation

◆ FreeBulkInsertState()

void FreeBulkInsertState ( BulkInsertState  bistate)

Definition at line 2053 of file heapam.c.

2054{
2055 if (bistate->current_buf != InvalidBuffer)
2056 ReleaseBuffer(bistate->current_buf);
2057 FreeAccessStrategy(bistate->strategy);
2058 pfree(bistate);
2059}
#define InvalidBuffer
Definition: buf.h:25
void ReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:5366
void FreeAccessStrategy(BufferAccessStrategy strategy)
Definition: freelist.c:643
void pfree(void *pointer)
Definition: mcxt.c:1594
BufferAccessStrategy strategy
Definition: hio.h:31
Buffer current_buf
Definition: hio.h:32

References BulkInsertStateData::current_buf, FreeAccessStrategy(), InvalidBuffer, pfree(), ReleaseBuffer(), and BulkInsertStateData::strategy.

Referenced by ATRewriteTable(), CopyFrom(), CopyMultiInsertBufferCleanup(), intorel_shutdown(), and transientrel_shutdown().

◆ GetBulkInsertState()

BulkInsertState GetBulkInsertState ( void  )

Definition at line 2036 of file heapam.c.

2037{
2038 BulkInsertState bistate;
2039
2040 bistate = (BulkInsertState) palloc(sizeof(BulkInsertStateData));
2042 bistate->current_buf = InvalidBuffer;
2043 bistate->next_free = InvalidBlockNumber;
2044 bistate->last_free = InvalidBlockNumber;
2045 bistate->already_extended_by = 0;
2046 return bistate;
2047}
#define InvalidBlockNumber
Definition: block.h:33
@ BAS_BULKWRITE
Definition: bufmgr.h:39
BufferAccessStrategy GetAccessStrategy(BufferAccessStrategyType btype)
Definition: freelist.c:461
struct BulkInsertStateData * BulkInsertState
Definition: heapam.h:46
void * palloc(Size size)
Definition: mcxt.c:1365
BlockNumber last_free
Definition: hio.h:49
uint32 already_extended_by
Definition: hio.h:50
BlockNumber next_free
Definition: hio.h:48

References BulkInsertStateData::already_extended_by, BAS_BULKWRITE, BulkInsertStateData::current_buf, GetAccessStrategy(), InvalidBlockNumber, InvalidBuffer, BulkInsertStateData::last_free, BulkInsertStateData::next_free, palloc(), and BulkInsertStateData::strategy.

Referenced by ATRewriteTable(), CopyFrom(), CopyMultiInsertBufferInit(), intorel_startup(), and transientrel_startup().

◆ heap_abort_speculative()

void heap_abort_speculative ( Relation  relation,
const ItemPointerData tid 
)

Definition at line 6201 of file heapam.c.

6202{
6204 ItemId lp;
6205 HeapTupleData tp;
6206 Page page;
6207 BlockNumber block;
6208 Buffer buffer;
6209
6211
6212 block = ItemPointerGetBlockNumber(tid);
6213 buffer = ReadBuffer(relation, block);
6214 page = BufferGetPage(buffer);
6215
6217
6218 /*
6219 * Page can't be all visible, we just inserted into it, and are still
6220 * running.
6221 */
6222 Assert(!PageIsAllVisible(page));
6223
6226
6227 tp.t_tableOid = RelationGetRelid(relation);
6228 tp.t_data = (HeapTupleHeader) PageGetItem(page, lp);
6229 tp.t_len = ItemIdGetLength(lp);
6230 tp.t_self = *tid;
6231
6232 /*
6233 * Sanity check that the tuple really is a speculatively inserted tuple,
6234 * inserted by us.
6235 */
6236 if (tp.t_data->t_choice.t_heap.t_xmin != xid)
6237 elog(ERROR, "attempted to kill a tuple inserted by another transaction");
6238 if (!(IsToastRelation(relation) || HeapTupleHeaderIsSpeculative(tp.t_data)))
6239 elog(ERROR, "attempted to kill a non-speculative tuple");
6241
6242 /*
6243 * No need to check for serializable conflicts here. There is never a
6244 * need for a combo CID, either. No need to extract replica identity, or
6245 * do anything special with infomask bits.
6246 */
6247
6249
6250 /*
6251 * The tuple will become DEAD immediately. Flag that this page is a
6252 * candidate for pruning by setting xmin to TransactionXmin. While not
6253 * immediately prunable, it is the oldest xid we can cheaply determine
6254 * that's safe against wraparound / being older than the table's
6255 * relfrozenxid. To defend against the unlikely case of a new relation
6256 * having a newer relfrozenxid than our TransactionXmin, use relfrozenxid
6257 * if so (vacuum can't subsequently move relfrozenxid to beyond
6258 * TransactionXmin, so there's no race here).
6259 */
6261 {
6262 TransactionId relfrozenxid = relation->rd_rel->relfrozenxid;
6263 TransactionId prune_xid;
6264
6265 if (TransactionIdPrecedes(TransactionXmin, relfrozenxid))
6266 prune_xid = relfrozenxid;
6267 else
6268 prune_xid = TransactionXmin;
6269 PageSetPrunable(page, prune_xid);
6270 }
6271
6272 /* store transaction information of xact deleting the tuple */
6274 tp.t_data->t_infomask2 &= ~HEAP_KEYS_UPDATED;
6275
6276 /*
6277 * Set the tuple header xmin to InvalidTransactionId. This makes the
6278 * tuple immediately invisible everyone. (In particular, to any
6279 * transactions waiting on the speculative token, woken up later.)
6280 */
6282
6283 /* Clear the speculative insertion token too */
6284 tp.t_data->t_ctid = tp.t_self;
6285
6286 MarkBufferDirty(buffer);
6287
6288 /*
6289 * XLOG stuff
6290 *
6291 * The WAL records generated here match heap_delete(). The same recovery
6292 * routines are used.
6293 */
6294 if (RelationNeedsWAL(relation))
6295 {
6296 xl_heap_delete xlrec;
6297 XLogRecPtr recptr;
6298
6299 xlrec.flags = XLH_DELETE_IS_SUPER;
6301 tp.t_data->t_infomask2);
6303 xlrec.xmax = xid;
6304
6308
6309 /* No replica identity & replication origin logged */
6310
6311 recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_DELETE);
6312
6313 PageSetLSN(page, recptr);
6314 }
6315
6317
6319
6320 if (HeapTupleHasExternal(&tp))
6321 {
6322 Assert(!IsToastRelation(relation));
6323 heap_toast_delete(relation, &tp, true);
6324 }
6325
6326 /*
6327 * Never need to mark tuple for invalidation, since catalogs don't support
6328 * speculative insertion
6329 */
6330
6331 /* Now we can release the buffer */
6332 ReleaseBuffer(buffer);
6333
6334 /* count deletion, as we counted the insertion too */
6335 pgstat_count_heap_delete(relation);
6336}
uint32 BlockNumber
Definition: block.h:31
int Buffer
Definition: buf.h:23
void LockBuffer(Buffer buffer, BufferLockMode mode)
Definition: bufmgr.c:5604
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:2943
Buffer ReadBuffer(Relation reln, BlockNumber blockNum)
Definition: bufmgr.c:745
static Page BufferGetPage(Buffer buffer)
Definition: bufmgr.h:436
@ BUFFER_LOCK_EXCLUSIVE
Definition: bufmgr.h:207
@ BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:205
static bool PageIsAllVisible(const PageData *page)
Definition: bufpage.h:428
static void * PageGetItem(const PageData *page, const ItemIdData *itemId)
Definition: bufpage.h:353
static ItemId PageGetItemId(Page page, OffsetNumber offsetNumber)
Definition: bufpage.h:243
static void PageSetLSN(Page page, XLogRecPtr lsn)
Definition: bufpage.h:390
PageData * Page
Definition: bufpage.h:81
#define PageSetPrunable(page, xid)
Definition: bufpage.h:446
uint32 TransactionId
Definition: c.h:660
bool IsToastRelation(Relation relation)
Definition: catalog.c:206
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:226
Assert(PointerIsAligned(start, uint64))
static uint8 compute_infobits(uint16 infomask, uint16 infomask2)
Definition: heapam.c:2759
#define XLOG_HEAP_DELETE
Definition: heapam_xlog.h:34
#define SizeOfHeapDelete
Definition: heapam_xlog.h:121
#define XLH_DELETE_IS_SUPER
Definition: heapam_xlog.h:105
void heap_toast_delete(Relation rel, HeapTuple oldtup, bool is_speculative)
Definition: heaptoast.c:43
HeapTupleHeaderData * HeapTupleHeader
Definition: htup.h:23
static bool HeapTupleHasExternal(const HeapTupleData *tuple)
Definition: htup_details.h:762
#define HEAP_XMAX_BITS
Definition: htup_details.h:281
static bool HeapTupleHeaderIsHeapOnly(const HeapTupleHeaderData *tup)
Definition: htup_details.h:555
#define HEAP_MOVED
Definition: htup_details.h:213
static bool HeapTupleHeaderIsSpeculative(const HeapTupleHeaderData *tup)
Definition: htup_details.h:461
static void HeapTupleHeaderSetXmin(HeapTupleHeaderData *tup, TransactionId xid)
Definition: htup_details.h:331
#define ItemIdGetLength(itemId)
Definition: itemid.h:59
#define ItemIdIsNormal(itemId)
Definition: itemid.h:99
static OffsetNumber ItemPointerGetOffsetNumber(const ItemPointerData *pointer)
Definition: itemptr.h:124
static BlockNumber ItemPointerGetBlockNumber(const ItemPointerData *pointer)
Definition: itemptr.h:103
static bool ItemPointerIsValid(const ItemPointerData *pointer)
Definition: itemptr.h:83
#define START_CRIT_SECTION()
Definition: miscadmin.h:150
#define END_CRIT_SECTION()
Definition: miscadmin.h:152
void pgstat_count_heap_delete(Relation rel)
#define RelationGetRelid(relation)
Definition: rel.h:515
#define RelationNeedsWAL(relation)
Definition: rel.h:638
TransactionId TransactionXmin
Definition: snapmgr.c:159
ItemPointerData t_self
Definition: htup.h:65
uint32 t_len
Definition: htup.h:64
HeapTupleHeader t_data
Definition: htup.h:68
Oid t_tableOid
Definition: htup.h:66
TransactionId t_xmin
Definition: htup_details.h:124
union HeapTupleHeaderData::@49 t_choice
ItemPointerData t_ctid
Definition: htup_details.h:161
HeapTupleFields t_heap
Definition: htup_details.h:157
Form_pg_class rd_rel
Definition: rel.h:111
TransactionId xmax
Definition: heapam_xlog.h:115
OffsetNumber offnum
Definition: heapam_xlog.h:116
uint8 infobits_set
Definition: heapam_xlog.h:117
#define InvalidTransactionId
Definition: transam.h:31
#define TransactionIdIsValid(xid)
Definition: transam.h:41
static bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition: transam.h:263
TransactionId GetCurrentTransactionId(void)
Definition: xact.c:455
uint64 XLogRecPtr
Definition: xlogdefs.h:21
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition: xloginsert.c:478
void XLogRegisterData(const void *data, uint32 len)
Definition: xloginsert.c:368
void XLogRegisterBuffer(uint8 block_id, Buffer buffer, uint8 flags)
Definition: xloginsert.c:245
void XLogBeginInsert(void)
Definition: xloginsert.c:152
#define REGBUF_STANDARD
Definition: xloginsert.h:35

References Assert(), BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferGetPage(), compute_infobits(), elog, END_CRIT_SECTION, ERROR, xl_heap_delete::flags, GetCurrentTransactionId(), HEAP_MOVED, heap_toast_delete(), HEAP_XMAX_BITS, HeapTupleHasExternal(), HeapTupleHeaderIsHeapOnly(), HeapTupleHeaderIsSpeculative(), HeapTupleHeaderSetXmin(), xl_heap_delete::infobits_set, InvalidTransactionId, IsToastRelation(), ItemIdGetLength, ItemIdIsNormal, ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), ItemPointerIsValid(), LockBuffer(), MarkBufferDirty(), xl_heap_delete::offnum, PageGetItem(), PageGetItemId(), PageIsAllVisible(), PageSetLSN(), PageSetPrunable, pgstat_count_heap_delete(), RelationData::rd_rel, ReadBuffer(), REGBUF_STANDARD, RelationGetRelid, RelationNeedsWAL, ReleaseBuffer(), SizeOfHeapDelete, START_CRIT_SECTION, HeapTupleHeaderData::t_choice, HeapTupleHeaderData::t_ctid, HeapTupleData::t_data, HeapTupleHeaderData::t_heap, HeapTupleHeaderData::t_infomask, HeapTupleHeaderData::t_infomask2, HeapTupleData::t_len, HeapTupleData::t_self, HeapTupleData::t_tableOid, HeapTupleFields::t_xmin, TransactionIdIsValid, TransactionIdPrecedes(), TransactionXmin, XLH_DELETE_IS_SUPER, XLOG_HEAP_DELETE, XLogBeginInsert(), XLogInsert(), XLogRegisterBuffer(), XLogRegisterData(), and xl_heap_delete::xmax.

Referenced by heapam_tuple_complete_speculative(), and toast_delete_datum().

◆ heap_beginscan()

TableScanDesc heap_beginscan ( Relation  relation,
Snapshot  snapshot,
int  nkeys,
ScanKey  key,
ParallelTableScanDesc  parallel_scan,
uint32  flags 
)

Definition at line 1115 of file heapam.c.

1119{
1120 HeapScanDesc scan;
1121
1122 /*
1123 * increment relation ref count while scanning relation
1124 *
1125 * This is just to make really sure the relcache entry won't go away while
1126 * the scan has a pointer to it. Caller should be holding the rel open
1127 * anyway, so this is redundant in all normal scenarios...
1128 */
1130
1131 /*
1132 * allocate and initialize scan descriptor
1133 */
1134 if (flags & SO_TYPE_BITMAPSCAN)
1135 {
1137
1138 /*
1139 * Bitmap Heap scans do not have any fields that a normal Heap Scan
1140 * does not have, so no special initializations required here.
1141 */
1142 scan = (HeapScanDesc) bscan;
1143 }
1144 else
1145 scan = (HeapScanDesc) palloc(sizeof(HeapScanDescData));
1146
1147 scan->rs_base.rs_rd = relation;
1148 scan->rs_base.rs_snapshot = snapshot;
1149 scan->rs_base.rs_nkeys = nkeys;
1150 scan->rs_base.rs_flags = flags;
1151 scan->rs_base.rs_parallel = parallel_scan;
1152 scan->rs_strategy = NULL; /* set in initscan */
1153 scan->rs_cbuf = InvalidBuffer;
1154
1155 /*
1156 * Disable page-at-a-time mode if it's not a MVCC-safe snapshot.
1157 */
1158 if (!(snapshot && IsMVCCSnapshot(snapshot)))
1159 scan->rs_base.rs_flags &= ~SO_ALLOW_PAGEMODE;
1160
1161 /* Check that a historic snapshot is not used for non-catalog tables */
1162 if (snapshot &&
1163 IsHistoricMVCCSnapshot(snapshot) &&
1165 {
1166 ereport(ERROR,
1167 (errcode(ERRCODE_INVALID_TRANSACTION_STATE),
1168 errmsg("cannot query non-catalog table \"%s\" during logical decoding",
1169 RelationGetRelationName(relation))));
1170 }
1171
1172 /*
1173 * For seqscan and sample scans in a serializable transaction, acquire a
1174 * predicate lock on the entire relation. This is required not only to
1175 * lock all the matching tuples, but also to conflict with new insertions
1176 * into the table. In an indexscan, we take page locks on the index pages
1177 * covering the range specified in the scan qual, but in a heap scan there
1178 * is nothing more fine-grained to lock. A bitmap scan is a different
1179 * story, there we have already scanned the index and locked the index
1180 * pages covering the predicate. But in that case we still have to lock
1181 * any matching heap tuples. For sample scan we could optimize the locking
1182 * to be at least page-level granularity, but we'd need to add per-tuple
1183 * locking for that.
1184 */
1186 {
1187 /*
1188 * Ensure a missing snapshot is noticed reliably, even if the
1189 * isolation mode means predicate locking isn't performed (and
1190 * therefore the snapshot isn't used here).
1191 */
1192 Assert(snapshot);
1193 PredicateLockRelation(relation, snapshot);
1194 }
1195
1196 /* we only need to set this up once */
1197 scan->rs_ctup.t_tableOid = RelationGetRelid(relation);
1198
1199 /*
1200 * Allocate memory to keep track of page allocation for parallel workers
1201 * when doing a parallel scan.
1202 */
1203 if (parallel_scan != NULL)
1205 else
1206 scan->rs_parallelworkerdata = NULL;
1207
1208 /*
1209 * we do this here instead of in initscan() because heap_rescan also calls
1210 * initscan() and we don't want to allocate memory again
1211 */
1212 if (nkeys > 0)
1213 scan->rs_base.rs_key = (ScanKey) palloc(sizeof(ScanKeyData) * nkeys);
1214 else
1215 scan->rs_base.rs_key = NULL;
1216
1217 initscan(scan, key, false);
1218
1219 scan->rs_read_stream = NULL;
1220
1221 /*
1222 * Set up a read stream for sequential scans and TID range scans. This
1223 * should be done after initscan() because initscan() allocates the
1224 * BufferAccessStrategy object passed to the read stream API.
1225 */
1226 if (scan->rs_base.rs_flags & SO_TYPE_SEQSCAN ||
1228 {
1230
1231 if (scan->rs_base.rs_parallel)
1233 else
1235
1236 /* ---
1237 * It is safe to use batchmode as the only locks taken by `cb`
1238 * are never taken while waiting for IO:
1239 * - SyncScanLock is used in the non-parallel case
1240 * - in the parallel case, only spinlocks and atomics are used
1241 * ---
1242 */
1245 scan->rs_strategy,
1246 scan->rs_base.rs_rd,
1248 cb,
1249 scan,
1250 0);
1251 }
1252 else if (scan->rs_base.rs_flags & SO_TYPE_BITMAPSCAN)
1253 {
1256 scan->rs_strategy,
1257 scan->rs_base.rs_rd,
1260 scan,
1261 sizeof(TBMIterateResult));
1262 }
1263
1264
1265 return (TableScanDesc) scan;
1266}
int errcode(int sqlerrcode)
Definition: elog.c:863
int errmsg(const char *fmt,...)
Definition: elog.c:1080
#define ereport(elevel,...)
Definition: elog.h:150
static BlockNumber heap_scan_stream_read_next_parallel(ReadStream *stream, void *callback_private_data, void *per_buffer_data)
Definition: heapam.c:247
static BlockNumber heap_scan_stream_read_next_serial(ReadStream *stream, void *callback_private_data, void *per_buffer_data)
Definition: heapam.c:287
static BlockNumber bitmapheap_stream_read_next(ReadStream *pgsr, void *private_data, void *per_buffer_data)
Definition: heapam.c:312
static void initscan(HeapScanDesc scan, ScanKey key, bool keep_startblock)
Definition: heapam.c:352
struct HeapScanDescData * HeapScanDesc
Definition: heapam.h:102
void PredicateLockRelation(Relation relation, Snapshot snapshot)
Definition: predicate.c:2576
ReadStream * read_stream_begin_relation(int flags, BufferAccessStrategy strategy, Relation rel, ForkNumber forknum, ReadStreamBlockNumberCB callback, void *callback_private_data, size_t per_buffer_data_size)
Definition: read_stream.c:737
#define READ_STREAM_USE_BATCHING
Definition: read_stream.h:64
BlockNumber(* ReadStreamBlockNumberCB)(ReadStream *stream, void *callback_private_data, void *per_buffer_data)
Definition: read_stream.h:77
#define READ_STREAM_DEFAULT
Definition: read_stream.h:21
#define READ_STREAM_SEQUENTIAL
Definition: read_stream.h:36
#define RelationGetRelationName(relation)
Definition: rel.h:549
#define RelationIsAccessibleInLogicalDecoding(relation)
Definition: rel.h:694
void RelationIncrementReferenceCount(Relation rel)
Definition: relcache.c:2187
@ MAIN_FORKNUM
Definition: relpath.h:58
ScanKeyData * ScanKey
Definition: skey.h:75
#define IsHistoricMVCCSnapshot(snapshot)
Definition: snapmgr.h:59
#define IsMVCCSnapshot(snapshot)
Definition: snapmgr.h:55
BufferAccessStrategy rs_strategy
Definition: heapam.h:73
Buffer rs_cbuf
Definition: heapam.h:70
ParallelBlockTableScanWorkerData * rs_parallelworkerdata
Definition: heapam.h:95
HeapTupleData rs_ctup
Definition: heapam.h:75
ReadStream * rs_read_stream
Definition: heapam.h:78
TableScanDescData rs_base
Definition: heapam.h:58
Relation rs_rd
Definition: relscan.h:36
uint32 rs_flags
Definition: relscan.h:64
struct ScanKeyData * rs_key
Definition: relscan.h:39
struct SnapshotData * rs_snapshot
Definition: relscan.h:37
struct ParallelTableScanDescData * rs_parallel
Definition: relscan.h:66
@ SO_TYPE_TIDRANGESCAN
Definition: tableam.h:53
@ SO_TYPE_SAMPLESCAN
Definition: tableam.h:51
@ SO_TYPE_SEQSCAN
Definition: tableam.h:49
@ SO_TYPE_BITMAPSCAN
Definition: tableam.h:50

References Assert(), bitmapheap_stream_read_next(), ereport, errcode(), errmsg(), ERROR, heap_scan_stream_read_next_parallel(), heap_scan_stream_read_next_serial(), initscan(), InvalidBuffer, IsHistoricMVCCSnapshot, IsMVCCSnapshot, sort-test::key, MAIN_FORKNUM, palloc(), PredicateLockRelation(), read_stream_begin_relation(), READ_STREAM_DEFAULT, READ_STREAM_SEQUENTIAL, READ_STREAM_USE_BATCHING, RelationGetRelationName, RelationGetRelid, RelationIncrementReferenceCount(), RelationIsAccessibleInLogicalDecoding, HeapScanDescData::rs_base, HeapScanDescData::rs_cbuf, HeapScanDescData::rs_ctup, TableScanDescData::rs_flags, TableScanDescData::rs_key, TableScanDescData::rs_nkeys, TableScanDescData::rs_parallel, HeapScanDescData::rs_parallelworkerdata, TableScanDescData::rs_rd, HeapScanDescData::rs_read_stream, TableScanDescData::rs_snapshot, HeapScanDescData::rs_strategy, SO_TYPE_BITMAPSCAN, SO_TYPE_SAMPLESCAN, SO_TYPE_SEQSCAN, SO_TYPE_TIDRANGESCAN, and HeapTupleData::t_tableOid.

◆ heap_delete()

TM_Result heap_delete ( Relation  relation,
const ItemPointerData tid,
CommandId  cid,
Snapshot  crosscheck,
bool  wait,
TM_FailureData tmfd,
bool  changingPart 
)

Definition at line 2804 of file heapam.c.

2807{
2808 TM_Result result;
2810 ItemId lp;
2811 HeapTupleData tp;
2812 Page page;
2813 BlockNumber block;
2814 Buffer buffer;
2815 Buffer vmbuffer = InvalidBuffer;
2816 TransactionId new_xmax;
2817 uint16 new_infomask,
2818 new_infomask2;
2819 bool have_tuple_lock = false;
2820 bool iscombo;
2821 bool all_visible_cleared = false;
2822 HeapTuple old_key_tuple = NULL; /* replica identity of the tuple */
2823 bool old_key_copied = false;
2824
2826
2827 AssertHasSnapshotForToast(relation);
2828
2829 /*
2830 * Forbid this during a parallel operation, lest it allocate a combo CID.
2831 * Other workers might need that combo CID for visibility checks, and we
2832 * have no provision for broadcasting it to them.
2833 */
2834 if (IsInParallelMode())
2835 ereport(ERROR,
2836 (errcode(ERRCODE_INVALID_TRANSACTION_STATE),
2837 errmsg("cannot delete tuples during a parallel operation")));
2838
2839 block = ItemPointerGetBlockNumber(tid);
2840 buffer = ReadBuffer(relation, block);
2841 page = BufferGetPage(buffer);
2842
2843 /*
2844 * Before locking the buffer, pin the visibility map page if it appears to
2845 * be necessary. Since we haven't got the lock yet, someone else might be
2846 * in the middle of changing this, so we'll need to recheck after we have
2847 * the lock.
2848 */
2849 if (PageIsAllVisible(page))
2850 visibilitymap_pin(relation, block, &vmbuffer);
2851
2853
2856
2857 tp.t_tableOid = RelationGetRelid(relation);
2858 tp.t_data = (HeapTupleHeader) PageGetItem(page, lp);
2859 tp.t_len = ItemIdGetLength(lp);
2860 tp.t_self = *tid;
2861
2862l1:
2863
2864 /*
2865 * If we didn't pin the visibility map page and the page has become all
2866 * visible while we were busy locking the buffer, we'll have to unlock and
2867 * re-lock, to avoid holding the buffer lock across an I/O. That's a bit
2868 * unfortunate, but hopefully shouldn't happen often.
2869 */
2870 if (vmbuffer == InvalidBuffer && PageIsAllVisible(page))
2871 {
2873 visibilitymap_pin(relation, block, &vmbuffer);
2875 }
2876
2877 result = HeapTupleSatisfiesUpdate(&tp, cid, buffer);
2878
2879 if (result == TM_Invisible)
2880 {
2881 UnlockReleaseBuffer(buffer);
2882 ereport(ERROR,
2883 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
2884 errmsg("attempted to delete invisible tuple")));
2885 }
2886 else if (result == TM_BeingModified && wait)
2887 {
2888 TransactionId xwait;
2889 uint16 infomask;
2890
2891 /* must copy state data before unlocking buffer */
2893 infomask = tp.t_data->t_infomask;
2894
2895 /*
2896 * Sleep until concurrent transaction ends -- except when there's a
2897 * single locker and it's our own transaction. Note we don't care
2898 * which lock mode the locker has, because we need the strongest one.
2899 *
2900 * Before sleeping, we need to acquire tuple lock to establish our
2901 * priority for the tuple (see heap_lock_tuple). LockTuple will
2902 * release us when we are next-in-line for the tuple.
2903 *
2904 * If we are forced to "start over" below, we keep the tuple lock;
2905 * this arranges that we stay at the head of the line while rechecking
2906 * tuple state.
2907 */
2908 if (infomask & HEAP_XMAX_IS_MULTI)
2909 {
2910 bool current_is_member = false;
2911
2912 if (DoesMultiXactIdConflict((MultiXactId) xwait, infomask,
2913 LockTupleExclusive, &current_is_member))
2914 {
2916
2917 /*
2918 * Acquire the lock, if necessary (but skip it when we're
2919 * requesting a lock and already have one; avoids deadlock).
2920 */
2921 if (!current_is_member)
2923 LockWaitBlock, &have_tuple_lock);
2924
2925 /* wait for multixact */
2927 relation, &(tp.t_self), XLTW_Delete,
2928 NULL);
2930
2931 /*
2932 * If xwait had just locked the tuple then some other xact
2933 * could update this tuple before we get to this point. Check
2934 * for xmax change, and start over if so.
2935 *
2936 * We also must start over if we didn't pin the VM page, and
2937 * the page has become all visible.
2938 */
2939 if ((vmbuffer == InvalidBuffer && PageIsAllVisible(page)) ||
2940 xmax_infomask_changed(tp.t_data->t_infomask, infomask) ||
2942 xwait))
2943 goto l1;
2944 }
2945
2946 /*
2947 * You might think the multixact is necessarily done here, but not
2948 * so: it could have surviving members, namely our own xact or
2949 * other subxacts of this backend. It is legal for us to delete
2950 * the tuple in either case, however (the latter case is
2951 * essentially a situation of upgrading our former shared lock to
2952 * exclusive). We don't bother changing the on-disk hint bits
2953 * since we are about to overwrite the xmax altogether.
2954 */
2955 }
2956 else if (!TransactionIdIsCurrentTransactionId(xwait))
2957 {
2958 /*
2959 * Wait for regular transaction to end; but first, acquire tuple
2960 * lock.
2961 */
2964 LockWaitBlock, &have_tuple_lock);
2965 XactLockTableWait(xwait, relation, &(tp.t_self), XLTW_Delete);
2967
2968 /*
2969 * xwait is done, but if xwait had just locked the tuple then some
2970 * other xact could update this tuple before we get to this point.
2971 * Check for xmax change, and start over if so.
2972 *
2973 * We also must start over if we didn't pin the VM page, and the
2974 * page has become all visible.
2975 */
2976 if ((vmbuffer == InvalidBuffer && PageIsAllVisible(page)) ||
2977 xmax_infomask_changed(tp.t_data->t_infomask, infomask) ||
2979 xwait))
2980 goto l1;
2981
2982 /* Otherwise check if it committed or aborted */
2983 UpdateXmaxHintBits(tp.t_data, buffer, xwait);
2984 }
2985
2986 /*
2987 * We may overwrite if previous xmax aborted, or if it committed but
2988 * only locked the tuple without updating it.
2989 */
2990 if ((tp.t_data->t_infomask & HEAP_XMAX_INVALID) ||
2993 result = TM_Ok;
2994 else if (!ItemPointerEquals(&tp.t_self, &tp.t_data->t_ctid))
2995 result = TM_Updated;
2996 else
2997 result = TM_Deleted;
2998 }
2999
3000 /* sanity check the result HeapTupleSatisfiesUpdate() and the logic above */
3001 if (result != TM_Ok)
3002 {
3003 Assert(result == TM_SelfModified ||
3004 result == TM_Updated ||
3005 result == TM_Deleted ||
3006 result == TM_BeingModified);
3008 Assert(result != TM_Updated ||
3010 }
3011
3012 if (crosscheck != InvalidSnapshot && result == TM_Ok)
3013 {
3014 /* Perform additional check for transaction-snapshot mode RI updates */
3015 if (!HeapTupleSatisfiesVisibility(&tp, crosscheck, buffer))
3016 result = TM_Updated;
3017 }
3018
3019 if (result != TM_Ok)
3020 {
3021 tmfd->ctid = tp.t_data->t_ctid;
3023 if (result == TM_SelfModified)
3025 else
3026 tmfd->cmax = InvalidCommandId;
3027 UnlockReleaseBuffer(buffer);
3028 if (have_tuple_lock)
3030 if (vmbuffer != InvalidBuffer)
3031 ReleaseBuffer(vmbuffer);
3032 return result;
3033 }
3034
3035 /*
3036 * We're about to do the actual delete -- check for conflict first, to
3037 * avoid possibly having to roll back work we've just done.
3038 *
3039 * This is safe without a recheck as long as there is no possibility of
3040 * another process scanning the page between this check and the delete
3041 * being visible to the scan (i.e., an exclusive buffer content lock is
3042 * continuously held from this point until the tuple delete is visible).
3043 */
3045
3046 /* replace cid with a combo CID if necessary */
3047 HeapTupleHeaderAdjustCmax(tp.t_data, &cid, &iscombo);
3048
3049 /*
3050 * Compute replica identity tuple before entering the critical section so
3051 * we don't PANIC upon a memory allocation failure.
3052 */
3053 old_key_tuple = ExtractReplicaIdentity(relation, &tp, true, &old_key_copied);
3054
3055 /*
3056 * If this is the first possibly-multixact-able operation in the current
3057 * transaction, set my per-backend OldestMemberMXactId setting. We can be
3058 * certain that the transaction will never become a member of any older
3059 * MultiXactIds than that. (We have to do this even if we end up just
3060 * using our own TransactionId below, since some other backend could
3061 * incorporate our XID into a MultiXact immediately afterwards.)
3062 */
3064
3067 xid, LockTupleExclusive, true,
3068 &new_xmax, &new_infomask, &new_infomask2);
3069
3071
3072 /*
3073 * If this transaction commits, the tuple will become DEAD sooner or
3074 * later. Set flag that this page is a candidate for pruning once our xid
3075 * falls below the OldestXmin horizon. If the transaction finally aborts,
3076 * the subsequent page pruning will be a no-op and the hint will be
3077 * cleared.
3078 */
3079 PageSetPrunable(page, xid);
3080
3081 if (PageIsAllVisible(page))
3082 {
3083 all_visible_cleared = true;
3084 PageClearAllVisible(page);
3085 visibilitymap_clear(relation, BufferGetBlockNumber(buffer),
3086 vmbuffer, VISIBILITYMAP_VALID_BITS);
3087 }
3088
3089 /* store transaction information of xact deleting the tuple */
3091 tp.t_data->t_infomask2 &= ~HEAP_KEYS_UPDATED;
3092 tp.t_data->t_infomask |= new_infomask;
3093 tp.t_data->t_infomask2 |= new_infomask2;
3095 HeapTupleHeaderSetXmax(tp.t_data, new_xmax);
3096 HeapTupleHeaderSetCmax(tp.t_data, cid, iscombo);
3097 /* Make sure there is no forward chain link in t_ctid */
3098 tp.t_data->t_ctid = tp.t_self;
3099
3100 /* Signal that this is actually a move into another partition */
3101 if (changingPart)
3103
3104 MarkBufferDirty(buffer);
3105
3106 /*
3107 * XLOG stuff
3108 *
3109 * NB: heap_abort_speculative() uses the same xlog record and replay
3110 * routines.
3111 */
3112 if (RelationNeedsWAL(relation))
3113 {
3114 xl_heap_delete xlrec;
3115 xl_heap_header xlhdr;
3116 XLogRecPtr recptr;
3117
3118 /*
3119 * For logical decode we need combo CIDs to properly decode the
3120 * catalog
3121 */
3123 log_heap_new_cid(relation, &tp);
3124
3125 xlrec.flags = 0;
3126 if (all_visible_cleared)
3128 if (changingPart)
3131 tp.t_data->t_infomask2);
3133 xlrec.xmax = new_xmax;
3134
3135 if (old_key_tuple != NULL)
3136 {
3137 if (relation->rd_rel->relreplident == REPLICA_IDENTITY_FULL)
3139 else
3141 }
3142
3145
3147
3148 /*
3149 * Log replica identity of the deleted tuple if there is one
3150 */
3151 if (old_key_tuple != NULL)
3152 {
3153 xlhdr.t_infomask2 = old_key_tuple->t_data->t_infomask2;
3154 xlhdr.t_infomask = old_key_tuple->t_data->t_infomask;
3155 xlhdr.t_hoff = old_key_tuple->t_data->t_hoff;
3156
3158 XLogRegisterData((char *) old_key_tuple->t_data
3160 old_key_tuple->t_len
3162 }
3163
3164 /* filtering by origin on a row level is much more efficient */
3166
3167 recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_DELETE);
3168
3169 PageSetLSN(page, recptr);
3170 }
3171
3173
3175
3176 if (vmbuffer != InvalidBuffer)
3177 ReleaseBuffer(vmbuffer);
3178
3179 /*
3180 * If the tuple has toasted out-of-line attributes, we need to delete
3181 * those items too. We have to do this before releasing the buffer
3182 * because we need to look at the contents of the tuple, but it's OK to
3183 * release the content lock on the buffer first.
3184 */
3185 if (relation->rd_rel->relkind != RELKIND_RELATION &&
3186 relation->rd_rel->relkind != RELKIND_MATVIEW)
3187 {
3188 /* toast table entries should never be recursively toasted */
3190 }
3191 else if (HeapTupleHasExternal(&tp))
3192 heap_toast_delete(relation, &tp, false);
3193
3194 /*
3195 * Mark tuple for invalidation from system caches at next command
3196 * boundary. We have to do this before releasing the buffer because we
3197 * need to look at the contents of the tuple.
3198 */
3199 CacheInvalidateHeapTuple(relation, &tp, NULL);
3200
3201 /* Now we can release the buffer */
3202 ReleaseBuffer(buffer);
3203
3204 /*
3205 * Release the lmgr tuple lock, if we had it.
3206 */
3207 if (have_tuple_lock)
3209
3210 pgstat_count_heap_delete(relation);
3211
3212 if (old_key_tuple != NULL && old_key_copied)
3213 heap_freetuple(old_key_tuple);
3214
3215 return TM_Ok;
3216}
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:4223
void UnlockReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:5383
static void PageClearAllVisible(Page page)
Definition: bufpage.h:438
#define InvalidCommandId
Definition: c.h:677
TransactionId MultiXactId
Definition: c.h:670
uint16_t uint16
Definition: c.h:540
void HeapTupleHeaderAdjustCmax(const HeapTupleHeaderData *tup, CommandId *cmax, bool *iscombo)
Definition: combocid.c:153
CommandId HeapTupleHeaderGetCmax(const HeapTupleHeaderData *tup)
Definition: combocid.c:118
static bool DoesMultiXactIdConflict(MultiXactId multi, uint16 infomask, LockTupleMode lockmode, bool *current_is_member)
Definition: heapam.c:7621
static XLogRecPtr log_heap_new_cid(Relation relation, HeapTuple tup)
Definition: heapam.c:9086
static void compute_new_xmax_infomask(TransactionId xmax, uint16 old_infomask, uint16 old_infomask2, TransactionId add_to_xmax, LockTupleMode mode, bool is_update, TransactionId *result_xmax, uint16 *result_infomask, uint16 *result_infomask2)
Definition: heapam.c:5352
static bool heap_acquire_tuplock(Relation relation, const ItemPointerData *tid, LockTupleMode mode, LockWaitPolicy wait_policy, bool *have_tuple_lock)
Definition: heapam.c:5303
static HeapTuple ExtractReplicaIdentity(Relation relation, HeapTuple tp, bool key_required, bool *copy)
Definition: heapam.c:9167
static void MultiXactIdWait(MultiXactId multi, MultiXactStatus status, uint16 infomask, Relation rel, const ItemPointerData *ctid, XLTW_Oper oper, int *remaining)
Definition: heapam.c:7799
static bool xmax_infomask_changed(uint16 new_infomask, uint16 old_infomask)
Definition: heapam.c:2781
#define UnlockTupleTuplock(rel, tup, mode)
Definition: heapam.c:164
static void AssertHasSnapshotForToast(Relation rel)
Definition: heapam.c:220
static void UpdateXmaxHintBits(HeapTupleHeader tuple, Buffer buffer, TransactionId xid)
Definition: heapam.c:2014
bool HeapTupleSatisfiesVisibility(HeapTuple htup, Snapshot snapshot, Buffer buffer)
bool HeapTupleHeaderIsOnlyLocked(HeapTupleHeader tuple)
TM_Result HeapTupleSatisfiesUpdate(HeapTuple htup, CommandId curcid, Buffer buffer)
#define XLH_DELETE_CONTAINS_OLD_KEY
Definition: heapam_xlog.h:104
#define XLH_DELETE_ALL_VISIBLE_CLEARED
Definition: heapam_xlog.h:102
#define SizeOfHeapHeader
Definition: heapam_xlog.h:157
#define XLH_DELETE_IS_PARTITION_MOVE
Definition: heapam_xlog.h:106
#define XLH_DELETE_CONTAINS_OLD_TUPLE
Definition: heapam_xlog.h:103
void heap_freetuple(HeapTuple htup)
Definition: heaptuple.c:1435
#define SizeofHeapTupleHeader
Definition: htup_details.h:185
static bool HEAP_XMAX_IS_LOCKED_ONLY(uint16 infomask)
Definition: htup_details.h:226
static void HeapTupleHeaderSetCmax(HeapTupleHeaderData *tup, CommandId cid, bool iscombo)
Definition: htup_details.h:431
static void HeapTupleHeaderClearHotUpdated(HeapTupleHeaderData *tup)
Definition: htup_details.h:549
static TransactionId HeapTupleHeaderGetRawXmax(const HeapTupleHeaderData *tup)
Definition: htup_details.h:377
#define HEAP_XMAX_IS_MULTI
Definition: htup_details.h:209
#define HEAP_XMAX_INVALID
Definition: htup_details.h:208
static TransactionId HeapTupleHeaderGetUpdateXid(const HeapTupleHeaderData *tup)
Definition: htup_details.h:397
static void HeapTupleHeaderSetMovedPartitions(HeapTupleHeaderData *tup)
Definition: htup_details.h:486
static void HeapTupleHeaderSetXmax(HeapTupleHeaderData *tup, TransactionId xid)
Definition: htup_details.h:383
void CacheInvalidateHeapTuple(Relation relation, HeapTuple tuple, HeapTuple newtuple)
Definition: inval.c:1571
bool ItemPointerEquals(const ItemPointerData *pointer1, const ItemPointerData *pointer2)
Definition: itemptr.c:35
void XactLockTableWait(TransactionId xid, Relation rel, const ItemPointerData *ctid, XLTW_Oper oper)
Definition: lmgr.c:663
@ XLTW_Delete
Definition: lmgr.h:28
@ LockWaitBlock
Definition: lockoptions.h:39
@ LockTupleExclusive
Definition: lockoptions.h:58
void MultiXactIdSetOldestMember(void)
Definition: multixact.c:657
@ MultiXactStatusUpdate
Definition: multixact.h:47
void CheckForSerializableConflictIn(Relation relation, const ItemPointerData *tid, BlockNumber blkno)
Definition: predicate.c:4336
#define InvalidSnapshot
Definition: snapshot.h:119
TransactionId xmax
Definition: tableam.h:150
CommandId cmax
Definition: tableam.h:151
ItemPointerData ctid
Definition: tableam.h:149
uint16 t_infomask
Definition: heapam_xlog.h:153
uint16 t_infomask2
Definition: heapam_xlog.h:152
TM_Result
Definition: tableam.h:73
@ TM_Ok
Definition: tableam.h:78
@ TM_BeingModified
Definition: tableam.h:100
@ TM_Deleted
Definition: tableam.h:93
@ TM_Updated
Definition: tableam.h:90
@ TM_SelfModified
Definition: tableam.h:84
@ TM_Invisible
Definition: tableam.h:81
#define TransactionIdEquals(id1, id2)
Definition: transam.h:43
bool visibilitymap_clear(Relation rel, BlockNumber heapBlk, Buffer vmbuf, uint8 flags)
void visibilitymap_pin(Relation rel, BlockNumber heapBlk, Buffer *vmbuf)
#define VISIBILITYMAP_VALID_BITS
bool TransactionIdIsCurrentTransactionId(TransactionId xid)
Definition: xact.c:942
bool IsInParallelMode(void)
Definition: xact.c:1090
#define XLOG_INCLUDE_ORIGIN
Definition: xlog.h:154
void XLogSetRecordFlags(uint8 flags)
Definition: xloginsert.c:460

References Assert(), AssertHasSnapshotForToast(), BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferGetBlockNumber(), BufferGetPage(), CacheInvalidateHeapTuple(), CheckForSerializableConflictIn(), TM_FailureData::cmax, compute_infobits(), compute_new_xmax_infomask(), TM_FailureData::ctid, DoesMultiXactIdConflict(), END_CRIT_SECTION, ereport, errcode(), errmsg(), ERROR, ExtractReplicaIdentity(), xl_heap_delete::flags, GetCurrentTransactionId(), heap_acquire_tuplock(), heap_freetuple(), HEAP_MOVED, heap_toast_delete(), HEAP_XMAX_BITS, HEAP_XMAX_INVALID, HEAP_XMAX_IS_LOCKED_ONLY(), HEAP_XMAX_IS_MULTI, HeapTupleHasExternal(), HeapTupleHeaderAdjustCmax(), HeapTupleHeaderClearHotUpdated(), HeapTupleHeaderGetCmax(), HeapTupleHeaderGetRawXmax(), HeapTupleHeaderGetUpdateXid(), HeapTupleHeaderIsOnlyLocked(), HeapTupleHeaderSetCmax(), HeapTupleHeaderSetMovedPartitions(), HeapTupleHeaderSetXmax(), HeapTupleSatisfiesUpdate(), HeapTupleSatisfiesVisibility(), xl_heap_delete::infobits_set, InvalidBuffer, InvalidCommandId, InvalidSnapshot, IsInParallelMode(), ItemIdGetLength, ItemIdIsNormal, ItemPointerEquals(), ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), ItemPointerIsValid(), LockBuffer(), LockTupleExclusive, LockWaitBlock, log_heap_new_cid(), MarkBufferDirty(), MultiXactIdSetOldestMember(), MultiXactIdWait(), MultiXactStatusUpdate, xl_heap_delete::offnum, PageClearAllVisible(), PageGetItem(), PageGetItemId(), PageIsAllVisible(), PageSetLSN(), PageSetPrunable, pgstat_count_heap_delete(), RelationData::rd_rel, ReadBuffer(), REGBUF_STANDARD, RelationGetRelid, RelationIsAccessibleInLogicalDecoding, RelationNeedsWAL, ReleaseBuffer(), SizeOfHeapDelete, SizeOfHeapHeader, SizeofHeapTupleHeader, START_CRIT_SECTION, HeapTupleHeaderData::t_ctid, HeapTupleData::t_data, xl_heap_header::t_hoff, HeapTupleHeaderData::t_hoff, xl_heap_header::t_infomask, HeapTupleHeaderData::t_infomask, xl_heap_header::t_infomask2, HeapTupleHeaderData::t_infomask2, HeapTupleData::t_len, HeapTupleData::t_self, HeapTupleData::t_tableOid, TM_BeingModified, TM_Deleted, TM_Invisible, TM_Ok, TM_SelfModified, TM_Updated, TransactionIdEquals, TransactionIdIsCurrentTransactionId(), UnlockReleaseBuffer(), UnlockTupleTuplock, UpdateXmaxHintBits(), visibilitymap_clear(), visibilitymap_pin(), VISIBILITYMAP_VALID_BITS, XactLockTableWait(), XLH_DELETE_ALL_VISIBLE_CLEARED, XLH_DELETE_CONTAINS_OLD_KEY, XLH_DELETE_CONTAINS_OLD_TUPLE, XLH_DELETE_IS_PARTITION_MOVE, XLOG_HEAP_DELETE, XLOG_INCLUDE_ORIGIN, XLogBeginInsert(), XLogInsert(), XLogRegisterBuffer(), XLogRegisterData(), XLogSetRecordFlags(), XLTW_Delete, xl_heap_delete::xmax, TM_FailureData::xmax, and xmax_infomask_changed().

Referenced by heapam_tuple_delete(), and simple_heap_delete().

◆ heap_endscan()

void heap_endscan ( TableScanDesc  sscan)

Definition at line 1322 of file heapam.c.

1323{
1324 HeapScanDesc scan = (HeapScanDesc) sscan;
1325
1326 /* Note: no locking manipulations needed */
1327
1328 /*
1329 * unpin scan buffers
1330 */
1331 if (BufferIsValid(scan->rs_cbuf))
1332 ReleaseBuffer(scan->rs_cbuf);
1333
1334 /*
1335 * Must free the read stream before freeing the BufferAccessStrategy.
1336 */
1337 if (scan->rs_read_stream)
1339
1340 /*
1341 * decrement relation reference count and free scan descriptor storage
1342 */
1344
1345 if (scan->rs_base.rs_key)
1346 pfree(scan->rs_base.rs_key);
1347
1348 if (scan->rs_strategy != NULL)
1350
1351 if (scan->rs_parallelworkerdata != NULL)
1353
1354 if (scan->rs_base.rs_flags & SO_TEMP_SNAPSHOT)
1356
1357 pfree(scan);
1358}
static bool BufferIsValid(Buffer bufnum)
Definition: bufmgr.h:387
void read_stream_end(ReadStream *stream)
Definition: read_stream.c:1089
void RelationDecrementReferenceCount(Relation rel)
Definition: relcache.c:2200
void UnregisterSnapshot(Snapshot snapshot)
Definition: snapmgr.c:866
@ SO_TEMP_SNAPSHOT
Definition: tableam.h:65

References BufferIsValid(), FreeAccessStrategy(), pfree(), read_stream_end(), RelationDecrementReferenceCount(), ReleaseBuffer(), HeapScanDescData::rs_base, HeapScanDescData::rs_cbuf, TableScanDescData::rs_flags, TableScanDescData::rs_key, HeapScanDescData::rs_parallelworkerdata, TableScanDescData::rs_rd, HeapScanDescData::rs_read_stream, TableScanDescData::rs_snapshot, HeapScanDescData::rs_strategy, SO_TEMP_SNAPSHOT, and UnregisterSnapshot().

◆ heap_execute_freeze_tuple()

static void heap_execute_freeze_tuple ( HeapTupleHeader  tuple,
HeapTupleFreeze frz 
)
inlinestatic

Definition at line 475 of file heapam.h.

476{
477 HeapTupleHeaderSetXmax(tuple, frz->xmax);
478
479 if (frz->frzflags & XLH_FREEZE_XVAC)
481
482 if (frz->frzflags & XLH_INVALID_XVAC)
484
485 tuple->t_infomask = frz->t_infomask;
486 tuple->t_infomask2 = frz->t_infomask2;
487}
#define XLH_INVALID_XVAC
Definition: heapam_xlog.h:348
#define XLH_FREEZE_XVAC
Definition: heapam_xlog.h:347
static void HeapTupleHeaderSetXvac(HeapTupleHeaderData *tup, TransactionId xid)
Definition: htup_details.h:451
uint8 frzflags
Definition: heapam.h:147
uint16 t_infomask2
Definition: heapam.h:145
TransactionId xmax
Definition: heapam.h:144
uint16 t_infomask
Definition: heapam.h:146
#define FrozenTransactionId
Definition: transam.h:33

References FrozenTransactionId, HeapTupleFreeze::frzflags, HeapTupleHeaderSetXmax(), HeapTupleHeaderSetXvac(), InvalidTransactionId, HeapTupleFreeze::t_infomask, HeapTupleHeaderData::t_infomask, HeapTupleFreeze::t_infomask2, HeapTupleHeaderData::t_infomask2, XLH_FREEZE_XVAC, XLH_INVALID_XVAC, and HeapTupleFreeze::xmax.

Referenced by heap_freeze_prepared_tuples(), heap_freeze_tuple(), and heap_xlog_prune_freeze().

◆ heap_fetch()

bool heap_fetch ( Relation  relation,
Snapshot  snapshot,
HeapTuple  tuple,
Buffer userbuf,
bool  keep_buf 
)

Definition at line 1620 of file heapam.c.

1625{
1626 ItemPointer tid = &(tuple->t_self);
1627 ItemId lp;
1628 Buffer buffer;
1629 Page page;
1630 OffsetNumber offnum;
1631 bool valid;
1632
1633 /*
1634 * Fetch and pin the appropriate page of the relation.
1635 */
1636 buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(tid));
1637
1638 /*
1639 * Need share lock on buffer to examine tuple commit status.
1640 */
1642 page = BufferGetPage(buffer);
1643
1644 /*
1645 * We'd better check for out-of-range offnum in case of VACUUM since the
1646 * TID was obtained.
1647 */
1648 offnum = ItemPointerGetOffsetNumber(tid);
1649 if (offnum < FirstOffsetNumber || offnum > PageGetMaxOffsetNumber(page))
1650 {
1652 ReleaseBuffer(buffer);
1653 *userbuf = InvalidBuffer;
1654 tuple->t_data = NULL;
1655 return false;
1656 }
1657
1658 /*
1659 * get the item line pointer corresponding to the requested tid
1660 */
1661 lp = PageGetItemId(page, offnum);
1662
1663 /*
1664 * Must check for deleted tuple.
1665 */
1666 if (!ItemIdIsNormal(lp))
1667 {
1669 ReleaseBuffer(buffer);
1670 *userbuf = InvalidBuffer;
1671 tuple->t_data = NULL;
1672 return false;
1673 }
1674
1675 /*
1676 * fill in *tuple fields
1677 */
1678 tuple->t_data = (HeapTupleHeader) PageGetItem(page, lp);
1679 tuple->t_len = ItemIdGetLength(lp);
1680 tuple->t_tableOid = RelationGetRelid(relation);
1681
1682 /*
1683 * check tuple visibility, then release lock
1684 */
1685 valid = HeapTupleSatisfiesVisibility(tuple, snapshot, buffer);
1686
1687 if (valid)
1688 PredicateLockTID(relation, &(tuple->t_self), snapshot,
1690
1691 HeapCheckForSerializableConflictOut(valid, relation, tuple, buffer, snapshot);
1692
1694
1695 if (valid)
1696 {
1697 /*
1698 * All checks passed, so return the tuple as valid. Caller is now
1699 * responsible for releasing the buffer.
1700 */
1701 *userbuf = buffer;
1702
1703 return true;
1704 }
1705
1706 /* Tuple failed time qual, but maybe caller wants to see it anyway. */
1707 if (keep_buf)
1708 *userbuf = buffer;
1709 else
1710 {
1711 ReleaseBuffer(buffer);
1712 *userbuf = InvalidBuffer;
1713 tuple->t_data = NULL;
1714 }
1715
1716 return false;
1717}
@ BUFFER_LOCK_SHARE
Definition: bufmgr.h:206
static OffsetNumber PageGetMaxOffsetNumber(const PageData *page)
Definition: bufpage.h:371
void HeapCheckForSerializableConflictOut(bool visible, Relation relation, HeapTuple tuple, Buffer buffer, Snapshot snapshot)
Definition: heapam.c:9271
static TransactionId HeapTupleHeaderGetXmin(const HeapTupleHeaderData *tup)
Definition: htup_details.h:324
uint16 OffsetNumber
Definition: off.h:24
void PredicateLockTID(Relation relation, const ItemPointerData *tid, Snapshot snapshot, TransactionId tuple_xid)
Definition: predicate.c:2621

References BUFFER_LOCK_SHARE, BUFFER_LOCK_UNLOCK, BufferGetPage(), HeapCheckForSerializableConflictOut(), HeapTupleHeaderGetXmin(), HeapTupleSatisfiesVisibility(), InvalidBuffer, ItemIdGetLength, ItemIdIsNormal, ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), LockBuffer(), PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), PredicateLockTID(), ReadBuffer(), RelationGetRelid, ReleaseBuffer(), HeapTupleData::t_data, HeapTupleData::t_len, HeapTupleData::t_self, and HeapTupleData::t_tableOid.

Referenced by heap_lock_updated_tuple_rec(), heapam_fetch_row_version(), and heapam_tuple_lock().

◆ heap_finish_speculative()

void heap_finish_speculative ( Relation  relation,
const ItemPointerData tid 
)

Definition at line 6114 of file heapam.c.

6115{
6116 Buffer buffer;
6117 Page page;
6118 OffsetNumber offnum;
6119 ItemId lp = NULL;
6120 HeapTupleHeader htup;
6121
6122 buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(tid));
6124 page = BufferGetPage(buffer);
6125
6126 offnum = ItemPointerGetOffsetNumber(tid);
6127 if (PageGetMaxOffsetNumber(page) >= offnum)
6128 lp = PageGetItemId(page, offnum);
6129
6130 if (PageGetMaxOffsetNumber(page) < offnum || !ItemIdIsNormal(lp))
6131 elog(ERROR, "invalid lp");
6132
6133 htup = (HeapTupleHeader) PageGetItem(page, lp);
6134
6135 /* NO EREPORT(ERROR) from here till changes are logged */
6137
6139
6140 MarkBufferDirty(buffer);
6141
6142 /*
6143 * Replace the speculative insertion token with a real t_ctid, pointing to
6144 * itself like it does on regular tuples.
6145 */
6146 htup->t_ctid = *tid;
6147
6148 /* XLOG stuff */
6149 if (RelationNeedsWAL(relation))
6150 {
6151 xl_heap_confirm xlrec;
6152 XLogRecPtr recptr;
6153
6155
6157
6158 /* We want the same filtering on this as on a plain insert */
6160
6163
6164 recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_CONFIRM);
6165
6166 PageSetLSN(page, recptr);
6167 }
6168
6170
6171 UnlockReleaseBuffer(buffer);
6172}
#define SizeOfHeapConfirm
Definition: heapam_xlog.h:431
#define XLOG_HEAP_CONFIRM
Definition: heapam_xlog.h:38
OffsetNumber offnum
Definition: heapam_xlog.h:428

References Assert(), BUFFER_LOCK_EXCLUSIVE, BufferGetPage(), elog, END_CRIT_SECTION, ERROR, HeapTupleHeaderIsSpeculative(), ItemIdIsNormal, ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), LockBuffer(), MarkBufferDirty(), xl_heap_confirm::offnum, PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), PageSetLSN(), ReadBuffer(), REGBUF_STANDARD, RelationNeedsWAL, SizeOfHeapConfirm, START_CRIT_SECTION, HeapTupleHeaderData::t_ctid, UnlockReleaseBuffer(), XLOG_HEAP_CONFIRM, XLOG_INCLUDE_ORIGIN, XLogBeginInsert(), XLogInsert(), XLogRegisterBuffer(), XLogRegisterData(), and XLogSetRecordFlags().

Referenced by heapam_tuple_complete_speculative().

◆ heap_freeze_prepared_tuples()

void heap_freeze_prepared_tuples ( Buffer  buffer,
HeapTupleFreeze tuples,
int  ntuples 
)

Definition at line 7406 of file heapam.c.

7407{
7408 Page page = BufferGetPage(buffer);
7409
7410 for (int i = 0; i < ntuples; i++)
7411 {
7412 HeapTupleFreeze *frz = tuples + i;
7413 ItemId itemid = PageGetItemId(page, frz->offset);
7414 HeapTupleHeader htup;
7415
7416 htup = (HeapTupleHeader) PageGetItem(page, itemid);
7417 heap_execute_freeze_tuple(htup, frz);
7418 }
7419}
static void heap_execute_freeze_tuple(HeapTupleHeader tuple, HeapTupleFreeze *frz)
Definition: heapam.h:475
int i
Definition: isn.c:77
OffsetNumber offset
Definition: heapam.h:152

References BufferGetPage(), heap_execute_freeze_tuple(), i, HeapTupleFreeze::offset, PageGetItem(), and PageGetItemId().

Referenced by heap_page_prune_and_freeze().

◆ heap_freeze_tuple()

bool heap_freeze_tuple ( HeapTupleHeader  tuple,
TransactionId  relfrozenxid,
TransactionId  relminmxid,
TransactionId  FreezeLimit,
TransactionId  MultiXactCutoff 
)

Definition at line 7428 of file heapam.c.

7431{
7432 HeapTupleFreeze frz;
7433 bool do_freeze;
7434 bool totally_frozen;
7435 struct VacuumCutoffs cutoffs;
7436 HeapPageFreeze pagefrz;
7437
7438 cutoffs.relfrozenxid = relfrozenxid;
7439 cutoffs.relminmxid = relminmxid;
7440 cutoffs.OldestXmin = FreezeLimit;
7441 cutoffs.OldestMxact = MultiXactCutoff;
7442 cutoffs.FreezeLimit = FreezeLimit;
7443 cutoffs.MultiXactCutoff = MultiXactCutoff;
7444
7445 pagefrz.freeze_required = true;
7446 pagefrz.FreezePageRelfrozenXid = FreezeLimit;
7447 pagefrz.FreezePageRelminMxid = MultiXactCutoff;
7448 pagefrz.NoFreezePageRelfrozenXid = FreezeLimit;
7449 pagefrz.NoFreezePageRelminMxid = MultiXactCutoff;
7450
7451 do_freeze = heap_prepare_freeze_tuple(tuple, &cutoffs,
7452 &pagefrz, &frz, &totally_frozen);
7453
7454 /*
7455 * Note that because this is not a WAL-logged operation, we don't need to
7456 * fill in the offset in the freeze record.
7457 */
7458
7459 if (do_freeze)
7460 heap_execute_freeze_tuple(tuple, &frz);
7461 return do_freeze;
7462}
bool heap_prepare_freeze_tuple(HeapTupleHeader tuple, const struct VacuumCutoffs *cutoffs, HeapPageFreeze *pagefrz, HeapTupleFreeze *frz, bool *totally_frozen)
Definition: heapam.c:7080
TransactionId FreezeLimit
Definition: vacuum.h:289
TransactionId relfrozenxid
Definition: vacuum.h:263
MultiXactId relminmxid
Definition: vacuum.h:264
MultiXactId MultiXactCutoff
Definition: vacuum.h:290

References VacuumCutoffs::FreezeLimit, heap_execute_freeze_tuple(), heap_prepare_freeze_tuple(), VacuumCutoffs::MultiXactCutoff, VacuumCutoffs::OldestMxact, VacuumCutoffs::OldestXmin, VacuumCutoffs::relfrozenxid, and VacuumCutoffs::relminmxid.

Referenced by rewrite_heap_tuple().

◆ heap_get_latest_tid()

void heap_get_latest_tid ( TableScanDesc  sscan,
ItemPointer  tid 
)

Definition at line 1892 of file heapam.c.

1894{
1895 Relation relation = sscan->rs_rd;
1896 Snapshot snapshot = sscan->rs_snapshot;
1897 ItemPointerData ctid;
1898 TransactionId priorXmax;
1899
1900 /*
1901 * table_tuple_get_latest_tid() verified that the passed in tid is valid.
1902 * Assume that t_ctid links are valid however - there shouldn't be invalid
1903 * ones in the table.
1904 */
1906
1907 /*
1908 * Loop to chase down t_ctid links. At top of loop, ctid is the tuple we
1909 * need to examine, and *tid is the TID we will return if ctid turns out
1910 * to be bogus.
1911 *
1912 * Note that we will loop until we reach the end of the t_ctid chain.
1913 * Depending on the snapshot passed, there might be at most one visible
1914 * version of the row, but we don't try to optimize for that.
1915 */
1916 ctid = *tid;
1917 priorXmax = InvalidTransactionId; /* cannot check first XMIN */
1918 for (;;)
1919 {
1920 Buffer buffer;
1921 Page page;
1922 OffsetNumber offnum;
1923 ItemId lp;
1924 HeapTupleData tp;
1925 bool valid;
1926
1927 /*
1928 * Read, pin, and lock the page.
1929 */
1930 buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(&ctid));
1932 page = BufferGetPage(buffer);
1933
1934 /*
1935 * Check for bogus item number. This is not treated as an error
1936 * condition because it can happen while following a t_ctid link. We
1937 * just assume that the prior tid is OK and return it unchanged.
1938 */
1939 offnum = ItemPointerGetOffsetNumber(&ctid);
1940 if (offnum < FirstOffsetNumber || offnum > PageGetMaxOffsetNumber(page))
1941 {
1942 UnlockReleaseBuffer(buffer);
1943 break;
1944 }
1945 lp = PageGetItemId(page, offnum);
1946 if (!ItemIdIsNormal(lp))
1947 {
1948 UnlockReleaseBuffer(buffer);
1949 break;
1950 }
1951
1952 /* OK to access the tuple */
1953 tp.t_self = ctid;
1954 tp.t_data = (HeapTupleHeader) PageGetItem(page, lp);
1955 tp.t_len = ItemIdGetLength(lp);
1956 tp.t_tableOid = RelationGetRelid(relation);
1957
1958 /*
1959 * After following a t_ctid link, we might arrive at an unrelated
1960 * tuple. Check for XMIN match.
1961 */
1962 if (TransactionIdIsValid(priorXmax) &&
1964 {
1965 UnlockReleaseBuffer(buffer);
1966 break;
1967 }
1968
1969 /*
1970 * Check tuple visibility; if visible, set it as the new result
1971 * candidate.
1972 */
1973 valid = HeapTupleSatisfiesVisibility(&tp, snapshot, buffer);
1974 HeapCheckForSerializableConflictOut(valid, relation, &tp, buffer, snapshot);
1975 if (valid)
1976 *tid = ctid;
1977
1978 /*
1979 * If there's a valid t_ctid link, follow it, else we're done.
1980 */
1981 if ((tp.t_data->t_infomask & HEAP_XMAX_INVALID) ||
1985 {
1986 UnlockReleaseBuffer(buffer);
1987 break;
1988 }
1989
1990 ctid = tp.t_data->t_ctid;
1991 priorXmax = HeapTupleHeaderGetUpdateXid(tp.t_data);
1992 UnlockReleaseBuffer(buffer);
1993 } /* end of loop */
1994}
static bool HeapTupleHeaderIndicatesMovedPartitions(const HeapTupleHeaderData *tup)
Definition: htup_details.h:480

References Assert(), BUFFER_LOCK_SHARE, BufferGetPage(), HEAP_XMAX_INVALID, HeapCheckForSerializableConflictOut(), HeapTupleHeaderGetUpdateXid(), HeapTupleHeaderGetXmin(), HeapTupleHeaderIndicatesMovedPartitions(), HeapTupleHeaderIsOnlyLocked(), HeapTupleSatisfiesVisibility(), InvalidTransactionId, ItemIdGetLength, ItemIdIsNormal, ItemPointerEquals(), ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), ItemPointerIsValid(), LockBuffer(), PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), ReadBuffer(), RelationGetRelid, TableScanDescData::rs_rd, TableScanDescData::rs_snapshot, HeapTupleHeaderData::t_ctid, HeapTupleData::t_data, HeapTupleHeaderData::t_infomask, HeapTupleData::t_len, HeapTupleData::t_self, HeapTupleData::t_tableOid, TransactionIdEquals, TransactionIdIsValid, and UnlockReleaseBuffer().

◆ heap_get_root_tuples()

void heap_get_root_tuples ( Page  page,
OffsetNumber root_offsets 
)

Definition at line 1885 of file pruneheap.c.

1886{
1887 OffsetNumber offnum,
1888 maxoff;
1889
1890 MemSet(root_offsets, InvalidOffsetNumber,
1892
1893 maxoff = PageGetMaxOffsetNumber(page);
1894 for (offnum = FirstOffsetNumber; offnum <= maxoff; offnum = OffsetNumberNext(offnum))
1895 {
1896 ItemId lp = PageGetItemId(page, offnum);
1897 HeapTupleHeader htup;
1898 OffsetNumber nextoffnum;
1899 TransactionId priorXmax;
1900
1901 /* skip unused and dead items */
1902 if (!ItemIdIsUsed(lp) || ItemIdIsDead(lp))
1903 continue;
1904
1905 if (ItemIdIsNormal(lp))
1906 {
1907 htup = (HeapTupleHeader) PageGetItem(page, lp);
1908
1909 /*
1910 * Check if this tuple is part of a HOT-chain rooted at some other
1911 * tuple. If so, skip it for now; we'll process it when we find
1912 * its root.
1913 */
1914 if (HeapTupleHeaderIsHeapOnly(htup))
1915 continue;
1916
1917 /*
1918 * This is either a plain tuple or the root of a HOT-chain.
1919 * Remember it in the mapping.
1920 */
1921 root_offsets[offnum - 1] = offnum;
1922
1923 /* If it's not the start of a HOT-chain, we're done with it */
1924 if (!HeapTupleHeaderIsHotUpdated(htup))
1925 continue;
1926
1927 /* Set up to scan the HOT-chain */
1928 nextoffnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
1929 priorXmax = HeapTupleHeaderGetUpdateXid(htup);
1930 }
1931 else
1932 {
1933 /* Must be a redirect item. We do not set its root_offsets entry */
1935 /* Set up to scan the HOT-chain */
1936 nextoffnum = ItemIdGetRedirect(lp);
1937 priorXmax = InvalidTransactionId;
1938 }
1939
1940 /*
1941 * Now follow the HOT-chain and collect other tuples in the chain.
1942 *
1943 * Note: Even though this is a nested loop, the complexity of the
1944 * function is O(N) because a tuple in the page should be visited not
1945 * more than twice, once in the outer loop and once in HOT-chain
1946 * chases.
1947 */
1948 for (;;)
1949 {
1950 /* Sanity check (pure paranoia) */
1951 if (offnum < FirstOffsetNumber)
1952 break;
1953
1954 /*
1955 * An offset past the end of page's line pointer array is possible
1956 * when the array was truncated
1957 */
1958 if (offnum > maxoff)
1959 break;
1960
1961 lp = PageGetItemId(page, nextoffnum);
1962
1963 /* Check for broken chains */
1964 if (!ItemIdIsNormal(lp))
1965 break;
1966
1967 htup = (HeapTupleHeader) PageGetItem(page, lp);
1968
1969 if (TransactionIdIsValid(priorXmax) &&
1971 break;
1972
1973 /* Remember the root line pointer for this item */
1974 root_offsets[nextoffnum - 1] = offnum;
1975
1976 /* Advance to next chain member, if any */
1977 if (!HeapTupleHeaderIsHotUpdated(htup))
1978 break;
1979
1980 /* HOT implies it can't have moved to different partition */
1982
1983 nextoffnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
1984 priorXmax = HeapTupleHeaderGetUpdateXid(htup);
1985 }
1986 }
1987}
#define MemSet(start, val, len)
Definition: c.h:1022
static bool HeapTupleHeaderIsHotUpdated(const HeapTupleHeaderData *tup)
Definition: htup_details.h:534
#define MaxHeapTuplesPerPage
Definition: htup_details.h:624
#define ItemIdGetRedirect(itemId)
Definition: itemid.h:78
#define ItemIdIsDead(itemId)
Definition: itemid.h:113
#define ItemIdIsUsed(itemId)
Definition: itemid.h:92
#define ItemIdIsRedirected(itemId)
Definition: itemid.h:106
#define InvalidOffsetNumber
Definition: off.h:26
#define OffsetNumberNext(offsetNumber)
Definition: off.h:52
#define FirstOffsetNumber
Definition: off.h:27

References Assert(), FirstOffsetNumber, HeapTupleHeaderGetUpdateXid(), HeapTupleHeaderGetXmin(), HeapTupleHeaderIndicatesMovedPartitions(), HeapTupleHeaderIsHeapOnly(), HeapTupleHeaderIsHotUpdated(), InvalidOffsetNumber, InvalidTransactionId, ItemIdGetRedirect, ItemIdIsDead, ItemIdIsNormal, ItemIdIsRedirected, ItemIdIsUsed, ItemPointerGetOffsetNumber(), MaxHeapTuplesPerPage, MemSet, OffsetNumberNext, PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), HeapTupleHeaderData::t_ctid, TransactionIdEquals, and TransactionIdIsValid.

Referenced by heapam_index_build_range_scan(), and heapam_index_validate_scan().

◆ heap_getnext()

HeapTuple heap_getnext ( TableScanDesc  sscan,
ScanDirection  direction 
)

Definition at line 1361 of file heapam.c.

1362{
1363 HeapScanDesc scan = (HeapScanDesc) sscan;
1364
1365 /*
1366 * This is still widely used directly, without going through table AM, so
1367 * add a safety check. It's possible we should, at a later point,
1368 * downgrade this to an assert. The reason for checking the AM routine,
1369 * rather than the AM oid, is that this allows to write regression tests
1370 * that create another AM reusing the heap handler.
1371 */
1373 ereport(ERROR,
1374 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1375 errmsg_internal("only heap AM is supported")));
1376
1377 /*
1378 * We don't expect direct calls to heap_getnext with valid CheckXidAlive
1379 * for catalog or regular tables. See detailed comments in xact.c where
1380 * these variables are declared. Normally we have such a check at tableam
1381 * level API but this is called from many places so we need to ensure it
1382 * here.
1383 */
1385 elog(ERROR, "unexpected heap_getnext call during logical decoding");
1386
1387 /* Note: no locking manipulations needed */
1388
1390 heapgettup_pagemode(scan, direction,
1391 scan->rs_base.rs_nkeys, scan->rs_base.rs_key);
1392 else
1393 heapgettup(scan, direction,
1394 scan->rs_base.rs_nkeys, scan->rs_base.rs_key);
1395
1396 if (scan->rs_ctup.t_data == NULL)
1397 return NULL;
1398
1399 /*
1400 * if we get here it means we have a new current scan tuple, so point to
1401 * the proper return buffer and return the tuple.
1402 */
1403
1405
1406 return &scan->rs_ctup;
1407}
#define unlikely(x)
Definition: c.h:407
int errmsg_internal(const char *fmt,...)
Definition: elog.c:1170
static void heapgettup(HeapScanDesc scan, ScanDirection dir, int nkeys, ScanKey key)
Definition: heapam.c:911
static void heapgettup_pagemode(HeapScanDesc scan, ScanDirection dir, int nkeys, ScanKey key)
Definition: heapam.c:1021
const TableAmRoutine * GetHeapamTableAmRoutine(void)
#define pgstat_count_heap_getnext(rel)
Definition: pgstat.h:695
const struct TableAmRoutine * rd_tableam
Definition: rel.h:189
@ SO_ALLOW_PAGEMODE
Definition: tableam.h:62
bool bsysscan
Definition: xact.c:101
TransactionId CheckXidAlive
Definition: xact.c:100

References bsysscan, CheckXidAlive, elog, ereport, errcode(), errmsg_internal(), ERROR, GetHeapamTableAmRoutine(), heapgettup(), heapgettup_pagemode(), pgstat_count_heap_getnext, RelationData::rd_tableam, HeapScanDescData::rs_base, HeapScanDescData::rs_ctup, TableScanDescData::rs_flags, TableScanDescData::rs_key, TableScanDescData::rs_nkeys, TableScanDescData::rs_rd, SO_ALLOW_PAGEMODE, HeapTupleData::t_data, TransactionIdIsValid, and unlikely.

Referenced by AlterTableMoveAll(), AlterTableSpaceOptions(), check_db_file_conflict(), CreateDatabaseUsingFileCopy(), do_autovacuum(), DropSetting(), DropTableSpace(), find_typed_table_dependencies(), get_all_vacuum_rels(), get_database_list(), get_subscription_list(), get_tables_to_cluster(), get_tablespace_name(), get_tablespace_oid(), GetAllPublicationRelations(), getRelationsInNamespace(), GetSchemaPublicationRelations(), heapam_index_build_range_scan(), heapam_index_validate_scan(), objectsInSchemaToOids(), pgrowlocks(), pgstat_heap(), populate_typ_list(), ReindexMultipleTables(), remove_dbtablespaces(), RemoveSubscriptionRel(), RenameTableSpace(), ThereIsAtLeastOneRole(), and vac_truncate_clog().

◆ heap_getnextslot()

bool heap_getnextslot ( TableScanDesc  sscan,
ScanDirection  direction,
TupleTableSlot slot 
)

Definition at line 1410 of file heapam.c.

1411{
1412 HeapScanDesc scan = (HeapScanDesc) sscan;
1413
1414 /* Note: no locking manipulations needed */
1415
1416 if (sscan->rs_flags & SO_ALLOW_PAGEMODE)
1417 heapgettup_pagemode(scan, direction, sscan->rs_nkeys, sscan->rs_key);
1418 else
1419 heapgettup(scan, direction, sscan->rs_nkeys, sscan->rs_key);
1420
1421 if (scan->rs_ctup.t_data == NULL)
1422 {
1423 ExecClearTuple(slot);
1424 return false;
1425 }
1426
1427 /*
1428 * if we get here it means we have a new current scan tuple, so point to
1429 * the proper return buffer and return the tuple.
1430 */
1431
1433
1434 ExecStoreBufferHeapTuple(&scan->rs_ctup, slot,
1435 scan->rs_cbuf);
1436 return true;
1437}
TupleTableSlot * ExecStoreBufferHeapTuple(HeapTuple tuple, TupleTableSlot *slot, Buffer buffer)
Definition: execTuples.c:1581
static TupleTableSlot * ExecClearTuple(TupleTableSlot *slot)
Definition: tuptable.h:457

References ExecClearTuple(), ExecStoreBufferHeapTuple(), heapgettup(), heapgettup_pagemode(), pgstat_count_heap_getnext, HeapScanDescData::rs_base, HeapScanDescData::rs_cbuf, HeapScanDescData::rs_ctup, TableScanDescData::rs_flags, TableScanDescData::rs_key, TableScanDescData::rs_nkeys, TableScanDescData::rs_rd, SO_ALLOW_PAGEMODE, and HeapTupleData::t_data.

◆ heap_getnextslot_tidrange()

bool heap_getnextslot_tidrange ( TableScanDesc  sscan,
ScanDirection  direction,
TupleTableSlot slot 
)

Definition at line 1513 of file heapam.c.

1515{
1516 HeapScanDesc scan = (HeapScanDesc) sscan;
1517 ItemPointer mintid = &sscan->st.tidrange.rs_mintid;
1518 ItemPointer maxtid = &sscan->st.tidrange.rs_maxtid;
1519
1520 /* Note: no locking manipulations needed */
1521 for (;;)
1522 {
1523 if (sscan->rs_flags & SO_ALLOW_PAGEMODE)
1524 heapgettup_pagemode(scan, direction, sscan->rs_nkeys, sscan->rs_key);
1525 else
1526 heapgettup(scan, direction, sscan->rs_nkeys, sscan->rs_key);
1527
1528 if (scan->rs_ctup.t_data == NULL)
1529 {
1530 ExecClearTuple(slot);
1531 return false;
1532 }
1533
1534 /*
1535 * heap_set_tidrange will have used heap_setscanlimits to limit the
1536 * range of pages we scan to only ones that can contain the TID range
1537 * we're scanning for. Here we must filter out any tuples from these
1538 * pages that are outside of that range.
1539 */
1540 if (ItemPointerCompare(&scan->rs_ctup.t_self, mintid) < 0)
1541 {
1542 ExecClearTuple(slot);
1543
1544 /*
1545 * When scanning backwards, the TIDs will be in descending order.
1546 * Future tuples in this direction will be lower still, so we can
1547 * just return false to indicate there will be no more tuples.
1548 */
1549 if (ScanDirectionIsBackward(direction))
1550 return false;
1551
1552 continue;
1553 }
1554
1555 /*
1556 * Likewise for the final page, we must filter out TIDs greater than
1557 * maxtid.
1558 */
1559 if (ItemPointerCompare(&scan->rs_ctup.t_self, maxtid) > 0)
1560 {
1561 ExecClearTuple(slot);
1562
1563 /*
1564 * When scanning forward, the TIDs will be in ascending order.
1565 * Future tuples in this direction will be higher still, so we can
1566 * just return false to indicate there will be no more tuples.
1567 */
1568 if (ScanDirectionIsForward(direction))
1569 return false;
1570 continue;
1571 }
1572
1573 break;
1574 }
1575
1576 /*
1577 * if we get here it means we have a new current scan tuple, so point to
1578 * the proper return buffer and return the tuple.
1579 */
1581
1582 ExecStoreBufferHeapTuple(&scan->rs_ctup, slot, scan->rs_cbuf);
1583 return true;
1584}
int32 ItemPointerCompare(const ItemPointerData *arg1, const ItemPointerData *arg2)
Definition: itemptr.c:51
#define ScanDirectionIsForward(direction)
Definition: sdir.h:64
#define ScanDirectionIsBackward(direction)
Definition: sdir.h:50
struct TableScanDescData::@50::@51 tidrange
ItemPointerData rs_mintid
Definition: relscan.h:55
ItemPointerData rs_maxtid
Definition: relscan.h:56
union TableScanDescData::@50 st

References ExecClearTuple(), ExecStoreBufferHeapTuple(), heapgettup(), heapgettup_pagemode(), ItemPointerCompare(), pgstat_count_heap_getnext, HeapScanDescData::rs_base, HeapScanDescData::rs_cbuf, HeapScanDescData::rs_ctup, TableScanDescData::rs_flags, TableScanDescData::rs_key, TableScanDescData::rs_maxtid, TableScanDescData::rs_mintid, TableScanDescData::rs_nkeys, TableScanDescData::rs_rd, ScanDirectionIsBackward, ScanDirectionIsForward, SO_ALLOW_PAGEMODE, TableScanDescData::st, HeapTupleData::t_data, HeapTupleData::t_self, and TableScanDescData::tidrange.

◆ heap_hot_search_buffer()

bool heap_hot_search_buffer ( ItemPointer  tid,
Relation  relation,
Buffer  buffer,
Snapshot  snapshot,
HeapTuple  heapTuple,
bool *  all_dead,
bool  first_call 
)

Definition at line 1740 of file heapam.c.

1743{
1744 Page page = BufferGetPage(buffer);
1746 BlockNumber blkno;
1747 OffsetNumber offnum;
1748 bool at_chain_start;
1749 bool valid;
1750 bool skip;
1751 GlobalVisState *vistest = NULL;
1752
1753 /* If this is not the first call, previous call returned a (live!) tuple */
1754 if (all_dead)
1755 *all_dead = first_call;
1756
1757 blkno = ItemPointerGetBlockNumber(tid);
1758 offnum = ItemPointerGetOffsetNumber(tid);
1759 at_chain_start = first_call;
1760 skip = !first_call;
1761
1762 /* XXX: we should assert that a snapshot is pushed or registered */
1764 Assert(BufferGetBlockNumber(buffer) == blkno);
1765
1766 /* Scan through possible multiple members of HOT-chain */
1767 for (;;)
1768 {
1769 ItemId lp;
1770
1771 /* check for bogus TID */
1772 if (offnum < FirstOffsetNumber || offnum > PageGetMaxOffsetNumber(page))
1773 break;
1774
1775 lp = PageGetItemId(page, offnum);
1776
1777 /* check for unused, dead, or redirected items */
1778 if (!ItemIdIsNormal(lp))
1779 {
1780 /* We should only see a redirect at start of chain */
1781 if (ItemIdIsRedirected(lp) && at_chain_start)
1782 {
1783 /* Follow the redirect */
1784 offnum = ItemIdGetRedirect(lp);
1785 at_chain_start = false;
1786 continue;
1787 }
1788 /* else must be end of chain */
1789 break;
1790 }
1791
1792 /*
1793 * Update heapTuple to point to the element of the HOT chain we're
1794 * currently investigating. Having t_self set correctly is important
1795 * because the SSI checks and the *Satisfies routine for historical
1796 * MVCC snapshots need the correct tid to decide about the visibility.
1797 */
1798 heapTuple->t_data = (HeapTupleHeader) PageGetItem(page, lp);
1799 heapTuple->t_len = ItemIdGetLength(lp);
1800 heapTuple->t_tableOid = RelationGetRelid(relation);
1801 ItemPointerSet(&heapTuple->t_self, blkno, offnum);
1802
1803 /*
1804 * Shouldn't see a HEAP_ONLY tuple at chain start.
1805 */
1806 if (at_chain_start && HeapTupleIsHeapOnly(heapTuple))
1807 break;
1808
1809 /*
1810 * The xmin should match the previous xmax value, else chain is
1811 * broken.
1812 */
1813 if (TransactionIdIsValid(prev_xmax) &&
1814 !TransactionIdEquals(prev_xmax,
1815 HeapTupleHeaderGetXmin(heapTuple->t_data)))
1816 break;
1817
1818 /*
1819 * When first_call is true (and thus, skip is initially false) we'll
1820 * return the first tuple we find. But on later passes, heapTuple
1821 * will initially be pointing to the tuple we returned last time.
1822 * Returning it again would be incorrect (and would loop forever), so
1823 * we skip it and return the next match we find.
1824 */
1825 if (!skip)
1826 {
1827 /* If it's visible per the snapshot, we must return it */
1828 valid = HeapTupleSatisfiesVisibility(heapTuple, snapshot, buffer);
1829 HeapCheckForSerializableConflictOut(valid, relation, heapTuple,
1830 buffer, snapshot);
1831
1832 if (valid)
1833 {
1834 ItemPointerSetOffsetNumber(tid, offnum);
1835 PredicateLockTID(relation, &heapTuple->t_self, snapshot,
1836 HeapTupleHeaderGetXmin(heapTuple->t_data));
1837 if (all_dead)
1838 *all_dead = false;
1839 return true;
1840 }
1841 }
1842 skip = false;
1843
1844 /*
1845 * If we can't see it, maybe no one else can either. At caller
1846 * request, check whether all chain members are dead to all
1847 * transactions.
1848 *
1849 * Note: if you change the criterion here for what is "dead", fix the
1850 * planner's get_actual_variable_range() function to match.
1851 */
1852 if (all_dead && *all_dead)
1853 {
1854 if (!vistest)
1855 vistest = GlobalVisTestFor(relation);
1856
1857 if (!HeapTupleIsSurelyDead(heapTuple, vistest))
1858 *all_dead = false;
1859 }
1860
1861 /*
1862 * Check to see if HOT chain continues past this tuple; if so fetch
1863 * the next offnum and loop around.
1864 */
1865 if (HeapTupleIsHotUpdated(heapTuple))
1866 {
1868 blkno);
1869 offnum = ItemPointerGetOffsetNumber(&heapTuple->t_data->t_ctid);
1870 at_chain_start = false;
1871 prev_xmax = HeapTupleHeaderGetUpdateXid(heapTuple->t_data);
1872 }
1873 else
1874 break; /* end of chain */
1875 }
1876
1877 return false;
1878}
bool HeapTupleIsSurelyDead(HeapTuple htup, GlobalVisState *vistest)
static bool HeapTupleIsHotUpdated(const HeapTupleData *tuple)
Definition: htup_details.h:768
static bool HeapTupleIsHeapOnly(const HeapTupleData *tuple)
Definition: htup_details.h:786
static void ItemPointerSet(ItemPointerData *pointer, BlockNumber blockNumber, OffsetNumber offNum)
Definition: itemptr.h:135
static void ItemPointerSetOffsetNumber(ItemPointerData *pointer, OffsetNumber offsetNumber)
Definition: itemptr.h:158
static const struct exclude_list_item skip[]
Definition: pg_checksums.c:108
GlobalVisState * GlobalVisTestFor(Relation rel)
Definition: procarray.c:4069
TransactionId RecentXmin
Definition: snapmgr.c:160

References Assert(), BufferGetBlockNumber(), BufferGetPage(), GlobalVisTestFor(), HeapCheckForSerializableConflictOut(), HeapTupleHeaderGetUpdateXid(), HeapTupleHeaderGetXmin(), HeapTupleIsHeapOnly(), HeapTupleIsHotUpdated(), HeapTupleIsSurelyDead(), HeapTupleSatisfiesVisibility(), InvalidTransactionId, ItemIdGetLength, ItemIdGetRedirect, ItemIdIsNormal, ItemIdIsRedirected, ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), ItemPointerSet(), ItemPointerSetOffsetNumber(), PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), PredicateLockTID(), RecentXmin, RelationGetRelid, skip, HeapTupleHeaderData::t_ctid, HeapTupleData::t_data, HeapTupleData::t_len, HeapTupleData::t_self, HeapTupleData::t_tableOid, TransactionIdEquals, and TransactionIdIsValid.

Referenced by BitmapHeapScanNextBlock(), heap_index_delete_tuples(), and heapam_index_fetch_tuple().

◆ heap_index_delete_tuples()

TransactionId heap_index_delete_tuples ( Relation  rel,
TM_IndexDeleteOp delstate 
)

Definition at line 8144 of file heapam.c.

8145{
8146 /* Initial assumption is that earlier pruning took care of conflict */
8147 TransactionId snapshotConflictHorizon = InvalidTransactionId;
8150 Page page = NULL;
8152 TransactionId priorXmax;
8153#ifdef USE_PREFETCH
8154 IndexDeletePrefetchState prefetch_state;
8155 int prefetch_distance;
8156#endif
8157 SnapshotData SnapshotNonVacuumable;
8158 int finalndeltids = 0,
8159 nblocksaccessed = 0;
8160
8161 /* State that's only used in bottom-up index deletion case */
8162 int nblocksfavorable = 0;
8163 int curtargetfreespace = delstate->bottomupfreespace,
8164 lastfreespace = 0,
8165 actualfreespace = 0;
8166 bool bottomup_final_block = false;
8167
8168 InitNonVacuumableSnapshot(SnapshotNonVacuumable, GlobalVisTestFor(rel));
8169
8170 /* Sort caller's deltids array by TID for further processing */
8171 index_delete_sort(delstate);
8172
8173 /*
8174 * Bottom-up case: resort deltids array in an order attuned to where the
8175 * greatest number of promising TIDs are to be found, and determine how
8176 * many blocks from the start of sorted array should be considered
8177 * favorable. This will also shrink the deltids array in order to
8178 * eliminate completely unfavorable blocks up front.
8179 */
8180 if (delstate->bottomup)
8181 nblocksfavorable = bottomup_sort_and_shrink(delstate);
8182
8183#ifdef USE_PREFETCH
8184 /* Initialize prefetch state. */
8185 prefetch_state.cur_hblkno = InvalidBlockNumber;
8186 prefetch_state.next_item = 0;
8187 prefetch_state.ndeltids = delstate->ndeltids;
8188 prefetch_state.deltids = delstate->deltids;
8189
8190 /*
8191 * Determine the prefetch distance that we will attempt to maintain.
8192 *
8193 * Since the caller holds a buffer lock somewhere in rel, we'd better make
8194 * sure that isn't a catalog relation before we call code that does
8195 * syscache lookups, to avoid risk of deadlock.
8196 */
8197 if (IsCatalogRelation(rel))
8198 prefetch_distance = maintenance_io_concurrency;
8199 else
8200 prefetch_distance =
8202
8203 /* Cap initial prefetch distance for bottom-up deletion caller */
8204 if (delstate->bottomup)
8205 {
8206 Assert(nblocksfavorable >= 1);
8207 Assert(nblocksfavorable <= BOTTOMUP_MAX_NBLOCKS);
8208 prefetch_distance = Min(prefetch_distance, nblocksfavorable);
8209 }
8210
8211 /* Start prefetching. */
8212 index_delete_prefetch_buffer(rel, &prefetch_state, prefetch_distance);
8213#endif
8214
8215 /* Iterate over deltids, determine which to delete, check their horizon */
8216 Assert(delstate->ndeltids > 0);
8217 for (int i = 0; i < delstate->ndeltids; i++)
8218 {
8219 TM_IndexDelete *ideltid = &delstate->deltids[i];
8220 TM_IndexStatus *istatus = delstate->status + ideltid->id;
8221 ItemPointer htid = &ideltid->tid;
8222 OffsetNumber offnum;
8223
8224 /*
8225 * Read buffer, and perform required extra steps each time a new block
8226 * is encountered. Avoid refetching if it's the same block as the one
8227 * from the last htid.
8228 */
8229 if (blkno == InvalidBlockNumber ||
8230 ItemPointerGetBlockNumber(htid) != blkno)
8231 {
8232 /*
8233 * Consider giving up early for bottom-up index deletion caller
8234 * first. (Only prefetch next-next block afterwards, when it
8235 * becomes clear that we're at least going to access the next
8236 * block in line.)
8237 *
8238 * Sometimes the first block frees so much space for bottom-up
8239 * caller that the deletion process can end without accessing any
8240 * more blocks. It is usually necessary to access 2 or 3 blocks
8241 * per bottom-up deletion operation, though.
8242 */
8243 if (delstate->bottomup)
8244 {
8245 /*
8246 * We often allow caller to delete a few additional items
8247 * whose entries we reached after the point that space target
8248 * from caller was satisfied. The cost of accessing the page
8249 * was already paid at that point, so it made sense to finish
8250 * it off. When that happened, we finalize everything here
8251 * (by finishing off the whole bottom-up deletion operation
8252 * without needlessly paying the cost of accessing any more
8253 * blocks).
8254 */
8255 if (bottomup_final_block)
8256 break;
8257
8258 /*
8259 * Give up when we didn't enable our caller to free any
8260 * additional space as a result of processing the page that we
8261 * just finished up with. This rule is the main way in which
8262 * we keep the cost of bottom-up deletion under control.
8263 */
8264 if (nblocksaccessed >= 1 && actualfreespace == lastfreespace)
8265 break;
8266 lastfreespace = actualfreespace; /* for next time */
8267
8268 /*
8269 * Deletion operation (which is bottom-up) will definitely
8270 * access the next block in line. Prepare for that now.
8271 *
8272 * Decay target free space so that we don't hang on for too
8273 * long with a marginal case. (Space target is only truly
8274 * helpful when it allows us to recognize that we don't need
8275 * to access more than 1 or 2 blocks to satisfy caller due to
8276 * agreeable workload characteristics.)
8277 *
8278 * We are a bit more patient when we encounter contiguous
8279 * blocks, though: these are treated as favorable blocks. The
8280 * decay process is only applied when the next block in line
8281 * is not a favorable/contiguous block. This is not an
8282 * exception to the general rule; we still insist on finding
8283 * at least one deletable item per block accessed. See
8284 * bottomup_nblocksfavorable() for full details of the theory
8285 * behind favorable blocks and heap block locality in general.
8286 *
8287 * Note: The first block in line is always treated as a
8288 * favorable block, so the earliest possible point that the
8289 * decay can be applied is just before we access the second
8290 * block in line. The Assert() verifies this for us.
8291 */
8292 Assert(nblocksaccessed > 0 || nblocksfavorable > 0);
8293 if (nblocksfavorable > 0)
8294 nblocksfavorable--;
8295 else
8296 curtargetfreespace /= 2;
8297 }
8298
8299 /* release old buffer */
8300 if (BufferIsValid(buf))
8302
8303 blkno = ItemPointerGetBlockNumber(htid);
8304 buf = ReadBuffer(rel, blkno);
8305 nblocksaccessed++;
8306 Assert(!delstate->bottomup ||
8307 nblocksaccessed <= BOTTOMUP_MAX_NBLOCKS);
8308
8309#ifdef USE_PREFETCH
8310
8311 /*
8312 * To maintain the prefetch distance, prefetch one more page for
8313 * each page we read.
8314 */
8315 index_delete_prefetch_buffer(rel, &prefetch_state, 1);
8316#endif
8317
8319
8320 page = BufferGetPage(buf);
8321 maxoff = PageGetMaxOffsetNumber(page);
8322 }
8323
8324 /*
8325 * In passing, detect index corruption involving an index page with a
8326 * TID that points to a location in the heap that couldn't possibly be
8327 * correct. We only do this with actual TIDs from caller's index page
8328 * (not items reached by traversing through a HOT chain).
8329 */
8330 index_delete_check_htid(delstate, page, maxoff, htid, istatus);
8331
8332 if (istatus->knowndeletable)
8333 Assert(!delstate->bottomup && !istatus->promising);
8334 else
8335 {
8336 ItemPointerData tmp = *htid;
8337 HeapTupleData heapTuple;
8338
8339 /* Are any tuples from this HOT chain non-vacuumable? */
8340 if (heap_hot_search_buffer(&tmp, rel, buf, &SnapshotNonVacuumable,
8341 &heapTuple, NULL, true))
8342 continue; /* can't delete entry */
8343
8344 /* Caller will delete, since whole HOT chain is vacuumable */
8345 istatus->knowndeletable = true;
8346
8347 /* Maintain index free space info for bottom-up deletion case */
8348 if (delstate->bottomup)
8349 {
8350 Assert(istatus->freespace > 0);
8351 actualfreespace += istatus->freespace;
8352 if (actualfreespace >= curtargetfreespace)
8353 bottomup_final_block = true;
8354 }
8355 }
8356
8357 /*
8358 * Maintain snapshotConflictHorizon value for deletion operation as a
8359 * whole by advancing current value using heap tuple headers. This is
8360 * loosely based on the logic for pruning a HOT chain.
8361 */
8362 offnum = ItemPointerGetOffsetNumber(htid);
8363 priorXmax = InvalidTransactionId; /* cannot check first XMIN */
8364 for (;;)
8365 {
8366 ItemId lp;
8367 HeapTupleHeader htup;
8368
8369 /* Sanity check (pure paranoia) */
8370 if (offnum < FirstOffsetNumber)
8371 break;
8372
8373 /*
8374 * An offset past the end of page's line pointer array is possible
8375 * when the array was truncated
8376 */
8377 if (offnum > maxoff)
8378 break;
8379
8380 lp = PageGetItemId(page, offnum);
8381 if (ItemIdIsRedirected(lp))
8382 {
8383 offnum = ItemIdGetRedirect(lp);
8384 continue;
8385 }
8386
8387 /*
8388 * We'll often encounter LP_DEAD line pointers (especially with an
8389 * entry marked knowndeletable by our caller up front). No heap
8390 * tuple headers get examined for an htid that leads us to an
8391 * LP_DEAD item. This is okay because the earlier pruning
8392 * operation that made the line pointer LP_DEAD in the first place
8393 * must have considered the original tuple header as part of
8394 * generating its own snapshotConflictHorizon value.
8395 *
8396 * Relying on XLOG_HEAP2_PRUNE_VACUUM_SCAN records like this is
8397 * the same strategy that index vacuuming uses in all cases. Index
8398 * VACUUM WAL records don't even have a snapshotConflictHorizon
8399 * field of their own for this reason.
8400 */
8401 if (!ItemIdIsNormal(lp))
8402 break;
8403
8404 htup = (HeapTupleHeader) PageGetItem(page, lp);
8405
8406 /*
8407 * Check the tuple XMIN against prior XMAX, if any
8408 */
8409 if (TransactionIdIsValid(priorXmax) &&
8411 break;
8412
8414 &snapshotConflictHorizon);
8415
8416 /*
8417 * If the tuple is not HOT-updated, then we are at the end of this
8418 * HOT-chain. No need to visit later tuples from the same update
8419 * chain (they get their own index entries) -- just move on to
8420 * next htid from index AM caller.
8421 */
8422 if (!HeapTupleHeaderIsHotUpdated(htup))
8423 break;
8424
8425 /* Advance to next HOT chain member */
8426 Assert(ItemPointerGetBlockNumber(&htup->t_ctid) == blkno);
8427 offnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
8428 priorXmax = HeapTupleHeaderGetUpdateXid(htup);
8429 }
8430
8431 /* Enable further/final shrinking of deltids for caller */
8432 finalndeltids = i + 1;
8433 }
8434
8436
8437 /*
8438 * Shrink deltids array to exclude non-deletable entries at the end. This
8439 * is not just a minor optimization. Final deltids array size might be
8440 * zero for a bottom-up caller. Index AM is explicitly allowed to rely on
8441 * ndeltids being zero in all cases with zero total deletable entries.
8442 */
8443 Assert(finalndeltids > 0 || delstate->bottomup);
8444 delstate->ndeltids = finalndeltids;
8445
8446 return snapshotConflictHorizon;
8447}
int maintenance_io_concurrency
Definition: bufmgr.c:162
#define Min(x, y)
Definition: c.h:1006
bool IsCatalogRelation(Relation relation)
Definition: catalog.c:104
static int bottomup_sort_and_shrink(TM_IndexDeleteOp *delstate)
Definition: heapam.c:8701
void HeapTupleHeaderAdvanceConflictHorizon(HeapTupleHeader tuple, TransactionId *snapshotConflictHorizon)
Definition: heapam.c:7999
#define BOTTOMUP_MAX_NBLOCKS
Definition: heapam.c:184
static void index_delete_check_htid(TM_IndexDeleteOp *delstate, Page page, OffsetNumber maxoff, const ItemPointerData *htid, TM_IndexStatus *istatus)
Definition: heapam.c:8084
bool heap_hot_search_buffer(ItemPointer tid, Relation relation, Buffer buffer, Snapshot snapshot, HeapTuple heapTuple, bool *all_dead, bool first_call)
Definition: heapam.c:1740
static void index_delete_sort(TM_IndexDeleteOp *delstate)
Definition: heapam.c:8489
static char * buf
Definition: pg_test_fsync.c:72
#define InitNonVacuumableSnapshot(snapshotdata, vistestp)
Definition: snapmgr.h:50
int get_tablespace_maintenance_io_concurrency(Oid spcid)
Definition: spccache.c:229
TM_IndexStatus * status
Definition: tableam.h:254
int bottomupfreespace
Definition: tableam.h:249
TM_IndexDelete * deltids
Definition: tableam.h:253
ItemPointerData tid
Definition: tableam.h:212
bool knowndeletable
Definition: tableam.h:219
bool promising
Definition: tableam.h:222
int16 freespace
Definition: tableam.h:223

References Assert(), TM_IndexDeleteOp::bottomup, BOTTOMUP_MAX_NBLOCKS, bottomup_sort_and_shrink(), TM_IndexDeleteOp::bottomupfreespace, buf, BUFFER_LOCK_SHARE, BufferGetPage(), BufferIsValid(), TM_IndexDeleteOp::deltids, FirstOffsetNumber, TM_IndexStatus::freespace, get_tablespace_maintenance_io_concurrency(), GlobalVisTestFor(), heap_hot_search_buffer(), HeapTupleHeaderAdvanceConflictHorizon(), HeapTupleHeaderGetUpdateXid(), HeapTupleHeaderGetXmin(), HeapTupleHeaderIsHotUpdated(), i, TM_IndexDelete::id, index_delete_check_htid(), index_delete_sort(), InitNonVacuumableSnapshot, InvalidBlockNumber, InvalidBuffer, InvalidOffsetNumber, InvalidTransactionId, IsCatalogRelation(), ItemIdGetRedirect, ItemIdIsNormal, ItemIdIsRedirected, ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), TM_IndexStatus::knowndeletable, LockBuffer(), maintenance_io_concurrency, Min, TM_IndexDeleteOp::ndeltids, PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), TM_IndexStatus::promising, RelationData::rd_rel, ReadBuffer(), TM_IndexDeleteOp::status, HeapTupleHeaderData::t_ctid, TM_IndexDelete::tid, TransactionIdEquals, TransactionIdIsValid, and UnlockReleaseBuffer().

◆ heap_inplace_lock()

bool heap_inplace_lock ( Relation  relation,
HeapTuple  oldtup_ptr,
Buffer  buffer,
void(*)(void *)  release_callback,
void *  arg 
)

Definition at line 6380 of file heapam.c.

6383{
6384 HeapTupleData oldtup = *oldtup_ptr; /* minimize diff vs. heap_update() */
6385 TM_Result result;
6386 bool ret;
6387
6388#ifdef USE_ASSERT_CHECKING
6389 if (RelationGetRelid(relation) == RelationRelationId)
6390 check_inplace_rel_lock(oldtup_ptr);
6391#endif
6392
6393 Assert(BufferIsValid(buffer));
6394
6395 /*
6396 * Construct shared cache inval if necessary. Because we pass a tuple
6397 * version without our own inplace changes or inplace changes other
6398 * sessions complete while we wait for locks, inplace update mustn't
6399 * change catcache lookup keys. But we aren't bothering with index
6400 * updates either, so that's true a fortiori. After LockBuffer(), it
6401 * would be too late, because this might reach a
6402 * CatalogCacheInitializeCache() that locks "buffer".
6403 */
6404 CacheInvalidateHeapTupleInplace(relation, oldtup_ptr, NULL);
6405
6406 LockTuple(relation, &oldtup.t_self, InplaceUpdateTupleLock);
6408
6409 /*----------
6410 * Interpret HeapTupleSatisfiesUpdate() like heap_update() does, except:
6411 *
6412 * - wait unconditionally
6413 * - already locked tuple above, since inplace needs that unconditionally
6414 * - don't recheck header after wait: simpler to defer to next iteration
6415 * - don't try to continue even if the updater aborts: likewise
6416 * - no crosscheck
6417 */
6418 result = HeapTupleSatisfiesUpdate(&oldtup, GetCurrentCommandId(false),
6419 buffer);
6420
6421 if (result == TM_Invisible)
6422 {
6423 /* no known way this can happen */
6424 ereport(ERROR,
6425 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
6426 errmsg_internal("attempted to overwrite invisible tuple")));
6427 }
6428 else if (result == TM_SelfModified)
6429 {
6430 /*
6431 * CREATE INDEX might reach this if an expression is silly enough to
6432 * call e.g. SELECT ... FROM pg_class FOR SHARE. C code of other SQL
6433 * statements might get here after a heap_update() of the same row, in
6434 * the absence of an intervening CommandCounterIncrement().
6435 */
6436 ereport(ERROR,
6437 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
6438 errmsg("tuple to be updated was already modified by an operation triggered by the current command")));
6439 }
6440 else if (result == TM_BeingModified)
6441 {
6442 TransactionId xwait;
6443 uint16 infomask;
6444
6445 xwait = HeapTupleHeaderGetRawXmax(oldtup.t_data);
6446 infomask = oldtup.t_data->t_infomask;
6447
6448 if (infomask & HEAP_XMAX_IS_MULTI)
6449 {
6452 int remain;
6453
6454 if (DoesMultiXactIdConflict((MultiXactId) xwait, infomask,
6455 lockmode, NULL))
6456 {
6458 release_callback(arg);
6459 ret = false;
6460 MultiXactIdWait((MultiXactId) xwait, mxact_status, infomask,
6461 relation, &oldtup.t_self, XLTW_Update,
6462 &remain);
6463 }
6464 else
6465 ret = true;
6466 }
6468 ret = true;
6469 else if (HEAP_XMAX_IS_KEYSHR_LOCKED(infomask))
6470 ret = true;
6471 else
6472 {
6474 release_callback(arg);
6475 ret = false;
6476 XactLockTableWait(xwait, relation, &oldtup.t_self,
6477 XLTW_Update);
6478 }
6479 }
6480 else
6481 {
6482 ret = (result == TM_Ok);
6483 if (!ret)
6484 {
6486 release_callback(arg);
6487 }
6488 }
6489
6490 /*
6491 * GetCatalogSnapshot() relies on invalidation messages to know when to
6492 * take a new snapshot. COMMIT of xwait is responsible for sending the
6493 * invalidation. We're not acquiring heavyweight locks sufficient to
6494 * block if not yet sent, so we must take a new snapshot to ensure a later
6495 * attempt has a fair chance. While we don't need this if xwait aborted,
6496 * don't bother optimizing that.
6497 */
6498 if (!ret)
6499 {
6500 UnlockTuple(relation, &oldtup.t_self, InplaceUpdateTupleLock);
6503 }
6504 return ret;
6505}
static bool HEAP_XMAX_IS_KEYSHR_LOCKED(uint16 infomask)
Definition: htup_details.h:275
void CacheInvalidateHeapTupleInplace(Relation relation, HeapTuple tuple, HeapTuple newtuple)
Definition: inval.c:1588
void ForgetInplace_Inval(void)
Definition: inval.c:1286
void UnlockTuple(Relation relation, const ItemPointerData *tid, LOCKMODE lockmode)
Definition: lmgr.c:601
void LockTuple(Relation relation, const ItemPointerData *tid, LOCKMODE lockmode)
Definition: lmgr.c:562
@ XLTW_Update
Definition: lmgr.h:27
#define InplaceUpdateTupleLock
Definition: lockdefs.h:48
LockTupleMode
Definition: lockoptions.h:50
@ LockTupleNoKeyExclusive
Definition: lockoptions.h:56
MultiXactStatus
Definition: multixact.h:39
@ MultiXactStatusNoKeyUpdate
Definition: multixact.h:45
void * arg
void InvalidateCatalogSnapshot(void)
Definition: snapmgr.c:455
CommandId GetCurrentCommandId(bool used)
Definition: xact.c:830

References arg, Assert(), BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferIsValid(), CacheInvalidateHeapTupleInplace(), DoesMultiXactIdConflict(), ereport, errcode(), errmsg(), errmsg_internal(), ERROR, ForgetInplace_Inval(), GetCurrentCommandId(), HEAP_XMAX_IS_KEYSHR_LOCKED(), HEAP_XMAX_IS_MULTI, HeapTupleHeaderGetRawXmax(), HeapTupleSatisfiesUpdate(), InplaceUpdateTupleLock, InvalidateCatalogSnapshot(), LockBuffer(), LockTuple(), LockTupleNoKeyExclusive, MultiXactIdWait(), MultiXactStatusNoKeyUpdate, RelationGetRelid, HeapTupleData::t_data, HeapTupleHeaderData::t_infomask, HeapTupleData::t_self, TM_BeingModified, TM_Invisible, TM_Ok, TM_SelfModified, TransactionIdIsCurrentTransactionId(), UnlockTuple(), XactLockTableWait(), and XLTW_Update.

Referenced by systable_inplace_update_begin().

◆ heap_inplace_unlock()

void heap_inplace_unlock ( Relation  relation,
HeapTuple  oldtup,
Buffer  buffer 
)

◆ heap_inplace_update_and_unlock()

void heap_inplace_update_and_unlock ( Relation  relation,
HeapTuple  oldtup,
HeapTuple  tuple,
Buffer  buffer 
)

Definition at line 6516 of file heapam.c.

6519{
6520 HeapTupleHeader htup = oldtup->t_data;
6521 uint32 oldlen;
6522 uint32 newlen;
6523 char *dst;
6524 char *src;
6525 int nmsgs = 0;
6526 SharedInvalidationMessage *invalMessages = NULL;
6527 bool RelcacheInitFileInval = false;
6528
6529 Assert(ItemPointerEquals(&oldtup->t_self, &tuple->t_self));
6530 oldlen = oldtup->t_len - htup->t_hoff;
6531 newlen = tuple->t_len - tuple->t_data->t_hoff;
6532 if (oldlen != newlen || htup->t_hoff != tuple->t_data->t_hoff)
6533 elog(ERROR, "wrong tuple length");
6534
6535 dst = (char *) htup + htup->t_hoff;
6536 src = (char *) tuple->t_data + tuple->t_data->t_hoff;
6537
6538 /* Like RecordTransactionCommit(), log only if needed */
6540 nmsgs = inplaceGetInvalidationMessages(&invalMessages,
6541 &RelcacheInitFileInval);
6542
6543 /*
6544 * Unlink relcache init files as needed. If unlinking, acquire
6545 * RelCacheInitLock until after associated invalidations. By doing this
6546 * in advance, if we checkpoint and then crash between inplace
6547 * XLogInsert() and inval, we don't rely on StartupXLOG() ->
6548 * RelationCacheInitFileRemove(). That uses elevel==LOG, so replay would
6549 * neglect to PANIC on EIO.
6550 */
6552
6553 /*----------
6554 * NO EREPORT(ERROR) from here till changes are complete
6555 *
6556 * Our buffer lock won't stop a reader having already pinned and checked
6557 * visibility for this tuple. Hence, we write WAL first, then mutate the
6558 * buffer. Like in MarkBufferDirtyHint() or RecordTransactionCommit(),
6559 * checkpoint delay makes that acceptable. With the usual order of
6560 * changes, a crash after memcpy() and before XLogInsert() could allow
6561 * datfrozenxid to overtake relfrozenxid:
6562 *
6563 * ["D" is a VACUUM (ONLY_DATABASE_STATS)]
6564 * ["R" is a VACUUM tbl]
6565 * D: vac_update_datfrozenxid() -> systable_beginscan(pg_class)
6566 * D: systable_getnext() returns pg_class tuple of tbl
6567 * R: memcpy() into pg_class tuple of tbl
6568 * D: raise pg_database.datfrozenxid, XLogInsert(), finish
6569 * [crash]
6570 * [recovery restores datfrozenxid w/o relfrozenxid]
6571 *
6572 * Mimic MarkBufferDirtyHint() subroutine XLogSaveBufferForHint().
6573 * Specifically, use DELAY_CHKPT_START, and copy the buffer to the stack.
6574 * The stack copy facilitates a FPI of the post-mutation block before we
6575 * accept other sessions seeing it. DELAY_CHKPT_START allows us to
6576 * XLogInsert() before MarkBufferDirty(). Since XLogSaveBufferForHint()
6577 * can operate under BUFFER_LOCK_SHARED, it can't avoid DELAY_CHKPT_START.
6578 * This function, however, likely could avoid it with the following order
6579 * of operations: MarkBufferDirty(), XLogInsert(), memcpy(). Opt to use
6580 * DELAY_CHKPT_START here, too, as a way to have fewer distinct code
6581 * patterns to analyze. Inplace update isn't so frequent that it should
6582 * pursue the small optimization of skipping DELAY_CHKPT_START.
6583 */
6587
6588 /* XLOG stuff */
6589 if (RelationNeedsWAL(relation))
6590 {
6591 xl_heap_inplace xlrec;
6592 PGAlignedBlock copied_buffer;
6593 char *origdata = (char *) BufferGetBlock(buffer);
6594 Page page = BufferGetPage(buffer);
6595 uint16 lower = ((PageHeader) page)->pd_lower;
6596 uint16 upper = ((PageHeader) page)->pd_upper;
6597 uintptr_t dst_offset_in_block;
6598 RelFileLocator rlocator;
6599 ForkNumber forkno;
6600 BlockNumber blkno;
6601 XLogRecPtr recptr;
6602
6603 xlrec.offnum = ItemPointerGetOffsetNumber(&tuple->t_self);
6604 xlrec.dbId = MyDatabaseId;
6605 xlrec.tsId = MyDatabaseTableSpace;
6606 xlrec.relcacheInitFileInval = RelcacheInitFileInval;
6607 xlrec.nmsgs = nmsgs;
6608
6611 if (nmsgs != 0)
6612 XLogRegisterData(invalMessages,
6613 nmsgs * sizeof(SharedInvalidationMessage));
6614
6615 /* register block matching what buffer will look like after changes */
6616 memcpy(copied_buffer.data, origdata, lower);
6617 memcpy(copied_buffer.data + upper, origdata + upper, BLCKSZ - upper);
6618 dst_offset_in_block = dst - origdata;
6619 memcpy(copied_buffer.data + dst_offset_in_block, src, newlen);
6620 BufferGetTag(buffer, &rlocator, &forkno, &blkno);
6621 Assert(forkno == MAIN_FORKNUM);
6622 XLogRegisterBlock(0, &rlocator, forkno, blkno, copied_buffer.data,
6624 XLogRegisterBufData(0, src, newlen);
6625
6626 /* inplace updates aren't decoded atm, don't log the origin */
6627
6628 recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_INPLACE);
6629
6630 PageSetLSN(page, recptr);
6631 }
6632
6633 memcpy(dst, src, newlen);
6634
6635 MarkBufferDirty(buffer);
6636
6638
6639 /*
6640 * Send invalidations to shared queue. SearchSysCacheLocked1() assumes we
6641 * do this before UnlockTuple().
6642 *
6643 * If we're mutating a tuple visible only to this transaction, there's an
6644 * equivalent transactional inval from the action that created the tuple,
6645 * and this inval is superfluous.
6646 */
6648
6649 MyProc->delayChkptFlags &= ~DELAY_CHKPT_START;
6651 UnlockTuple(relation, &tuple->t_self, InplaceUpdateTupleLock);
6652
6653 AcceptInvalidationMessages(); /* local processing of just-sent inval */
6654
6655 /*
6656 * Queue a transactional inval. The immediate invalidation we just sent
6657 * is the only one known to be necessary. To reduce risk from the
6658 * transition to immediate invalidation, continue sending a transactional
6659 * invalidation like we've long done. Third-party code might rely on it.
6660 */
6662 CacheInvalidateHeapTuple(relation, tuple, NULL);
6663}
void BufferGetTag(Buffer buffer, RelFileLocator *rlocator, ForkNumber *forknum, BlockNumber *blknum)
Definition: bufmgr.c:4244
static Block BufferGetBlock(Buffer buffer)
Definition: bufmgr.h:403
PageHeaderData * PageHeader
Definition: bufpage.h:173
uint32_t uint32
Definition: c.h:541
Oid MyDatabaseTableSpace
Definition: globals.c:96
Oid MyDatabaseId
Definition: globals.c:94
#define MinSizeOfHeapInplace
Definition: heapam_xlog.h:444
#define XLOG_HEAP_INPLACE
Definition: heapam_xlog.h:40
void AcceptInvalidationMessages(void)
Definition: inval.c:930
int inplaceGetInvalidationMessages(SharedInvalidationMessage **msgs, bool *RelcacheInitFileInval)
Definition: inval.c:1088
void PreInplace_Inval(void)
Definition: inval.c:1250
void AtInplace_Inval(void)
Definition: inval.c:1263
#define IsBootstrapProcessingMode()
Definition: miscadmin.h:477
Datum lower(PG_FUNCTION_ARGS)
Definition: oracle_compat.c:49
Datum upper(PG_FUNCTION_ARGS)
Definition: oracle_compat.c:80
#define DELAY_CHKPT_START
Definition: proc.h:135
ForkNumber
Definition: relpath.h:56
PGPROC * MyProc
Definition: proc.c:67
char data[BLCKSZ]
Definition: c.h:1119
int delayChkptFlags
Definition: proc.h:257
OffsetNumber offnum
Definition: heapam_xlog.h:436
bool relcacheInitFileInval
Definition: heapam_xlog.h:439
#define XLogStandbyInfoActive()
Definition: xlog.h:123
void XLogRegisterBufData(uint8 block_id, const void *data, uint32 len)
Definition: xloginsert.c:409
void XLogRegisterBlock(uint8 block_id, RelFileLocator *rlocator, ForkNumber forknum, BlockNumber blknum, const PageData *page, uint8 flags)
Definition: xloginsert.c:313

References AcceptInvalidationMessages(), Assert(), AtInplace_Inval(), BUFFER_LOCK_UNLOCK, BufferGetBlock(), BufferGetPage(), BufferGetTag(), CacheInvalidateHeapTuple(), PGAlignedBlock::data, xl_heap_inplace::dbId, DELAY_CHKPT_START, PGPROC::delayChkptFlags, elog, END_CRIT_SECTION, ERROR, inplaceGetInvalidationMessages(), InplaceUpdateTupleLock, IsBootstrapProcessingMode, ItemPointerEquals(), ItemPointerGetOffsetNumber(), LockBuffer(), lower(), MAIN_FORKNUM, MarkBufferDirty(), MinSizeOfHeapInplace, MyDatabaseId, MyDatabaseTableSpace, MyProc, xl_heap_inplace::nmsgs, xl_heap_inplace::offnum, PageSetLSN(), PreInplace_Inval(), REGBUF_STANDARD, RelationNeedsWAL, xl_heap_inplace::relcacheInitFileInval, START_CRIT_SECTION, HeapTupleData::t_data, HeapTupleHeaderData::t_hoff, HeapTupleData::t_len, HeapTupleData::t_self, xl_heap_inplace::tsId, UnlockTuple(), upper(), XLOG_HEAP_INPLACE, XLogBeginInsert(), XLogInsert(), XLogRegisterBlock(), XLogRegisterBufData(), XLogRegisterData(), and XLogStandbyInfoActive.

Referenced by systable_inplace_update_finish().

◆ heap_insert()

void heap_insert ( Relation  relation,
HeapTuple  tup,
CommandId  cid,
int  options,
BulkInsertState  bistate 
)

Definition at line 2103 of file heapam.c.

2105{
2107 HeapTuple heaptup;
2108 Buffer buffer;
2109 Buffer vmbuffer = InvalidBuffer;
2110 bool all_visible_cleared = false;
2111
2112 /* Cheap, simplistic check that the tuple matches the rel's rowtype. */
2115
2116 AssertHasSnapshotForToast(relation);
2117
2118 /*
2119 * Fill in tuple header fields and toast the tuple if necessary.
2120 *
2121 * Note: below this point, heaptup is the data we actually intend to store
2122 * into the relation; tup is the caller's original untoasted data.
2123 */
2124 heaptup = heap_prepare_insert(relation, tup, xid, cid, options);
2125
2126 /*
2127 * Find buffer to insert this tuple into. If the page is all visible,
2128 * this will also pin the requisite visibility map page.
2129 */
2130 buffer = RelationGetBufferForTuple(relation, heaptup->t_len,
2131 InvalidBuffer, options, bistate,
2132 &vmbuffer, NULL,
2133 0);
2134
2135 /*
2136 * We're about to do the actual insert -- but check for conflict first, to
2137 * avoid possibly having to roll back work we've just done.
2138 *
2139 * This is safe without a recheck as long as there is no possibility of
2140 * another process scanning the page between this check and the insert
2141 * being visible to the scan (i.e., an exclusive buffer content lock is
2142 * continuously held from this point until the tuple insert is visible).
2143 *
2144 * For a heap insert, we only need to check for table-level SSI locks. Our
2145 * new tuple can't possibly conflict with existing tuple locks, and heap
2146 * page locks are only consolidated versions of tuple locks; they do not
2147 * lock "gaps" as index page locks do. So we don't need to specify a
2148 * buffer when making the call, which makes for a faster check.
2149 */
2151
2152 /* NO EREPORT(ERROR) from here till changes are logged */
2154
2155 RelationPutHeapTuple(relation, buffer, heaptup,
2157
2158 if (PageIsAllVisible(BufferGetPage(buffer)))
2159 {
2160 all_visible_cleared = true;
2162 visibilitymap_clear(relation,
2163 ItemPointerGetBlockNumber(&(heaptup->t_self)),
2164 vmbuffer, VISIBILITYMAP_VALID_BITS);
2165 }
2166
2167 /*
2168 * XXX Should we set PageSetPrunable on this page ?
2169 *
2170 * The inserting transaction may eventually abort thus making this tuple
2171 * DEAD and hence available for pruning. Though we don't want to optimize
2172 * for aborts, if no other tuple in this page is UPDATEd/DELETEd, the
2173 * aborted tuple will never be pruned until next vacuum is triggered.
2174 *
2175 * If you do add PageSetPrunable here, add it in heap_xlog_insert too.
2176 */
2177
2178 MarkBufferDirty(buffer);
2179
2180 /* XLOG stuff */
2181 if (RelationNeedsWAL(relation))
2182 {
2183 xl_heap_insert xlrec;
2184 xl_heap_header xlhdr;
2185 XLogRecPtr recptr;
2186 Page page = BufferGetPage(buffer);
2187 uint8 info = XLOG_HEAP_INSERT;
2188 int bufflags = 0;
2189
2190 /*
2191 * If this is a catalog, we need to transmit combo CIDs to properly
2192 * decode, so log that as well.
2193 */
2195 log_heap_new_cid(relation, heaptup);
2196
2197 /*
2198 * If this is the single and first tuple on page, we can reinit the
2199 * page instead of restoring the whole thing. Set flag, and hide
2200 * buffer references from XLogInsert.
2201 */
2204 {
2205 info |= XLOG_HEAP_INIT_PAGE;
2206 bufflags |= REGBUF_WILL_INIT;
2207 }
2208
2209 xlrec.offnum = ItemPointerGetOffsetNumber(&heaptup->t_self);
2210 xlrec.flags = 0;
2211 if (all_visible_cleared)
2216
2217 /*
2218 * For logical decoding, we need the tuple even if we're doing a full
2219 * page write, so make sure it's included even if we take a full-page
2220 * image. (XXX We could alternatively store a pointer into the FPW).
2221 */
2222 if (RelationIsLogicallyLogged(relation) &&
2224 {
2226 bufflags |= REGBUF_KEEP_DATA;
2227
2228 if (IsToastRelation(relation))
2230 }
2231
2234
2235 xlhdr.t_infomask2 = heaptup->t_data->t_infomask2;
2236 xlhdr.t_infomask = heaptup->t_data->t_infomask;
2237 xlhdr.t_hoff = heaptup->t_data->t_hoff;
2238
2239 /*
2240 * note we mark xlhdr as belonging to buffer; if XLogInsert decides to
2241 * write the whole page to the xlog, we don't need to store
2242 * xl_heap_header in the xlog.
2243 */
2244 XLogRegisterBuffer(0, buffer, REGBUF_STANDARD | bufflags);
2246 /* PG73FORMAT: write bitmap [+ padding] [+ oid] + data */
2248 (char *) heaptup->t_data + SizeofHeapTupleHeader,
2249 heaptup->t_len - SizeofHeapTupleHeader);
2250
2251 /* filtering by origin on a row level is much more efficient */
2253
2254 recptr = XLogInsert(RM_HEAP_ID, info);
2255
2256 PageSetLSN(page, recptr);
2257 }
2258
2260
2261 UnlockReleaseBuffer(buffer);
2262 if (vmbuffer != InvalidBuffer)
2263 ReleaseBuffer(vmbuffer);
2264
2265 /*
2266 * If tuple is cacheable, mark it for invalidation from the caches in case
2267 * we abort. Note it is OK to do this after releasing the buffer, because
2268 * the heaptup data structure is all in local memory, not in the shared
2269 * buffer.
2270 */
2271 CacheInvalidateHeapTuple(relation, heaptup, NULL);
2272
2273 /* Note: speculative insertions are counted too, even if aborted later */
2274 pgstat_count_heap_insert(relation, 1);
2275
2276 /*
2277 * If heaptup is a private copy, release it. Don't forget to copy t_self
2278 * back to the caller's image, too.
2279 */
2280 if (heaptup != tup)
2281 {
2282 tup->t_self = heaptup->t_self;
2283 heap_freetuple(heaptup);
2284 }
2285}
uint8_t uint8
Definition: c.h:539
static HeapTuple heap_prepare_insert(Relation relation, HeapTuple tup, TransactionId xid, CommandId cid, int options)
Definition: heapam.c:2294
#define HEAP_INSERT_SPECULATIVE
Definition: heapam.h:40
#define HEAP_INSERT_NO_LOGICAL
Definition: heapam.h:39
#define XLH_INSERT_ON_TOAST_RELATION
Definition: heapam_xlog.h:76
#define XLH_INSERT_IS_SPECULATIVE
Definition: heapam_xlog.h:74
#define XLH_INSERT_ALL_VISIBLE_CLEARED
Definition: heapam_xlog.h:72
#define XLOG_HEAP_INSERT
Definition: heapam_xlog.h:33
#define SizeOfHeapInsert
Definition: heapam_xlog.h:168
#define XLH_INSERT_CONTAINS_NEW_TUPLE
Definition: heapam_xlog.h:75
#define XLOG_HEAP_INIT_PAGE
Definition: heapam_xlog.h:47
void RelationPutHeapTuple(Relation relation, Buffer buffer, HeapTuple tuple, bool token)
Definition: hio.c:35
Buffer RelationGetBufferForTuple(Relation relation, Size len, Buffer otherBuffer, int options, BulkInsertState bistate, Buffer *vmbuffer, Buffer *vmbuffer_other, int num_pages)
Definition: hio.c:500
#define HeapTupleHeaderGetNatts(tup)
Definition: htup_details.h:577
void pgstat_count_heap_insert(Relation rel, PgStat_Counter n)
#define RelationIsLogicallyLogged(relation)
Definition: rel.h:711
#define RelationGetNumberOfAttributes(relation)
Definition: rel.h:521
OffsetNumber offnum
Definition: heapam_xlog.h:162
#define REGBUF_KEEP_DATA
Definition: xloginsert.h:36
#define REGBUF_WILL_INIT
Definition: xloginsert.h:34

References Assert(), AssertHasSnapshotForToast(), BufferGetBlockNumber(), BufferGetPage(), CacheInvalidateHeapTuple(), CheckForSerializableConflictIn(), END_CRIT_SECTION, FirstOffsetNumber, xl_heap_insert::flags, GetCurrentTransactionId(), heap_freetuple(), HEAP_INSERT_NO_LOGICAL, HEAP_INSERT_SPECULATIVE, heap_prepare_insert(), HeapTupleHeaderGetNatts, InvalidBlockNumber, InvalidBuffer, IsToastRelation(), ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), log_heap_new_cid(), MarkBufferDirty(), xl_heap_insert::offnum, PageClearAllVisible(), PageGetMaxOffsetNumber(), PageIsAllVisible(), PageSetLSN(), pgstat_count_heap_insert(), REGBUF_KEEP_DATA, REGBUF_STANDARD, REGBUF_WILL_INIT, RelationGetBufferForTuple(), RelationGetNumberOfAttributes, RelationIsAccessibleInLogicalDecoding, RelationIsLogicallyLogged, RelationNeedsWAL, RelationPutHeapTuple(), ReleaseBuffer(), SizeOfHeapHeader, SizeOfHeapInsert, SizeofHeapTupleHeader, START_CRIT_SECTION, HeapTupleData::t_data, xl_heap_header::t_hoff, HeapTupleHeaderData::t_hoff, xl_heap_header::t_infomask, HeapTupleHeaderData::t_infomask, xl_heap_header::t_infomask2, HeapTupleHeaderData::t_infomask2, HeapTupleData::t_len, HeapTupleData::t_self, UnlockReleaseBuffer(), visibilitymap_clear(), VISIBILITYMAP_VALID_BITS, XLH_INSERT_ALL_VISIBLE_CLEARED, XLH_INSERT_CONTAINS_NEW_TUPLE, XLH_INSERT_IS_SPECULATIVE, XLH_INSERT_ON_TOAST_RELATION, XLOG_HEAP_INIT_PAGE, XLOG_HEAP_INSERT, XLOG_INCLUDE_ORIGIN, XLogBeginInsert(), XLogInsert(), XLogRegisterBufData(), XLogRegisterBuffer(), XLogRegisterData(), and XLogSetRecordFlags().

Referenced by heapam_tuple_insert(), heapam_tuple_insert_speculative(), simple_heap_insert(), and toast_save_datum().

◆ heap_lock_tuple()

TM_Result heap_lock_tuple ( Relation  relation,
HeapTuple  tuple,
CommandId  cid,
LockTupleMode  mode,
LockWaitPolicy  wait_policy,
bool  follow_updates,
Buffer buffer,
TM_FailureData tmfd 
)

Definition at line 4605 of file heapam.c.

4609{
4610 TM_Result result;
4611 ItemPointer tid = &(tuple->t_self);
4612 ItemId lp;
4613 Page page;
4614 Buffer vmbuffer = InvalidBuffer;
4615 BlockNumber block;
4616 TransactionId xid,
4617 xmax;
4618 uint16 old_infomask,
4619 new_infomask,
4620 new_infomask2;
4621 bool first_time = true;
4622 bool skip_tuple_lock = false;
4623 bool have_tuple_lock = false;
4624 bool cleared_all_frozen = false;
4625
4626 *buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(tid));
4627 block = ItemPointerGetBlockNumber(tid);
4628
4629 /*
4630 * Before locking the buffer, pin the visibility map page if it appears to
4631 * be necessary. Since we haven't got the lock yet, someone else might be
4632 * in the middle of changing this, so we'll need to recheck after we have
4633 * the lock.
4634 */
4635 if (PageIsAllVisible(BufferGetPage(*buffer)))
4636 visibilitymap_pin(relation, block, &vmbuffer);
4637
4639
4640 page = BufferGetPage(*buffer);
4643
4644 tuple->t_data = (HeapTupleHeader) PageGetItem(page, lp);
4645 tuple->t_len = ItemIdGetLength(lp);
4646 tuple->t_tableOid = RelationGetRelid(relation);
4647
4648l3:
4649 result = HeapTupleSatisfiesUpdate(tuple, cid, *buffer);
4650
4651 if (result == TM_Invisible)
4652 {
4653 /*
4654 * This is possible, but only when locking a tuple for ON CONFLICT
4655 * UPDATE. We return this value here rather than throwing an error in
4656 * order to give that case the opportunity to throw a more specific
4657 * error.
4658 */
4659 result = TM_Invisible;
4660 goto out_locked;
4661 }
4662 else if (result == TM_BeingModified ||
4663 result == TM_Updated ||
4664 result == TM_Deleted)
4665 {
4666 TransactionId xwait;
4667 uint16 infomask;
4668 uint16 infomask2;
4669 bool require_sleep;
4670 ItemPointerData t_ctid;
4671
4672 /* must copy state data before unlocking buffer */
4673 xwait = HeapTupleHeaderGetRawXmax(tuple->t_data);
4674 infomask = tuple->t_data->t_infomask;
4675 infomask2 = tuple->t_data->t_infomask2;
4676 ItemPointerCopy(&tuple->t_data->t_ctid, &t_ctid);
4677
4679
4680 /*
4681 * If any subtransaction of the current top transaction already holds
4682 * a lock as strong as or stronger than what we're requesting, we
4683 * effectively hold the desired lock already. We *must* succeed
4684 * without trying to take the tuple lock, else we will deadlock
4685 * against anyone wanting to acquire a stronger lock.
4686 *
4687 * Note we only do this the first time we loop on the HTSU result;
4688 * there is no point in testing in subsequent passes, because
4689 * evidently our own transaction cannot have acquired a new lock after
4690 * the first time we checked.
4691 */
4692 if (first_time)
4693 {
4694 first_time = false;
4695
4696 if (infomask & HEAP_XMAX_IS_MULTI)
4697 {
4698 int i;
4699 int nmembers;
4700 MultiXactMember *members;
4701
4702 /*
4703 * We don't need to allow old multixacts here; if that had
4704 * been the case, HeapTupleSatisfiesUpdate would have returned
4705 * MayBeUpdated and we wouldn't be here.
4706 */
4707 nmembers =
4708 GetMultiXactIdMembers(xwait, &members, false,
4709 HEAP_XMAX_IS_LOCKED_ONLY(infomask));
4710
4711 for (i = 0; i < nmembers; i++)
4712 {
4713 /* only consider members of our own transaction */
4714 if (!TransactionIdIsCurrentTransactionId(members[i].xid))
4715 continue;
4716
4717 if (TUPLOCK_from_mxstatus(members[i].status) >= mode)
4718 {
4719 pfree(members);
4720 result = TM_Ok;
4721 goto out_unlocked;
4722 }
4723 else
4724 {
4725 /*
4726 * Disable acquisition of the heavyweight tuple lock.
4727 * Otherwise, when promoting a weaker lock, we might
4728 * deadlock with another locker that has acquired the
4729 * heavyweight tuple lock and is waiting for our
4730 * transaction to finish.
4731 *
4732 * Note that in this case we still need to wait for
4733 * the multixact if required, to avoid acquiring
4734 * conflicting locks.
4735 */
4736 skip_tuple_lock = true;
4737 }
4738 }
4739
4740 if (members)
4741 pfree(members);
4742 }
4744 {
4745 switch (mode)
4746 {
4747 case LockTupleKeyShare:
4749 HEAP_XMAX_IS_SHR_LOCKED(infomask) ||
4750 HEAP_XMAX_IS_EXCL_LOCKED(infomask));
4751 result = TM_Ok;
4752 goto out_unlocked;
4753 case LockTupleShare:
4754 if (HEAP_XMAX_IS_SHR_LOCKED(infomask) ||
4755 HEAP_XMAX_IS_EXCL_LOCKED(infomask))
4756 {
4757 result = TM_Ok;
4758 goto out_unlocked;
4759 }
4760 break;
4762 if (HEAP_XMAX_IS_EXCL_LOCKED(infomask))
4763 {
4764 result = TM_Ok;
4765 goto out_unlocked;
4766 }
4767 break;
4768 case LockTupleExclusive:
4769 if (HEAP_XMAX_IS_EXCL_LOCKED(infomask) &&
4770 infomask2 & HEAP_KEYS_UPDATED)
4771 {
4772 result = TM_Ok;
4773 goto out_unlocked;
4774 }
4775 break;
4776 }
4777 }
4778 }
4779
4780 /*
4781 * Initially assume that we will have to wait for the locking
4782 * transaction(s) to finish. We check various cases below in which
4783 * this can be turned off.
4784 */
4785 require_sleep = true;
4786 if (mode == LockTupleKeyShare)
4787 {
4788 /*
4789 * If we're requesting KeyShare, and there's no update present, we
4790 * don't need to wait. Even if there is an update, we can still
4791 * continue if the key hasn't been modified.
4792 *
4793 * However, if there are updates, we need to walk the update chain
4794 * to mark future versions of the row as locked, too. That way,
4795 * if somebody deletes that future version, we're protected
4796 * against the key going away. This locking of future versions
4797 * could block momentarily, if a concurrent transaction is
4798 * deleting a key; or it could return a value to the effect that
4799 * the transaction deleting the key has already committed. So we
4800 * do this before re-locking the buffer; otherwise this would be
4801 * prone to deadlocks.
4802 *
4803 * Note that the TID we're locking was grabbed before we unlocked
4804 * the buffer. For it to change while we're not looking, the
4805 * other properties we're testing for below after re-locking the
4806 * buffer would also change, in which case we would restart this
4807 * loop above.
4808 */
4809 if (!(infomask2 & HEAP_KEYS_UPDATED))
4810 {
4811 bool updated;
4812
4813 updated = !HEAP_XMAX_IS_LOCKED_ONLY(infomask);
4814
4815 /*
4816 * If there are updates, follow the update chain; bail out if
4817 * that cannot be done.
4818 */
4819 if (follow_updates && updated)
4820 {
4821 TM_Result res;
4822
4823 res = heap_lock_updated_tuple(relation, tuple, &t_ctid,
4825 mode);
4826 if (res != TM_Ok)
4827 {
4828 result = res;
4829 /* recovery code expects to have buffer lock held */
4831 goto failed;
4832 }
4833 }
4834
4836
4837 /*
4838 * Make sure it's still an appropriate lock, else start over.
4839 * Also, if it wasn't updated before we released the lock, but
4840 * is updated now, we start over too; the reason is that we
4841 * now need to follow the update chain to lock the new
4842 * versions.
4843 */
4844 if (!HeapTupleHeaderIsOnlyLocked(tuple->t_data) &&
4845 ((tuple->t_data->t_infomask2 & HEAP_KEYS_UPDATED) ||
4846 !updated))
4847 goto l3;
4848
4849 /* Things look okay, so we can skip sleeping */
4850 require_sleep = false;
4851
4852 /*
4853 * Note we allow Xmax to change here; other updaters/lockers
4854 * could have modified it before we grabbed the buffer lock.
4855 * However, this is not a problem, because with the recheck we
4856 * just did we ensure that they still don't conflict with the
4857 * lock we want.
4858 */
4859 }
4860 }
4861 else if (mode == LockTupleShare)
4862 {
4863 /*
4864 * If we're requesting Share, we can similarly avoid sleeping if
4865 * there's no update and no exclusive lock present.
4866 */
4867 if (HEAP_XMAX_IS_LOCKED_ONLY(infomask) &&
4868 !HEAP_XMAX_IS_EXCL_LOCKED(infomask))
4869 {
4871
4872 /*
4873 * Make sure it's still an appropriate lock, else start over.
4874 * See above about allowing xmax to change.
4875 */
4878 goto l3;
4879 require_sleep = false;
4880 }
4881 }
4882 else if (mode == LockTupleNoKeyExclusive)
4883 {
4884 /*
4885 * If we're requesting NoKeyExclusive, we might also be able to
4886 * avoid sleeping; just ensure that there no conflicting lock
4887 * already acquired.
4888 */
4889 if (infomask & HEAP_XMAX_IS_MULTI)
4890 {
4891 if (!DoesMultiXactIdConflict((MultiXactId) xwait, infomask,
4892 mode, NULL))
4893 {
4894 /*
4895 * No conflict, but if the xmax changed under us in the
4896 * meantime, start over.
4897 */
4899 if (xmax_infomask_changed(tuple->t_data->t_infomask, infomask) ||
4901 xwait))
4902 goto l3;
4903
4904 /* otherwise, we're good */
4905 require_sleep = false;
4906 }
4907 }
4908 else if (HEAP_XMAX_IS_KEYSHR_LOCKED(infomask))
4909 {
4911
4912 /* if the xmax changed in the meantime, start over */
4913 if (xmax_infomask_changed(tuple->t_data->t_infomask, infomask) ||
4915 xwait))
4916 goto l3;
4917 /* otherwise, we're good */
4918 require_sleep = false;
4919 }
4920 }
4921
4922 /*
4923 * As a check independent from those above, we can also avoid sleeping
4924 * if the current transaction is the sole locker of the tuple. Note
4925 * that the strength of the lock already held is irrelevant; this is
4926 * not about recording the lock in Xmax (which will be done regardless
4927 * of this optimization, below). Also, note that the cases where we
4928 * hold a lock stronger than we are requesting are already handled
4929 * above by not doing anything.
4930 *
4931 * Note we only deal with the non-multixact case here; MultiXactIdWait
4932 * is well equipped to deal with this situation on its own.
4933 */
4934 if (require_sleep && !(infomask & HEAP_XMAX_IS_MULTI) &&
4936 {
4937 /* ... but if the xmax changed in the meantime, start over */
4939 if (xmax_infomask_changed(tuple->t_data->t_infomask, infomask) ||
4941 xwait))
4942 goto l3;
4944 require_sleep = false;
4945 }
4946
4947 /*
4948 * Time to sleep on the other transaction/multixact, if necessary.
4949 *
4950 * If the other transaction is an update/delete that's already
4951 * committed, then sleeping cannot possibly do any good: if we're
4952 * required to sleep, get out to raise an error instead.
4953 *
4954 * By here, we either have already acquired the buffer exclusive lock,
4955 * or we must wait for the locking transaction or multixact; so below
4956 * we ensure that we grab buffer lock after the sleep.
4957 */
4958 if (require_sleep && (result == TM_Updated || result == TM_Deleted))
4959 {
4961 goto failed;
4962 }
4963 else if (require_sleep)
4964 {
4965 /*
4966 * Acquire tuple lock to establish our priority for the tuple, or
4967 * die trying. LockTuple will release us when we are next-in-line
4968 * for the tuple. We must do this even if we are share-locking,
4969 * but not if we already have a weaker lock on the tuple.
4970 *
4971 * If we are forced to "start over" below, we keep the tuple lock;
4972 * this arranges that we stay at the head of the line while
4973 * rechecking tuple state.
4974 */
4975 if (!skip_tuple_lock &&
4976 !heap_acquire_tuplock(relation, tid, mode, wait_policy,
4977 &have_tuple_lock))
4978 {
4979 /*
4980 * This can only happen if wait_policy is Skip and the lock
4981 * couldn't be obtained.
4982 */
4983 result = TM_WouldBlock;
4984 /* recovery code expects to have buffer lock held */
4986 goto failed;
4987 }
4988
4989 if (infomask & HEAP_XMAX_IS_MULTI)
4990 {
4992
4993 /* We only ever lock tuples, never update them */
4994 if (status >= MultiXactStatusNoKeyUpdate)
4995 elog(ERROR, "invalid lock mode in heap_lock_tuple");
4996
4997 /* wait for multixact to end, or die trying */
4998 switch (wait_policy)
4999 {
5000 case LockWaitBlock:
5001 MultiXactIdWait((MultiXactId) xwait, status, infomask,
5002 relation, &tuple->t_self, XLTW_Lock, NULL);
5003 break;
5004 case LockWaitSkip:
5006 status, infomask, relation,
5007 NULL, false))
5008 {
5009 result = TM_WouldBlock;
5010 /* recovery code expects to have buffer lock held */
5012 goto failed;
5013 }
5014 break;
5015 case LockWaitError:
5017 status, infomask, relation,
5018 NULL, log_lock_failures))
5019 ereport(ERROR,
5020 (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
5021 errmsg("could not obtain lock on row in relation \"%s\"",
5022 RelationGetRelationName(relation))));
5023
5024 break;
5025 }
5026
5027 /*
5028 * Of course, the multixact might not be done here: if we're
5029 * requesting a light lock mode, other transactions with light
5030 * locks could still be alive, as well as locks owned by our
5031 * own xact or other subxacts of this backend. We need to
5032 * preserve the surviving MultiXact members. Note that it
5033 * isn't absolutely necessary in the latter case, but doing so
5034 * is simpler.
5035 */
5036 }
5037 else
5038 {
5039 /* wait for regular transaction to end, or die trying */
5040 switch (wait_policy)
5041 {
5042 case LockWaitBlock:
5043 XactLockTableWait(xwait, relation, &tuple->t_self,
5044 XLTW_Lock);
5045 break;
5046 case LockWaitSkip:
5047 if (!ConditionalXactLockTableWait(xwait, false))
5048 {
5049 result = TM_WouldBlock;
5050 /* recovery code expects to have buffer lock held */
5052 goto failed;
5053 }
5054 break;
5055 case LockWaitError:
5057 ereport(ERROR,
5058 (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
5059 errmsg("could not obtain lock on row in relation \"%s\"",
5060 RelationGetRelationName(relation))));
5061 break;
5062 }
5063 }
5064
5065 /* if there are updates, follow the update chain */
5066 if (follow_updates && !HEAP_XMAX_IS_LOCKED_ONLY(infomask))
5067 {
5068 TM_Result res;
5069
5070 res = heap_lock_updated_tuple(relation, tuple, &t_ctid,
5072 mode);
5073 if (res != TM_Ok)
5074 {
5075 result = res;
5076 /* recovery code expects to have buffer lock held */
5078 goto failed;
5079 }
5080 }
5081
5083
5084 /*
5085 * xwait is done, but if xwait had just locked the tuple then some
5086 * other xact could update this tuple before we get to this point.
5087 * Check for xmax change, and start over if so.
5088 */
5089 if (xmax_infomask_changed(tuple->t_data->t_infomask, infomask) ||
5091 xwait))
5092 goto l3;
5093
5094 if (!(infomask & HEAP_XMAX_IS_MULTI))
5095 {
5096 /*
5097 * Otherwise check if it committed or aborted. Note we cannot
5098 * be here if the tuple was only locked by somebody who didn't
5099 * conflict with us; that would have been handled above. So
5100 * that transaction must necessarily be gone by now. But
5101 * don't check for this in the multixact case, because some
5102 * locker transactions might still be running.
5103 */
5104 UpdateXmaxHintBits(tuple->t_data, *buffer, xwait);
5105 }
5106 }
5107
5108 /* By here, we're certain that we hold buffer exclusive lock again */
5109
5110 /*
5111 * We may lock if previous xmax aborted, or if it committed but only
5112 * locked the tuple without updating it; or if we didn't have to wait
5113 * at all for whatever reason.
5114 */
5115 if (!require_sleep ||
5116 (tuple->t_data->t_infomask & HEAP_XMAX_INVALID) ||
5119 result = TM_Ok;
5120 else if (!ItemPointerEquals(&tuple->t_self, &tuple->t_data->t_ctid))
5121 result = TM_Updated;
5122 else
5123 result = TM_Deleted;
5124 }
5125
5126failed:
5127 if (result != TM_Ok)
5128 {
5129 Assert(result == TM_SelfModified || result == TM_Updated ||
5130 result == TM_Deleted || result == TM_WouldBlock);
5131
5132 /*
5133 * When locking a tuple under LockWaitSkip semantics and we fail with
5134 * TM_WouldBlock above, it's possible for concurrent transactions to
5135 * release the lock and set HEAP_XMAX_INVALID in the meantime. So
5136 * this assert is slightly different from the equivalent one in
5137 * heap_delete and heap_update.
5138 */
5139 Assert((result == TM_WouldBlock) ||
5140 !(tuple->t_data->t_infomask & HEAP_XMAX_INVALID));
5141 Assert(result != TM_Updated ||
5142 !ItemPointerEquals(&tuple->t_self, &tuple->t_data->t_ctid));
5143 tmfd->ctid = tuple->t_data->t_ctid;
5144 tmfd->xmax = HeapTupleHeaderGetUpdateXid(tuple->t_data);
5145 if (result == TM_SelfModified)
5146 tmfd->cmax = HeapTupleHeaderGetCmax(tuple->t_data);
5147 else
5148 tmfd->cmax = InvalidCommandId;
5149 goto out_locked;
5150 }
5151
5152 /*
5153 * If we didn't pin the visibility map page and the page has become all
5154 * visible while we were busy locking the buffer, or during some
5155 * subsequent window during which we had it unlocked, we'll have to unlock
5156 * and re-lock, to avoid holding the buffer lock across I/O. That's a bit
5157 * unfortunate, especially since we'll now have to recheck whether the
5158 * tuple has been locked or updated under us, but hopefully it won't
5159 * happen very often.
5160 */
5161 if (vmbuffer == InvalidBuffer && PageIsAllVisible(page))
5162 {
5164 visibilitymap_pin(relation, block, &vmbuffer);
5166 goto l3;
5167 }
5168
5169 xmax = HeapTupleHeaderGetRawXmax(tuple->t_data);
5170 old_infomask = tuple->t_data->t_infomask;
5171
5172 /*
5173 * If this is the first possibly-multixact-able operation in the current
5174 * transaction, set my per-backend OldestMemberMXactId setting. We can be
5175 * certain that the transaction will never become a member of any older
5176 * MultiXactIds than that. (We have to do this even if we end up just
5177 * using our own TransactionId below, since some other backend could
5178 * incorporate our XID into a MultiXact immediately afterwards.)
5179 */
5181
5182 /*
5183 * Compute the new xmax and infomask to store into the tuple. Note we do
5184 * not modify the tuple just yet, because that would leave it in the wrong
5185 * state if multixact.c elogs.
5186 */
5187 compute_new_xmax_infomask(xmax, old_infomask, tuple->t_data->t_infomask2,
5188 GetCurrentTransactionId(), mode, false,
5189 &xid, &new_infomask, &new_infomask2);
5190
5192
5193 /*
5194 * Store transaction information of xact locking the tuple.
5195 *
5196 * Note: Cmax is meaningless in this context, so don't set it; this avoids
5197 * possibly generating a useless combo CID. Moreover, if we're locking a
5198 * previously updated tuple, it's important to preserve the Cmax.
5199 *
5200 * Also reset the HOT UPDATE bit, but only if there's no update; otherwise
5201 * we would break the HOT chain.
5202 */
5203 tuple->t_data->t_infomask &= ~HEAP_XMAX_BITS;
5204 tuple->t_data->t_infomask2 &= ~HEAP_KEYS_UPDATED;
5205 tuple->t_data->t_infomask |= new_infomask;
5206 tuple->t_data->t_infomask2 |= new_infomask2;
5207 if (HEAP_XMAX_IS_LOCKED_ONLY(new_infomask))
5209 HeapTupleHeaderSetXmax(tuple->t_data, xid);
5210
5211 /*
5212 * Make sure there is no forward chain link in t_ctid. Note that in the
5213 * cases where the tuple has been updated, we must not overwrite t_ctid,
5214 * because it was set by the updater. Moreover, if the tuple has been
5215 * updated, we need to follow the update chain to lock the new versions of
5216 * the tuple as well.
5217 */
5218 if (HEAP_XMAX_IS_LOCKED_ONLY(new_infomask))
5219 tuple->t_data->t_ctid = *tid;
5220
5221 /* Clear only the all-frozen bit on visibility map if needed */
5222 if (PageIsAllVisible(page) &&
5223 visibilitymap_clear(relation, block, vmbuffer,
5225 cleared_all_frozen = true;
5226
5227
5228 MarkBufferDirty(*buffer);
5229
5230 /*
5231 * XLOG stuff. You might think that we don't need an XLOG record because
5232 * there is no state change worth restoring after a crash. You would be
5233 * wrong however: we have just written either a TransactionId or a
5234 * MultiXactId that may never have been seen on disk before, and we need
5235 * to make sure that there are XLOG entries covering those ID numbers.
5236 * Else the same IDs might be re-used after a crash, which would be
5237 * disastrous if this page made it to disk before the crash. Essentially
5238 * we have to enforce the WAL log-before-data rule even in this case.
5239 * (Also, in a PITR log-shipping or 2PC environment, we have to have XLOG
5240 * entries for everything anyway.)
5241 */
5242 if (RelationNeedsWAL(relation))
5243 {
5244 xl_heap_lock xlrec;
5245 XLogRecPtr recptr;
5246
5249
5250 xlrec.offnum = ItemPointerGetOffsetNumber(&tuple->t_self);
5251 xlrec.xmax = xid;
5252 xlrec.infobits_set = compute_infobits(new_infomask,
5253 tuple->t_data->t_infomask2);
5254 xlrec.flags = cleared_all_frozen ? XLH_LOCK_ALL_FROZEN_CLEARED : 0;
5256
5257 /* we don't decode row locks atm, so no need to log the origin */
5258
5259 recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_LOCK);
5260
5261 PageSetLSN(page, recptr);
5262 }
5263
5265
5266 result = TM_Ok;
5267
5268out_locked:
5270
5271out_unlocked:
5272 if (BufferIsValid(vmbuffer))
5273 ReleaseBuffer(vmbuffer);
5274
5275 /*
5276 * Don't update the visibility map here. Locking a tuple doesn't change
5277 * visibility info.
5278 */
5279
5280 /*
5281 * Now that we have successfully marked the tuple as locked, we can
5282 * release the lmgr tuple lock, if we had it.
5283 */
5284 if (have_tuple_lock)
5285 UnlockTupleTuplock(relation, tid, mode);
5286
5287 return result;
5288}
#define TUPLOCK_from_mxstatus(status)
Definition: heapam.c:213
static bool ConditionalMultiXactIdWait(MultiXactId multi, MultiXactStatus status, uint16 infomask, Relation rel, int *remaining, bool logLockFailure)
Definition: heapam.c:7821
static TM_Result heap_lock_updated_tuple(Relation rel, HeapTuple tuple, const ItemPointerData *ctid, TransactionId xid, LockTupleMode mode)
Definition: heapam.c:6069
static MultiXactStatus get_mxact_status_for_lock(LockTupleMode mode, bool is_update)
Definition: heapam.c:4558
#define XLH_LOCK_ALL_FROZEN_CLEARED
Definition: heapam_xlog.h:401
#define XLOG_HEAP_LOCK
Definition: heapam_xlog.h:39
#define SizeOfHeapLock
Definition: heapam_xlog.h:412
#define HEAP_KEYS_UPDATED
Definition: htup_details.h:289
static bool HEAP_XMAX_IS_SHR_LOCKED(uint16 infomask)
Definition: htup_details.h:263
static bool HEAP_XMAX_IS_EXCL_LOCKED(uint16 infomask)
Definition: htup_details.h:269
static void ItemPointerCopy(const ItemPointerData *fromPointer, ItemPointerData *toPointer)
Definition: itemptr.h:172
bool ConditionalXactLockTableWait(TransactionId xid, bool logLockFailure)
Definition: lmgr.c:739
@ XLTW_Lock
Definition: lmgr.h:29
bool log_lock_failures
Definition: lock.c:54
@ LockWaitSkip
Definition: lockoptions.h:41
@ LockWaitError
Definition: lockoptions.h:43
@ LockTupleShare
Definition: lockoptions.h:54
@ LockTupleKeyShare
Definition: lockoptions.h:52
int GetMultiXactIdMembers(MultiXactId multi, MultiXactMember **members, bool from_pgupgrade, bool isLockOnly)
Definition: multixact.c:1334
static PgChecksumMode mode
Definition: pg_checksums.c:56
uint8 infobits_set
Definition: heapam_xlog.h:408
OffsetNumber offnum
Definition: heapam_xlog.h:407
TransactionId xmax
Definition: heapam_xlog.h:406
@ TM_WouldBlock
Definition: tableam.h:103
#define VISIBILITYMAP_ALL_FROZEN

References Assert(), BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferGetPage(), BufferIsValid(), TM_FailureData::cmax, compute_infobits(), compute_new_xmax_infomask(), ConditionalMultiXactIdWait(), ConditionalXactLockTableWait(), TM_FailureData::ctid, DoesMultiXactIdConflict(), elog, END_CRIT_SECTION, ereport, errcode(), errmsg(), ERROR, xl_heap_lock::flags, get_mxact_status_for_lock(), GetCurrentTransactionId(), GetMultiXactIdMembers(), heap_acquire_tuplock(), HEAP_KEYS_UPDATED, heap_lock_updated_tuple(), HEAP_XMAX_INVALID, HEAP_XMAX_IS_EXCL_LOCKED(), HEAP_XMAX_IS_KEYSHR_LOCKED(), HEAP_XMAX_IS_LOCKED_ONLY(), HEAP_XMAX_IS_MULTI, HEAP_XMAX_IS_SHR_LOCKED(), HeapTupleHeaderClearHotUpdated(), HeapTupleHeaderGetCmax(), HeapTupleHeaderGetRawXmax(), HeapTupleHeaderGetUpdateXid(), HeapTupleHeaderIsOnlyLocked(), HeapTupleHeaderSetXmax(), HeapTupleSatisfiesUpdate(), i, xl_heap_lock::infobits_set, InvalidBuffer, InvalidCommandId, ItemIdGetLength, ItemIdIsNormal, ItemPointerCopy(), ItemPointerEquals(), ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), LockBuffer(), LockTupleExclusive, LockTupleKeyShare, LockTupleNoKeyExclusive, LockTupleShare, LockWaitBlock, LockWaitError, LockWaitSkip, log_lock_failures, MarkBufferDirty(), mode, MultiXactIdSetOldestMember(), MultiXactIdWait(), MultiXactStatusNoKeyUpdate, xl_heap_lock::offnum, PageGetItem(), PageGetItemId(), PageIsAllVisible(), PageSetLSN(), pfree(), ReadBuffer(), REGBUF_STANDARD, RelationGetRelationName, RelationGetRelid, RelationNeedsWAL, ReleaseBuffer(), SizeOfHeapLock, START_CRIT_SECTION, HeapTupleHeaderData::t_ctid, HeapTupleData::t_data, HeapTupleHeaderData::t_infomask, HeapTupleHeaderData::t_infomask2, HeapTupleData::t_len, HeapTupleData::t_self, HeapTupleData::t_tableOid, TM_BeingModified, TM_Deleted, TM_Invisible, TM_Ok, TM_SelfModified, TM_Updated, TM_WouldBlock, TransactionIdEquals, TransactionIdIsCurrentTransactionId(), TUPLOCK_from_mxstatus, UnlockTupleTuplock, UpdateXmaxHintBits(), VISIBILITYMAP_ALL_FROZEN, visibilitymap_clear(), visibilitymap_pin(), XactLockTableWait(), XLH_LOCK_ALL_FROZEN_CLEARED, XLOG_HEAP_LOCK, XLogBeginInsert(), XLogInsert(), XLogRegisterBuffer(), XLogRegisterData(), XLTW_Lock, xl_heap_lock::xmax, TM_FailureData::xmax, and xmax_infomask_changed().

Referenced by heapam_tuple_lock().

◆ heap_multi_insert()

void heap_multi_insert ( Relation  relation,
TupleTableSlot **  slots,
int  ntuples,
CommandId  cid,
int  options,
BulkInsertState  bistate 
)

Definition at line 2374 of file heapam.c.

2376{
2378 HeapTuple *heaptuples;
2379 int i;
2380 int ndone;
2381 PGAlignedBlock scratch;
2382 Page page;
2383 Buffer vmbuffer = InvalidBuffer;
2384 bool needwal;
2385 Size saveFreeSpace;
2386 bool need_tuple_data = RelationIsLogicallyLogged(relation);
2387 bool need_cids = RelationIsAccessibleInLogicalDecoding(relation);
2388 bool starting_with_empty_page = false;
2389 int npages = 0;
2390 int npages_used = 0;
2391
2392 /* currently not needed (thus unsupported) for heap_multi_insert() */
2394
2395 AssertHasSnapshotForToast(relation);
2396
2397 needwal = RelationNeedsWAL(relation);
2398 saveFreeSpace = RelationGetTargetPageFreeSpace(relation,
2400
2401 /* Toast and set header data in all the slots */
2402 heaptuples = palloc(ntuples * sizeof(HeapTuple));
2403 for (i = 0; i < ntuples; i++)
2404 {
2405 HeapTuple tuple;
2406
2407 tuple = ExecFetchSlotHeapTuple(slots[i], true, NULL);
2408 slots[i]->tts_tableOid = RelationGetRelid(relation);
2409 tuple->t_tableOid = slots[i]->tts_tableOid;
2410 heaptuples[i] = heap_prepare_insert(relation, tuple, xid, cid,
2411 options);
2412 }
2413
2414 /*
2415 * We're about to do the actual inserts -- but check for conflict first,
2416 * to minimize the possibility of having to roll back work we've just
2417 * done.
2418 *
2419 * A check here does not definitively prevent a serialization anomaly;
2420 * that check MUST be done at least past the point of acquiring an
2421 * exclusive buffer content lock on every buffer that will be affected,
2422 * and MAY be done after all inserts are reflected in the buffers and
2423 * those locks are released; otherwise there is a race condition. Since
2424 * multiple buffers can be locked and unlocked in the loop below, and it
2425 * would not be feasible to identify and lock all of those buffers before
2426 * the loop, we must do a final check at the end.
2427 *
2428 * The check here could be omitted with no loss of correctness; it is
2429 * present strictly as an optimization.
2430 *
2431 * For heap inserts, we only need to check for table-level SSI locks. Our
2432 * new tuples can't possibly conflict with existing tuple locks, and heap
2433 * page locks are only consolidated versions of tuple locks; they do not
2434 * lock "gaps" as index page locks do. So we don't need to specify a
2435 * buffer when making the call, which makes for a faster check.
2436 */
2438
2439 ndone = 0;
2440 while (ndone < ntuples)
2441 {
2442 Buffer buffer;
2443 bool all_visible_cleared = false;
2444 bool all_frozen_set = false;
2445 int nthispage;
2446
2448
2449 /*
2450 * Compute number of pages needed to fit the to-be-inserted tuples in
2451 * the worst case. This will be used to determine how much to extend
2452 * the relation by in RelationGetBufferForTuple(), if needed. If we
2453 * filled a prior page from scratch, we can just update our last
2454 * computation, but if we started with a partially filled page,
2455 * recompute from scratch, the number of potentially required pages
2456 * can vary due to tuples needing to fit onto the page, page headers
2457 * etc.
2458 */
2459 if (ndone == 0 || !starting_with_empty_page)
2460 {
2461 npages = heap_multi_insert_pages(heaptuples, ndone, ntuples,
2462 saveFreeSpace);
2463 npages_used = 0;
2464 }
2465 else
2466 npages_used++;
2467
2468 /*
2469 * Find buffer where at least the next tuple will fit. If the page is
2470 * all-visible, this will also pin the requisite visibility map page.
2471 *
2472 * Also pin visibility map page if COPY FREEZE inserts tuples into an
2473 * empty page. See all_frozen_set below.
2474 */
2475 buffer = RelationGetBufferForTuple(relation, heaptuples[ndone]->t_len,
2476 InvalidBuffer, options, bistate,
2477 &vmbuffer, NULL,
2478 npages - npages_used);
2479 page = BufferGetPage(buffer);
2480
2481 starting_with_empty_page = PageGetMaxOffsetNumber(page) == 0;
2482
2483 if (starting_with_empty_page && (options & HEAP_INSERT_FROZEN))
2484 {
2485 all_frozen_set = true;
2486 /* Lock the vmbuffer before entering the critical section */
2488 }
2489
2490 /* NO EREPORT(ERROR) from here till changes are logged */
2492
2493 /*
2494 * RelationGetBufferForTuple has ensured that the first tuple fits.
2495 * Put that on the page, and then as many other tuples as fit.
2496 */
2497 RelationPutHeapTuple(relation, buffer, heaptuples[ndone], false);
2498
2499 /*
2500 * For logical decoding we need combo CIDs to properly decode the
2501 * catalog.
2502 */
2503 if (needwal && need_cids)
2504 log_heap_new_cid(relation, heaptuples[ndone]);
2505
2506 for (nthispage = 1; ndone + nthispage < ntuples; nthispage++)
2507 {
2508 HeapTuple heaptup = heaptuples[ndone + nthispage];
2509
2510 if (PageGetHeapFreeSpace(page) < MAXALIGN(heaptup->t_len) + saveFreeSpace)
2511 break;
2512
2513 RelationPutHeapTuple(relation, buffer, heaptup, false);
2514
2515 /*
2516 * For logical decoding we need combo CIDs to properly decode the
2517 * catalog.
2518 */
2519 if (needwal && need_cids)
2520 log_heap_new_cid(relation, heaptup);
2521 }
2522
2523 /*
2524 * If the page is all visible, need to clear that, unless we're only
2525 * going to add further frozen rows to it.
2526 *
2527 * If we're only adding already frozen rows to a previously empty
2528 * page, mark it as all-frozen and update the visibility map. We're
2529 * already holding a pin on the vmbuffer.
2530 */
2532 {
2533 all_visible_cleared = true;
2534 PageClearAllVisible(page);
2535 visibilitymap_clear(relation,
2536 BufferGetBlockNumber(buffer),
2537 vmbuffer, VISIBILITYMAP_VALID_BITS);
2538 }
2539 else if (all_frozen_set)
2540 {
2541 PageSetAllVisible(page);
2543 vmbuffer,
2546 relation->rd_locator);
2547 }
2548
2549 /*
2550 * XXX Should we set PageSetPrunable on this page ? See heap_insert()
2551 */
2552
2553 MarkBufferDirty(buffer);
2554
2555 /* XLOG stuff */
2556 if (needwal)
2557 {
2558 XLogRecPtr recptr;
2559 xl_heap_multi_insert *xlrec;
2561 char *tupledata;
2562 int totaldatalen;
2563 char *scratchptr = scratch.data;
2564 bool init;
2565 int bufflags = 0;
2566
2567 /*
2568 * If the page was previously empty, we can reinit the page
2569 * instead of restoring the whole thing.
2570 */
2571 init = starting_with_empty_page;
2572
2573 /* allocate xl_heap_multi_insert struct from the scratch area */
2574 xlrec = (xl_heap_multi_insert *) scratchptr;
2575 scratchptr += SizeOfHeapMultiInsert;
2576
2577 /*
2578 * Allocate offsets array. Unless we're reinitializing the page,
2579 * in that case the tuples are stored in order starting at
2580 * FirstOffsetNumber and we don't need to store the offsets
2581 * explicitly.
2582 */
2583 if (!init)
2584 scratchptr += nthispage * sizeof(OffsetNumber);
2585
2586 /* the rest of the scratch space is used for tuple data */
2587 tupledata = scratchptr;
2588
2589 /* check that the mutually exclusive flags are not both set */
2590 Assert(!(all_visible_cleared && all_frozen_set));
2591
2592 xlrec->flags = 0;
2593 if (all_visible_cleared)
2595
2596 /*
2597 * We don't have to worry about including a conflict xid in the
2598 * WAL record, as HEAP_INSERT_FROZEN intentionally violates
2599 * visibility rules.
2600 */
2601 if (all_frozen_set)
2603
2604 xlrec->ntuples = nthispage;
2605
2606 /*
2607 * Write out an xl_multi_insert_tuple and the tuple data itself
2608 * for each tuple.
2609 */
2610 for (i = 0; i < nthispage; i++)
2611 {
2612 HeapTuple heaptup = heaptuples[ndone + i];
2613 xl_multi_insert_tuple *tuphdr;
2614 int datalen;
2615
2616 if (!init)
2617 xlrec->offsets[i] = ItemPointerGetOffsetNumber(&heaptup->t_self);
2618 /* xl_multi_insert_tuple needs two-byte alignment. */
2619 tuphdr = (xl_multi_insert_tuple *) SHORTALIGN(scratchptr);
2620 scratchptr = ((char *) tuphdr) + SizeOfMultiInsertTuple;
2621
2622 tuphdr->t_infomask2 = heaptup->t_data->t_infomask2;
2623 tuphdr->t_infomask = heaptup->t_data->t_infomask;
2624 tuphdr->t_hoff = heaptup->t_data->t_hoff;
2625
2626 /* write bitmap [+ padding] [+ oid] + data */
2627 datalen = heaptup->t_len - SizeofHeapTupleHeader;
2628 memcpy(scratchptr,
2629 (char *) heaptup->t_data + SizeofHeapTupleHeader,
2630 datalen);
2631 tuphdr->datalen = datalen;
2632 scratchptr += datalen;
2633 }
2634 totaldatalen = scratchptr - tupledata;
2635 Assert((scratchptr - scratch.data) < BLCKSZ);
2636
2637 if (need_tuple_data)
2639
2640 /*
2641 * Signal that this is the last xl_heap_multi_insert record
2642 * emitted by this call to heap_multi_insert(). Needed for logical
2643 * decoding so it knows when to cleanup temporary data.
2644 */
2645 if (ndone + nthispage == ntuples)
2647
2648 if (init)
2649 {
2650 info |= XLOG_HEAP_INIT_PAGE;
2651 bufflags |= REGBUF_WILL_INIT;
2652 }
2653
2654 /*
2655 * If we're doing logical decoding, include the new tuple data
2656 * even if we take a full-page image of the page.
2657 */
2658 if (need_tuple_data)
2659 bufflags |= REGBUF_KEEP_DATA;
2660
2662 XLogRegisterData(xlrec, tupledata - scratch.data);
2663 XLogRegisterBuffer(0, buffer, REGBUF_STANDARD | bufflags);
2664 if (all_frozen_set)
2665 XLogRegisterBuffer(1, vmbuffer, 0);
2666
2667 XLogRegisterBufData(0, tupledata, totaldatalen);
2668
2669 /* filtering by origin on a row level is much more efficient */
2671
2672 recptr = XLogInsert(RM_HEAP2_ID, info);
2673
2674 PageSetLSN(page, recptr);
2675 if (all_frozen_set)
2676 {
2677 Assert(BufferIsDirty(vmbuffer));
2678 PageSetLSN(BufferGetPage(vmbuffer), recptr);
2679 }
2680 }
2681
2683
2684 if (all_frozen_set)
2685 LockBuffer(vmbuffer, BUFFER_LOCK_UNLOCK);
2686
2687 UnlockReleaseBuffer(buffer);
2688 ndone += nthispage;
2689
2690 /*
2691 * NB: Only release vmbuffer after inserting all tuples - it's fairly
2692 * likely that we'll insert into subsequent heap pages that are likely
2693 * to use the same vm page.
2694 */
2695 }
2696
2697 /* We're done with inserting all tuples, so release the last vmbuffer. */
2698 if (vmbuffer != InvalidBuffer)
2699 ReleaseBuffer(vmbuffer);
2700
2701 /*
2702 * We're done with the actual inserts. Check for conflicts again, to
2703 * ensure that all rw-conflicts in to these inserts are detected. Without
2704 * this final check, a sequential scan of the heap may have locked the
2705 * table after the "before" check, missing one opportunity to detect the
2706 * conflict, and then scanned the table before the new tuples were there,
2707 * missing the other chance to detect the conflict.
2708 *
2709 * For heap inserts, we only need to check for table-level SSI locks. Our
2710 * new tuples can't possibly conflict with existing tuple locks, and heap
2711 * page locks are only consolidated versions of tuple locks; they do not
2712 * lock "gaps" as index page locks do. So we don't need to specify a
2713 * buffer when making the call.
2714 */
2716
2717 /*
2718 * If tuples are cacheable, mark them for invalidation from the caches in
2719 * case we abort. Note it is OK to do this after releasing the buffer,
2720 * because the heaptuples data structure is all in local memory, not in
2721 * the shared buffer.
2722 */
2723 if (IsCatalogRelation(relation))
2724 {
2725 for (i = 0; i < ntuples; i++)
2726 CacheInvalidateHeapTuple(relation, heaptuples[i], NULL);
2727 }
2728
2729 /* copy t_self fields back to the caller's slots */
2730 for (i = 0; i < ntuples; i++)
2731 slots[i]->tts_tid = heaptuples[i]->t_self;
2732
2733 pgstat_count_heap_insert(relation, ntuples);
2734}
bool BufferIsDirty(Buffer buffer)
Definition: bufmgr.c:2911
Size PageGetHeapFreeSpace(const PageData *page)
Definition: bufpage.c:990
static void PageSetAllVisible(Page page)
Definition: bufpage.h:433
#define MAXALIGN(LEN)
Definition: c.h:813
#define SHORTALIGN(LEN)
Definition: c.h:809
size_t Size
Definition: c.h:613
HeapTuple ExecFetchSlotHeapTuple(TupleTableSlot *slot, bool materialize, bool *shouldFree)
Definition: execTuples.c:1833
static int heap_multi_insert_pages(HeapTuple *heaptuples, int done, int ntuples, Size saveFreeSpace)
Definition: heapam.c:2342
#define HEAP_INSERT_FROZEN
Definition: heapam.h:38
#define SizeOfHeapMultiInsert
Definition: heapam_xlog.h:188
#define XLOG_HEAP2_MULTI_INSERT
Definition: heapam_xlog.h:64
#define XLH_INSERT_LAST_IN_MULTI
Definition: heapam_xlog.h:73
#define XLH_INSERT_ALL_FROZEN_SET
Definition: heapam_xlog.h:79
#define SizeOfMultiInsertTuple
Definition: heapam_xlog.h:199
int init
Definition: isn.c:79
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:123
#define RelationGetTargetPageFreeSpace(relation, defaultff)
Definition: rel.h:390
#define HEAP_DEFAULT_FILLFACTOR
Definition: rel.h:361
RelFileLocator rd_locator
Definition: rel.h:57
Oid tts_tableOid
Definition: tuptable.h:129
OffsetNumber offsets[FLEXIBLE_ARRAY_MEMBER]
Definition: heapam_xlog.h:185
uint8 visibilitymap_set_vmbits(BlockNumber heapBlk, Buffer vmBuf, uint8 flags, const RelFileLocator rlocator)
#define VISIBILITYMAP_ALL_VISIBLE

References Assert(), AssertHasSnapshotForToast(), BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferGetBlockNumber(), BufferGetPage(), BufferIsDirty(), CacheInvalidateHeapTuple(), CHECK_FOR_INTERRUPTS, CheckForSerializableConflictIn(), PGAlignedBlock::data, xl_multi_insert_tuple::datalen, END_CRIT_SECTION, ExecFetchSlotHeapTuple(), xl_heap_multi_insert::flags, GetCurrentTransactionId(), HEAP_DEFAULT_FILLFACTOR, HEAP_INSERT_FROZEN, HEAP_INSERT_NO_LOGICAL, heap_multi_insert_pages(), heap_prepare_insert(), i, init, InvalidBlockNumber, InvalidBuffer, IsCatalogRelation(), ItemPointerGetOffsetNumber(), LockBuffer(), log_heap_new_cid(), MarkBufferDirty(), MAXALIGN, xl_heap_multi_insert::ntuples, xl_heap_multi_insert::offsets, PageClearAllVisible(), PageGetHeapFreeSpace(), PageGetMaxOffsetNumber(), PageIsAllVisible(), PageSetAllVisible(), PageSetLSN(), palloc(), pgstat_count_heap_insert(), RelationData::rd_locator, REGBUF_KEEP_DATA, REGBUF_STANDARD, REGBUF_WILL_INIT, RelationGetBufferForTuple(), RelationGetRelid, RelationGetTargetPageFreeSpace, RelationIsAccessibleInLogicalDecoding, RelationIsLogicallyLogged, RelationNeedsWAL, RelationPutHeapTuple(), ReleaseBuffer(), SHORTALIGN, SizeOfHeapMultiInsert, SizeofHeapTupleHeader, SizeOfMultiInsertTuple, START_CRIT_SECTION, HeapTupleData::t_data, xl_multi_insert_tuple::t_hoff, HeapTupleHeaderData::t_hoff, xl_multi_insert_tuple::t_infomask, HeapTupleHeaderData::t_infomask, xl_multi_insert_tuple::t_infomask2, HeapTupleHeaderData::t_infomask2, HeapTupleData::t_len, HeapTupleData::t_self, HeapTupleData::t_tableOid, TupleTableSlot::tts_tableOid, UnlockReleaseBuffer(), VISIBILITYMAP_ALL_FROZEN, VISIBILITYMAP_ALL_VISIBLE, visibilitymap_clear(), visibilitymap_set_vmbits(), VISIBILITYMAP_VALID_BITS, XLH_INSERT_ALL_FROZEN_SET, XLH_INSERT_ALL_VISIBLE_CLEARED, XLH_INSERT_CONTAINS_NEW_TUPLE, XLH_INSERT_LAST_IN_MULTI, XLOG_HEAP2_MULTI_INSERT, XLOG_HEAP_INIT_PAGE, XLOG_INCLUDE_ORIGIN, XLogBeginInsert(), XLogInsert(), XLogRegisterBufData(), XLogRegisterBuffer(), XLogRegisterData(), and XLogSetRecordFlags().

Referenced by CatalogTuplesMultiInsertWithInfo().

◆ heap_page_prune_and_freeze()

void heap_page_prune_and_freeze ( PruneFreezeParams params,
PruneFreezeResult presult,
OffsetNumber off_loc,
TransactionId new_relfrozen_xid,
MultiXactId new_relmin_mxid 
)

Definition at line 809 of file pruneheap.c.

814{
815 Buffer buffer = params->buffer;
816 Page page = BufferGetPage(buffer);
817 PruneState prstate;
818 bool do_freeze;
819 bool do_prune;
820 bool do_hint_prune;
821 bool did_tuple_hint_fpi;
822 int64 fpi_before = pgWalUsage.wal_fpi;
823
824 /* Initialize prstate */
825 prune_freeze_setup(params,
826 new_relfrozen_xid, new_relmin_mxid,
827 presult, &prstate);
828
829 /*
830 * Examine all line pointers and tuple visibility information to determine
831 * which line pointers should change state and which tuples may be frozen.
832 * Prepare queue of state changes to later be executed in a critical
833 * section.
834 */
836 buffer, &prstate, off_loc);
837
838 /*
839 * If checksums are enabled, calling heap_prune_satisfies_vacuum() while
840 * checking tuple visibility information in prune_freeze_plan() may have
841 * caused an FPI to be emitted.
842 */
843 did_tuple_hint_fpi = fpi_before != pgWalUsage.wal_fpi;
844
845 do_prune = prstate.nredirected > 0 ||
846 prstate.ndead > 0 ||
847 prstate.nunused > 0;
848
849 /*
850 * Even if we don't prune anything, if we found a new value for the
851 * pd_prune_xid field or the page was marked full, we will update the hint
852 * bit.
853 */
854 do_hint_prune = ((PageHeader) page)->pd_prune_xid != prstate.new_prune_xid ||
855 PageIsFull(page);
856
857 /*
858 * Decide if we want to go ahead with freezing according to the freeze
859 * plans we prepared, or not.
860 */
861 do_freeze = heap_page_will_freeze(params->relation, buffer,
862 did_tuple_hint_fpi,
863 do_prune,
864 do_hint_prune,
865 &prstate);
866
867 /*
868 * While scanning the line pointers, we did not clear
869 * all_visible/all_frozen when encountering LP_DEAD items because we
870 * wanted the decision whether or not to freeze the page to be unaffected
871 * by the short-term presence of LP_DEAD items. These LP_DEAD items are
872 * effectively assumed to be LP_UNUSED items in the making. It doesn't
873 * matter which vacuum heap pass (initial pass or final pass) ends up
874 * setting the page all-frozen, as long as the ongoing VACUUM does it.
875 *
876 * Now that we finished determining whether or not to freeze the page,
877 * update all_visible and all_frozen so that they reflect the true state
878 * of the page for setting PD_ALL_VISIBLE and VM bits.
879 */
880 if (prstate.lpdead_items > 0)
881 prstate.all_visible = prstate.all_frozen = false;
882
883 Assert(!prstate.all_frozen || prstate.all_visible);
884
885 /* Any error while applying the changes is critical */
887
888 if (do_hint_prune)
889 {
890 /*
891 * Update the page's pd_prune_xid field to either zero, or the lowest
892 * XID of any soon-prunable tuple.
893 */
894 ((PageHeader) page)->pd_prune_xid = prstate.new_prune_xid;
895
896 /*
897 * Also clear the "page is full" flag, since there's no point in
898 * repeating the prune/defrag process until something else happens to
899 * the page.
900 */
901 PageClearFull(page);
902
903 /*
904 * If that's all we had to do to the page, this is a non-WAL-logged
905 * hint. If we are going to freeze or prune the page, we will mark
906 * the buffer dirty below.
907 */
908 if (!do_freeze && !do_prune)
909 MarkBufferDirtyHint(buffer, true);
910 }
911
912 if (do_prune || do_freeze)
913 {
914 /* Apply the planned item changes and repair page fragmentation. */
915 if (do_prune)
916 {
917 heap_page_prune_execute(buffer, false,
918 prstate.redirected, prstate.nredirected,
919 prstate.nowdead, prstate.ndead,
920 prstate.nowunused, prstate.nunused);
921 }
922
923 if (do_freeze)
924 heap_freeze_prepared_tuples(buffer, prstate.frozen, prstate.nfrozen);
925
926 MarkBufferDirty(buffer);
927
928 /*
929 * Emit a WAL XLOG_HEAP2_PRUNE* record showing what we did
930 */
931 if (RelationNeedsWAL(params->relation))
932 {
933 /*
934 * The snapshotConflictHorizon for the whole record should be the
935 * most conservative of all the horizons calculated for any of the
936 * possible modifications. If this record will prune tuples, any
937 * transactions on the standby older than the youngest xmax of the
938 * most recently removed tuple this record will prune will
939 * conflict. If this record will freeze tuples, any transactions
940 * on the standby with xids older than the youngest tuple this
941 * record will freeze will conflict.
942 */
943 TransactionId conflict_xid;
944
946 prstate.latest_xid_removed))
947 conflict_xid = prstate.frz_conflict_horizon;
948 else
949 conflict_xid = prstate.latest_xid_removed;
950
951 log_heap_prune_and_freeze(params->relation, buffer,
952 InvalidBuffer, /* vmbuffer */
953 0, /* vmflags */
954 conflict_xid,
955 true, params->reason,
956 prstate.frozen, prstate.nfrozen,
957 prstate.redirected, prstate.nredirected,
958 prstate.nowdead, prstate.ndead,
959 prstate.nowunused, prstate.nunused);
960 }
961 }
962
964
965 /* Copy information back for caller */
966 presult->ndeleted = prstate.ndeleted;
967 presult->nnewlpdead = prstate.ndead;
968 presult->nfrozen = prstate.nfrozen;
969 presult->live_tuples = prstate.live_tuples;
971 presult->all_visible = prstate.all_visible;
972 presult->all_frozen = prstate.all_frozen;
973 presult->hastup = prstate.hastup;
974
975 /*
976 * For callers planning to update the visibility map, the conflict horizon
977 * for that record must be the newest xmin on the page. However, if the
978 * page is completely frozen, there can be no conflict and the
979 * vm_conflict_horizon should remain InvalidTransactionId. This includes
980 * the case that we just froze all the tuples; the prune-freeze record
981 * included the conflict XID already so the caller doesn't need it.
982 */
983 if (presult->all_frozen)
985 else
987
988 presult->lpdead_items = prstate.lpdead_items;
989 /* the presult->deadoffsets array was already filled in */
990
991 if (prstate.attempt_freeze)
992 {
993 if (presult->nfrozen > 0)
994 {
995 *new_relfrozen_xid = prstate.pagefrz.FreezePageRelfrozenXid;
996 *new_relmin_mxid = prstate.pagefrz.FreezePageRelminMxid;
997 }
998 else
999 {
1000 *new_relfrozen_xid = prstate.pagefrz.NoFreezePageRelfrozenXid;
1001 *new_relmin_mxid = prstate.pagefrz.NoFreezePageRelminMxid;
1002 }
1003 }
1004}
void MarkBufferDirtyHint(Buffer buffer, bool buffer_std)
Definition: bufmgr.c:5430
static void PageClearFull(Page page)
Definition: bufpage.h:422
static bool PageIsFull(const PageData *page)
Definition: bufpage.h:412
int64_t int64
Definition: c.h:538
void heap_freeze_prepared_tuples(Buffer buffer, HeapTupleFreeze *tuples, int ntuples)
Definition: heapam.c:7406
WalUsage pgWalUsage
Definition: instrument.c:22
static bool heap_page_will_freeze(Relation relation, Buffer buffer, bool did_tuple_hint_fpi, bool do_prune, bool do_hint_prune, PruneState *prstate)
Definition: pruneheap.c:653
static void prune_freeze_plan(Oid reloid, Buffer buffer, PruneState *prstate, OffsetNumber *off_loc)
Definition: pruneheap.c:448
void log_heap_prune_and_freeze(Relation relation, Buffer buffer, Buffer vmbuffer, uint8 vmflags, TransactionId conflict_xid, bool cleanup_lock, PruneReason reason, HeapTupleFreeze *frozen, int nfrozen, OffsetNumber *redirected, int nredirected, OffsetNumber *dead, int ndead, OffsetNumber *unused, int nunused)
Definition: pruneheap.c:2157
static void prune_freeze_setup(PruneFreezeParams *params, TransactionId *new_relfrozen_xid, MultiXactId *new_relmin_mxid, const PruneFreezeResult *presult, PruneState *prstate)
Definition: pruneheap.c:327
void heap_page_prune_execute(Buffer buffer, bool lp_truncate_only, OffsetNumber *redirected, int nredirected, OffsetNumber *nowdead, int ndead, OffsetNumber *nowunused, int nunused)
Definition: pruneheap.c:1661
MultiXactId NoFreezePageRelminMxid
Definition: heapam.h:220
TransactionId FreezePageRelfrozenXid
Definition: heapam.h:208
MultiXactId FreezePageRelminMxid
Definition: heapam.h:209
TransactionId NoFreezePageRelfrozenXid
Definition: heapam.h:219
PruneReason reason
Definition: heapam.h:245
Buffer buffer
Definition: heapam.h:239
Relation relation
Definition: heapam.h:238
int recently_dead_tuples
Definition: heapam.h:285
TransactionId vm_conflict_horizon
Definition: heapam.h:300
bool all_visible
Definition: heapam.h:298
HeapPageFreeze pagefrz
Definition: pruneheap.c:104
bool all_visible
Definition: pruneheap.c:154
int ndead
Definition: pruneheap.c:56
TransactionId new_prune_xid
Definition: pruneheap.c:53
bool attempt_freeze
Definition: pruneheap.c:46
bool hastup
Definition: pruneheap.c:123
int recently_dead_tuples
Definition: pruneheap.c:120
OffsetNumber nowdead[MaxHeapTuplesPerPage]
Definition: pruneheap.c:61
TransactionId frz_conflict_horizon
Definition: pruneheap.c:137
OffsetNumber nowunused[MaxHeapTuplesPerPage]
Definition: pruneheap.c:62
int live_tuples
Definition: pruneheap.c:119
TransactionId visibility_cutoff_xid
Definition: pruneheap.c:156
bool all_frozen
Definition: pruneheap.c:155
HeapTupleFreeze frozen[MaxHeapTuplesPerPage]
Definition: pruneheap.c:63
int lpdead_items
Definition: pruneheap.c:129
int nfrozen
Definition: pruneheap.c:58
OffsetNumber redirected[MaxHeapTuplesPerPage *2]
Definition: pruneheap.c:60
int ndeleted
Definition: pruneheap.c:116
int nredirected
Definition: pruneheap.c:55
TransactionId latest_xid_removed
Definition: pruneheap.c:54
int nunused
Definition: pruneheap.c:57
int64 wal_fpi
Definition: instrument.h:54
static bool TransactionIdFollows(TransactionId id1, TransactionId id2)
Definition: transam.h:297

References PruneState::all_frozen, PruneFreezeResult::all_frozen, PruneState::all_visible, PruneFreezeResult::all_visible, Assert(), PruneState::attempt_freeze, PruneFreezeParams::buffer, BufferGetPage(), END_CRIT_SECTION, HeapPageFreeze::FreezePageRelfrozenXid, HeapPageFreeze::FreezePageRelminMxid, PruneState::frozen, PruneState::frz_conflict_horizon, PruneState::hastup, PruneFreezeResult::hastup, heap_freeze_prepared_tuples(), heap_page_prune_execute(), heap_page_will_freeze(), InvalidBuffer, InvalidTransactionId, PruneState::latest_xid_removed, PruneState::live_tuples, PruneFreezeResult::live_tuples, log_heap_prune_and_freeze(), PruneState::lpdead_items, PruneFreezeResult::lpdead_items, MarkBufferDirty(), MarkBufferDirtyHint(), PruneState::ndead, PruneState::ndeleted, PruneFreezeResult::ndeleted, PruneState::new_prune_xid, PruneState::nfrozen, PruneFreezeResult::nfrozen, PruneFreezeResult::nnewlpdead, HeapPageFreeze::NoFreezePageRelfrozenXid, HeapPageFreeze::NoFreezePageRelminMxid, PruneState::nowdead, PruneState::nowunused, PruneState::nredirected, PruneState::nunused, PageClearFull(), PruneState::pagefrz, PageIsFull(), pgWalUsage, prune_freeze_plan(), prune_freeze_setup(), PruneFreezeParams::reason, PruneState::recently_dead_tuples, PruneFreezeResult::recently_dead_tuples, PruneState::redirected, PruneFreezeParams::relation, RelationGetRelid, RelationNeedsWAL, START_CRIT_SECTION, TransactionIdFollows(), PruneState::visibility_cutoff_xid, PruneFreezeResult::vm_conflict_horizon, and WalUsage::wal_fpi.

Referenced by heap_page_prune_opt(), and lazy_scan_prune().

◆ heap_page_prune_execute()

void heap_page_prune_execute ( Buffer  buffer,
bool  lp_truncate_only,
OffsetNumber redirected,
int  nredirected,
OffsetNumber nowdead,
int  ndead,
OffsetNumber nowunused,
int  nunused 
)

Definition at line 1661 of file pruneheap.c.

1665{
1666 Page page = BufferGetPage(buffer);
1667 OffsetNumber *offnum;
1669
1670 /* Shouldn't be called unless there's something to do */
1671 Assert(nredirected > 0 || ndead > 0 || nunused > 0);
1672
1673 /* If 'lp_truncate_only', we can only remove already-dead line pointers */
1674 Assert(!lp_truncate_only || (nredirected == 0 && ndead == 0));
1675
1676 /* Update all redirected line pointers */
1677 offnum = redirected;
1678 for (int i = 0; i < nredirected; i++)
1679 {
1680 OffsetNumber fromoff = *offnum++;
1681 OffsetNumber tooff = *offnum++;
1682 ItemId fromlp = PageGetItemId(page, fromoff);
1684
1685#ifdef USE_ASSERT_CHECKING
1686
1687 /*
1688 * Any existing item that we set as an LP_REDIRECT (any 'from' item)
1689 * must be the first item from a HOT chain. If the item has tuple
1690 * storage then it can't be a heap-only tuple. Otherwise we are just
1691 * maintaining an existing LP_REDIRECT from an existing HOT chain that
1692 * has been pruned at least once before now.
1693 */
1694 if (!ItemIdIsRedirected(fromlp))
1695 {
1696 Assert(ItemIdHasStorage(fromlp) && ItemIdIsNormal(fromlp));
1697
1698 htup = (HeapTupleHeader) PageGetItem(page, fromlp);
1700 }
1701 else
1702 {
1703 /* We shouldn't need to redundantly set the redirect */
1704 Assert(ItemIdGetRedirect(fromlp) != tooff);
1705 }
1706
1707 /*
1708 * The item that we're about to set as an LP_REDIRECT (the 'from'
1709 * item) will point to an existing item (the 'to' item) that is
1710 * already a heap-only tuple. There can be at most one LP_REDIRECT
1711 * item per HOT chain.
1712 *
1713 * We need to keep around an LP_REDIRECT item (after original
1714 * non-heap-only root tuple gets pruned away) so that it's always
1715 * possible for VACUUM to easily figure out what TID to delete from
1716 * indexes when an entire HOT chain becomes dead. A heap-only tuple
1717 * can never become LP_DEAD; an LP_REDIRECT item or a regular heap
1718 * tuple can.
1719 *
1720 * This check may miss problems, e.g. the target of a redirect could
1721 * be marked as unused subsequently. The page_verify_redirects() check
1722 * below will catch such problems.
1723 */
1724 tolp = PageGetItemId(page, tooff);
1725 Assert(ItemIdHasStorage(tolp) && ItemIdIsNormal(tolp));
1726 htup = (HeapTupleHeader) PageGetItem(page, tolp);
1728#endif
1729
1730 ItemIdSetRedirect(fromlp, tooff);
1731 }
1732
1733 /* Update all now-dead line pointers */
1734 offnum = nowdead;
1735 for (int i = 0; i < ndead; i++)
1736 {
1737 OffsetNumber off = *offnum++;
1738 ItemId lp = PageGetItemId(page, off);
1739
1740#ifdef USE_ASSERT_CHECKING
1741
1742 /*
1743 * An LP_DEAD line pointer must be left behind when the original item
1744 * (which is dead to everybody) could still be referenced by a TID in
1745 * an index. This should never be necessary with any individual
1746 * heap-only tuple item, though. (It's not clear how much of a problem
1747 * that would be, but there is no reason to allow it.)
1748 */
1749 if (ItemIdHasStorage(lp))
1750 {
1752 htup = (HeapTupleHeader) PageGetItem(page, lp);
1754 }
1755 else
1756 {
1757 /* Whole HOT chain becomes dead */
1759 }
1760#endif
1761
1762 ItemIdSetDead(lp);
1763 }
1764
1765 /* Update all now-unused line pointers */
1766 offnum = nowunused;
1767 for (int i = 0; i < nunused; i++)
1768 {
1769 OffsetNumber off = *offnum++;
1770 ItemId lp = PageGetItemId(page, off);
1771
1772#ifdef USE_ASSERT_CHECKING
1773
1774 if (lp_truncate_only)
1775 {
1776 /* Setting LP_DEAD to LP_UNUSED in vacuum's second pass */
1778 }
1779 else
1780 {
1781 /*
1782 * When heap_page_prune_and_freeze() was called, mark_unused_now
1783 * may have been passed as true, which allows would-be LP_DEAD
1784 * items to be made LP_UNUSED instead. This is only possible if
1785 * the relation has no indexes. If there are any dead items, then
1786 * mark_unused_now was not true and every item being marked
1787 * LP_UNUSED must refer to a heap-only tuple.
1788 */
1789 if (ndead > 0)
1790 {
1792 htup = (HeapTupleHeader) PageGetItem(page, lp);
1794 }
1795 else
1796 Assert(ItemIdIsUsed(lp));
1797 }
1798
1799#endif
1800
1801 ItemIdSetUnused(lp);
1802 }
1803
1804 if (lp_truncate_only)
1806 else
1807 {
1808 /*
1809 * Finally, repair any fragmentation, and update the page's hint bit
1810 * about whether it has free pointers.
1811 */
1813
1814 /*
1815 * Now that the page has been modified, assert that redirect items
1816 * still point to valid targets.
1817 */
1819 }
1820}
void PageRepairFragmentation(Page page)
Definition: bufpage.c:698
void PageTruncateLinePointerArray(Page page)
Definition: bufpage.c:834
#define PG_USED_FOR_ASSERTS_ONLY
Definition: c.h:228
#define ItemIdSetRedirect(itemId, link)
Definition: itemid.h:152
#define ItemIdSetDead(itemId)
Definition: itemid.h:164
#define ItemIdSetUnused(itemId)
Definition: itemid.h:128
#define ItemIdHasStorage(itemId)
Definition: itemid.h:120
static void page_verify_redirects(Page page)
Definition: pruneheap.c:1837

References Assert(), BufferGetPage(), HeapTupleHeaderIsHeapOnly(), i, ItemIdGetRedirect, ItemIdHasStorage, ItemIdIsDead, ItemIdIsNormal, ItemIdIsRedirected, ItemIdIsUsed, ItemIdSetDead, ItemIdSetRedirect, ItemIdSetUnused, page_verify_redirects(), PageGetItem(), PageGetItemId(), PageRepairFragmentation(), PageTruncateLinePointerArray(), and PG_USED_FOR_ASSERTS_ONLY.

Referenced by heap_page_prune_and_freeze(), and heap_xlog_prune_freeze().

◆ heap_page_prune_opt()

void heap_page_prune_opt ( Relation  relation,
Buffer  buffer 
)

Definition at line 209 of file pruneheap.c.

210{
211 Page page = BufferGetPage(buffer);
212 TransactionId prune_xid;
213 GlobalVisState *vistest;
214 Size minfree;
215
216 /*
217 * We can't write WAL in recovery mode, so there's no point trying to
218 * clean the page. The primary will likely issue a cleaning WAL record
219 * soon anyway, so this is no particular loss.
220 */
221 if (RecoveryInProgress())
222 return;
223
224 /*
225 * First check whether there's any chance there's something to prune,
226 * determining the appropriate horizon is a waste if there's no prune_xid
227 * (i.e. no updates/deletes left potentially dead tuples around).
228 */
229 prune_xid = ((PageHeader) page)->pd_prune_xid;
230 if (!TransactionIdIsValid(prune_xid))
231 return;
232
233 /*
234 * Check whether prune_xid indicates that there may be dead rows that can
235 * be cleaned up.
236 */
237 vistest = GlobalVisTestFor(relation);
238
239 if (!GlobalVisTestIsRemovableXid(vistest, prune_xid))
240 return;
241
242 /*
243 * We prune when a previous UPDATE failed to find enough space on the page
244 * for a new tuple version, or when free space falls below the relation's
245 * fill-factor target (but not less than 10%).
246 *
247 * Checking free space here is questionable since we aren't holding any
248 * lock on the buffer; in the worst case we could get a bogus answer. It's
249 * unlikely to be *seriously* wrong, though, since reading either pd_lower
250 * or pd_upper is probably atomic. Avoiding taking a lock seems more
251 * important than sometimes getting a wrong answer in what is after all
252 * just a heuristic estimate.
253 */
254 minfree = RelationGetTargetPageFreeSpace(relation,
256 minfree = Max(minfree, BLCKSZ / 10);
257
258 if (PageIsFull(page) || PageGetHeapFreeSpace(page) < minfree)
259 {
260 /* OK, try to get exclusive buffer lock */
262 return;
263
264 /*
265 * Now that we have buffer lock, get accurate information about the
266 * page's free space, and recheck the heuristic about whether to
267 * prune.
268 */
269 if (PageIsFull(page) || PageGetHeapFreeSpace(page) < minfree)
270 {
271 OffsetNumber dummy_off_loc;
272 PruneFreezeResult presult;
273
274 /*
275 * We don't pass the HEAP_PAGE_PRUNE_MARK_UNUSED_NOW option
276 * regardless of whether or not the relation has indexes, since we
277 * cannot safely determine that during on-access pruning with the
278 * current implementation.
279 */
280 PruneFreezeParams params = {
281 .relation = relation,
282 .buffer = buffer,
283 .reason = PRUNE_ON_ACCESS,
284 .options = 0,
285 .vistest = vistest,
286 .cutoffs = NULL,
287 };
288
289 heap_page_prune_and_freeze(&params, &presult, &dummy_off_loc,
290 NULL, NULL);
291
292 /*
293 * Report the number of tuples reclaimed to pgstats. This is
294 * presult.ndeleted minus the number of newly-LP_DEAD-set items.
295 *
296 * We derive the number of dead tuples like this to avoid totally
297 * forgetting about items that were set to LP_DEAD, since they
298 * still need to be cleaned up by VACUUM. We only want to count
299 * heap-only tuples that just became LP_UNUSED in our report,
300 * which don't.
301 *
302 * VACUUM doesn't have to compensate in the same way when it
303 * tracks ndeleted, since it will set the same LP_DEAD items to
304 * LP_UNUSED separately.
305 */
306 if (presult.ndeleted > presult.nnewlpdead)
308 presult.ndeleted - presult.nnewlpdead);
309 }
310
311 /* And release buffer lock */
313
314 /*
315 * We avoid reuse of any free space created on the page by unrelated
316 * UPDATEs/INSERTs by opting to not update the FSM at this point. The
317 * free space should be reused by UPDATEs to *this* page.
318 */
319 }
320}
bool ConditionalLockBufferForCleanup(Buffer buffer)
Definition: bufmgr.c:5857
#define Max(x, y)
Definition: c.h:1000
void pgstat_update_heap_dead_tuples(Relation rel, int delta)
bool GlobalVisTestIsRemovableXid(GlobalVisState *state, TransactionId xid)
Definition: procarray.c:4226
void heap_page_prune_and_freeze(PruneFreezeParams *params, PruneFreezeResult *presult, OffsetNumber *off_loc, TransactionId *new_relfrozen_xid, MultiXactId *new_relmin_mxid)
Definition: pruneheap.c:809
bool RecoveryInProgress(void)
Definition: xlog.c:6406

References BUFFER_LOCK_UNLOCK, BufferGetPage(), ConditionalLockBufferForCleanup(), GlobalVisTestFor(), GlobalVisTestIsRemovableXid(), HEAP_DEFAULT_FILLFACTOR, heap_page_prune_and_freeze(), LockBuffer(), Max, PruneFreezeResult::ndeleted, PruneFreezeResult::nnewlpdead, PageGetHeapFreeSpace(), PageIsFull(), pgstat_update_heap_dead_tuples(), PRUNE_ON_ACCESS, RecoveryInProgress(), PruneFreezeParams::relation, RelationGetTargetPageFreeSpace, and TransactionIdIsValid.

Referenced by BitmapHeapScanNextBlock(), heap_prepare_pagescan(), and heapam_index_fetch_tuple().

◆ heap_pre_freeze_checks()

void heap_pre_freeze_checks ( Buffer  buffer,
HeapTupleFreeze tuples,
int  ntuples 
)

Definition at line 7353 of file heapam.c.

7355{
7356 Page page = BufferGetPage(buffer);
7357
7358 for (int i = 0; i < ntuples; i++)
7359 {
7360 HeapTupleFreeze *frz = tuples + i;
7361 ItemId itemid = PageGetItemId(page, frz->offset);
7362 HeapTupleHeader htup;
7363
7364 htup = (HeapTupleHeader) PageGetItem(page, itemid);
7365
7366 /* Deliberately avoid relying on tuple hint bits here */
7368 {
7370
7372 if (unlikely(!TransactionIdDidCommit(xmin)))
7373 ereport(ERROR,
7375 errmsg_internal("uncommitted xmin %u needs to be frozen",
7376 xmin)));
7377 }
7378
7379 /*
7380 * TransactionIdDidAbort won't work reliably in the presence of XIDs
7381 * left behind by transactions that were in progress during a crash,
7382 * so we can only check that xmax didn't commit
7383 */
7385 {
7387
7390 ereport(ERROR,
7392 errmsg_internal("cannot freeze committed xmax %u",
7393 xmax)));
7394 }
7395 }
7396}
#define HEAP_FREEZE_CHECK_XMAX_ABORTED
Definition: heapam.h:138
#define HEAP_FREEZE_CHECK_XMIN_COMMITTED
Definition: heapam.h:137
static bool HeapTupleHeaderXminFrozen(const HeapTupleHeaderData *tup)
Definition: htup_details.h:350
static TransactionId HeapTupleHeaderGetRawXmin(const HeapTupleHeaderData *tup)
Definition: htup_details.h:318
#define ERRCODE_DATA_CORRUPTED
Definition: pg_basebackup.c:42
uint8 checkflags
Definition: heapam.h:150
bool TransactionIdDidCommit(TransactionId transactionId)
Definition: transam.c:126
#define TransactionIdIsNormal(xid)
Definition: transam.h:42

References Assert(), BufferGetPage(), HeapTupleFreeze::checkflags, ereport, errcode(), ERRCODE_DATA_CORRUPTED, errmsg_internal(), ERROR, HEAP_FREEZE_CHECK_XMAX_ABORTED, HEAP_FREEZE_CHECK_XMIN_COMMITTED, HeapTupleHeaderGetRawXmax(), HeapTupleHeaderGetRawXmin(), HeapTupleHeaderXminFrozen(), i, HeapTupleFreeze::offset, PageGetItem(), PageGetItemId(), TransactionIdDidCommit(), TransactionIdIsNormal, and unlikely.

Referenced by heap_page_will_freeze().

◆ heap_prepare_freeze_tuple()

bool heap_prepare_freeze_tuple ( HeapTupleHeader  tuple,
const struct VacuumCutoffs cutoffs,
HeapPageFreeze pagefrz,
HeapTupleFreeze frz,
bool *  totally_frozen 
)

Definition at line 7080 of file heapam.c.

7084{
7085 bool xmin_already_frozen = false,
7086 xmax_already_frozen = false;
7087 bool freeze_xmin = false,
7088 replace_xvac = false,
7089 replace_xmax = false,
7090 freeze_xmax = false;
7091 TransactionId xid;
7092
7093 frz->xmax = HeapTupleHeaderGetRawXmax(tuple);
7094 frz->t_infomask2 = tuple->t_infomask2;
7095 frz->t_infomask = tuple->t_infomask;
7096 frz->frzflags = 0;
7097 frz->checkflags = 0;
7098
7099 /*
7100 * Process xmin, while keeping track of whether it's already frozen, or
7101 * will become frozen iff our freeze plan is executed by caller (could be
7102 * neither).
7103 */
7104 xid = HeapTupleHeaderGetXmin(tuple);
7105 if (!TransactionIdIsNormal(xid))
7106 xmin_already_frozen = true;
7107 else
7108 {
7109 if (TransactionIdPrecedes(xid, cutoffs->relfrozenxid))
7110 ereport(ERROR,
7112 errmsg_internal("found xmin %u from before relfrozenxid %u",
7113 xid, cutoffs->relfrozenxid)));
7114
7115 /* Will set freeze_xmin flags in freeze plan below */
7116 freeze_xmin = TransactionIdPrecedes(xid, cutoffs->OldestXmin);
7117
7118 /* Verify that xmin committed if and when freeze plan is executed */
7119 if (freeze_xmin)
7121 }
7122
7123 /*
7124 * Old-style VACUUM FULL is gone, but we have to process xvac for as long
7125 * as we support having MOVED_OFF/MOVED_IN tuples in the database
7126 */
7127 xid = HeapTupleHeaderGetXvac(tuple);
7128 if (TransactionIdIsNormal(xid))
7129 {
7131 Assert(TransactionIdPrecedes(xid, cutoffs->OldestXmin));
7132
7133 /*
7134 * For Xvac, we always freeze proactively. This allows totally_frozen
7135 * tracking to ignore xvac.
7136 */
7137 replace_xvac = pagefrz->freeze_required = true;
7138
7139 /* Will set replace_xvac flags in freeze plan below */
7140 }
7141
7142 /* Now process xmax */
7143 xid = frz->xmax;
7144 if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
7145 {
7146 /* Raw xmax is a MultiXactId */
7147 TransactionId newxmax;
7148 uint16 flags;
7149
7150 /*
7151 * We will either remove xmax completely (in the "freeze_xmax" path),
7152 * process xmax by replacing it (in the "replace_xmax" path), or
7153 * perform no-op xmax processing. The only constraint is that the
7154 * FreezeLimit/MultiXactCutoff postcondition must never be violated.
7155 */
7156 newxmax = FreezeMultiXactId(xid, tuple->t_infomask, cutoffs,
7157 &flags, pagefrz);
7158
7159 if (flags & FRM_NOOP)
7160 {
7161 /*
7162 * xmax is a MultiXactId, and nothing about it changes for now.
7163 * This is the only case where 'freeze_required' won't have been
7164 * set for us by FreezeMultiXactId, as well as the only case where
7165 * neither freeze_xmax nor replace_xmax are set (given a multi).
7166 *
7167 * This is a no-op, but the call to FreezeMultiXactId might have
7168 * ratcheted back NewRelfrozenXid and/or NewRelminMxid trackers
7169 * for us (the "freeze page" variants, specifically). That'll
7170 * make it safe for our caller to freeze the page later on, while
7171 * leaving this particular xmax undisturbed.
7172 *
7173 * FreezeMultiXactId is _not_ responsible for the "no freeze"
7174 * NewRelfrozenXid/NewRelminMxid trackers, though -- that's our
7175 * job. A call to heap_tuple_should_freeze for this same tuple
7176 * will take place below if 'freeze_required' isn't set already.
7177 * (This repeats work from FreezeMultiXactId, but allows "no
7178 * freeze" tracker maintenance to happen in only one place.)
7179 */
7180 Assert(!MultiXactIdPrecedes(newxmax, cutoffs->MultiXactCutoff));
7181 Assert(MultiXactIdIsValid(newxmax) && xid == newxmax);
7182 }
7183 else if (flags & FRM_RETURN_IS_XID)
7184 {
7185 /*
7186 * xmax will become an updater Xid (original MultiXact's updater
7187 * member Xid will be carried forward as a simple Xid in Xmax).
7188 */
7189 Assert(!TransactionIdPrecedes(newxmax, cutoffs->OldestXmin));
7190
7191 /*
7192 * NB -- some of these transformations are only valid because we
7193 * know the return Xid is a tuple updater (i.e. not merely a
7194 * locker.) Also note that the only reason we don't explicitly
7195 * worry about HEAP_KEYS_UPDATED is because it lives in
7196 * t_infomask2 rather than t_infomask.
7197 */
7198 frz->t_infomask &= ~HEAP_XMAX_BITS;
7199 frz->xmax = newxmax;
7200 if (flags & FRM_MARK_COMMITTED)
7202 replace_xmax = true;
7203 }
7204 else if (flags & FRM_RETURN_IS_MULTI)
7205 {
7206 uint16 newbits;
7207 uint16 newbits2;
7208
7209 /*
7210 * xmax is an old MultiXactId that we have to replace with a new
7211 * MultiXactId, to carry forward two or more original member XIDs.
7212 */
7213 Assert(!MultiXactIdPrecedes(newxmax, cutoffs->OldestMxact));
7214
7215 /*
7216 * We can't use GetMultiXactIdHintBits directly on the new multi
7217 * here; that routine initializes the masks to all zeroes, which
7218 * would lose other bits we need. Doing it this way ensures all
7219 * unrelated bits remain untouched.
7220 */
7221 frz->t_infomask &= ~HEAP_XMAX_BITS;
7222 frz->t_infomask2 &= ~HEAP_KEYS_UPDATED;
7223 GetMultiXactIdHintBits(newxmax, &newbits, &newbits2);
7224 frz->t_infomask |= newbits;
7225 frz->t_infomask2 |= newbits2;
7226 frz->xmax = newxmax;
7227 replace_xmax = true;
7228 }
7229 else
7230 {
7231 /*
7232 * Freeze plan for tuple "freezes xmax" in the strictest sense:
7233 * it'll leave nothing in xmax (neither an Xid nor a MultiXactId).
7234 */
7235 Assert(flags & FRM_INVALIDATE_XMAX);
7236 Assert(!TransactionIdIsValid(newxmax));
7237
7238 /* Will set freeze_xmax flags in freeze plan below */
7239 freeze_xmax = true;
7240 }
7241
7242 /* MultiXactId processing forces freezing (barring FRM_NOOP case) */
7243 Assert(pagefrz->freeze_required || (!freeze_xmax && !replace_xmax));
7244 }
7245 else if (TransactionIdIsNormal(xid))
7246 {
7247 /* Raw xmax is normal XID */
7248 if (TransactionIdPrecedes(xid, cutoffs->relfrozenxid))
7249 ereport(ERROR,
7251 errmsg_internal("found xmax %u from before relfrozenxid %u",
7252 xid, cutoffs->relfrozenxid)));
7253
7254 /* Will set freeze_xmax flags in freeze plan below */
7255 freeze_xmax = TransactionIdPrecedes(xid, cutoffs->OldestXmin);
7256
7257 /*
7258 * Verify that xmax aborted if and when freeze plan is executed,
7259 * provided it's from an update. (A lock-only xmax can be removed
7260 * independent of this, since the lock is released at xact end.)
7261 */
7262 if (freeze_xmax && !HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask))
7264 }
7265 else if (!TransactionIdIsValid(xid))
7266 {
7267 /* Raw xmax is InvalidTransactionId XID */
7268 Assert((tuple->t_infomask & HEAP_XMAX_IS_MULTI) == 0);
7269 xmax_already_frozen = true;
7270 }
7271 else
7272 ereport(ERROR,
7274 errmsg_internal("found raw xmax %u (infomask 0x%04x) not invalid and not multi",
7275 xid, tuple->t_infomask)));
7276
7277 if (freeze_xmin)
7278 {
7279 Assert(!xmin_already_frozen);
7280
7282 }
7283 if (replace_xvac)
7284 {
7285 /*
7286 * If a MOVED_OFF tuple is not dead, the xvac transaction must have
7287 * failed; whereas a non-dead MOVED_IN tuple must mean the xvac
7288 * transaction succeeded.
7289 */
7290 Assert(pagefrz->freeze_required);
7291 if (tuple->t_infomask & HEAP_MOVED_OFF)
7292 frz->frzflags |= XLH_INVALID_XVAC;
7293 else
7294 frz->frzflags |= XLH_FREEZE_XVAC;
7295 }
7296 if (replace_xmax)
7297 {
7298 Assert(!xmax_already_frozen && !freeze_xmax);
7299 Assert(pagefrz->freeze_required);
7300
7301 /* Already set replace_xmax flags in freeze plan earlier */
7302 }
7303 if (freeze_xmax)
7304 {
7305 Assert(!xmax_already_frozen && !replace_xmax);
7306
7308
7309 /*
7310 * The tuple might be marked either XMAX_INVALID or XMAX_COMMITTED +
7311 * LOCKED. Normalize to INVALID just to be sure no one gets confused.
7312 * Also get rid of the HEAP_KEYS_UPDATED bit.
7313 */
7314 frz->t_infomask &= ~HEAP_XMAX_BITS;
7316 frz->t_infomask2 &= ~HEAP_HOT_UPDATED;
7317 frz->t_infomask2 &= ~HEAP_KEYS_UPDATED;
7318 }
7319
7320 /*
7321 * Determine if this tuple is already totally frozen, or will become
7322 * totally frozen (provided caller executes freeze plans for the page)
7323 */
7324 *totally_frozen = ((freeze_xmin || xmin_already_frozen) &&
7325 (freeze_xmax || xmax_already_frozen));
7326
7327 if (!pagefrz->freeze_required && !(xmin_already_frozen &&
7328 xmax_already_frozen))
7329 {
7330 /*
7331 * So far no previous tuple from the page made freezing mandatory.
7332 * Does this tuple force caller to freeze the entire page?
7333 */
7334 pagefrz->freeze_required =
7335 heap_tuple_should_freeze(tuple, cutoffs,
7336 &pagefrz->NoFreezePageRelfrozenXid,
7337 &pagefrz->NoFreezePageRelminMxid);
7338 }
7339
7340 /* Tell caller if this tuple has a usable freeze plan set in *frz */
7341 return freeze_xmin || replace_xvac || replace_xmax || freeze_xmax;
7342}
static void GetMultiXactIdHintBits(MultiXactId multi, uint16 *new_infomask, uint16 *new_infomask2)
Definition: heapam.c:7472
#define FRM_RETURN_IS_XID
Definition: heapam.c:6679
static TransactionId FreezeMultiXactId(MultiXactId multi, uint16 t_infomask, const struct VacuumCutoffs *cutoffs, uint16 *flags, HeapPageFreeze *pagefrz)
Definition: heapam.c:6730
bool heap_tuple_should_freeze(HeapTupleHeader tuple, const struct VacuumCutoffs *cutoffs, TransactionId *NoFreezePageRelfrozenXid, MultiXactId *NoFreezePageRelminMxid)
Definition: heapam.c:7891
#define FRM_MARK_COMMITTED
Definition: heapam.c:6681
#define FRM_NOOP
Definition: heapam.c:6677
#define FRM_RETURN_IS_MULTI
Definition: heapam.c:6680
#define FRM_INVALIDATE_XMAX
Definition: heapam.c:6678
#define HEAP_MOVED_OFF
Definition: htup_details.h:211
#define HEAP_XMIN_FROZEN
Definition: htup_details.h:206
static TransactionId HeapTupleHeaderGetXvac(const HeapTupleHeaderData *tup)
Definition: htup_details.h:442
#define HEAP_XMAX_COMMITTED
Definition: htup_details.h:207
bool MultiXactIdPrecedes(MultiXactId multi1, MultiXactId multi2)
Definition: multixact.c:3269
#define MultiXactIdIsValid(multi)
Definition: multixact.h:29
bool freeze_required
Definition: heapam.h:182
TransactionId OldestXmin
Definition: vacuum.h:279
MultiXactId OldestMxact
Definition: vacuum.h:280
static bool TransactionIdPrecedesOrEquals(TransactionId id1, TransactionId id2)
Definition: transam.h:282

References Assert(), HeapTupleFreeze::checkflags, ereport, errcode(), ERRCODE_DATA_CORRUPTED, errmsg_internal(), ERROR, HeapPageFreeze::freeze_required, FreezeMultiXactId(), FRM_INVALIDATE_XMAX, FRM_MARK_COMMITTED, FRM_NOOP, FRM_RETURN_IS_MULTI, FRM_RETURN_IS_XID, HeapTupleFreeze::frzflags, GetMultiXactIdHintBits(), HEAP_FREEZE_CHECK_XMAX_ABORTED, HEAP_FREEZE_CHECK_XMIN_COMMITTED, HEAP_MOVED_OFF, heap_tuple_should_freeze(), HEAP_XMAX_COMMITTED, HEAP_XMAX_INVALID, HEAP_XMAX_IS_LOCKED_ONLY(), HEAP_XMAX_IS_MULTI, HEAP_XMIN_FROZEN, HeapTupleHeaderGetRawXmax(), HeapTupleHeaderGetXmin(), HeapTupleHeaderGetXvac(), InvalidTransactionId, VacuumCutoffs::MultiXactCutoff, MultiXactIdIsValid, MultiXactIdPrecedes(), HeapPageFreeze::NoFreezePageRelfrozenXid, HeapPageFreeze::NoFreezePageRelminMxid, VacuumCutoffs::OldestMxact, VacuumCutoffs::OldestXmin, VacuumCutoffs::relfrozenxid, HeapTupleFreeze::t_infomask, HeapTupleHeaderData::t_infomask, HeapTupleFreeze::t_infomask2, HeapTupleHeaderData::t_infomask2, TransactionIdIsNormal, TransactionIdIsValid, TransactionIdPrecedes(), TransactionIdPrecedesOrEquals(), XLH_FREEZE_XVAC, XLH_INVALID_XVAC, and HeapTupleFreeze::xmax.

Referenced by heap_freeze_tuple(), and heap_prune_record_unchanged_lp_normal().

◆ heap_prepare_pagescan()

void heap_prepare_pagescan ( TableScanDesc  sscan)

Definition at line 567 of file heapam.c.

568{
569 HeapScanDesc scan = (HeapScanDesc) sscan;
570 Buffer buffer = scan->rs_cbuf;
571 BlockNumber block = scan->rs_cblock;
572 Snapshot snapshot;
573 Page page;
574 int lines;
575 bool all_visible;
576 bool check_serializable;
577
578 Assert(BufferGetBlockNumber(buffer) == block);
579
580 /* ensure we're not accidentally being used when not in pagemode */
582 snapshot = scan->rs_base.rs_snapshot;
583
584 /*
585 * Prune and repair fragmentation for the whole page, if possible.
586 */
587 heap_page_prune_opt(scan->rs_base.rs_rd, buffer);
588
589 /*
590 * We must hold share lock on the buffer content while examining tuple
591 * visibility. Afterwards, however, the tuples we have found to be
592 * visible are guaranteed good as long as we hold the buffer pin.
593 */
595
596 page = BufferGetPage(buffer);
597 lines = PageGetMaxOffsetNumber(page);
598
599 /*
600 * If the all-visible flag indicates that all tuples on the page are
601 * visible to everyone, we can skip the per-tuple visibility tests.
602 *
603 * Note: In hot standby, a tuple that's already visible to all
604 * transactions on the primary might still be invisible to a read-only
605 * transaction in the standby. We partly handle this problem by tracking
606 * the minimum xmin of visible tuples as the cut-off XID while marking a
607 * page all-visible on the primary and WAL log that along with the
608 * visibility map SET operation. In hot standby, we wait for (or abort)
609 * all transactions that can potentially may not see one or more tuples on
610 * the page. That's how index-only scans work fine in hot standby. A
611 * crucial difference between index-only scans and heap scans is that the
612 * index-only scan completely relies on the visibility map where as heap
613 * scan looks at the page-level PD_ALL_VISIBLE flag. We are not sure if
614 * the page-level flag can be trusted in the same way, because it might
615 * get propagated somehow without being explicitly WAL-logged, e.g. via a
616 * full page write. Until we can prove that beyond doubt, let's check each
617 * tuple for visibility the hard way.
618 */
619 all_visible = PageIsAllVisible(page) && !snapshot->takenDuringRecovery;
620 check_serializable =
622
623 /*
624 * We call page_collect_tuples() with constant arguments, to get the
625 * compiler to constant fold the constant arguments. Separate calls with
626 * constant arguments, rather than variables, are needed on several
627 * compilers to actually perform constant folding.
628 */
629 if (likely(all_visible))
630 {
631 if (likely(!check_serializable))
632 scan->rs_ntuples = page_collect_tuples(scan, snapshot, page, buffer,
633 block, lines, true, false);
634 else
635 scan->rs_ntuples = page_collect_tuples(scan, snapshot, page, buffer,
636 block, lines, true, true);
637 }
638 else
639 {
640 if (likely(!check_serializable))
641 scan->rs_ntuples = page_collect_tuples(scan, snapshot, page, buffer,
642 block, lines, false, false);
643 else
644 scan->rs_ntuples = page_collect_tuples(scan, snapshot, page, buffer,
645 block, lines, false, true);
646 }
647
649}
#define likely(x)
Definition: c.h:406
static pg_attribute_always_inline int page_collect_tuples(HeapScanDesc scan, Snapshot snapshot, Page page, Buffer buffer, BlockNumber block, int lines, bool all_visible, bool check_serializable)
Definition: heapam.c:517
bool CheckForSerializableConflictOutNeeded(Relation relation, Snapshot snapshot)
Definition: predicate.c:3991
void heap_page_prune_opt(Relation relation, Buffer buffer)
Definition: pruneheap.c:209
uint32 rs_ntuples
Definition: heapam.h:99
BlockNumber rs_cblock
Definition: heapam.h:69
bool takenDuringRecovery
Definition: snapshot.h:180

References Assert(), BUFFER_LOCK_SHARE, BUFFER_LOCK_UNLOCK, BufferGetBlockNumber(), BufferGetPage(), CheckForSerializableConflictOutNeeded(), heap_page_prune_opt(), likely, LockBuffer(), page_collect_tuples(), PageGetMaxOffsetNumber(), PageIsAllVisible(), HeapScanDescData::rs_base, HeapScanDescData::rs_cblock, HeapScanDescData::rs_cbuf, TableScanDescData::rs_flags, HeapScanDescData::rs_ntuples, TableScanDescData::rs_rd, TableScanDescData::rs_snapshot, SO_ALLOW_PAGEMODE, and SnapshotData::takenDuringRecovery.

Referenced by heapam_scan_sample_next_block(), and heapgettup_pagemode().

◆ heap_rescan()

void heap_rescan ( TableScanDesc  sscan,
ScanKey  key,
bool  set_params,
bool  allow_strat,
bool  allow_sync,
bool  allow_pagemode 
)

Definition at line 1269 of file heapam.c.

1271{
1272 HeapScanDesc scan = (HeapScanDesc) sscan;
1273
1274 if (set_params)
1275 {
1276 if (allow_strat)
1278 else
1279 scan->rs_base.rs_flags &= ~SO_ALLOW_STRAT;
1280
1281 if (allow_sync)
1283 else
1284 scan->rs_base.rs_flags &= ~SO_ALLOW_SYNC;
1285
1286 if (allow_pagemode && scan->rs_base.rs_snapshot &&
1289 else
1291 }
1292
1293 /*
1294 * unpin scan buffers
1295 */
1296 if (BufferIsValid(scan->rs_cbuf))
1297 {
1298 ReleaseBuffer(scan->rs_cbuf);
1299 scan->rs_cbuf = InvalidBuffer;
1300 }
1301
1302 /*
1303 * SO_TYPE_BITMAPSCAN would be cleaned up here, but it does not hold any
1304 * additional data vs a normal HeapScan
1305 */
1306
1307 /*
1308 * The read stream is reset on rescan. This must be done before
1309 * initscan(), as some state referred to by read_stream_reset() is reset
1310 * in initscan().
1311 */
1312 if (scan->rs_read_stream)
1314
1315 /*
1316 * reinitialize scan descriptor
1317 */
1318 initscan(scan, key, true);
1319}
void read_stream_reset(ReadStream *stream)
Definition: read_stream.c:1044
@ SO_ALLOW_STRAT
Definition: tableam.h:58
@ SO_ALLOW_SYNC
Definition: tableam.h:60

References BufferIsValid(), initscan(), InvalidBuffer, IsMVCCSnapshot, sort-test::key, read_stream_reset(), ReleaseBuffer(), HeapScanDescData::rs_base, HeapScanDescData::rs_cbuf, TableScanDescData::rs_flags, HeapScanDescData::rs_read_stream, TableScanDescData::rs_snapshot, SO_ALLOW_PAGEMODE, SO_ALLOW_STRAT, and SO_ALLOW_SYNC.

◆ heap_set_tidrange()

void heap_set_tidrange ( TableScanDesc  sscan,
ItemPointer  mintid,
ItemPointer  maxtid 
)

Definition at line 1440 of file heapam.c.

1442{
1443 HeapScanDesc scan = (HeapScanDesc) sscan;
1444 BlockNumber startBlk;
1445 BlockNumber numBlks;
1446 ItemPointerData highestItem;
1447 ItemPointerData lowestItem;
1448
1449 /*
1450 * For relations without any pages, we can simply leave the TID range
1451 * unset. There will be no tuples to scan, therefore no tuples outside
1452 * the given TID range.
1453 */
1454 if (scan->rs_nblocks == 0)
1455 return;
1456
1457 /*
1458 * Set up some ItemPointers which point to the first and last possible
1459 * tuples in the heap.
1460 */
1461 ItemPointerSet(&highestItem, scan->rs_nblocks - 1, MaxOffsetNumber);
1462 ItemPointerSet(&lowestItem, 0, FirstOffsetNumber);
1463
1464 /*
1465 * If the given maximum TID is below the highest possible TID in the
1466 * relation, then restrict the range to that, otherwise we scan to the end
1467 * of the relation.
1468 */
1469 if (ItemPointerCompare(maxtid, &highestItem) < 0)
1470 ItemPointerCopy(maxtid, &highestItem);
1471
1472 /*
1473 * If the given minimum TID is above the lowest possible TID in the
1474 * relation, then restrict the range to only scan for TIDs above that.
1475 */
1476 if (ItemPointerCompare(mintid, &lowestItem) > 0)
1477 ItemPointerCopy(mintid, &lowestItem);
1478
1479 /*
1480 * Check for an empty range and protect from would be negative results
1481 * from the numBlks calculation below.
1482 */
1483 if (ItemPointerCompare(&highestItem, &lowestItem) < 0)
1484 {
1485 /* Set an empty range of blocks to scan */
1486 heap_setscanlimits(sscan, 0, 0);
1487 return;
1488 }
1489
1490 /*
1491 * Calculate the first block and the number of blocks we must scan. We
1492 * could be more aggressive here and perform some more validation to try
1493 * and further narrow the scope of blocks to scan by checking if the
1494 * lowestItem has an offset above MaxOffsetNumber. In this case, we could
1495 * advance startBlk by one. Likewise, if highestItem has an offset of 0
1496 * we could scan one fewer blocks. However, such an optimization does not
1497 * seem worth troubling over, currently.
1498 */
1499 startBlk = ItemPointerGetBlockNumberNoCheck(&lowestItem);
1500
1501 numBlks = ItemPointerGetBlockNumberNoCheck(&highestItem) -
1502 ItemPointerGetBlockNumberNoCheck(&lowestItem) + 1;
1503
1504 /* Set the start block and number of blocks to scan */
1505 heap_setscanlimits(sscan, startBlk, numBlks);
1506
1507 /* Finally, set the TID range in sscan */
1508 ItemPointerCopy(&lowestItem, &sscan->st.tidrange.rs_mintid);
1509 ItemPointerCopy(&highestItem, &sscan->st.tidrange.rs_maxtid);
1510}
void heap_setscanlimits(TableScanDesc sscan, BlockNumber startBlk, BlockNumber numBlks)
Definition: heapam.c:495
static BlockNumber ItemPointerGetBlockNumberNoCheck(const ItemPointerData *pointer)
Definition: itemptr.h:93
#define MaxOffsetNumber
Definition: off.h:28
BlockNumber rs_nblocks
Definition: heapam.h:61

References FirstOffsetNumber, heap_setscanlimits(), ItemPointerCompare(), ItemPointerCopy(), ItemPointerGetBlockNumberNoCheck(), ItemPointerSet(), MaxOffsetNumber, TableScanDescData::rs_maxtid, TableScanDescData::rs_mintid, HeapScanDescData::rs_nblocks, TableScanDescData::st, and TableScanDescData::tidrange.

◆ heap_setscanlimits()

void heap_setscanlimits ( TableScanDesc  sscan,
BlockNumber  startBlk,
BlockNumber  numBlks 
)

Definition at line 495 of file heapam.c.

496{
497 HeapScanDesc scan = (HeapScanDesc) sscan;
498
499 Assert(!scan->rs_inited); /* else too late to change */
500 /* else rs_startblock is significant */
502
503 /* Check startBlk is valid (but allow case of zero blocks...) */
504 Assert(startBlk == 0 || startBlk < scan->rs_nblocks);
505
506 scan->rs_startblock = startBlk;
507 scan->rs_numblocks = numBlks;
508}
bool rs_inited
Definition: heapam.h:67
BlockNumber rs_startblock
Definition: heapam.h:62
BlockNumber rs_numblocks
Definition: heapam.h:63

References Assert(), HeapScanDescData::rs_base, TableScanDescData::rs_flags, HeapScanDescData::rs_inited, HeapScanDescData::rs_numblocks, HeapScanDescData::rs_startblock, and SO_ALLOW_SYNC.

Referenced by heap_set_tidrange(), and heapam_index_build_range_scan().

◆ heap_tuple_needs_eventual_freeze()

bool heap_tuple_needs_eventual_freeze ( HeapTupleHeader  tuple)

Definition at line 7836 of file heapam.c.

7837{
7838 TransactionId xid;
7839
7840 /*
7841 * If xmin is a normal transaction ID, this tuple is definitely not
7842 * frozen.
7843 */
7844 xid = HeapTupleHeaderGetXmin(tuple);
7845 if (TransactionIdIsNormal(xid))
7846 return true;
7847
7848 /*
7849 * If xmax is a valid xact or multixact, this tuple is also not frozen.
7850 */
7851 if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
7852 {
7853 MultiXactId multi;
7854
7855 multi = HeapTupleHeaderGetRawXmax(tuple);
7856 if (MultiXactIdIsValid(multi))
7857 return true;
7858 }
7859 else
7860 {
7861 xid = HeapTupleHeaderGetRawXmax(tuple);
7862 if (TransactionIdIsNormal(xid))
7863 return true;
7864 }
7865
7866 if (tuple->t_infomask & HEAP_MOVED)
7867 {
7868 xid = HeapTupleHeaderGetXvac(tuple);
7869 if (TransactionIdIsNormal(xid))
7870 return true;
7871 }
7872
7873 return false;
7874}

References HEAP_MOVED, HEAP_XMAX_IS_MULTI, HeapTupleHeaderGetRawXmax(), HeapTupleHeaderGetXmin(), HeapTupleHeaderGetXvac(), MultiXactIdIsValid, HeapTupleHeaderData::t_infomask, and TransactionIdIsNormal.

Referenced by collect_corrupt_items(), and heap_page_would_be_all_visible().

◆ heap_tuple_should_freeze()

bool heap_tuple_should_freeze ( HeapTupleHeader  tuple,
const struct VacuumCutoffs cutoffs,
TransactionId NoFreezePageRelfrozenXid,
MultiXactId NoFreezePageRelminMxid 
)

Definition at line 7891 of file heapam.c.

7895{
7896 TransactionId xid;
7897 MultiXactId multi;
7898 bool freeze = false;
7899
7900 /* First deal with xmin */
7901 xid = HeapTupleHeaderGetXmin(tuple);
7902 if (TransactionIdIsNormal(xid))
7903 {
7905 if (TransactionIdPrecedes(xid, *NoFreezePageRelfrozenXid))
7906 *NoFreezePageRelfrozenXid = xid;
7907 if (TransactionIdPrecedes(xid, cutoffs->FreezeLimit))
7908 freeze = true;
7909 }
7910
7911 /* Now deal with xmax */
7913 multi = InvalidMultiXactId;
7914 if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
7915 multi = HeapTupleHeaderGetRawXmax(tuple);
7916 else
7917 xid = HeapTupleHeaderGetRawXmax(tuple);
7918
7919 if (TransactionIdIsNormal(xid))
7920 {
7922 /* xmax is a non-permanent XID */
7923 if (TransactionIdPrecedes(xid, *NoFreezePageRelfrozenXid))
7924 *NoFreezePageRelfrozenXid = xid;
7925 if (TransactionIdPrecedes(xid, cutoffs->FreezeLimit))
7926 freeze = true;
7927 }
7928 else if (!MultiXactIdIsValid(multi))
7929 {
7930 /* xmax is a permanent XID or invalid MultiXactId/XID */
7931 }
7932 else if (HEAP_LOCKED_UPGRADED(tuple->t_infomask))
7933 {
7934 /* xmax is a pg_upgrade'd MultiXact, which can't have updater XID */
7935 if (MultiXactIdPrecedes(multi, *NoFreezePageRelminMxid))
7936 *NoFreezePageRelminMxid = multi;
7937 /* heap_prepare_freeze_tuple always freezes pg_upgrade'd xmax */
7938 freeze = true;
7939 }
7940 else
7941 {
7942 /* xmax is a MultiXactId that may have an updater XID */
7943 MultiXactMember *members;
7944 int nmembers;
7945
7947 if (MultiXactIdPrecedes(multi, *NoFreezePageRelminMxid))
7948 *NoFreezePageRelminMxid = multi;
7949 if (MultiXactIdPrecedes(multi, cutoffs->MultiXactCutoff))
7950 freeze = true;
7951
7952 /* need to check whether any member of the mxact is old */
7953 nmembers = GetMultiXactIdMembers(multi, &members, false,
7955
7956 for (int i = 0; i < nmembers; i++)
7957 {
7958 xid = members[i].xid;
7960 if (TransactionIdPrecedes(xid, *NoFreezePageRelfrozenXid))
7961 *NoFreezePageRelfrozenXid = xid;
7962 if (TransactionIdPrecedes(xid, cutoffs->FreezeLimit))
7963 freeze = true;
7964 }
7965 if (nmembers > 0)
7966 pfree(members);
7967 }
7968
7969 if (tuple->t_infomask & HEAP_MOVED)
7970 {
7971 xid = HeapTupleHeaderGetXvac(tuple);
7972 if (TransactionIdIsNormal(xid))
7973 {
7975 if (TransactionIdPrecedes(xid, *NoFreezePageRelfrozenXid))
7976 *NoFreezePageRelfrozenXid = xid;
7977 /* heap_prepare_freeze_tuple forces xvac freezing */
7978 freeze = true;
7979 }
7980 }
7981
7982 return freeze;
7983}
static bool HEAP_LOCKED_UPGRADED(uint16 infomask)
Definition: htup_details.h:251
bool MultiXactIdPrecedesOrEquals(MultiXactId multi1, MultiXactId multi2)
Definition: multixact.c:3283
#define InvalidMultiXactId
Definition: multixact.h:25
TransactionId xid
Definition: multixact.h:59

References Assert(), VacuumCutoffs::FreezeLimit, GetMultiXactIdMembers(), HEAP_LOCKED_UPGRADED(), HEAP_MOVED, HEAP_XMAX_IS_LOCKED_ONLY(), HEAP_XMAX_IS_MULTI, HeapTupleHeaderGetRawXmax(), HeapTupleHeaderGetXmin(), HeapTupleHeaderGetXvac(), i, InvalidMultiXactId, InvalidTransactionId, VacuumCutoffs::MultiXactCutoff, MultiXactIdIsValid, MultiXactIdPrecedes(), MultiXactIdPrecedesOrEquals(), pfree(), VacuumCutoffs::relfrozenxid, VacuumCutoffs::relminmxid, HeapTupleHeaderData::t_infomask, TransactionIdIsNormal, TransactionIdPrecedes(), TransactionIdPrecedesOrEquals(), and MultiXactMember::xid.

Referenced by heap_prepare_freeze_tuple(), and lazy_scan_noprune().

◆ heap_update()

TM_Result heap_update ( Relation  relation,
const ItemPointerData otid,
HeapTuple  newtup,
CommandId  cid,
Snapshot  crosscheck,
bool  wait,
TM_FailureData tmfd,
LockTupleMode lockmode,
TU_UpdateIndexes update_indexes 
)

Definition at line 3273 of file heapam.c.

3277{
3278 TM_Result result;
3280 Bitmapset *hot_attrs;
3281 Bitmapset *sum_attrs;
3282 Bitmapset *key_attrs;
3283 Bitmapset *id_attrs;
3284 Bitmapset *interesting_attrs;
3285 Bitmapset *modified_attrs;
3286 ItemId lp;
3287 HeapTupleData oldtup;
3288 HeapTuple heaptup;
3289 HeapTuple old_key_tuple = NULL;
3290 bool old_key_copied = false;
3291 Page page;
3292 BlockNumber block;
3293 MultiXactStatus mxact_status;
3294 Buffer buffer,
3295 newbuf,
3296 vmbuffer = InvalidBuffer,
3297 vmbuffer_new = InvalidBuffer;
3298 bool need_toast;
3299 Size newtupsize,
3300 pagefree;
3301 bool have_tuple_lock = false;
3302 bool iscombo;
3303 bool use_hot_update = false;
3304 bool summarized_update = false;
3305 bool key_intact;
3306 bool all_visible_cleared = false;
3307 bool all_visible_cleared_new = false;
3308 bool checked_lockers;
3309 bool locker_remains;
3310 bool id_has_external = false;
3311 TransactionId xmax_new_tuple,
3312 xmax_old_tuple;
3313 uint16 infomask_old_tuple,
3314 infomask2_old_tuple,
3315 infomask_new_tuple,
3316 infomask2_new_tuple;
3317
3319
3320 /* Cheap, simplistic check that the tuple matches the rel's rowtype. */
3323
3324 AssertHasSnapshotForToast(relation);
3325
3326 /*
3327 * Forbid this during a parallel operation, lest it allocate a combo CID.
3328 * Other workers might need that combo CID for visibility checks, and we
3329 * have no provision for broadcasting it to them.
3330 */
3331 if (IsInParallelMode())
3332 ereport(ERROR,
3333 (errcode(ERRCODE_INVALID_TRANSACTION_STATE),
3334 errmsg("cannot update tuples during a parallel operation")));
3335
3336#ifdef USE_ASSERT_CHECKING
3337 check_lock_if_inplace_updateable_rel(relation, otid, newtup);
3338#endif
3339
3340 /*
3341 * Fetch the list of attributes to be checked for various operations.
3342 *
3343 * For HOT considerations, this is wasted effort if we fail to update or
3344 * have to put the new tuple on a different page. But we must compute the
3345 * list before obtaining buffer lock --- in the worst case, if we are
3346 * doing an update on one of the relevant system catalogs, we could
3347 * deadlock if we try to fetch the list later. In any case, the relcache
3348 * caches the data so this is usually pretty cheap.
3349 *
3350 * We also need columns used by the replica identity and columns that are
3351 * considered the "key" of rows in the table.
3352 *
3353 * Note that we get copies of each bitmap, so we need not worry about
3354 * relcache flush happening midway through.
3355 */
3356 hot_attrs = RelationGetIndexAttrBitmap(relation,
3358 sum_attrs = RelationGetIndexAttrBitmap(relation,
3361 id_attrs = RelationGetIndexAttrBitmap(relation,
3363 interesting_attrs = NULL;
3364 interesting_attrs = bms_add_members(interesting_attrs, hot_attrs);
3365 interesting_attrs = bms_add_members(interesting_attrs, sum_attrs);
3366 interesting_attrs = bms_add_members(interesting_attrs, key_attrs);
3367 interesting_attrs = bms_add_members(interesting_attrs, id_attrs);
3368
3369 block = ItemPointerGetBlockNumber(otid);
3370 INJECTION_POINT("heap_update-before-pin", NULL);
3371 buffer = ReadBuffer(relation, block);
3372 page = BufferGetPage(buffer);
3373
3374 /*
3375 * Before locking the buffer, pin the visibility map page if it appears to
3376 * be necessary. Since we haven't got the lock yet, someone else might be
3377 * in the middle of changing this, so we'll need to recheck after we have
3378 * the lock.
3379 */
3380 if (PageIsAllVisible(page))
3381 visibilitymap_pin(relation, block, &vmbuffer);
3382
3384
3385 lp = PageGetItemId(page, ItemPointerGetOffsetNumber(otid));
3386
3387 /*
3388 * Usually, a buffer pin and/or snapshot blocks pruning of otid, ensuring
3389 * we see LP_NORMAL here. When the otid origin is a syscache, we may have
3390 * neither a pin nor a snapshot. Hence, we may see other LP_ states, each
3391 * of which indicates concurrent pruning.
3392 *
3393 * Failing with TM_Updated would be most accurate. However, unlike other
3394 * TM_Updated scenarios, we don't know the successor ctid in LP_UNUSED and
3395 * LP_DEAD cases. While the distinction between TM_Updated and TM_Deleted
3396 * does matter to SQL statements UPDATE and MERGE, those SQL statements
3397 * hold a snapshot that ensures LP_NORMAL. Hence, the choice between
3398 * TM_Updated and TM_Deleted affects only the wording of error messages.
3399 * Settle on TM_Deleted, for two reasons. First, it avoids complicating
3400 * the specification of when tmfd->ctid is valid. Second, it creates
3401 * error log evidence that we took this branch.
3402 *
3403 * Since it's possible to see LP_UNUSED at otid, it's also possible to see
3404 * LP_NORMAL for a tuple that replaced LP_UNUSED. If it's a tuple for an
3405 * unrelated row, we'll fail with "duplicate key value violates unique".
3406 * XXX if otid is the live, newer version of the newtup row, we'll discard
3407 * changes originating in versions of this catalog row after the version
3408 * the caller got from syscache. See syscache-update-pruned.spec.
3409 */
3410 if (!ItemIdIsNormal(lp))
3411 {
3413
3414 UnlockReleaseBuffer(buffer);
3415 Assert(!have_tuple_lock);
3416 if (vmbuffer != InvalidBuffer)
3417 ReleaseBuffer(vmbuffer);
3418 tmfd->ctid = *otid;
3419 tmfd->xmax = InvalidTransactionId;
3420 tmfd->cmax = InvalidCommandId;
3421 *update_indexes = TU_None;
3422
3423 bms_free(hot_attrs);
3424 bms_free(sum_attrs);
3425 bms_free(key_attrs);
3426 bms_free(id_attrs);
3427 /* modified_attrs not yet initialized */
3428 bms_free(interesting_attrs);
3429 return TM_Deleted;
3430 }
3431
3432 /*
3433 * Fill in enough data in oldtup for HeapDetermineColumnsInfo to work
3434 * properly.
3435 */
3436 oldtup.t_tableOid = RelationGetRelid(relation);
3437 oldtup.t_data = (HeapTupleHeader) PageGetItem(page, lp);
3438 oldtup.t_len = ItemIdGetLength(lp);
3439 oldtup.t_self = *otid;
3440
3441 /* the new tuple is ready, except for this: */
3442 newtup->t_tableOid = RelationGetRelid(relation);
3443
3444 /*
3445 * Determine columns modified by the update. Additionally, identify
3446 * whether any of the unmodified replica identity key attributes in the
3447 * old tuple is externally stored or not. This is required because for
3448 * such attributes the flattened value won't be WAL logged as part of the
3449 * new tuple so we must include it as part of the old_key_tuple. See
3450 * ExtractReplicaIdentity.
3451 */
3452 modified_attrs = HeapDetermineColumnsInfo(relation, interesting_attrs,
3453 id_attrs, &oldtup,
3454 newtup, &id_has_external);
3455
3456 /*
3457 * If we're not updating any "key" column, we can grab a weaker lock type.
3458 * This allows for more concurrency when we are running simultaneously
3459 * with foreign key checks.
3460 *
3461 * Note that if a column gets detoasted while executing the update, but
3462 * the value ends up being the same, this test will fail and we will use
3463 * the stronger lock. This is acceptable; the important case to optimize
3464 * is updates that don't manipulate key columns, not those that
3465 * serendipitously arrive at the same key values.
3466 */
3467 if (!bms_overlap(modified_attrs, key_attrs))
3468 {
3469 *lockmode = LockTupleNoKeyExclusive;
3470 mxact_status = MultiXactStatusNoKeyUpdate;
3471 key_intact = true;
3472
3473 /*
3474 * If this is the first possibly-multixact-able operation in the
3475 * current transaction, set my per-backend OldestMemberMXactId
3476 * setting. We can be certain that the transaction will never become a
3477 * member of any older MultiXactIds than that. (We have to do this
3478 * even if we end up just using our own TransactionId below, since
3479 * some other backend could incorporate our XID into a MultiXact
3480 * immediately afterwards.)
3481 */
3483 }
3484 else
3485 {
3486 *lockmode = LockTupleExclusive;
3487 mxact_status = MultiXactStatusUpdate;
3488 key_intact = false;
3489 }
3490
3491 /*
3492 * Note: beyond this point, use oldtup not otid to refer to old tuple.
3493 * otid may very well point at newtup->t_self, which we will overwrite
3494 * with the new tuple's location, so there's great risk of confusion if we
3495 * use otid anymore.
3496 */
3497
3498l2:
3499 checked_lockers = false;
3500 locker_remains = false;
3501 result = HeapTupleSatisfiesUpdate(&oldtup, cid, buffer);
3502
3503 /* see below about the "no wait" case */
3504 Assert(result != TM_BeingModified || wait);
3505
3506 if (result == TM_Invisible)
3507 {
3508 UnlockReleaseBuffer(buffer);
3509 ereport(ERROR,
3510 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
3511 errmsg("attempted to update invisible tuple")));
3512 }
3513 else if (result == TM_BeingModified && wait)
3514 {
3515 TransactionId xwait;
3516 uint16 infomask;
3517 bool can_continue = false;
3518
3519 /*
3520 * XXX note that we don't consider the "no wait" case here. This
3521 * isn't a problem currently because no caller uses that case, but it
3522 * should be fixed if such a caller is introduced. It wasn't a
3523 * problem previously because this code would always wait, but now
3524 * that some tuple locks do not conflict with one of the lock modes we
3525 * use, it is possible that this case is interesting to handle
3526 * specially.
3527 *
3528 * This may cause failures with third-party code that calls
3529 * heap_update directly.
3530 */
3531
3532 /* must copy state data before unlocking buffer */
3533 xwait = HeapTupleHeaderGetRawXmax(oldtup.t_data);
3534 infomask = oldtup.t_data->t_infomask;
3535
3536 /*
3537 * Now we have to do something about the existing locker. If it's a
3538 * multi, sleep on it; we might be awakened before it is completely
3539 * gone (or even not sleep at all in some cases); we need to preserve
3540 * it as locker, unless it is gone completely.
3541 *
3542 * If it's not a multi, we need to check for sleeping conditions
3543 * before actually going to sleep. If the update doesn't conflict
3544 * with the locks, we just continue without sleeping (but making sure
3545 * it is preserved).
3546 *
3547 * Before sleeping, we need to acquire tuple lock to establish our
3548 * priority for the tuple (see heap_lock_tuple). LockTuple will
3549 * release us when we are next-in-line for the tuple. Note we must
3550 * not acquire the tuple lock until we're sure we're going to sleep;
3551 * otherwise we're open for race conditions with other transactions
3552 * holding the tuple lock which sleep on us.
3553 *
3554 * If we are forced to "start over" below, we keep the tuple lock;
3555 * this arranges that we stay at the head of the line while rechecking
3556 * tuple state.
3557 */
3558 if (infomask & HEAP_XMAX_IS_MULTI)
3559 {
3560 TransactionId update_xact;
3561 int remain;
3562 bool current_is_member = false;
3563
3564 if (DoesMultiXactIdConflict((MultiXactId) xwait, infomask,
3565 *lockmode, &current_is_member))
3566 {
3568
3569 /*
3570 * Acquire the lock, if necessary (but skip it when we're
3571 * requesting a lock and already have one; avoids deadlock).
3572 */
3573 if (!current_is_member)
3574 heap_acquire_tuplock(relation, &(oldtup.t_self), *lockmode,
3575 LockWaitBlock, &have_tuple_lock);
3576
3577 /* wait for multixact */
3578 MultiXactIdWait((MultiXactId) xwait, mxact_status, infomask,
3579 relation, &oldtup.t_self, XLTW_Update,
3580 &remain);
3581 checked_lockers = true;
3582 locker_remains = remain != 0;
3584
3585 /*
3586 * If xwait had just locked the tuple then some other xact
3587 * could update this tuple before we get to this point. Check
3588 * for xmax change, and start over if so.
3589 */
3591 infomask) ||
3593 xwait))
3594 goto l2;
3595 }
3596
3597 /*
3598 * Note that the multixact may not be done by now. It could have
3599 * surviving members; our own xact or other subxacts of this
3600 * backend, and also any other concurrent transaction that locked
3601 * the tuple with LockTupleKeyShare if we only got
3602 * LockTupleNoKeyExclusive. If this is the case, we have to be
3603 * careful to mark the updated tuple with the surviving members in
3604 * Xmax.
3605 *
3606 * Note that there could have been another update in the
3607 * MultiXact. In that case, we need to check whether it committed
3608 * or aborted. If it aborted we are safe to update it again;
3609 * otherwise there is an update conflict, and we have to return
3610 * TableTuple{Deleted, Updated} below.
3611 *
3612 * In the LockTupleExclusive case, we still need to preserve the
3613 * surviving members: those would include the tuple locks we had
3614 * before this one, which are important to keep in case this
3615 * subxact aborts.
3616 */
3618 update_xact = HeapTupleGetUpdateXid(oldtup.t_data);
3619 else
3620 update_xact = InvalidTransactionId;
3621
3622 /*
3623 * There was no UPDATE in the MultiXact; or it aborted. No
3624 * TransactionIdIsInProgress() call needed here, since we called
3625 * MultiXactIdWait() above.
3626 */
3627 if (!TransactionIdIsValid(update_xact) ||
3628 TransactionIdDidAbort(update_xact))
3629 can_continue = true;
3630 }
3632 {
3633 /*
3634 * The only locker is ourselves; we can avoid grabbing the tuple
3635 * lock here, but must preserve our locking information.
3636 */
3637 checked_lockers = true;
3638 locker_remains = true;
3639 can_continue = true;
3640 }
3641 else if (HEAP_XMAX_IS_KEYSHR_LOCKED(infomask) && key_intact)
3642 {
3643 /*
3644 * If it's just a key-share locker, and we're not changing the key
3645 * columns, we don't need to wait for it to end; but we need to
3646 * preserve it as locker.
3647 */
3648 checked_lockers = true;
3649 locker_remains = true;
3650 can_continue = true;
3651 }
3652 else
3653 {
3654 /*
3655 * Wait for regular transaction to end; but first, acquire tuple
3656 * lock.
3657 */
3659 heap_acquire_tuplock(relation, &(oldtup.t_self), *lockmode,
3660 LockWaitBlock, &have_tuple_lock);
3661 XactLockTableWait(xwait, relation, &oldtup.t_self,
3662 XLTW_Update);
3663 checked_lockers = true;
3665
3666 /*
3667 * xwait is done, but if xwait had just locked the tuple then some
3668 * other xact could update this tuple before we get to this point.
3669 * Check for xmax change, and start over if so.
3670 */
3671 if (xmax_infomask_changed(oldtup.t_data->t_infomask, infomask) ||
3672 !TransactionIdEquals(xwait,
3674 goto l2;
3675
3676 /* Otherwise check if it committed or aborted */
3677 UpdateXmaxHintBits(oldtup.t_data, buffer, xwait);
3678 if (oldtup.t_data->t_infomask & HEAP_XMAX_INVALID)
3679 can_continue = true;
3680 }
3681
3682 if (can_continue)
3683 result = TM_Ok;
3684 else if (!ItemPointerEquals(&oldtup.t_self, &oldtup.t_data->t_ctid))
3685 result = TM_Updated;
3686 else
3687 result = TM_Deleted;
3688 }
3689
3690 /* Sanity check the result HeapTupleSatisfiesUpdate() and the logic above */
3691 if (result != TM_Ok)
3692 {
3693 Assert(result == TM_SelfModified ||
3694 result == TM_Updated ||
3695 result == TM_Deleted ||
3696 result == TM_BeingModified);
3698 Assert(result != TM_Updated ||
3699 !ItemPointerEquals(&oldtup.t_self, &oldtup.t_data->t_ctid));
3700 }
3701
3702 if (crosscheck != InvalidSnapshot && result == TM_Ok)
3703 {
3704 /* Perform additional check for transaction-snapshot mode RI updates */
3705 if (!HeapTupleSatisfiesVisibility(&oldtup, crosscheck, buffer))
3706 result = TM_Updated;
3707 }
3708
3709 if (result != TM_Ok)
3710 {
3711 tmfd->ctid = oldtup.t_data->t_ctid;
3712 tmfd->xmax = HeapTupleHeaderGetUpdateXid(oldtup.t_data);
3713 if (result == TM_SelfModified)
3714 tmfd->cmax = HeapTupleHeaderGetCmax(oldtup.t_data);
3715 else
3716 tmfd->cmax = InvalidCommandId;
3717 UnlockReleaseBuffer(buffer);
3718 if (have_tuple_lock)
3719 UnlockTupleTuplock(relation, &(oldtup.t_self), *lockmode);
3720 if (vmbuffer != InvalidBuffer)
3721 ReleaseBuffer(vmbuffer);
3722 *update_indexes = TU_None;
3723
3724 bms_free(hot_attrs);
3725 bms_free(sum_attrs);
3726 bms_free(key_attrs);
3727 bms_free(id_attrs);
3728 bms_free(modified_attrs);
3729 bms_free(interesting_attrs);
3730 return result;
3731 }
3732
3733 /*
3734 * If we didn't pin the visibility map page and the page has become all
3735 * visible while we were busy locking the buffer, or during some
3736 * subsequent window during which we had it unlocked, we'll have to unlock
3737 * and re-lock, to avoid holding the buffer lock across an I/O. That's a
3738 * bit unfortunate, especially since we'll now have to recheck whether the
3739 * tuple has been locked or updated under us, but hopefully it won't
3740 * happen very often.
3741 */
3742 if (vmbuffer == InvalidBuffer && PageIsAllVisible(page))
3743 {
3745 visibilitymap_pin(relation, block, &vmbuffer);
3747 goto l2;
3748 }
3749
3750 /* Fill in transaction status data */
3751
3752 /*
3753 * If the tuple we're updating is locked, we need to preserve the locking
3754 * info in the old tuple's Xmax. Prepare a new Xmax value for this.
3755 */
3757 oldtup.t_data->t_infomask,
3758 oldtup.t_data->t_infomask2,
3759 xid, *lockmode, true,
3760 &xmax_old_tuple, &infomask_old_tuple,
3761 &infomask2_old_tuple);
3762
3763 /*
3764 * And also prepare an Xmax value for the new copy of the tuple. If there
3765 * was no xmax previously, or there was one but all lockers are now gone,
3766 * then use InvalidTransactionId; otherwise, get the xmax from the old
3767 * tuple. (In rare cases that might also be InvalidTransactionId and yet
3768 * not have the HEAP_XMAX_INVALID bit set; that's fine.)
3769 */
3770 if ((oldtup.t_data->t_infomask & HEAP_XMAX_INVALID) ||
3772 (checked_lockers && !locker_remains))
3773 xmax_new_tuple = InvalidTransactionId;
3774 else
3775 xmax_new_tuple = HeapTupleHeaderGetRawXmax(oldtup.t_data);
3776
3777 if (!TransactionIdIsValid(xmax_new_tuple))
3778 {
3779 infomask_new_tuple = HEAP_XMAX_INVALID;
3780 infomask2_new_tuple = 0;
3781 }
3782 else
3783 {
3784 /*
3785 * If we found a valid Xmax for the new tuple, then the infomask bits
3786 * to use on the new tuple depend on what was there on the old one.
3787 * Note that since we're doing an update, the only possibility is that
3788 * the lockers had FOR KEY SHARE lock.
3789 */
3790 if (oldtup.t_data->t_infomask & HEAP_XMAX_IS_MULTI)
3791 {
3792 GetMultiXactIdHintBits(xmax_new_tuple, &infomask_new_tuple,
3793 &infomask2_new_tuple);
3794 }
3795 else
3796 {
3797 infomask_new_tuple = HEAP_XMAX_KEYSHR_LOCK | HEAP_XMAX_LOCK_ONLY;
3798 infomask2_new_tuple = 0;
3799 }
3800 }
3801
3802 /*
3803 * Prepare the new tuple with the appropriate initial values of Xmin and
3804 * Xmax, as well as initial infomask bits as computed above.
3805 */
3806 newtup->t_data->t_infomask &= ~(HEAP_XACT_MASK);
3807 newtup->t_data->t_infomask2 &= ~(HEAP2_XACT_MASK);
3808 HeapTupleHeaderSetXmin(newtup->t_data, xid);
3809 HeapTupleHeaderSetCmin(newtup->t_data, cid);
3810 newtup->t_data->t_infomask |= HEAP_UPDATED | infomask_new_tuple;
3811 newtup->t_data->t_infomask2 |= infomask2_new_tuple;
3812 HeapTupleHeaderSetXmax(newtup->t_data, xmax_new_tuple);
3813
3814 /*
3815 * Replace cid with a combo CID if necessary. Note that we already put
3816 * the plain cid into the new tuple.
3817 */
3818 HeapTupleHeaderAdjustCmax(oldtup.t_data, &cid, &iscombo);
3819
3820 /*
3821 * If the toaster needs to be activated, OR if the new tuple will not fit
3822 * on the same page as the old, then we need to release the content lock
3823 * (but not the pin!) on the old tuple's buffer while we are off doing
3824 * TOAST and/or table-file-extension work. We must mark the old tuple to
3825 * show that it's locked, else other processes may try to update it
3826 * themselves.
3827 *
3828 * We need to invoke the toaster if there are already any out-of-line
3829 * toasted values present, or if the new tuple is over-threshold.
3830 */
3831 if (relation->rd_rel->relkind != RELKIND_RELATION &&
3832 relation->rd_rel->relkind != RELKIND_MATVIEW)
3833 {
3834 /* toast table entries should never be recursively toasted */
3835 Assert(!HeapTupleHasExternal(&oldtup));
3836 Assert(!HeapTupleHasExternal(newtup));
3837 need_toast = false;
3838 }
3839 else
3840 need_toast = (HeapTupleHasExternal(&oldtup) ||
3841 HeapTupleHasExternal(newtup) ||
3842 newtup->t_len > TOAST_TUPLE_THRESHOLD);
3843
3844 pagefree = PageGetHeapFreeSpace(page);
3845
3846 newtupsize = MAXALIGN(newtup->t_len);
3847
3848 if (need_toast || newtupsize > pagefree)
3849 {
3850 TransactionId xmax_lock_old_tuple;
3851 uint16 infomask_lock_old_tuple,
3852 infomask2_lock_old_tuple;
3853 bool cleared_all_frozen = false;
3854
3855 /*
3856 * To prevent concurrent sessions from updating the tuple, we have to
3857 * temporarily mark it locked, while we release the page-level lock.
3858 *
3859 * To satisfy the rule that any xid potentially appearing in a buffer
3860 * written out to disk, we unfortunately have to WAL log this
3861 * temporary modification. We can reuse xl_heap_lock for this
3862 * purpose. If we crash/error before following through with the
3863 * actual update, xmax will be of an aborted transaction, allowing
3864 * other sessions to proceed.
3865 */
3866
3867 /*
3868 * Compute xmax / infomask appropriate for locking the tuple. This has
3869 * to be done separately from the combo that's going to be used for
3870 * updating, because the potentially created multixact would otherwise
3871 * be wrong.
3872 */
3874 oldtup.t_data->t_infomask,
3875 oldtup.t_data->t_infomask2,
3876 xid, *lockmode, false,
3877 &xmax_lock_old_tuple, &infomask_lock_old_tuple,
3878 &infomask2_lock_old_tuple);
3879
3880 Assert(HEAP_XMAX_IS_LOCKED_ONLY(infomask_lock_old_tuple));
3881
3883
3884 /* Clear obsolete visibility flags ... */
3886 oldtup.t_data->t_infomask2 &= ~HEAP_KEYS_UPDATED;
3887 HeapTupleClearHotUpdated(&oldtup);
3888 /* ... and store info about transaction updating this tuple */
3889 Assert(TransactionIdIsValid(xmax_lock_old_tuple));
3890 HeapTupleHeaderSetXmax(oldtup.t_data, xmax_lock_old_tuple);
3891 oldtup.t_data->t_infomask |= infomask_lock_old_tuple;
3892 oldtup.t_data->t_infomask2 |= infomask2_lock_old_tuple;
3893 HeapTupleHeaderSetCmax(oldtup.t_data, cid, iscombo);
3894
3895 /* temporarily make it look not-updated, but locked */
3896 oldtup.t_data->t_ctid = oldtup.t_self;
3897
3898 /*
3899 * Clear all-frozen bit on visibility map if needed. We could
3900 * immediately reset ALL_VISIBLE, but given that the WAL logging
3901 * overhead would be unchanged, that doesn't seem necessarily
3902 * worthwhile.
3903 */
3904 if (PageIsAllVisible(page) &&
3905 visibilitymap_clear(relation, block, vmbuffer,
3907 cleared_all_frozen = true;
3908
3909 MarkBufferDirty(buffer);
3910
3911 if (RelationNeedsWAL(relation))
3912 {
3913 xl_heap_lock xlrec;
3914 XLogRecPtr recptr;
3915
3918
3919 xlrec.offnum = ItemPointerGetOffsetNumber(&oldtup.t_self);
3920 xlrec.xmax = xmax_lock_old_tuple;
3922 oldtup.t_data->t_infomask2);
3923 xlrec.flags =
3924 cleared_all_frozen ? XLH_LOCK_ALL_FROZEN_CLEARED : 0;
3926 recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_LOCK);
3927 PageSetLSN(page, recptr);
3928 }
3929
3931
3933
3934 /*
3935 * Let the toaster do its thing, if needed.
3936 *
3937 * Note: below this point, heaptup is the data we actually intend to
3938 * store into the relation; newtup is the caller's original untoasted
3939 * data.
3940 */
3941 if (need_toast)
3942 {
3943 /* Note we always use WAL and FSM during updates */
3944 heaptup = heap_toast_insert_or_update(relation, newtup, &oldtup, 0);
3945 newtupsize = MAXALIGN(heaptup->t_len);
3946 }
3947 else
3948 heaptup = newtup;
3949
3950 /*
3951 * Now, do we need a new page for the tuple, or not? This is a bit
3952 * tricky since someone else could have added tuples to the page while
3953 * we weren't looking. We have to recheck the available space after
3954 * reacquiring the buffer lock. But don't bother to do that if the
3955 * former amount of free space is still not enough; it's unlikely
3956 * there's more free now than before.
3957 *
3958 * What's more, if we need to get a new page, we will need to acquire
3959 * buffer locks on both old and new pages. To avoid deadlock against
3960 * some other backend trying to get the same two locks in the other
3961 * order, we must be consistent about the order we get the locks in.
3962 * We use the rule "lock the lower-numbered page of the relation
3963 * first". To implement this, we must do RelationGetBufferForTuple
3964 * while not holding the lock on the old page, and we must rely on it
3965 * to get the locks on both pages in the correct order.
3966 *
3967 * Another consideration is that we need visibility map page pin(s) if
3968 * we will have to clear the all-visible flag on either page. If we
3969 * call RelationGetBufferForTuple, we rely on it to acquire any such
3970 * pins; but if we don't, we have to handle that here. Hence we need
3971 * a loop.
3972 */
3973 for (;;)
3974 {
3975 if (newtupsize > pagefree)
3976 {
3977 /* It doesn't fit, must use RelationGetBufferForTuple. */
3978 newbuf = RelationGetBufferForTuple(relation, heaptup->t_len,
3979 buffer, 0, NULL,
3980 &vmbuffer_new, &vmbuffer,
3981 0);
3982 /* We're all done. */
3983 break;
3984 }
3985 /* Acquire VM page pin if needed and we don't have it. */
3986 if (vmbuffer == InvalidBuffer && PageIsAllVisible(page))
3987 visibilitymap_pin(relation, block, &vmbuffer);
3988 /* Re-acquire the lock on the old tuple's page. */
3990 /* Re-check using the up-to-date free space */
3991 pagefree = PageGetHeapFreeSpace(page);
3992 if (newtupsize > pagefree ||
3993 (vmbuffer == InvalidBuffer && PageIsAllVisible(page)))
3994 {
3995 /*
3996 * Rats, it doesn't fit anymore, or somebody just now set the
3997 * all-visible flag. We must now unlock and loop to avoid
3998 * deadlock. Fortunately, this path should seldom be taken.
3999 */
4001 }
4002 else
4003 {
4004 /* We're all done. */
4005 newbuf = buffer;
4006 break;
4007 }
4008 }
4009 }
4010 else
4011 {
4012 /* No TOAST work needed, and it'll fit on same page */
4013 newbuf = buffer;
4014 heaptup = newtup;
4015 }
4016
4017 /*
4018 * We're about to do the actual update -- check for conflict first, to
4019 * avoid possibly having to roll back work we've just done.
4020 *
4021 * This is safe without a recheck as long as there is no possibility of
4022 * another process scanning the pages between this check and the update
4023 * being visible to the scan (i.e., exclusive buffer content lock(s) are
4024 * continuously held from this point until the tuple update is visible).
4025 *
4026 * For the new tuple the only check needed is at the relation level, but
4027 * since both tuples are in the same relation and the check for oldtup
4028 * will include checking the relation level, there is no benefit to a
4029 * separate check for the new tuple.
4030 */
4031 CheckForSerializableConflictIn(relation, &oldtup.t_self,
4032 BufferGetBlockNumber(buffer));
4033
4034 /*
4035 * At this point newbuf and buffer are both pinned and locked, and newbuf
4036 * has enough space for the new tuple. If they are the same buffer, only
4037 * one pin is held.
4038 */
4039
4040 if (newbuf == buffer)
4041 {
4042 /*
4043 * Since the new tuple is going into the same page, we might be able
4044 * to do a HOT update. Check if any of the index columns have been
4045 * changed.
4046 */
4047 if (!bms_overlap(modified_attrs, hot_attrs))
4048 {
4049 use_hot_update = true;
4050
4051 /*
4052 * If none of the columns that are used in hot-blocking indexes
4053 * were updated, we can apply HOT, but we do still need to check
4054 * if we need to update the summarizing indexes, and update those
4055 * indexes if the columns were updated, or we may fail to detect
4056 * e.g. value bound changes in BRIN minmax indexes.
4057 */
4058 if (bms_overlap(modified_attrs, sum_attrs))
4059 summarized_update = true;
4060 }
4061 }
4062 else
4063 {
4064 /* Set a hint that the old page could use prune/defrag */
4065 PageSetFull(page);
4066 }
4067
4068 /*
4069 * Compute replica identity tuple before entering the critical section so
4070 * we don't PANIC upon a memory allocation failure.
4071 * ExtractReplicaIdentity() will return NULL if nothing needs to be
4072 * logged. Pass old key required as true only if the replica identity key
4073 * columns are modified or it has external data.
4074 */
4075 old_key_tuple = ExtractReplicaIdentity(relation, &oldtup,
4076 bms_overlap(modified_attrs, id_attrs) ||
4077 id_has_external,
4078 &old_key_copied);
4079
4080 /* NO EREPORT(ERROR) from here till changes are logged */
4082
4083 /*
4084 * If this transaction commits, the old tuple will become DEAD sooner or
4085 * later. Set flag that this page is a candidate for pruning once our xid
4086 * falls below the OldestXmin horizon. If the transaction finally aborts,
4087 * the subsequent page pruning will be a no-op and the hint will be
4088 * cleared.
4089 *
4090 * XXX Should we set hint on newbuf as well? If the transaction aborts,
4091 * there would be a prunable tuple in the newbuf; but for now we choose
4092 * not to optimize for aborts. Note that heap_xlog_update must be kept in
4093 * sync if this decision changes.
4094 */
4095 PageSetPrunable(page, xid);
4096
4097 if (use_hot_update)
4098 {
4099 /* Mark the old tuple as HOT-updated */
4100 HeapTupleSetHotUpdated(&oldtup);
4101 /* And mark the new tuple as heap-only */
4102 HeapTupleSetHeapOnly(heaptup);
4103 /* Mark the caller's copy too, in case different from heaptup */
4104 HeapTupleSetHeapOnly(newtup);
4105 }
4106 else
4107 {
4108 /* Make sure tuples are correctly marked as not-HOT */
4109 HeapTupleClearHotUpdated(&oldtup);
4110 HeapTupleClearHeapOnly(heaptup);
4111 HeapTupleClearHeapOnly(newtup);
4112 }
4113
4114 RelationPutHeapTuple(relation, newbuf, heaptup, false); /* insert new tuple */
4115
4116
4117 /* Clear obsolete visibility flags, possibly set by ourselves above... */
4119 oldtup.t_data->t_infomask2 &= ~HEAP_KEYS_UPDATED;
4120 /* ... and store info about transaction updating this tuple */
4121 Assert(TransactionIdIsValid(xmax_old_tuple));
4122 HeapTupleHeaderSetXmax(oldtup.t_data, xmax_old_tuple);
4123 oldtup.t_data->t_infomask |= infomask_old_tuple;
4124 oldtup.t_data->t_infomask2 |= infomask2_old_tuple;
4125 HeapTupleHeaderSetCmax(oldtup.t_data, cid, iscombo);
4126
4127 /* record address of new tuple in t_ctid of old one */
4128 oldtup.t_data->t_ctid = heaptup->t_self;
4129
4130 /* clear PD_ALL_VISIBLE flags, reset all visibilitymap bits */
4131 if (PageIsAllVisible(BufferGetPage(buffer)))
4132 {
4133 all_visible_cleared = true;
4135 visibilitymap_clear(relation, BufferGetBlockNumber(buffer),
4136 vmbuffer, VISIBILITYMAP_VALID_BITS);
4137 }
4138 if (newbuf != buffer && PageIsAllVisible(BufferGetPage(newbuf)))
4139 {
4140 all_visible_cleared_new = true;
4142 visibilitymap_clear(relation, BufferGetBlockNumber(newbuf),
4143 vmbuffer_new, VISIBILITYMAP_VALID_BITS);
4144 }
4145
4146 if (newbuf != buffer)
4147 MarkBufferDirty(newbuf);
4148 MarkBufferDirty(buffer);
4149
4150 /* XLOG stuff */
4151 if (RelationNeedsWAL(relation))
4152 {
4153 XLogRecPtr recptr;
4154
4155 /*
4156 * For logical decoding we need combo CIDs to properly decode the
4157 * catalog.
4158 */
4160 {
4161 log_heap_new_cid(relation, &oldtup);
4162 log_heap_new_cid(relation, heaptup);
4163 }
4164
4165 recptr = log_heap_update(relation, buffer,
4166 newbuf, &oldtup, heaptup,
4167 old_key_tuple,
4168 all_visible_cleared,
4169 all_visible_cleared_new);
4170 if (newbuf != buffer)
4171 {
4172 PageSetLSN(BufferGetPage(newbuf), recptr);
4173 }
4174 PageSetLSN(BufferGetPage(buffer), recptr);
4175 }
4176
4178
4179 if (newbuf != buffer)
4182
4183 /*
4184 * Mark old tuple for invalidation from system caches at next command
4185 * boundary, and mark the new tuple for invalidation in case we abort. We
4186 * have to do this before releasing the buffer because oldtup is in the
4187 * buffer. (heaptup is all in local memory, but it's necessary to process
4188 * both tuple versions in one call to inval.c so we can avoid redundant
4189 * sinval messages.)
4190 */
4191 CacheInvalidateHeapTuple(relation, &oldtup, heaptup);
4192
4193 /* Now we can release the buffer(s) */
4194 if (newbuf != buffer)
4195 ReleaseBuffer(newbuf);
4196 ReleaseBuffer(buffer);
4197 if (BufferIsValid(vmbuffer_new))
4198 ReleaseBuffer(vmbuffer_new);
4199 if (BufferIsValid(vmbuffer))
4200 ReleaseBuffer(vmbuffer);
4201
4202 /*
4203 * Release the lmgr tuple lock, if we had it.
4204 */
4205 if (have_tuple_lock)
4206 UnlockTupleTuplock(relation, &(oldtup.t_self), *lockmode);
4207
4208 pgstat_count_heap_update(relation, use_hot_update, newbuf != buffer);
4209
4210 /*
4211 * If heaptup is a private copy, release it. Don't forget to copy t_self
4212 * back to the caller's image, too.
4213 */
4214 if (heaptup != newtup)
4215 {
4216 newtup->t_self = heaptup->t_self;
4217 heap_freetuple(heaptup);
4218 }
4219
4220 /*
4221 * If it is a HOT update, the update may still need to update summarized
4222 * indexes, lest we fail to update those summaries and get incorrect
4223 * results (for example, minmax bounds of the block may change with this
4224 * update).
4225 */
4226 if (use_hot_update)
4227 {
4228 if (summarized_update)
4229 *update_indexes = TU_Summarizing;
4230 else
4231 *update_indexes = TU_None;
4232 }
4233 else
4234 *update_indexes = TU_All;
4235
4236 if (old_key_tuple != NULL && old_key_copied)
4237 heap_freetuple(old_key_tuple);
4238
4239 bms_free(hot_attrs);
4240 bms_free(sum_attrs);
4241 bms_free(key_attrs);
4242 bms_free(id_attrs);
4243 bms_free(modified_attrs);
4244 bms_free(interesting_attrs);
4245
4246 return TM_Ok;
4247}
void bms_free(Bitmapset *a)
Definition: bitmapset.c:239
Bitmapset * bms_add_members(Bitmapset *a, const Bitmapset *b)
Definition: bitmapset.c:916
bool bms_overlap(const Bitmapset *a, const Bitmapset *b)
Definition: bitmapset.c:581
static void PageSetFull(Page page)
Definition: bufpage.h:417
static Bitmapset * HeapDetermineColumnsInfo(Relation relation, Bitmapset *interesting_cols, Bitmapset *external_cols, HeapTuple oldtup, HeapTuple newtup, bool *has_external)
Definition: heapam.c:4427
static XLogRecPtr log_heap_update(Relation reln, Buffer oldbuf, Buffer newbuf, HeapTuple oldtup, HeapTuple newtup, HeapTuple old_key_tuple, bool all_visible_cleared, bool new_all_visible_cleared)
Definition: heapam.c:8864
TransactionId HeapTupleGetUpdateXid(const HeapTupleHeaderData *tup)
Definition: heapam.c:7605
HeapTuple heap_toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup, int options)
Definition: heaptoast.c:96
#define TOAST_TUPLE_THRESHOLD
Definition: heaptoast.h:48
static void HeapTupleClearHotUpdated(const HeapTupleData *tuple)
Definition: htup_details.h:780
#define HEAP2_XACT_MASK
Definition: htup_details.h:293
#define HEAP_XMAX_LOCK_ONLY
Definition: htup_details.h:197
static void HeapTupleHeaderSetCmin(HeapTupleHeaderData *tup, CommandId cid)
Definition: htup_details.h:422
static void HeapTupleSetHeapOnly(const HeapTupleData *tuple)
Definition: htup_details.h:792
#define HEAP_XACT_MASK
Definition: htup_details.h:215
static void HeapTupleSetHotUpdated(const HeapTupleData *tuple)
Definition: htup_details.h:774
static void HeapTupleClearHeapOnly(const HeapTupleData *tuple)
Definition: htup_details.h:798
#define HEAP_UPDATED
Definition: htup_details.h:210
#define HEAP_XMAX_KEYSHR_LOCK
Definition: htup_details.h:194
#define INJECTION_POINT(name, arg)
void pgstat_count_heap_update(Relation rel, bool hot, bool newpage)
Bitmapset * RelationGetIndexAttrBitmap(Relation relation, IndexAttrBitmapKind attrKind)
Definition: relcache.c:5303
@ INDEX_ATTR_BITMAP_KEY
Definition: relcache.h:69
@ INDEX_ATTR_BITMAP_HOT_BLOCKING
Definition: relcache.h:72
@ INDEX_ATTR_BITMAP_SUMMARIZED
Definition: relcache.h:73
@ INDEX_ATTR_BITMAP_IDENTITY_KEY
Definition: relcache.h:71
bool RelationSupportsSysCache(Oid relid)
Definition: syscache.c:762
@ TU_Summarizing
Definition: tableam.h:119
@ TU_All
Definition: tableam.h:116
@ TU_None
Definition: tableam.h:113
bool TransactionIdDidAbort(TransactionId transactionId)
Definition: transam.c:188

References Assert(), AssertHasSnapshotForToast(), bms_add_members(), bms_free(), bms_overlap(), BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferGetBlockNumber(), BufferGetPage(), BufferIsValid(), CacheInvalidateHeapTuple(), CheckForSerializableConflictIn(), TM_FailureData::cmax, compute_infobits(), compute_new_xmax_infomask(), TM_FailureData::ctid, DoesMultiXactIdConflict(), END_CRIT_SECTION, ereport, errcode(), errmsg(), ERROR, ExtractReplicaIdentity(), xl_heap_lock::flags, GetCurrentTransactionId(), GetMultiXactIdHintBits(), HEAP2_XACT_MASK, heap_acquire_tuplock(), heap_freetuple(), HEAP_LOCKED_UPGRADED(), HEAP_MOVED, heap_toast_insert_or_update(), HEAP_UPDATED, HEAP_XACT_MASK, HEAP_XMAX_BITS, HEAP_XMAX_INVALID, HEAP_XMAX_IS_KEYSHR_LOCKED(), HEAP_XMAX_IS_LOCKED_ONLY(), HEAP_XMAX_IS_MULTI, HEAP_XMAX_KEYSHR_LOCK, HEAP_XMAX_LOCK_ONLY, HeapDetermineColumnsInfo(), HeapTupleClearHeapOnly(), HeapTupleClearHotUpdated(), HeapTupleGetUpdateXid(), HeapTupleHasExternal(), HeapTupleHeaderAdjustCmax(), HeapTupleHeaderGetCmax(), HeapTupleHeaderGetNatts, HeapTupleHeaderGetRawXmax(), HeapTupleHeaderGetUpdateXid(), HeapTupleHeaderSetCmax(), HeapTupleHeaderSetCmin(), HeapTupleHeaderSetXmax(), HeapTupleHeaderSetXmin(), HeapTupleSatisfiesUpdate(), HeapTupleSatisfiesVisibility(), HeapTupleSetHeapOnly(), HeapTupleSetHotUpdated(), INDEX_ATTR_BITMAP_HOT_BLOCKING, INDEX_ATTR_BITMAP_IDENTITY_KEY, INDEX_ATTR_BITMAP_KEY, INDEX_ATTR_BITMAP_SUMMARIZED, xl_heap_lock::infobits_set, INJECTION_POINT, InvalidBuffer, InvalidCommandId, InvalidSnapshot, InvalidTransactionId, IsInParallelMode(), ItemIdGetLength, ItemIdIsNormal, ItemPointerEquals(), ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), ItemPointerIsValid(), LockBuffer(), LockTupleExclusive, LockTupleNoKeyExclusive, LockWaitBlock, log_heap_new_cid(), log_heap_update(), MarkBufferDirty(), MAXALIGN, MultiXactIdSetOldestMember(), MultiXactIdWait(), MultiXactStatusNoKeyUpdate, MultiXactStatusUpdate, xl_heap_lock::offnum, PageClearAllVisible(), PageGetHeapFreeSpace(), PageGetItem(), PageGetItemId(), PageIsAllVisible(), PageSetFull(), PageSetLSN(), PageSetPrunable, pgstat_count_heap_update(), RelationData::rd_rel, ReadBuffer(), REGBUF_STANDARD, RelationGetBufferForTuple(), RelationGetIndexAttrBitmap(), RelationGetNumberOfAttributes, RelationGetRelid, RelationIsAccessibleInLogicalDecoding, RelationNeedsWAL, RelationPutHeapTuple(), RelationSupportsSysCache(), ReleaseBuffer(), SizeOfHeapLock, START_CRIT_SECTION, HeapTupleHeaderData::t_ctid, HeapTupleData::t_data, HeapTupleHeaderData::t_infomask, HeapTupleHeaderData::t_infomask2, HeapTupleData::t_len, HeapTupleData::t_self, HeapTupleData::t_tableOid, TM_BeingModified, TM_Deleted, TM_Invisible, TM_Ok, TM_SelfModified, TM_Updated, TOAST_TUPLE_THRESHOLD, TransactionIdDidAbort(), TransactionIdEquals, TransactionIdIsCurrentTransactionId(), TransactionIdIsValid, TU_All, TU_None, TU_Summarizing, UnlockReleaseBuffer(), UnlockTupleTuplock, UpdateXmaxHintBits(), VISIBILITYMAP_ALL_FROZEN, visibilitymap_clear(), visibilitymap_pin(), VISIBILITYMAP_VALID_BITS, XactLockTableWait(), XLH_LOCK_ALL_FROZEN_CLEARED, XLOG_HEAP_LOCK, XLogBeginInsert(), XLogInsert(), XLogRegisterBuffer(), XLogRegisterData(), XLTW_Update, xl_heap_lock::xmax, TM_FailureData::xmax, and xmax_infomask_changed().

Referenced by heapam_tuple_update(), and simple_heap_update().

◆ heap_vacuum_rel()

void heap_vacuum_rel ( Relation  rel,
const VacuumParams  params,
BufferAccessStrategy  bstrategy 
)

Definition at line 627 of file vacuumlazy.c.

629{
630 LVRelState *vacrel;
631 bool verbose,
632 instrument,
633 skipwithvm,
634 frozenxid_updated,
635 minmulti_updated;
636 BlockNumber orig_rel_pages,
637 new_rel_pages,
638 new_rel_allvisible,
639 new_rel_allfrozen;
640 PGRUsage ru0;
641 TimestampTz starttime = 0;
642 PgStat_Counter startreadtime = 0,
643 startwritetime = 0;
644 WalUsage startwalusage = pgWalUsage;
645 BufferUsage startbufferusage = pgBufferUsage;
646 ErrorContextCallback errcallback;
647 char **indnames = NULL;
648
649 verbose = (params.options & VACOPT_VERBOSE) != 0;
650 instrument = (verbose || (AmAutoVacuumWorkerProcess() &&
651 params.log_vacuum_min_duration >= 0));
652 if (instrument)
653 {
654 pg_rusage_init(&ru0);
655 if (track_io_timing)
656 {
657 startreadtime = pgStatBlockReadTime;
658 startwritetime = pgStatBlockWriteTime;
659 }
660 }
661
662 /* Used for instrumentation and stats report */
663 starttime = GetCurrentTimestamp();
664
666 RelationGetRelid(rel));
667
668 /*
669 * Setup error traceback support for ereport() first. The idea is to set
670 * up an error context callback to display additional information on any
671 * error during a vacuum. During different phases of vacuum, we update
672 * the state so that the error context callback always display current
673 * information.
674 *
675 * Copy the names of heap rel into local memory for error reporting
676 * purposes, too. It isn't always safe to assume that we can get the name
677 * of each rel. It's convenient for code in lazy_scan_heap to always use
678 * these temp copies.
679 */
680 vacrel = (LVRelState *) palloc0(sizeof(LVRelState));
684 vacrel->indname = NULL;
686 vacrel->verbose = verbose;
687 errcallback.callback = vacuum_error_callback;
688 errcallback.arg = vacrel;
689 errcallback.previous = error_context_stack;
690 error_context_stack = &errcallback;
691
692 /* Set up high level stuff about rel and its indexes */
693 vacrel->rel = rel;
695 &vacrel->indrels);
696 vacrel->bstrategy = bstrategy;
697 if (instrument && vacrel->nindexes > 0)
698 {
699 /* Copy index names used by instrumentation (not error reporting) */
700 indnames = palloc(sizeof(char *) * vacrel->nindexes);
701 for (int i = 0; i < vacrel->nindexes; i++)
702 indnames[i] = pstrdup(RelationGetRelationName(vacrel->indrels[i]));
703 }
704
705 /*
706 * The index_cleanup param either disables index vacuuming and cleanup or
707 * forces it to go ahead when we would otherwise apply the index bypass
708 * optimization. The default is 'auto', which leaves the final decision
709 * up to lazy_vacuum().
710 *
711 * The truncate param allows user to avoid attempting relation truncation,
712 * though it can't force truncation to happen.
713 */
716 params.truncate != VACOPTVALUE_AUTO);
717
718 /*
719 * While VacuumFailSafeActive is reset to false before calling this, we
720 * still need to reset it here due to recursive calls.
721 */
722 VacuumFailsafeActive = false;
723 vacrel->consider_bypass_optimization = true;
724 vacrel->do_index_vacuuming = true;
725 vacrel->do_index_cleanup = true;
726 vacrel->do_rel_truncate = (params.truncate != VACOPTVALUE_DISABLED);
728 {
729 /* Force disable index vacuuming up-front */
730 vacrel->do_index_vacuuming = false;
731 vacrel->do_index_cleanup = false;
732 }
733 else if (params.index_cleanup == VACOPTVALUE_ENABLED)
734 {
735 /* Force index vacuuming. Note that failsafe can still bypass. */
736 vacrel->consider_bypass_optimization = false;
737 }
738 else
739 {
740 /* Default/auto, make all decisions dynamically */
742 }
743
744 /* Initialize page counters explicitly (be tidy) */
745 vacrel->scanned_pages = 0;
746 vacrel->eager_scanned_pages = 0;
747 vacrel->removed_pages = 0;
748 vacrel->new_frozen_tuple_pages = 0;
749 vacrel->lpdead_item_pages = 0;
750 vacrel->missed_dead_pages = 0;
751 vacrel->nonempty_pages = 0;
752 /* dead_items_alloc allocates vacrel->dead_items later on */
753
754 /* Allocate/initialize output statistics state */
755 vacrel->new_rel_tuples = 0;
756 vacrel->new_live_tuples = 0;
757 vacrel->indstats = (IndexBulkDeleteResult **)
758 palloc0(vacrel->nindexes * sizeof(IndexBulkDeleteResult *));
759
760 /* Initialize remaining counters (be tidy) */
761 vacrel->num_index_scans = 0;
762 vacrel->tuples_deleted = 0;
763 vacrel->tuples_frozen = 0;
764 vacrel->lpdead_items = 0;
765 vacrel->live_tuples = 0;
766 vacrel->recently_dead_tuples = 0;
767 vacrel->missed_dead_tuples = 0;
768
769 vacrel->vm_new_visible_pages = 0;
770 vacrel->vm_new_visible_frozen_pages = 0;
771 vacrel->vm_new_frozen_pages = 0;
772
773 /*
774 * Get cutoffs that determine which deleted tuples are considered DEAD,
775 * not just RECENTLY_DEAD, and which XIDs/MXIDs to freeze. Then determine
776 * the extent of the blocks that we'll scan in lazy_scan_heap. It has to
777 * happen in this order to ensure that the OldestXmin cutoff field works
778 * as an upper bound on the XIDs stored in the pages we'll actually scan
779 * (NewRelfrozenXid tracking must never be allowed to miss unfrozen XIDs).
780 *
781 * Next acquire vistest, a related cutoff that's used in pruning. We use
782 * vistest in combination with OldestXmin to ensure that
783 * heap_page_prune_and_freeze() always removes any deleted tuple whose
784 * xmax is < OldestXmin. lazy_scan_prune must never become confused about
785 * whether a tuple should be frozen or removed. (In the future we might
786 * want to teach lazy_scan_prune to recompute vistest from time to time,
787 * to increase the number of dead tuples it can prune away.)
788 */
789 vacrel->aggressive = vacuum_get_cutoffs(rel, params, &vacrel->cutoffs);
790 vacrel->rel_pages = orig_rel_pages = RelationGetNumberOfBlocks(rel);
791 vacrel->vistest = GlobalVisTestFor(rel);
792
793 /* Initialize state used to track oldest extant XID/MXID */
794 vacrel->NewRelfrozenXid = vacrel->cutoffs.OldestXmin;
795 vacrel->NewRelminMxid = vacrel->cutoffs.OldestMxact;
796
797 /*
798 * Initialize state related to tracking all-visible page skipping. This is
799 * very important to determine whether or not it is safe to advance the
800 * relfrozenxid/relminmxid.
801 */
802 vacrel->skippedallvis = false;
803 skipwithvm = true;
805 {
806 /*
807 * Force aggressive mode, and disable skipping blocks using the
808 * visibility map (even those set all-frozen)
809 */
810 vacrel->aggressive = true;
811 skipwithvm = false;
812 }
813
814 vacrel->skipwithvm = skipwithvm;
815
816 /*
817 * Set up eager scan tracking state. This must happen after determining
818 * whether or not the vacuum must be aggressive, because only normal
819 * vacuums use the eager scan algorithm.
820 */
821 heap_vacuum_eager_scan_setup(vacrel, params);
822
823 if (verbose)
824 {
825 if (vacrel->aggressive)
827 (errmsg("aggressively vacuuming \"%s.%s.%s\"",
828 vacrel->dbname, vacrel->relnamespace,
829 vacrel->relname)));
830 else
832 (errmsg("vacuuming \"%s.%s.%s\"",
833 vacrel->dbname, vacrel->relnamespace,
834 vacrel->relname)));
835 }
836
837 /*
838 * Allocate dead_items memory using dead_items_alloc. This handles
839 * parallel VACUUM initialization as part of allocating shared memory
840 * space used for dead_items. (But do a failsafe precheck first, to
841 * ensure that parallel VACUUM won't be attempted at all when relfrozenxid
842 * is already dangerously old.)
843 */
845 dead_items_alloc(vacrel, params.nworkers);
846
847 /*
848 * Call lazy_scan_heap to perform all required heap pruning, index
849 * vacuuming, and heap vacuuming (plus related processing)
850 */
851 lazy_scan_heap(vacrel);
852
853 /*
854 * Free resources managed by dead_items_alloc. This ends parallel mode in
855 * passing when necessary.
856 */
857 dead_items_cleanup(vacrel);
859
860 /*
861 * Update pg_class entries for each of rel's indexes where appropriate.
862 *
863 * Unlike the later update to rel's pg_class entry, this is not critical.
864 * Maintains relpages/reltuples statistics used by the planner only.
865 */
866 if (vacrel->do_index_cleanup)
868
869 /* Done with rel's indexes */
870 vac_close_indexes(vacrel->nindexes, vacrel->indrels, NoLock);
871
872 /* Optionally truncate rel */
873 if (should_attempt_truncation(vacrel))
874 lazy_truncate_heap(vacrel);
875
876 /* Pop the error context stack */
877 error_context_stack = errcallback.previous;
878
879 /* Report that we are now doing final cleanup */
882
883 /*
884 * Prepare to update rel's pg_class entry.
885 *
886 * Aggressive VACUUMs must always be able to advance relfrozenxid to a
887 * value >= FreezeLimit, and relminmxid to a value >= MultiXactCutoff.
888 * Non-aggressive VACUUMs may advance them by any amount, or not at all.
889 */
890 Assert(vacrel->NewRelfrozenXid == vacrel->cutoffs.OldestXmin ||
892 vacrel->cutoffs.relfrozenxid,
893 vacrel->NewRelfrozenXid));
894 Assert(vacrel->NewRelminMxid == vacrel->cutoffs.OldestMxact ||
896 vacrel->cutoffs.relminmxid,
897 vacrel->NewRelminMxid));
898 if (vacrel->skippedallvis)
899 {
900 /*
901 * Must keep original relfrozenxid in a non-aggressive VACUUM that
902 * chose to skip an all-visible page range. The state that tracks new
903 * values will have missed unfrozen XIDs from the pages we skipped.
904 */
905 Assert(!vacrel->aggressive);
908 }
909
910 /*
911 * For safety, clamp relallvisible to be not more than what we're setting
912 * pg_class.relpages to
913 */
914 new_rel_pages = vacrel->rel_pages; /* After possible rel truncation */
915 visibilitymap_count(rel, &new_rel_allvisible, &new_rel_allfrozen);
916 if (new_rel_allvisible > new_rel_pages)
917 new_rel_allvisible = new_rel_pages;
918
919 /*
920 * An all-frozen block _must_ be all-visible. As such, clamp the count of
921 * all-frozen blocks to the count of all-visible blocks. This matches the
922 * clamping of relallvisible above.
923 */
924 if (new_rel_allfrozen > new_rel_allvisible)
925 new_rel_allfrozen = new_rel_allvisible;
926
927 /*
928 * Now actually update rel's pg_class entry.
929 *
930 * In principle new_live_tuples could be -1 indicating that we (still)
931 * don't know the tuple count. In practice that can't happen, since we
932 * scan every page that isn't skipped using the visibility map.
933 */
934 vac_update_relstats(rel, new_rel_pages, vacrel->new_live_tuples,
935 new_rel_allvisible, new_rel_allfrozen,
936 vacrel->nindexes > 0,
937 vacrel->NewRelfrozenXid, vacrel->NewRelminMxid,
938 &frozenxid_updated, &minmulti_updated, false);
939
940 /*
941 * Report results to the cumulative stats system, too.
942 *
943 * Deliberately avoid telling the stats system about LP_DEAD items that
944 * remain in the table due to VACUUM bypassing index and heap vacuuming.
945 * ANALYZE will consider the remaining LP_DEAD items to be dead "tuples".
946 * It seems like a good idea to err on the side of not vacuuming again too
947 * soon in cases where the failsafe prevented significant amounts of heap
948 * vacuuming.
949 */
951 rel->rd_rel->relisshared,
952 Max(vacrel->new_live_tuples, 0),
953 vacrel->recently_dead_tuples +
954 vacrel->missed_dead_tuples,
955 starttime);
957
958 if (instrument)
959 {
961
962 if (verbose || params.log_vacuum_min_duration == 0 ||
963 TimestampDifferenceExceeds(starttime, endtime,
965 {
966 long secs_dur;
967 int usecs_dur;
968 WalUsage walusage;
969 BufferUsage bufferusage;
971 char *msgfmt;
972 int32 diff;
973 double read_rate = 0,
974 write_rate = 0;
975 int64 total_blks_hit;
976 int64 total_blks_read;
977 int64 total_blks_dirtied;
978
979 TimestampDifference(starttime, endtime, &secs_dur, &usecs_dur);
980 memset(&walusage, 0, sizeof(WalUsage));
981 WalUsageAccumDiff(&walusage, &pgWalUsage, &startwalusage);
982 memset(&bufferusage, 0, sizeof(BufferUsage));
983 BufferUsageAccumDiff(&bufferusage, &pgBufferUsage, &startbufferusage);
984
985 total_blks_hit = bufferusage.shared_blks_hit +
986 bufferusage.local_blks_hit;
987 total_blks_read = bufferusage.shared_blks_read +
988 bufferusage.local_blks_read;
989 total_blks_dirtied = bufferusage.shared_blks_dirtied +
990 bufferusage.local_blks_dirtied;
991
993 if (verbose)
994 {
995 /*
996 * Aggressiveness already reported earlier, in dedicated
997 * VACUUM VERBOSE ereport
998 */
999 Assert(!params.is_wraparound);
1000 msgfmt = _("finished vacuuming \"%s.%s.%s\": index scans: %d\n");
1001 }
1002 else if (params.is_wraparound)
1003 {
1004 /*
1005 * While it's possible for a VACUUM to be both is_wraparound
1006 * and !aggressive, that's just a corner-case -- is_wraparound
1007 * implies aggressive. Produce distinct output for the corner
1008 * case all the same, just in case.
1009 */
1010 if (vacrel->aggressive)
1011 msgfmt = _("automatic aggressive vacuum to prevent wraparound of table \"%s.%s.%s\": index scans: %d\n");
1012 else
1013 msgfmt = _("automatic vacuum to prevent wraparound of table \"%s.%s.%s\": index scans: %d\n");
1014 }
1015 else
1016 {
1017 if (vacrel->aggressive)
1018 msgfmt = _("automatic aggressive vacuum of table \"%s.%s.%s\": index scans: %d\n");
1019 else
1020 msgfmt = _("automatic vacuum of table \"%s.%s.%s\": index scans: %d\n");
1021 }
1022 appendStringInfo(&buf, msgfmt,
1023 vacrel->dbname,
1024 vacrel->relnamespace,
1025 vacrel->relname,
1026 vacrel->num_index_scans);
1027 appendStringInfo(&buf, _("pages: %u removed, %u remain, %u scanned (%.2f%% of total), %u eagerly scanned\n"),
1028 vacrel->removed_pages,
1029 new_rel_pages,
1030 vacrel->scanned_pages,
1031 orig_rel_pages == 0 ? 100.0 :
1032 100.0 * vacrel->scanned_pages /
1033 orig_rel_pages,
1034 vacrel->eager_scanned_pages);
1036 _("tuples: %" PRId64 " removed, %" PRId64 " remain, %" PRId64 " are dead but not yet removable\n"),
1037 vacrel->tuples_deleted,
1038 (int64) vacrel->new_rel_tuples,
1039 vacrel->recently_dead_tuples);
1040 if (vacrel->missed_dead_tuples > 0)
1042 _("tuples missed: %" PRId64 " dead from %u pages not removed due to cleanup lock contention\n"),
1043 vacrel->missed_dead_tuples,
1044 vacrel->missed_dead_pages);
1045 diff = (int32) (ReadNextTransactionId() -
1046 vacrel->cutoffs.OldestXmin);
1048 _("removable cutoff: %u, which was %d XIDs old when operation ended\n"),
1049 vacrel->cutoffs.OldestXmin, diff);
1050 if (frozenxid_updated)
1051 {
1052 diff = (int32) (vacrel->NewRelfrozenXid -
1053 vacrel->cutoffs.relfrozenxid);
1055 _("new relfrozenxid: %u, which is %d XIDs ahead of previous value\n"),
1056 vacrel->NewRelfrozenXid, diff);
1057 }
1058 if (minmulti_updated)
1059 {
1060 diff = (int32) (vacrel->NewRelminMxid -
1061 vacrel->cutoffs.relminmxid);
1063 _("new relminmxid: %u, which is %d MXIDs ahead of previous value\n"),
1064 vacrel->NewRelminMxid, diff);
1065 }
1066 appendStringInfo(&buf, _("frozen: %u pages from table (%.2f%% of total) had %" PRId64 " tuples frozen\n"),
1067 vacrel->new_frozen_tuple_pages,
1068 orig_rel_pages == 0 ? 100.0 :
1069 100.0 * vacrel->new_frozen_tuple_pages /
1070 orig_rel_pages,
1071 vacrel->tuples_frozen);
1072
1074 _("visibility map: %u pages set all-visible, %u pages set all-frozen (%u were all-visible)\n"),
1075 vacrel->vm_new_visible_pages,
1077 vacrel->vm_new_frozen_pages,
1078 vacrel->vm_new_frozen_pages);
1079 if (vacrel->do_index_vacuuming)
1080 {
1081 if (vacrel->nindexes == 0 || vacrel->num_index_scans == 0)
1082 appendStringInfoString(&buf, _("index scan not needed: "));
1083 else
1084 appendStringInfoString(&buf, _("index scan needed: "));
1085
1086 msgfmt = _("%u pages from table (%.2f%% of total) had %" PRId64 " dead item identifiers removed\n");
1087 }
1088 else
1089 {
1091 appendStringInfoString(&buf, _("index scan bypassed: "));
1092 else
1093 appendStringInfoString(&buf, _("index scan bypassed by failsafe: "));
1094
1095 msgfmt = _("%u pages from table (%.2f%% of total) have %" PRId64 " dead item identifiers\n");
1096 }
1097 appendStringInfo(&buf, msgfmt,
1098 vacrel->lpdead_item_pages,
1099 orig_rel_pages == 0 ? 100.0 :
1100 100.0 * vacrel->lpdead_item_pages / orig_rel_pages,
1101 vacrel->lpdead_items);
1102 for (int i = 0; i < vacrel->nindexes; i++)
1103 {
1104 IndexBulkDeleteResult *istat = vacrel->indstats[i];
1105
1106 if (!istat)
1107 continue;
1108
1110 _("index \"%s\": pages: %u in total, %u newly deleted, %u currently deleted, %u reusable\n"),
1111 indnames[i],
1112 istat->num_pages,
1113 istat->pages_newly_deleted,
1114 istat->pages_deleted,
1115 istat->pages_free);
1116 }
1118 {
1119 /*
1120 * We bypass the changecount mechanism because this value is
1121 * only updated by the calling process. We also rely on the
1122 * above call to pgstat_progress_end_command() to not clear
1123 * the st_progress_param array.
1124 */
1125 appendStringInfo(&buf, _("delay time: %.3f ms\n"),
1127 }
1128 if (track_io_timing)
1129 {
1130 double read_ms = (double) (pgStatBlockReadTime - startreadtime) / 1000;
1131 double write_ms = (double) (pgStatBlockWriteTime - startwritetime) / 1000;
1132
1133 appendStringInfo(&buf, _("I/O timings: read: %.3f ms, write: %.3f ms\n"),
1134 read_ms, write_ms);
1135 }
1136 if (secs_dur > 0 || usecs_dur > 0)
1137 {
1138 read_rate = (double) BLCKSZ * total_blks_read /
1139 (1024 * 1024) / (secs_dur + usecs_dur / 1000000.0);
1140 write_rate = (double) BLCKSZ * total_blks_dirtied /
1141 (1024 * 1024) / (secs_dur + usecs_dur / 1000000.0);
1142 }
1143 appendStringInfo(&buf, _("avg read rate: %.3f MB/s, avg write rate: %.3f MB/s\n"),
1144 read_rate, write_rate);
1146 _("buffer usage: %" PRId64 " hits, %" PRId64 " reads, %" PRId64 " dirtied\n"),
1147 total_blks_hit,
1148 total_blks_read,
1149 total_blks_dirtied);
1151 _("WAL usage: %" PRId64 " records, %" PRId64 " full page images, %" PRIu64 " bytes, %" PRIu64 " full page image bytes, %" PRId64 " buffers full\n"),
1152 walusage.wal_records,
1153 walusage.wal_fpi,
1154 walusage.wal_bytes,
1155 walusage.wal_fpi_bytes,
1156 walusage.wal_buffers_full);
1157 appendStringInfo(&buf, _("system usage: %s"), pg_rusage_show(&ru0));
1158
1159 ereport(verbose ? INFO : LOG,
1160 (errmsg_internal("%s", buf.data)));
1161 pfree(buf.data);
1162 }
1163 }
1164
1165 /* Cleanup index statistics and index names */
1166 for (int i = 0; i < vacrel->nindexes; i++)
1167 {
1168 if (vacrel->indstats[i])
1169 pfree(vacrel->indstats[i]);
1170
1171 if (instrument)
1172 pfree(indnames[i]);
1173 }
1174}
void TimestampDifference(TimestampTz start_time, TimestampTz stop_time, long *secs, int *microsecs)
Definition: timestamp.c:1721
bool TimestampDifferenceExceeds(TimestampTz start_time, TimestampTz stop_time, int msec)
Definition: timestamp.c:1781
TimestampTz GetCurrentTimestamp(void)
Definition: timestamp.c:1645
void pgstat_progress_start_command(ProgressCommandType cmdtype, Oid relid)
void pgstat_progress_update_param(int index, int64 val)
void pgstat_progress_end_command(void)
@ PROGRESS_COMMAND_VACUUM
PgBackendStatus * MyBEEntry
bool track_io_timing
Definition: bufmgr.c:147
#define RelationGetNumberOfBlocks(reln)
Definition: bufmgr.h:294
int32_t int32
Definition: c.h:537
int64 TimestampTz
Definition: timestamp.h:39
ErrorContextCallback * error_context_stack
Definition: elog.c:95
#define _(x)
Definition: elog.c:91
#define LOG
Definition: elog.h:31
#define INFO
Definition: elog.h:34
int verbose
void WalUsageAccumDiff(WalUsage *dst, const WalUsage *add, const WalUsage *sub)
Definition: instrument.c:288
BufferUsage pgBufferUsage
Definition: instrument.c:20
void BufferUsageAccumDiff(BufferUsage *dst, const BufferUsage *add, const BufferUsage *sub)
Definition: instrument.c:248
#define NoLock
Definition: lockdefs.h:34
#define RowExclusiveLock
Definition: lockdefs.h:38
char * get_database_name(Oid dbid)
Definition: lsyscache.c:1259
char * get_namespace_name(Oid nspid)
Definition: lsyscache.c:3533
char * pstrdup(const char *in)
Definition: mcxt.c:1759
void * palloc0(Size size)
Definition: mcxt.c:1395
#define AmAutoVacuumWorkerProcess()
Definition: miscadmin.h:383
const char * pg_rusage_show(const PGRUsage *ru0)
Definition: pg_rusage.c:40
void pg_rusage_init(PGRUsage *ru0)
Definition: pg_rusage.c:27
int64 PgStat_Counter
Definition: pgstat.h:67
PgStat_Counter pgStatBlockReadTime
PgStat_Counter pgStatBlockWriteTime
void pgstat_report_vacuum(Oid tableoid, bool shared, PgStat_Counter livetuples, PgStat_Counter deadtuples, TimestampTz starttime)
#define PROGRESS_VACUUM_PHASE_FINAL_CLEANUP
Definition: progress.h:39
#define PROGRESS_VACUUM_PHASE
Definition: progress.h:21
#define PROGRESS_VACUUM_DELAY_TIME
Definition: progress.h:31
#define RelationGetNamespace(relation)
Definition: rel.h:556
void appendStringInfo(StringInfo str, const char *fmt,...)
Definition: stringinfo.c:145
void appendStringInfoString(StringInfo str, const char *s)
Definition: stringinfo.c:230
void initStringInfo(StringInfo str)
Definition: stringinfo.c:97
int64 shared_blks_dirtied
Definition: instrument.h:28
int64 local_blks_hit
Definition: instrument.h:30
int64 shared_blks_read
Definition: instrument.h:27
int64 local_blks_read
Definition: instrument.h:31
int64 local_blks_dirtied
Definition: instrument.h:32
int64 shared_blks_hit
Definition: instrument.h:26
struct ErrorContextCallback * previous
Definition: elog.h:297
void(* callback)(void *arg)
Definition: elog.h:298
BlockNumber pages_deleted
Definition: genam.h:109
BlockNumber pages_newly_deleted
Definition: genam.h:108
BlockNumber pages_free
Definition: genam.h:110
BlockNumber num_pages
Definition: genam.h:104
bool verbose
Definition: vacuumlazy.c:297
BlockNumber vm_new_frozen_pages
Definition: vacuumlazy.c:336
int nindexes
Definition: vacuumlazy.c:263
int64 tuples_deleted
Definition: vacuumlazy.c:351
BlockNumber nonempty_pages
Definition: vacuumlazy.c:340
bool do_rel_truncate
Definition: vacuumlazy.c:279
BlockNumber scanned_pages
Definition: vacuumlazy.c:313
bool aggressive
Definition: vacuumlazy.c:270
BlockNumber new_frozen_tuple_pages
Definition: vacuumlazy.c:322
GlobalVisState * vistest
Definition: vacuumlazy.c:283
BlockNumber removed_pages
Definition: vacuumlazy.c:321
int num_index_scans
Definition: vacuumlazy.c:349
IndexBulkDeleteResult ** indstats
Definition: vacuumlazy.c:346
double new_live_tuples
Definition: vacuumlazy.c:344
double new_rel_tuples
Definition: vacuumlazy.c:343
TransactionId NewRelfrozenXid
Definition: vacuumlazy.c:285
Relation rel
Definition: vacuumlazy.c:261
bool consider_bypass_optimization
Definition: vacuumlazy.c:274
BlockNumber rel_pages
Definition: vacuumlazy.c:312
int64 recently_dead_tuples
Definition: vacuumlazy.c:355
int64 tuples_frozen
Definition: vacuumlazy.c:352
char * dbname
Definition: vacuumlazy.c:290
BlockNumber missed_dead_pages
Definition: vacuumlazy.c:339
char * relnamespace
Definition: vacuumlazy.c:291
int64 live_tuples
Definition: vacuumlazy.c:354
int64 lpdead_items
Definition: vacuumlazy.c:353
BufferAccessStrategy bstrategy
Definition: vacuumlazy.c:266
bool skippedallvis
Definition: vacuumlazy.c:287
BlockNumber lpdead_item_pages
Definition: vacuumlazy.c:338
BlockNumber eager_scanned_pages
Definition: vacuumlazy.c:319
Relation * indrels
Definition: vacuumlazy.c:262
bool skipwithvm
Definition: vacuumlazy.c:272
bool do_index_cleanup
Definition: vacuumlazy.c:278
MultiXactId NewRelminMxid
Definition: vacuumlazy.c:286
int64 missed_dead_tuples
Definition: vacuumlazy.c:356
struct VacuumCutoffs cutoffs
Definition: vacuumlazy.c:282
BlockNumber vm_new_visible_pages
Definition: vacuumlazy.c:325
char * relname
Definition: vacuumlazy.c:292
VacErrPhase phase
Definition: vacuumlazy.c:296
char * indname
Definition: vacuumlazy.c:293
BlockNumber vm_new_visible_frozen_pages
Definition: vacuumlazy.c:333
bool do_index_vacuuming
Definition: vacuumlazy.c:277
int64 st_progress_param[PGSTAT_NUM_PROGRESS_PARAM]
int nworkers
Definition: vacuum.h:251
VacOptValue truncate
Definition: vacuum.h:236
bits32 options
Definition: vacuum.h:219
int log_vacuum_min_duration
Definition: vacuum.h:227
bool is_wraparound
Definition: vacuum.h:226
VacOptValue index_cleanup
Definition: vacuum.h:235
int64 wal_buffers_full
Definition: instrument.h:57
uint64 wal_bytes
Definition: instrument.h:55
uint64 wal_fpi_bytes
Definition: instrument.h:56
int64 wal_records
Definition: instrument.h:53
static TransactionId ReadNextTransactionId(void)
Definition: transam.h:377
bool track_cost_delay_timing
Definition: vacuum.c:82
void vac_open_indexes(Relation relation, LOCKMODE lockmode, int *nindexes, Relation **Irel)
Definition: vacuum.c:2360
void vac_close_indexes(int nindexes, Relation *Irel, LOCKMODE lockmode)
Definition: vacuum.c:2403
bool VacuumFailsafeActive
Definition: vacuum.c:110
void vac_update_relstats(Relation relation, BlockNumber num_pages, double num_tuples, BlockNumber num_all_visible_pages, BlockNumber num_all_frozen_pages, bool hasindex, TransactionId frozenxid, MultiXactId minmulti, bool *frozenxid_updated, bool *minmulti_updated, bool in_outer_xact)
Definition: vacuum.c:1424
bool vacuum_get_cutoffs(Relation rel, const VacuumParams params, struct VacuumCutoffs *cutoffs)
Definition: vacuum.c:1098
#define VACOPT_VERBOSE
Definition: vacuum.h:182
@ VACOPTVALUE_AUTO
Definition: vacuum.h:203
@ VACOPTVALUE_ENABLED
Definition: vacuum.h:205
@ VACOPTVALUE_UNSPECIFIED
Definition: vacuum.h:202
@ VACOPTVALUE_DISABLED
Definition: vacuum.h:204
#define VACOPT_DISABLE_PAGE_SKIPPING
Definition: vacuum.h:188
static void dead_items_cleanup(LVRelState *vacrel)
Definition: vacuumlazy.c:3626
static void update_relstats_all_indexes(LVRelState *vacrel)
Definition: vacuumlazy.c:3832
static void heap_vacuum_eager_scan_setup(LVRelState *vacrel, const VacuumParams params)
Definition: vacuumlazy.c:500
static void vacuum_error_callback(void *arg)
Definition: vacuumlazy.c:3867
static void lazy_truncate_heap(LVRelState *vacrel)
Definition: vacuumlazy.c:3244
static bool should_attempt_truncation(LVRelState *vacrel)
Definition: vacuumlazy.c:3224
@ VACUUM_ERRCB_PHASE_UNKNOWN
Definition: vacuumlazy.c:225
static void lazy_scan_heap(LVRelState *vacrel)
Definition: vacuumlazy.c:1213
static bool lazy_check_wraparound_failsafe(LVRelState *vacrel)
Definition: vacuumlazy.c:2994
static void dead_items_alloc(LVRelState *vacrel, int nworkers)
Definition: vacuumlazy.c:3517
void visibilitymap_count(Relation rel, BlockNumber *all_visible, BlockNumber *all_frozen)

References _, LVRelState::aggressive, AmAutoVacuumWorkerProcess, appendStringInfo(), appendStringInfoString(), ErrorContextCallback::arg, Assert(), LVRelState::bstrategy, buf, BufferUsageAccumDiff(), ErrorContextCallback::callback, LVRelState::consider_bypass_optimization, LVRelState::cutoffs, LVRelState::dbname, dead_items_alloc(), dead_items_cleanup(), LVRelState::do_index_cleanup, LVRelState::do_index_vacuuming, LVRelState::do_rel_truncate, LVRelState::eager_scanned_pages, ereport, errmsg(), errmsg_internal(), error_context_stack, VacuumCutoffs::FreezeLimit, get_database_name(), get_namespace_name(), GetCurrentTimestamp(), GlobalVisTestFor(), heap_vacuum_eager_scan_setup(), i, VacuumParams::index_cleanup, LVRelState::indname, LVRelState::indrels, LVRelState::indstats, INFO, initStringInfo(), InvalidMultiXactId, InvalidTransactionId, VacuumParams::is_wraparound, IsInParallelMode(), lazy_check_wraparound_failsafe(), lazy_scan_heap(), lazy_truncate_heap(), LVRelState::live_tuples, BufferUsage::local_blks_dirtied, BufferUsage::local_blks_hit, BufferUsage::local_blks_read, LOG, VacuumParams::log_vacuum_min_duration, LVRelState::lpdead_item_pages, LVRelState::lpdead_items, Max, LVRelState::missed_dead_pages, LVRelState::missed_dead_tuples, VacuumCutoffs::MultiXactCutoff, MultiXactIdPrecedesOrEquals(), MyBEEntry, MyDatabaseId, LVRelState::new_frozen_tuple_pages, LVRelState::new_live_tuples, LVRelState::new_rel_tuples, LVRelState::NewRelfrozenXid, LVRelState::NewRelminMxid, LVRelState::nindexes, NoLock, LVRelState::nonempty_pages, LVRelState::num_index_scans, IndexBulkDeleteResult::num_pages, VacuumParams::nworkers, VacuumCutoffs::OldestMxact, VacuumCutoffs::OldestXmin, VacuumParams::options, IndexBulkDeleteResult::pages_deleted, IndexBulkDeleteResult::pages_free, IndexBulkDeleteResult::pages_newly_deleted, palloc(), palloc0(), pfree(), pg_rusage_init(), pg_rusage_show(), pgBufferUsage, pgstat_progress_end_command(), pgstat_progress_start_command(), pgstat_progress_update_param(), pgstat_report_vacuum(), pgStatBlockReadTime, pgStatBlockWriteTime, pgWalUsage, LVRelState::phase, ErrorContextCallback::previous, PROGRESS_COMMAND_VACUUM, PROGRESS_VACUUM_DELAY_TIME, PROGRESS_VACUUM_PHASE, PROGRESS_VACUUM_PHASE_FINAL_CLEANUP, pstrdup(), RelationData::rd_rel, ReadNextTransactionId(), LVRelState::recently_dead_tuples, LVRelState::rel, LVRelState::rel_pages, RelationGetNamespace, RelationGetNumberOfBlocks, RelationGetRelationName, RelationGetRelid, VacuumCutoffs::relfrozenxid, VacuumCutoffs::relminmxid, LVRelState::relname, LVRelState::relnamespace, LVRelState::removed_pages, RowExclusiveLock, LVRelState::scanned_pages, BufferUsage::shared_blks_dirtied, BufferUsage::shared_blks_hit, BufferUsage::shared_blks_read, should_attempt_truncation(), LVRelState::skippedallvis, LVRelState::skipwithvm, PgBackendStatus::st_progress_param, TimestampDifference(), TimestampDifferenceExceeds(), track_cost_delay_timing, track_io_timing, TransactionIdPrecedesOrEquals(), VacuumParams::truncate, LVRelState::tuples_deleted, LVRelState::tuples_frozen, update_relstats_all_indexes(), vac_close_indexes(), vac_open_indexes(), vac_update_relstats(), VACOPT_DISABLE_PAGE_SKIPPING, VACOPT_VERBOSE, VACOPTVALUE_AUTO, VACOPTVALUE_DISABLED, VACOPTVALUE_ENABLED, VACOPTVALUE_UNSPECIFIED, VACUUM_ERRCB_PHASE_UNKNOWN, vacuum_error_callback(), vacuum_get_cutoffs(), VacuumFailsafeActive, LVRelState::verbose, verbose, visibilitymap_count(), LVRelState::vistest, LVRelState::vm_new_frozen_pages, LVRelState::vm_new_visible_frozen_pages, LVRelState::vm_new_visible_pages, WalUsage::wal_buffers_full, WalUsage::wal_bytes, WalUsage::wal_fpi, WalUsage::wal_fpi_bytes, WalUsage::wal_records, and WalUsageAccumDiff().

◆ HeapCheckForSerializableConflictOut()

void HeapCheckForSerializableConflictOut ( bool  visible,
Relation  relation,
HeapTuple  tuple,
Buffer  buffer,
Snapshot  snapshot 
)

Definition at line 9271 of file heapam.c.

9274{
9275 TransactionId xid;
9276 HTSV_Result htsvResult;
9277
9278 if (!CheckForSerializableConflictOutNeeded(relation, snapshot))
9279 return;
9280
9281 /*
9282 * Check to see whether the tuple has been written to by a concurrent
9283 * transaction, either to create it not visible to us, or to delete it
9284 * while it is visible to us. The "visible" bool indicates whether the
9285 * tuple is visible to us, while HeapTupleSatisfiesVacuum checks what else
9286 * is going on with it.
9287 *
9288 * In the event of a concurrently inserted tuple that also happens to have
9289 * been concurrently updated (by a separate transaction), the xmin of the
9290 * tuple will be used -- not the updater's xid.
9291 */
9292 htsvResult = HeapTupleSatisfiesVacuum(tuple, TransactionXmin, buffer);
9293 switch (htsvResult)
9294 {
9295 case HEAPTUPLE_LIVE:
9296 if (visible)
9297 return;
9298 xid = HeapTupleHeaderGetXmin(tuple->t_data);
9299 break;
9302 if (visible)
9303 xid = HeapTupleHeaderGetUpdateXid(tuple->t_data);
9304 else
9305 xid = HeapTupleHeaderGetXmin(tuple->t_data);
9306
9308 {
9309 /* This is like the HEAPTUPLE_DEAD case */
9310 Assert(!visible);
9311 return;
9312 }
9313 break;
9315 xid = HeapTupleHeaderGetXmin(tuple->t_data);
9316 break;
9317 case HEAPTUPLE_DEAD:
9318 Assert(!visible);
9319 return;
9320 default:
9321
9322 /*
9323 * The only way to get to this default clause is if a new value is
9324 * added to the enum type without adding it to this switch
9325 * statement. That's a bug, so elog.
9326 */
9327 elog(ERROR, "unrecognized return value from HeapTupleSatisfiesVacuum: %u", htsvResult);
9328
9329 /*
9330 * In spite of having all enum values covered and calling elog on
9331 * this default, some compilers think this is a code path which
9332 * allows xid to be used below without initialization. Silence
9333 * that warning.
9334 */
9336 }
9337
9340
9341 /*
9342 * Find top level xid. Bail out if xid is too early to be a conflict, or
9343 * if it's our own xid.
9344 */
9346 return;
9349 return;
9350
9351 CheckForSerializableConflictOut(relation, xid, snapshot);
9352}
HTSV_Result HeapTupleSatisfiesVacuum(HeapTuple htup, TransactionId OldestXmin, Buffer buffer)
void CheckForSerializableConflictOut(Relation relation, TransactionId xid, Snapshot snapshot)
Definition: predicate.c:4023
TransactionId SubTransGetTopmostTransaction(TransactionId xid)
Definition: subtrans.c:162
static bool TransactionIdFollowsOrEquals(TransactionId id1, TransactionId id2)
Definition: transam.h:312
TransactionId GetTopTransactionIdIfAny(void)
Definition: xact.c:442

References Assert(), CheckForSerializableConflictOut(), CheckForSerializableConflictOutNeeded(), elog, ERROR, GetTopTransactionIdIfAny(), HEAPTUPLE_DEAD, HEAPTUPLE_DELETE_IN_PROGRESS, HEAPTUPLE_INSERT_IN_PROGRESS, HEAPTUPLE_LIVE, HEAPTUPLE_RECENTLY_DEAD, HeapTupleHeaderGetUpdateXid(), HeapTupleHeaderGetXmin(), HeapTupleSatisfiesVacuum(), InvalidTransactionId, SubTransGetTopmostTransaction(), HeapTupleData::t_data, TransactionIdEquals, TransactionIdFollowsOrEquals(), TransactionIdIsValid, TransactionIdPrecedes(), and TransactionXmin.

Referenced by BitmapHeapScanNextBlock(), heap_fetch(), heap_get_latest_tid(), heap_hot_search_buffer(), heapam_scan_sample_next_tuple(), heapgettup(), and page_collect_tuples().

◆ HeapTupleHeaderIsOnlyLocked()

bool HeapTupleHeaderIsOnlyLocked ( HeapTupleHeader  tuple)

Definition at line 1529 of file heapam_visibility.c.

1530{
1531 TransactionId xmax;
1532
1533 /* if there's no valid Xmax, then there's obviously no update either */
1534 if (tuple->t_infomask & HEAP_XMAX_INVALID)
1535 return true;
1536
1537 if (tuple->t_infomask & HEAP_XMAX_LOCK_ONLY)
1538 return true;
1539
1540 /* invalid xmax means no update */
1542 return true;
1543
1544 /*
1545 * if HEAP_XMAX_LOCK_ONLY is not set and not a multi, then this must
1546 * necessarily have been updated
1547 */
1548 if (!(tuple->t_infomask & HEAP_XMAX_IS_MULTI))
1549 return false;
1550
1551 /* ... but if it's a multi, then perhaps the updating Xid aborted. */
1552 xmax = HeapTupleGetUpdateXid(tuple);
1553
1554 /* not LOCKED_ONLY, so it has to have an xmax */
1556
1558 return false;
1559 if (TransactionIdIsInProgress(xmax))
1560 return false;
1561 if (TransactionIdDidCommit(xmax))
1562 return false;
1563
1564 /*
1565 * not current, not in progress, not committed -- must have aborted or
1566 * crashed
1567 */
1568 return true;
1569}
bool TransactionIdIsInProgress(TransactionId xid)
Definition: procarray.c:1402

References Assert(), HEAP_XMAX_INVALID, HEAP_XMAX_IS_MULTI, HEAP_XMAX_LOCK_ONLY, HeapTupleGetUpdateXid(), HeapTupleHeaderGetRawXmax(), HeapTupleHeaderData::t_infomask, TransactionIdDidCommit(), TransactionIdIsCurrentTransactionId(), TransactionIdIsInProgress(), and TransactionIdIsValid.

Referenced by heap_delete(), heap_get_latest_tid(), heap_lock_tuple(), heap_lock_updated_tuple_rec(), HeapTupleSatisfiesVacuumHorizon(), and rewrite_heap_tuple().

◆ HeapTupleIsSurelyDead()

bool HeapTupleIsSurelyDead ( HeapTuple  htup,
GlobalVisState vistest 
)

Definition at line 1474 of file heapam_visibility.c.

1475{
1476 HeapTupleHeader tuple = htup->t_data;
1477
1479 Assert(htup->t_tableOid != InvalidOid);
1480
1481 /*
1482 * If the inserting transaction is marked invalid, then it aborted, and
1483 * the tuple is definitely dead. If it's marked neither committed nor
1484 * invalid, then we assume it's still alive (since the presumption is that
1485 * all relevant hint bits were just set moments ago).
1486 */
1487 if (!HeapTupleHeaderXminCommitted(tuple))
1488 return HeapTupleHeaderXminInvalid(tuple);
1489
1490 /*
1491 * If the inserting transaction committed, but any deleting transaction
1492 * aborted, the tuple is still alive.
1493 */
1494 if (tuple->t_infomask & HEAP_XMAX_INVALID)
1495 return false;
1496
1497 /*
1498 * If the XMAX is just a lock, the tuple is still alive.
1499 */
1501 return false;
1502
1503 /*
1504 * If the Xmax is a MultiXact, it might be dead or alive, but we cannot
1505 * know without checking pg_multixact.
1506 */
1507 if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
1508 return false;
1509
1510 /* If deleter isn't known to have committed, assume it's still running. */
1511 if (!(tuple->t_infomask & HEAP_XMAX_COMMITTED))
1512 return false;
1513
1514 /* Deleter committed, so tuple is dead if the XID is old enough. */
1515 return GlobalVisTestIsRemovableXid(vistest,
1517}
static bool HeapTupleHeaderXminInvalid(const HeapTupleHeaderData *tup)
Definition: htup_details.h:343
static bool HeapTupleHeaderXminCommitted(const HeapTupleHeaderData *tup)
Definition: htup_details.h:337
#define InvalidOid
Definition: postgres_ext.h:37

References Assert(), GlobalVisTestIsRemovableXid(), HEAP_XMAX_COMMITTED, HEAP_XMAX_INVALID, HEAP_XMAX_IS_LOCKED_ONLY(), HEAP_XMAX_IS_MULTI, HeapTupleHeaderGetRawXmax(), HeapTupleHeaderXminCommitted(), HeapTupleHeaderXminInvalid(), InvalidOid, ItemPointerIsValid(), HeapTupleData::t_data, HeapTupleHeaderData::t_infomask, HeapTupleData::t_self, and HeapTupleData::t_tableOid.

Referenced by heap_hot_search_buffer().

◆ HeapTupleSatisfiesUpdate()

TM_Result HeapTupleSatisfiesUpdate ( HeapTuple  htup,
CommandId  curcid,
Buffer  buffer 
)

Definition at line 458 of file heapam_visibility.c.

460{
461 HeapTupleHeader tuple = htup->t_data;
462
464 Assert(htup->t_tableOid != InvalidOid);
465
467 {
469 return TM_Invisible;
470
471 /* Used by pre-9.0 binary upgrades */
472 if (tuple->t_infomask & HEAP_MOVED_OFF)
473 {
475
477 return TM_Invisible;
478 if (!TransactionIdIsInProgress(xvac))
479 {
480 if (TransactionIdDidCommit(xvac))
481 {
482 SetHintBits(tuple, buffer, HEAP_XMIN_INVALID,
484 return TM_Invisible;
485 }
486 SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED,
488 }
489 }
490 /* Used by pre-9.0 binary upgrades */
491 else if (tuple->t_infomask & HEAP_MOVED_IN)
492 {
494
496 {
498 return TM_Invisible;
499 if (TransactionIdDidCommit(xvac))
500 SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED,
502 else
503 {
504 SetHintBits(tuple, buffer, HEAP_XMIN_INVALID,
506 return TM_Invisible;
507 }
508 }
509 }
511 {
512 if (HeapTupleHeaderGetCmin(tuple) >= curcid)
513 return TM_Invisible; /* inserted after scan started */
514
515 if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid */
516 return TM_Ok;
517
519 {
520 TransactionId xmax;
521
522 xmax = HeapTupleHeaderGetRawXmax(tuple);
523
524 /*
525 * Careful here: even though this tuple was created by our own
526 * transaction, it might be locked by other transactions, if
527 * the original version was key-share locked when we updated
528 * it.
529 */
530
531 if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
532 {
533 if (MultiXactIdIsRunning(xmax, true))
534 return TM_BeingModified;
535 else
536 return TM_Ok;
537 }
538
539 /*
540 * If the locker is gone, then there is nothing of interest
541 * left in this Xmax; otherwise, report the tuple as
542 * locked/updated.
543 */
544 if (!TransactionIdIsInProgress(xmax))
545 return TM_Ok;
546 return TM_BeingModified;
547 }
548
549 if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
550 {
551 TransactionId xmax;
552
553 xmax = HeapTupleGetUpdateXid(tuple);
554
555 /* not LOCKED_ONLY, so it has to have an xmax */
557
558 /* deleting subtransaction must have aborted */
560 {
562 false))
563 return TM_BeingModified;
564 return TM_Ok;
565 }
566 else
567 {
568 if (HeapTupleHeaderGetCmax(tuple) >= curcid)
569 return TM_SelfModified; /* updated after scan started */
570 else
571 return TM_Invisible; /* updated before scan started */
572 }
573 }
574
576 {
577 /* deleting subtransaction must have aborted */
578 SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
580 return TM_Ok;
581 }
582
583 if (HeapTupleHeaderGetCmax(tuple) >= curcid)
584 return TM_SelfModified; /* updated after scan started */
585 else
586 return TM_Invisible; /* updated before scan started */
587 }
589 return TM_Invisible;
591 SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED,
593 else
594 {
595 /* it must have aborted or crashed */
596 SetHintBits(tuple, buffer, HEAP_XMIN_INVALID,
598 return TM_Invisible;
599 }
600 }
601
602 /* by here, the inserting transaction has committed */
603
604 if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid or aborted */
605 return TM_Ok;
606
607 if (tuple->t_infomask & HEAP_XMAX_COMMITTED)
608 {
610 return TM_Ok;
611 if (!ItemPointerEquals(&htup->t_self, &tuple->t_ctid))
612 return TM_Updated; /* updated by other */
613 else
614 return TM_Deleted; /* deleted by other */
615 }
616
617 if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
618 {
619 TransactionId xmax;
620
622 return TM_Ok;
623
625 {
627 return TM_BeingModified;
628
630 return TM_Ok;
631 }
632
633 xmax = HeapTupleGetUpdateXid(tuple);
634 if (!TransactionIdIsValid(xmax))
635 {
637 return TM_BeingModified;
638 }
639
640 /* not LOCKED_ONLY, so it has to have an xmax */
642
644 {
645 if (HeapTupleHeaderGetCmax(tuple) >= curcid)
646 return TM_SelfModified; /* updated after scan started */
647 else
648 return TM_Invisible; /* updated before scan started */
649 }
650
652 return TM_BeingModified;
653
654 if (TransactionIdDidCommit(xmax))
655 {
656 if (!ItemPointerEquals(&htup->t_self, &tuple->t_ctid))
657 return TM_Updated;
658 else
659 return TM_Deleted;
660 }
661
662 /*
663 * By here, the update in the Xmax is either aborted or crashed, but
664 * what about the other members?
665 */
666
668 {
669 /*
670 * There's no member, even just a locker, alive anymore, so we can
671 * mark the Xmax as invalid.
672 */
673 SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
675 return TM_Ok;
676 }
677 else
678 {
679 /* There are lockers running */
680 return TM_BeingModified;
681 }
682 }
683
685 {
687 return TM_BeingModified;
688 if (HeapTupleHeaderGetCmax(tuple) >= curcid)
689 return TM_SelfModified; /* updated after scan started */
690 else
691 return TM_Invisible; /* updated before scan started */
692 }
693
695 return TM_BeingModified;
696
698 {
699 /* it must have aborted or crashed */
700 SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
702 return TM_Ok;
703 }
704
705 /* xmax transaction committed */
706
708 {
709 SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
711 return TM_Ok;
712 }
713
714 SetHintBits(tuple, buffer, HEAP_XMAX_COMMITTED,
716 if (!ItemPointerEquals(&htup->t_self, &tuple->t_ctid))
717 return TM_Updated; /* updated by other */
718 else
719 return TM_Deleted; /* deleted by other */
720}
CommandId HeapTupleHeaderGetCmin(const HeapTupleHeaderData *tup)
Definition: combocid.c:104
static void SetHintBits(HeapTupleHeader tuple, Buffer buffer, uint16 infomask, TransactionId xid)
#define HEAP_XMIN_COMMITTED
Definition: htup_details.h:204
#define HEAP_MOVED_IN
Definition: htup_details.h:212
#define HEAP_XMIN_INVALID
Definition: htup_details.h:205
bool MultiXactIdIsRunning(MultiXactId multi, bool isLockOnly)
Definition: multixact.c:583

References Assert(), HEAP_LOCKED_UPGRADED(), HEAP_MOVED_IN, HEAP_MOVED_OFF, HEAP_XMAX_COMMITTED, HEAP_XMAX_INVALID, HEAP_XMAX_IS_LOCKED_ONLY(), HEAP_XMAX_IS_MULTI, HEAP_XMIN_COMMITTED, HEAP_XMIN_INVALID, HeapTupleGetUpdateXid(), HeapTupleHeaderGetCmax(), HeapTupleHeaderGetCmin(), HeapTupleHeaderGetRawXmax(), HeapTupleHeaderGetRawXmin(), HeapTupleHeaderGetXvac(), HeapTupleHeaderXminCommitted(), HeapTupleHeaderXminInvalid(), InvalidOid, InvalidTransactionId, ItemPointerEquals(), ItemPointerIsValid(), MultiXactIdIsRunning(), SetHintBits(), HeapTupleHeaderData::t_ctid, HeapTupleData::t_data, HeapTupleHeaderData::t_infomask, HeapTupleData::t_self, HeapTupleData::t_tableOid, TM_BeingModified, TM_Deleted, TM_Invisible, TM_Ok, TM_SelfModified, TM_Updated, TransactionIdDidCommit(), TransactionIdIsCurrentTransactionId(), TransactionIdIsInProgress(), and TransactionIdIsValid.

Referenced by heap_delete(), heap_inplace_lock(), heap_lock_tuple(), heap_update(), and pgrowlocks().

◆ HeapTupleSatisfiesVacuum()

HTSV_Result HeapTupleSatisfiesVacuum ( HeapTuple  htup,
TransactionId  OldestXmin,
Buffer  buffer 
)

Definition at line 1171 of file heapam_visibility.c.

1173{
1175 HTSV_Result res;
1176
1177 res = HeapTupleSatisfiesVacuumHorizon(htup, buffer, &dead_after);
1178
1179 if (res == HEAPTUPLE_RECENTLY_DEAD)
1180 {
1181 Assert(TransactionIdIsValid(dead_after));
1182
1183 if (TransactionIdPrecedes(dead_after, OldestXmin))
1184 res = HEAPTUPLE_DEAD;
1185 }
1186 else
1187 Assert(!TransactionIdIsValid(dead_after));
1188
1189 return res;
1190}
HTSV_Result HeapTupleSatisfiesVacuumHorizon(HeapTuple htup, Buffer buffer, TransactionId *dead_after)

References Assert(), HEAPTUPLE_DEAD, HEAPTUPLE_RECENTLY_DEAD, HeapTupleSatisfiesVacuumHorizon(), InvalidTransactionId, TransactionIdIsValid, and TransactionIdPrecedes().

Referenced by heap_page_would_be_all_visible(), heapam_index_build_range_scan(), heapam_relation_copy_for_cluster(), heapam_scan_analyze_next_tuple(), HeapCheckForSerializableConflictOut(), lazy_scan_noprune(), statapprox_heap(), tuple_all_visible(), and update_most_recent_deletion_info().

◆ HeapTupleSatisfiesVacuumHorizon()

HTSV_Result HeapTupleSatisfiesVacuumHorizon ( HeapTuple  htup,
Buffer  buffer,
TransactionId dead_after 
)

Definition at line 1205 of file heapam_visibility.c.

1206{
1207 HeapTupleHeader tuple = htup->t_data;
1208
1210 Assert(htup->t_tableOid != InvalidOid);
1211 Assert(dead_after != NULL);
1212
1213 *dead_after = InvalidTransactionId;
1214
1215 /*
1216 * Has inserting transaction committed?
1217 *
1218 * If the inserting transaction aborted, then the tuple was never visible
1219 * to any other transaction, so we can delete it immediately.
1220 */
1221 if (!HeapTupleHeaderXminCommitted(tuple))
1222 {
1223 if (HeapTupleHeaderXminInvalid(tuple))
1224 return HEAPTUPLE_DEAD;
1225 /* Used by pre-9.0 binary upgrades */
1226 else if (tuple->t_infomask & HEAP_MOVED_OFF)
1227 {
1229
1232 if (TransactionIdIsInProgress(xvac))
1234 if (TransactionIdDidCommit(xvac))
1235 {
1236 SetHintBits(tuple, buffer, HEAP_XMIN_INVALID,
1238 return HEAPTUPLE_DEAD;
1239 }
1240 SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED,
1242 }
1243 /* Used by pre-9.0 binary upgrades */
1244 else if (tuple->t_infomask & HEAP_MOVED_IN)
1245 {
1247
1250 if (TransactionIdIsInProgress(xvac))
1252 if (TransactionIdDidCommit(xvac))
1253 SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED,
1255 else
1256 {
1257 SetHintBits(tuple, buffer, HEAP_XMIN_INVALID,
1259 return HEAPTUPLE_DEAD;
1260 }
1261 }
1263 {
1264 if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid */
1266 /* only locked? run infomask-only check first, for performance */
1270 /* inserted and then deleted by same xact */
1273 /* deleting subtransaction must have aborted */
1275 }
1277 {
1278 /*
1279 * It'd be possible to discern between INSERT/DELETE in progress
1280 * here by looking at xmax - but that doesn't seem beneficial for
1281 * the majority of callers and even detrimental for some. We'd
1282 * rather have callers look at/wait for xmin than xmax. It's
1283 * always correct to return INSERT_IN_PROGRESS because that's
1284 * what's happening from the view of other backends.
1285 */
1287 }
1289 SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED,
1291 else
1292 {
1293 /*
1294 * Not in Progress, Not Committed, so either Aborted or crashed
1295 */
1296 SetHintBits(tuple, buffer, HEAP_XMIN_INVALID,
1298 return HEAPTUPLE_DEAD;
1299 }
1300
1301 /*
1302 * At this point the xmin is known committed, but we might not have
1303 * been able to set the hint bit yet; so we can no longer Assert that
1304 * it's set.
1305 */
1306 }
1307
1308 /*
1309 * Okay, the inserter committed, so it was good at some point. Now what
1310 * about the deleting transaction?
1311 */
1312 if (tuple->t_infomask & HEAP_XMAX_INVALID)
1313 return HEAPTUPLE_LIVE;
1314
1316 {
1317 /*
1318 * "Deleting" xact really only locked it, so the tuple is live in any
1319 * case. However, we should make sure that either XMAX_COMMITTED or
1320 * XMAX_INVALID gets set once the xact is gone, to reduce the costs of
1321 * examining the tuple for future xacts.
1322 */
1323 if (!(tuple->t_infomask & HEAP_XMAX_COMMITTED))
1324 {
1325 if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
1326 {
1327 /*
1328 * If it's a pre-pg_upgrade tuple, the multixact cannot
1329 * possibly be running; otherwise have to check.
1330 */
1331 if (!HEAP_LOCKED_UPGRADED(tuple->t_infomask) &&
1333 true))
1334 return HEAPTUPLE_LIVE;
1336 }
1337 else
1338 {
1340 return HEAPTUPLE_LIVE;
1341 SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
1343 }
1344 }
1345
1346 /*
1347 * We don't really care whether xmax did commit, abort or crash. We
1348 * know that xmax did lock the tuple, but it did not and will never
1349 * actually update it.
1350 */
1351
1352 return HEAPTUPLE_LIVE;
1353 }
1354
1355 if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
1356 {
1358
1359 /* already checked above */
1361
1362 /* not LOCKED_ONLY, so it has to have an xmax */
1364
1365 if (TransactionIdIsInProgress(xmax))
1367 else if (TransactionIdDidCommit(xmax))
1368 {
1369 /*
1370 * The multixact might still be running due to lockers. Need to
1371 * allow for pruning if below the xid horizon regardless --
1372 * otherwise we could end up with a tuple where the updater has to
1373 * be removed due to the horizon, but is not pruned away. It's
1374 * not a problem to prune that tuple, because any remaining
1375 * lockers will also be present in newer tuple versions.
1376 */
1377 *dead_after = xmax;
1379 }
1380 else if (!MultiXactIdIsRunning(HeapTupleHeaderGetRawXmax(tuple), false))
1381 {
1382 /*
1383 * Not in Progress, Not Committed, so either Aborted or crashed.
1384 * Mark the Xmax as invalid.
1385 */
1387 }
1388
1389 return HEAPTUPLE_LIVE;
1390 }
1391
1392 if (!(tuple->t_infomask & HEAP_XMAX_COMMITTED))
1393 {
1397 SetHintBits(tuple, buffer, HEAP_XMAX_COMMITTED,
1399 else
1400 {
1401 /*
1402 * Not in Progress, Not Committed, so either Aborted or crashed
1403 */
1404 SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
1406 return HEAPTUPLE_LIVE;
1407 }
1408
1409 /*
1410 * At this point the xmax is known committed, but we might not have
1411 * been able to set the hint bit yet; so we can no longer Assert that
1412 * it's set.
1413 */
1414 }
1415
1416 /*
1417 * Deleter committed, allow caller to check if it was recent enough that
1418 * some open transactions could still see the tuple.
1419 */
1420 *dead_after = HeapTupleHeaderGetRawXmax(tuple);
1422}

References Assert(), HEAP_LOCKED_UPGRADED(), HEAP_MOVED_IN, HEAP_MOVED_OFF, HEAP_XMAX_COMMITTED, HEAP_XMAX_INVALID, HEAP_XMAX_IS_LOCKED_ONLY(), HEAP_XMAX_IS_MULTI, HEAP_XMIN_COMMITTED, HEAP_XMIN_INVALID, HEAPTUPLE_DEAD, HEAPTUPLE_DELETE_IN_PROGRESS, HEAPTUPLE_INSERT_IN_PROGRESS, HEAPTUPLE_LIVE, HEAPTUPLE_RECENTLY_DEAD, HeapTupleGetUpdateXid(), HeapTupleHeaderGetRawXmax(), HeapTupleHeaderGetRawXmin(), HeapTupleHeaderGetUpdateXid(), HeapTupleHeaderGetXvac(), HeapTupleHeaderIsOnlyLocked(), HeapTupleHeaderXminCommitted(), HeapTupleHeaderXminInvalid(), InvalidOid, InvalidTransactionId, ItemPointerIsValid(), MultiXactIdIsRunning(), SetHintBits(), HeapTupleData::t_data, HeapTupleHeaderData::t_infomask, HeapTupleData::t_self, HeapTupleData::t_tableOid, TransactionIdDidCommit(), TransactionIdIsCurrentTransactionId(), TransactionIdIsInProgress(), and TransactionIdIsValid.

Referenced by heap_prune_satisfies_vacuum(), HeapTupleSatisfiesNonVacuumable(), and HeapTupleSatisfiesVacuum().

◆ HeapTupleSatisfiesVisibility()

bool HeapTupleSatisfiesVisibility ( HeapTuple  htup,
Snapshot  snapshot,
Buffer  buffer 
)

Definition at line 1776 of file heapam_visibility.c.

1777{
1778 switch (snapshot->snapshot_type)
1779 {
1780 case SNAPSHOT_MVCC:
1781 return HeapTupleSatisfiesMVCC(htup, snapshot, buffer);
1782 case SNAPSHOT_SELF:
1783 return HeapTupleSatisfiesSelf(htup, snapshot, buffer);
1784 case SNAPSHOT_ANY:
1785 return HeapTupleSatisfiesAny(htup, snapshot, buffer);
1786 case SNAPSHOT_TOAST:
1787 return HeapTupleSatisfiesToast(htup, snapshot, buffer);
1788 case SNAPSHOT_DIRTY:
1789 return HeapTupleSatisfiesDirty(htup, snapshot, buffer);
1791 return HeapTupleSatisfiesHistoricMVCC(htup, snapshot, buffer);
1793 return HeapTupleSatisfiesNonVacuumable(htup, snapshot, buffer);
1794 }
1795
1796 return false; /* keep compiler quiet */
1797}
static bool HeapTupleSatisfiesAny(HeapTuple htup, Snapshot snapshot, Buffer buffer)
static bool HeapTupleSatisfiesNonVacuumable(HeapTuple htup, Snapshot snapshot, Buffer buffer)
static bool HeapTupleSatisfiesToast(HeapTuple htup, Snapshot snapshot, Buffer buffer)
static bool HeapTupleSatisfiesDirty(HeapTuple htup, Snapshot snapshot, Buffer buffer)
static bool HeapTupleSatisfiesMVCC(HeapTuple htup, Snapshot snapshot, Buffer buffer)
static bool HeapTupleSatisfiesSelf(HeapTuple htup, Snapshot snapshot, Buffer buffer)
static bool HeapTupleSatisfiesHistoricMVCC(HeapTuple htup, Snapshot snapshot, Buffer buffer)
@ SNAPSHOT_TOAST
Definition: snapshot.h:70
@ SNAPSHOT_SELF
Definition: snapshot.h:60
@ SNAPSHOT_NON_VACUUMABLE
Definition: snapshot.h:114
@ SNAPSHOT_MVCC
Definition: snapshot.h:46
@ SNAPSHOT_ANY
Definition: snapshot.h:65
@ SNAPSHOT_HISTORIC_MVCC
Definition: snapshot.h:105
@ SNAPSHOT_DIRTY
Definition: snapshot.h:98
SnapshotType snapshot_type
Definition: snapshot.h:140

References HeapTupleSatisfiesAny(), HeapTupleSatisfiesDirty(), HeapTupleSatisfiesHistoricMVCC(), HeapTupleSatisfiesMVCC(), HeapTupleSatisfiesNonVacuumable(), HeapTupleSatisfiesSelf(), HeapTupleSatisfiesToast(), SNAPSHOT_ANY, SNAPSHOT_DIRTY, SNAPSHOT_HISTORIC_MVCC, SNAPSHOT_MVCC, SNAPSHOT_NON_VACUUMABLE, SNAPSHOT_SELF, SNAPSHOT_TOAST, and SnapshotData::snapshot_type.

Referenced by BitmapHeapScanNextBlock(), heap_delete(), heap_fetch(), heap_get_latest_tid(), heap_hot_search_buffer(), heap_update(), heapam_tuple_satisfies_snapshot(), heapgettup(), page_collect_tuples(), pgstat_heap(), SampleHeapTupleVisible(), and ScanSourceDatabasePgClassPage().

◆ HeapTupleSetHintBits()

void HeapTupleSetHintBits ( HeapTupleHeader  tuple,
Buffer  buffer,
uint16  infomask,
TransactionId  xid 
)

Definition at line 141 of file heapam_visibility.c.

143{
144 SetHintBits(tuple, buffer, infomask, xid);
145}

References SetHintBits().

Referenced by UpdateXmaxHintBits().

◆ log_heap_prune_and_freeze()

void log_heap_prune_and_freeze ( Relation  relation,
Buffer  buffer,
Buffer  vmbuffer,
uint8  vmflags,
TransactionId  conflict_xid,
bool  cleanup_lock,
PruneReason  reason,
HeapTupleFreeze frozen,
int  nfrozen,
OffsetNumber redirected,
int  nredirected,
OffsetNumber dead,
int  ndead,
OffsetNumber unused,
int  nunused 
)

Definition at line 2157 of file pruneheap.c.

2166{
2167 xl_heap_prune xlrec;
2168 XLogRecPtr recptr;
2169 uint8 info;
2170 uint8 regbuf_flags_heap;
2171
2172 /* The following local variables hold data registered in the WAL record: */
2174 xlhp_freeze_plans freeze_plans;
2175 xlhp_prune_items redirect_items;
2176 xlhp_prune_items dead_items;
2177 xlhp_prune_items unused_items;
2179 bool do_prune = nredirected > 0 || ndead > 0 || nunused > 0;
2180 bool do_set_vm = vmflags & VISIBILITYMAP_VALID_BITS;
2181
2182 Assert((vmflags & VISIBILITYMAP_VALID_BITS) == vmflags);
2183
2184 xlrec.flags = 0;
2185 regbuf_flags_heap = REGBUF_STANDARD;
2186
2187 /*
2188 * We can avoid an FPI of the heap page if the only modification we are
2189 * making to it is to set PD_ALL_VISIBLE and checksums/wal_log_hints are
2190 * disabled. Note that if we explicitly skip an FPI, we must not stamp the
2191 * heap page with this record's LSN. Recovery skips records <= the stamped
2192 * LSN, so this could lead to skipping an earlier FPI needed to repair a
2193 * torn page.
2194 */
2195 if (!do_prune &&
2196 nfrozen == 0 &&
2197 (!do_set_vm || !XLogHintBitIsNeeded()))
2198 regbuf_flags_heap |= REGBUF_NO_IMAGE;
2199
2200 /*
2201 * Prepare data for the buffer. The arrays are not actually in the
2202 * buffer, but we pretend that they are. When XLogInsert stores a full
2203 * page image, the arrays can be omitted.
2204 */
2206 XLogRegisterBuffer(0, buffer, regbuf_flags_heap);
2207
2208 if (do_set_vm)
2209 XLogRegisterBuffer(1, vmbuffer, 0);
2210
2211 if (nfrozen > 0)
2212 {
2213 int nplans;
2214
2216
2217 /*
2218 * Prepare deduplicated representation for use in the WAL record. This
2219 * destructively sorts frozen tuples array in-place.
2220 */
2221 nplans = heap_log_freeze_plan(frozen, nfrozen, plans, frz_offsets);
2222
2223 freeze_plans.nplans = nplans;
2224 XLogRegisterBufData(0, &freeze_plans,
2225 offsetof(xlhp_freeze_plans, plans));
2226 XLogRegisterBufData(0, plans,
2227 sizeof(xlhp_freeze_plan) * nplans);
2228 }
2229 if (nredirected > 0)
2230 {
2232
2233 redirect_items.ntargets = nredirected;
2234 XLogRegisterBufData(0, &redirect_items,
2235 offsetof(xlhp_prune_items, data));
2236 XLogRegisterBufData(0, redirected,
2237 sizeof(OffsetNumber[2]) * nredirected);
2238 }
2239 if (ndead > 0)
2240 {
2241 xlrec.flags |= XLHP_HAS_DEAD_ITEMS;
2242
2243 dead_items.ntargets = ndead;
2244 XLogRegisterBufData(0, &dead_items,
2245 offsetof(xlhp_prune_items, data));
2246 XLogRegisterBufData(0, dead,
2247 sizeof(OffsetNumber) * ndead);
2248 }
2249 if (nunused > 0)
2250 {
2252
2253 unused_items.ntargets = nunused;
2254 XLogRegisterBufData(0, &unused_items,
2255 offsetof(xlhp_prune_items, data));
2256 XLogRegisterBufData(0, unused,
2257 sizeof(OffsetNumber) * nunused);
2258 }
2259 if (nfrozen > 0)
2260 XLogRegisterBufData(0, frz_offsets,
2261 sizeof(OffsetNumber) * nfrozen);
2262
2263 /*
2264 * Prepare the main xl_heap_prune record. We already set the XLHP_HAS_*
2265 * flag above.
2266 */
2267 if (vmflags & VISIBILITYMAP_ALL_VISIBLE)
2268 {
2269 xlrec.flags |= XLHP_VM_ALL_VISIBLE;
2270 if (vmflags & VISIBILITYMAP_ALL_FROZEN)
2271 xlrec.flags |= XLHP_VM_ALL_FROZEN;
2272 }
2274 xlrec.flags |= XLHP_IS_CATALOG_REL;
2275 if (TransactionIdIsValid(conflict_xid))
2277 if (cleanup_lock)
2278 xlrec.flags |= XLHP_CLEANUP_LOCK;
2279 else
2280 {
2281 Assert(nredirected == 0 && ndead == 0);
2282 /* also, any items in 'unused' must've been LP_DEAD previously */
2283 }
2285 if (TransactionIdIsValid(conflict_xid))
2286 XLogRegisterData(&conflict_xid, sizeof(TransactionId));
2287
2288 switch (reason)
2289 {
2290 case PRUNE_ON_ACCESS:
2292 break;
2293 case PRUNE_VACUUM_SCAN:
2295 break;
2298 break;
2299 default:
2300 elog(ERROR, "unrecognized prune reason: %d", (int) reason);
2301 break;
2302 }
2303 recptr = XLogInsert(RM_HEAP2_ID, info);
2304
2305 if (do_set_vm)
2306 {
2307 Assert(BufferIsDirty(vmbuffer));
2308 PageSetLSN(BufferGetPage(vmbuffer), recptr);
2309 }
2310
2311 /*
2312 * See comment at the top of the function about regbuf_flags_heap for
2313 * details on when we can advance the page LSN.
2314 */
2315 if (do_prune || nfrozen > 0 || (do_set_vm && XLogHintBitIsNeeded()))
2316 {
2317 Assert(BufferIsDirty(buffer));
2318 PageSetLSN(BufferGetPage(buffer), recptr);
2319 }
2320}
#define XLHP_HAS_CONFLICT_HORIZON
Definition: heapam_xlog.h:316
#define XLHP_HAS_FREEZE_PLANS
Definition: heapam_xlog.h:322
#define XLHP_VM_ALL_VISIBLE
Definition: heapam_xlog.h:339
#define SizeOfHeapPrune
Definition: heapam_xlog.h:295
#define XLHP_HAS_NOW_UNUSED_ITEMS
Definition: heapam_xlog.h:331
#define XLHP_VM_ALL_FROZEN
Definition: heapam_xlog.h:340
#define XLHP_HAS_REDIRECTIONS
Definition: heapam_xlog.h:329
#define XLOG_HEAP2_PRUNE_VACUUM_SCAN
Definition: heapam_xlog.h:61
#define XLOG_HEAP2_PRUNE_ON_ACCESS
Definition: heapam_xlog.h:60
#define XLHP_CLEANUP_LOCK
Definition: heapam_xlog.h:308
#define XLHP_HAS_DEAD_ITEMS
Definition: heapam_xlog.h:330
#define XLOG_HEAP2_PRUNE_VACUUM_CLEANUP
Definition: heapam_xlog.h:62
#define XLHP_IS_CATALOG_REL
Definition: heapam_xlog.h:298
const void * data
static int heap_log_freeze_plan(HeapTupleFreeze *tuples, int ntuples, xlhp_freeze_plan *plans_out, OffsetNumber *offsets_out)
Definition: pruneheap.c:2078
#define XLogHintBitIsNeeded()
Definition: xlog.h:120
#define REGBUF_NO_IMAGE
Definition: xloginsert.h:33

References Assert(), BufferGetPage(), BufferIsDirty(), data, elog, ERROR, xl_heap_prune::flags, heap_log_freeze_plan(), MaxHeapTuplesPerPage, xlhp_freeze_plans::nplans, xlhp_prune_items::ntargets, PageSetLSN(), PRUNE_ON_ACCESS, PRUNE_VACUUM_CLEANUP, PRUNE_VACUUM_SCAN, REGBUF_NO_IMAGE, REGBUF_STANDARD, RelationIsAccessibleInLogicalDecoding, SizeOfHeapPrune, TransactionIdIsValid, VISIBILITYMAP_ALL_FROZEN, VISIBILITYMAP_ALL_VISIBLE, VISIBILITYMAP_VALID_BITS, XLHP_CLEANUP_LOCK, XLHP_HAS_CONFLICT_HORIZON, XLHP_HAS_DEAD_ITEMS, XLHP_HAS_FREEZE_PLANS, XLHP_HAS_NOW_UNUSED_ITEMS, XLHP_HAS_REDIRECTIONS, XLHP_IS_CATALOG_REL, XLHP_VM_ALL_FROZEN, XLHP_VM_ALL_VISIBLE, XLOG_HEAP2_PRUNE_ON_ACCESS, XLOG_HEAP2_PRUNE_VACUUM_CLEANUP, XLOG_HEAP2_PRUNE_VACUUM_SCAN, XLogBeginInsert(), XLogHintBitIsNeeded, XLogInsert(), XLogRegisterBufData(), XLogRegisterBuffer(), and XLogRegisterData().

Referenced by heap_page_prune_and_freeze(), and lazy_vacuum_heap_page().

◆ ReleaseBulkInsertStatePin()

void ReleaseBulkInsertStatePin ( BulkInsertState  bistate)

Definition at line 2065 of file heapam.c.

2066{
2067 if (bistate->current_buf != InvalidBuffer)
2068 ReleaseBuffer(bistate->current_buf);
2069 bistate->current_buf = InvalidBuffer;
2070
2071 /*
2072 * Despite the name, we also reset bulk relation extension state.
2073 * Otherwise we can end up erroring out due to looking for free space in
2074 * ->next_free of one partition, even though ->next_free was set when
2075 * extending another partition. It could obviously also be bad for
2076 * efficiency to look at existing blocks at offsets from another
2077 * partition, even if we don't error out.
2078 */
2079 bistate->next_free = InvalidBlockNumber;
2080 bistate->last_free = InvalidBlockNumber;
2081}

References BulkInsertStateData::current_buf, InvalidBlockNumber, InvalidBuffer, BulkInsertStateData::last_free, BulkInsertStateData::next_free, and ReleaseBuffer().

Referenced by CopyFrom().

◆ ResolveCminCmaxDuringDecoding()

bool ResolveCminCmaxDuringDecoding ( struct HTAB tuplecid_data,
Snapshot  snapshot,
HeapTuple  htup,
Buffer  buffer,
CommandId cmin,
CommandId cmax 
)

Definition at line 5560 of file reorderbuffer.c.

5564{
5567 ForkNumber forkno;
5568 BlockNumber blockno;
5569 bool updated_mapping = false;
5570
5571 /*
5572 * Return unresolved if tuplecid_data is not valid. That's because when
5573 * streaming in-progress transactions we may run into tuples with the CID
5574 * before actually decoding them. Think e.g. about INSERT followed by
5575 * TRUNCATE, where the TRUNCATE may not be decoded yet when applying the
5576 * INSERT. So in such cases, we assume the CID is from the future
5577 * command.
5578 */
5579 if (tuplecid_data == NULL)
5580 return false;
5581
5582 /* be careful about padding */
5583 memset(&key, 0, sizeof(key));
5584
5585 Assert(!BufferIsLocal(buffer));
5586
5587 /*
5588 * get relfilelocator from the buffer, no convenient way to access it
5589 * other than that.
5590 */
5591 BufferGetTag(buffer, &key.rlocator, &forkno, &blockno);
5592
5593 /* tuples can only be in the main fork */
5594 Assert(forkno == MAIN_FORKNUM);
5595 Assert(blockno == ItemPointerGetBlockNumber(&htup->t_self));
5596
5597 ItemPointerCopy(&htup->t_self,
5598 &key.tid);
5599
5600restart:
5601 ent = (ReorderBufferTupleCidEnt *)
5603
5604 /*
5605 * failed to find a mapping, check whether the table was rewritten and
5606 * apply mapping if so, but only do that once - there can be no new
5607 * mappings while we are in here since we have to hold a lock on the
5608 * relation.
5609 */
5610 if (ent == NULL && !updated_mapping)
5611 {
5613 /* now check but don't update for a mapping again */
5614 updated_mapping = true;
5615 goto restart;
5616 }
5617 else if (ent == NULL)
5618 return false;
5619
5620 if (cmin)
5621 *cmin = ent->cmin;
5622 if (cmax)
5623 *cmax = ent->cmax;
5624 return true;
5625}
#define BufferIsLocal(buffer)
Definition: buf.h:37
void * hash_search(HTAB *hashp, const void *keyPtr, HASHACTION action, bool *foundPtr)
Definition: dynahash.c:952
@ HASH_FIND
Definition: hsearch.h:113
static void UpdateLogicalMappings(HTAB *tuplecid_data, Oid relid, Snapshot snapshot)
static HTAB * tuplecid_data
Definition: snapmgr.c:163

References Assert(), BufferGetTag(), BufferIsLocal, ReorderBufferTupleCidEnt::cmax, ReorderBufferTupleCidEnt::cmin, HASH_FIND, hash_search(), ItemPointerCopy(), ItemPointerGetBlockNumber(), sort-test::key, MAIN_FORKNUM, HeapTupleData::t_self, HeapTupleData::t_tableOid, tuplecid_data, and UpdateLogicalMappings().

Referenced by HeapTupleSatisfiesHistoricMVCC().

◆ simple_heap_delete()

void simple_heap_delete ( Relation  relation,
const ItemPointerData tid 
)

Definition at line 3227 of file heapam.c.

3228{
3229 TM_Result result;
3230 TM_FailureData tmfd;
3231
3232 result = heap_delete(relation, tid,
3234 true /* wait for commit */ ,
3235 &tmfd, false /* changingPart */ );
3236 switch (result)
3237 {
3238 case TM_SelfModified:
3239 /* Tuple was already updated in current command? */
3240 elog(ERROR, "tuple already updated by self");
3241 break;
3242
3243 case TM_Ok:
3244 /* done successfully */
3245 break;
3246
3247 case TM_Updated:
3248 elog(ERROR, "tuple concurrently updated");
3249 break;
3250
3251 case TM_Deleted:
3252 elog(ERROR, "tuple concurrently deleted");
3253 break;
3254
3255 default:
3256 elog(ERROR, "unrecognized heap_delete status: %u", result);
3257 break;
3258 }
3259}
TM_Result heap_delete(Relation relation, const ItemPointerData *tid, CommandId cid, Snapshot crosscheck, bool wait, TM_FailureData *tmfd, bool changingPart)
Definition: heapam.c:2804

References elog, ERROR, GetCurrentCommandId(), heap_delete(), InvalidSnapshot, TM_Deleted, TM_Ok, TM_SelfModified, and TM_Updated.

Referenced by CatalogTupleDelete(), and toast_delete_datum().

◆ simple_heap_insert()

void simple_heap_insert ( Relation  relation,
HeapTuple  tup 
)

Definition at line 2746 of file heapam.c.

2747{
2748 heap_insert(relation, tup, GetCurrentCommandId(true), 0, NULL);
2749}
void heap_insert(Relation relation, HeapTuple tup, CommandId cid, int options, BulkInsertState bistate)
Definition: heapam.c:2103

References GetCurrentCommandId(), and heap_insert().

Referenced by CatalogTupleInsert(), CatalogTupleInsertWithInfo(), and InsertOneTuple().

◆ simple_heap_update()

void simple_heap_update ( Relation  relation,
const ItemPointerData otid,
HeapTuple  tup,
TU_UpdateIndexes update_indexes 
)

Definition at line 4517 of file heapam.c.

4519{
4520 TM_Result result;
4521 TM_FailureData tmfd;
4522 LockTupleMode lockmode;
4523
4524 result = heap_update(relation, otid, tup,
4526 true /* wait for commit */ ,
4527 &tmfd, &lockmode, update_indexes);
4528 switch (result)
4529 {
4530 case TM_SelfModified:
4531 /* Tuple was already updated in current command? */
4532 elog(ERROR, "tuple already updated by self");
4533 break;
4534
4535 case TM_Ok:
4536 /* done successfully */
4537 break;
4538
4539 case TM_Updated:
4540 elog(ERROR, "tuple concurrently updated");
4541 break;
4542
4543 case TM_Deleted:
4544 elog(ERROR, "tuple concurrently deleted");
4545 break;
4546
4547 default:
4548 elog(ERROR, "unrecognized heap_update status: %u", result);
4549 break;
4550 }
4551}
TM_Result heap_update(Relation relation, const ItemPointerData *otid, HeapTuple newtup, CommandId cid, Snapshot crosscheck, bool wait, TM_FailureData *tmfd, LockTupleMode *lockmode, TU_UpdateIndexes *update_indexes)
Definition: heapam.c:3273

References elog, ERROR, GetCurrentCommandId(), heap_update(), InvalidSnapshot, TM_Deleted, TM_Ok, TM_SelfModified, and TM_Updated.

Referenced by CatalogTupleUpdate(), and CatalogTupleUpdateWithInfo().