PostgreSQL Source Code git master
heapam.h File Reference
#include "access/heapam_xlog.h"
#include "access/relation.h"
#include "access/relscan.h"
#include "access/sdir.h"
#include "access/skey.h"
#include "access/table.h"
#include "access/tableam.h"
#include "commands/vacuum.h"
#include "nodes/lockoptions.h"
#include "nodes/primnodes.h"
#include "storage/bufpage.h"
#include "storage/dsm.h"
#include "storage/lockdefs.h"
#include "storage/read_stream.h"
#include "storage/shm_toc.h"
#include "utils/relcache.h"
#include "utils/snapshot.h"
Include dependency graph for heapam.h:
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Data Structures

struct  HeapScanDescData
 
struct  BitmapHeapScanDescData
 
struct  IndexFetchHeapData
 
struct  HeapTupleFreeze
 
struct  HeapPageFreeze
 
struct  PruneFreezeParams
 
struct  PruneFreezeResult
 

Macros

#define HEAP_INSERT_SKIP_FSM   TABLE_INSERT_SKIP_FSM
 
#define HEAP_INSERT_FROZEN   TABLE_INSERT_FROZEN
 
#define HEAP_INSERT_NO_LOGICAL   TABLE_INSERT_NO_LOGICAL
 
#define HEAP_INSERT_SPECULATIVE   0x0010
 
#define HEAP_PAGE_PRUNE_MARK_UNUSED_NOW   (1 << 0)
 
#define HEAP_PAGE_PRUNE_FREEZE   (1 << 1)
 
#define MaxLockTupleMode   LockTupleExclusive
 
#define HEAP_FREEZE_CHECK_XMIN_COMMITTED   0x01
 
#define HEAP_FREEZE_CHECK_XMAX_ABORTED   0x02
 

Typedefs

typedef struct BulkInsertStateDataBulkInsertState
 
typedef struct GlobalVisState GlobalVisState
 
typedef struct TupleTableSlot TupleTableSlot
 
typedef struct HeapScanDescData HeapScanDescData
 
typedef struct HeapScanDescDataHeapScanDesc
 
typedef struct BitmapHeapScanDescData BitmapHeapScanDescData
 
typedef struct BitmapHeapScanDescDataBitmapHeapScanDesc
 
typedef struct IndexFetchHeapData IndexFetchHeapData
 
typedef struct HeapTupleFreeze HeapTupleFreeze
 
typedef struct HeapPageFreeze HeapPageFreeze
 
typedef struct PruneFreezeParams PruneFreezeParams
 
typedef struct PruneFreezeResult PruneFreezeResult
 

Enumerations

enum  HTSV_Result {
  HEAPTUPLE_DEAD , HEAPTUPLE_LIVE , HEAPTUPLE_RECENTLY_DEAD , HEAPTUPLE_INSERT_IN_PROGRESS ,
  HEAPTUPLE_DELETE_IN_PROGRESS
}
 
enum  PruneReason { PRUNE_ON_ACCESS , PRUNE_VACUUM_SCAN , PRUNE_VACUUM_CLEANUP }
 

Functions

TableScanDesc heap_beginscan (Relation relation, Snapshot snapshot, int nkeys, ScanKey key, ParallelTableScanDesc parallel_scan, uint32 flags)
 
void heap_setscanlimits (TableScanDesc sscan, BlockNumber startBlk, BlockNumber numBlks)
 
void heap_prepare_pagescan (TableScanDesc sscan)
 
void heap_rescan (TableScanDesc sscan, ScanKey key, bool set_params, bool allow_strat, bool allow_sync, bool allow_pagemode)
 
void heap_endscan (TableScanDesc sscan)
 
HeapTuple heap_getnext (TableScanDesc sscan, ScanDirection direction)
 
bool heap_getnextslot (TableScanDesc sscan, ScanDirection direction, TupleTableSlot *slot)
 
void heap_set_tidrange (TableScanDesc sscan, ItemPointer mintid, ItemPointer maxtid)
 
bool heap_getnextslot_tidrange (TableScanDesc sscan, ScanDirection direction, TupleTableSlot *slot)
 
bool heap_fetch (Relation relation, Snapshot snapshot, HeapTuple tuple, Buffer *userbuf, bool keep_buf)
 
bool heap_hot_search_buffer (ItemPointer tid, Relation relation, Buffer buffer, Snapshot snapshot, HeapTuple heapTuple, bool *all_dead, bool first_call)
 
void heap_get_latest_tid (TableScanDesc sscan, ItemPointer tid)
 
BulkInsertState GetBulkInsertState (void)
 
void FreeBulkInsertState (BulkInsertState)
 
void ReleaseBulkInsertStatePin (BulkInsertState bistate)
 
void heap_insert (Relation relation, HeapTuple tup, CommandId cid, int options, BulkInsertState bistate)
 
void heap_multi_insert (Relation relation, TupleTableSlot **slots, int ntuples, CommandId cid, int options, BulkInsertState bistate)
 
TM_Result heap_delete (Relation relation, const ItemPointerData *tid, CommandId cid, Snapshot crosscheck, bool wait, TM_FailureData *tmfd, bool changingPart)
 
void heap_finish_speculative (Relation relation, const ItemPointerData *tid)
 
void heap_abort_speculative (Relation relation, const ItemPointerData *tid)
 
TM_Result heap_update (Relation relation, const ItemPointerData *otid, HeapTuple newtup, CommandId cid, Snapshot crosscheck, bool wait, TM_FailureData *tmfd, LockTupleMode *lockmode, TU_UpdateIndexes *update_indexes)
 
TM_Result heap_lock_tuple (Relation relation, HeapTuple tuple, CommandId cid, LockTupleMode mode, LockWaitPolicy wait_policy, bool follow_updates, Buffer *buffer, TM_FailureData *tmfd)
 
bool heap_inplace_lock (Relation relation, HeapTuple oldtup_ptr, Buffer buffer, void(*release_callback)(void *), void *arg)
 
void heap_inplace_update_and_unlock (Relation relation, HeapTuple oldtup, HeapTuple tuple, Buffer buffer)
 
void heap_inplace_unlock (Relation relation, HeapTuple oldtup, Buffer buffer)
 
bool heap_prepare_freeze_tuple (HeapTupleHeader tuple, const struct VacuumCutoffs *cutoffs, HeapPageFreeze *pagefrz, HeapTupleFreeze *frz, bool *totally_frozen)
 
void heap_pre_freeze_checks (Buffer buffer, HeapTupleFreeze *tuples, int ntuples)
 
void heap_freeze_prepared_tuples (Buffer buffer, HeapTupleFreeze *tuples, int ntuples)
 
bool heap_freeze_tuple (HeapTupleHeader tuple, TransactionId relfrozenxid, TransactionId relminmxid, TransactionId FreezeLimit, TransactionId MultiXactCutoff)
 
bool heap_tuple_should_freeze (HeapTupleHeader tuple, const struct VacuumCutoffs *cutoffs, TransactionId *NoFreezePageRelfrozenXid, MultiXactId *NoFreezePageRelminMxid)
 
bool heap_tuple_needs_eventual_freeze (HeapTupleHeader tuple)
 
void simple_heap_insert (Relation relation, HeapTuple tup)
 
void simple_heap_delete (Relation relation, const ItemPointerData *tid)
 
void simple_heap_update (Relation relation, const ItemPointerData *otid, HeapTuple tup, TU_UpdateIndexes *update_indexes)
 
TransactionId heap_index_delete_tuples (Relation rel, TM_IndexDeleteOp *delstate)
 
void heap_page_prune_opt (Relation relation, Buffer buffer)
 
void heap_page_prune_and_freeze (PruneFreezeParams *params, PruneFreezeResult *presult, OffsetNumber *off_loc, TransactionId *new_relfrozen_xid, MultiXactId *new_relmin_mxid)
 
void heap_page_prune_execute (Buffer buffer, bool lp_truncate_only, OffsetNumber *redirected, int nredirected, OffsetNumber *nowdead, int ndead, OffsetNumber *nowunused, int nunused)
 
void heap_get_root_tuples (Page page, OffsetNumber *root_offsets)
 
void log_heap_prune_and_freeze (Relation relation, Buffer buffer, Buffer vmbuffer, uint8 vmflags, TransactionId conflict_xid, bool cleanup_lock, PruneReason reason, HeapTupleFreeze *frozen, int nfrozen, OffsetNumber *redirected, int nredirected, OffsetNumber *dead, int ndead, OffsetNumber *unused, int nunused)
 
void heap_vacuum_rel (Relation rel, const VacuumParams params, BufferAccessStrategy bstrategy)
 
bool HeapTupleSatisfiesVisibility (HeapTuple htup, Snapshot snapshot, Buffer buffer)
 
TM_Result HeapTupleSatisfiesUpdate (HeapTuple htup, CommandId curcid, Buffer buffer)
 
HTSV_Result HeapTupleSatisfiesVacuum (HeapTuple htup, TransactionId OldestXmin, Buffer buffer)
 
HTSV_Result HeapTupleSatisfiesVacuumHorizon (HeapTuple htup, Buffer buffer, TransactionId *dead_after)
 
void HeapTupleSetHintBits (HeapTupleHeader tuple, Buffer buffer, uint16 infomask, TransactionId xid)
 
bool HeapTupleHeaderIsOnlyLocked (HeapTupleHeader tuple)
 
bool HeapTupleIsSurelyDead (HeapTuple htup, GlobalVisState *vistest)
 
bool ResolveCminCmaxDuringDecoding (struct HTAB *tuplecid_data, Snapshot snapshot, HeapTuple htup, Buffer buffer, CommandId *cmin, CommandId *cmax)
 
void HeapCheckForSerializableConflictOut (bool visible, Relation relation, HeapTuple tuple, Buffer buffer, Snapshot snapshot)
 
static void heap_execute_freeze_tuple (HeapTupleHeader tuple, HeapTupleFreeze *frz)
 

Macro Definition Documentation

◆ HEAP_FREEZE_CHECK_XMAX_ABORTED

#define HEAP_FREEZE_CHECK_XMAX_ABORTED   0x02

Definition at line 138 of file heapam.h.

◆ HEAP_FREEZE_CHECK_XMIN_COMMITTED

#define HEAP_FREEZE_CHECK_XMIN_COMMITTED   0x01

Definition at line 137 of file heapam.h.

◆ HEAP_INSERT_FROZEN

#define HEAP_INSERT_FROZEN   TABLE_INSERT_FROZEN

Definition at line 38 of file heapam.h.

◆ HEAP_INSERT_NO_LOGICAL

#define HEAP_INSERT_NO_LOGICAL   TABLE_INSERT_NO_LOGICAL

Definition at line 39 of file heapam.h.

◆ HEAP_INSERT_SKIP_FSM

#define HEAP_INSERT_SKIP_FSM   TABLE_INSERT_SKIP_FSM

Definition at line 37 of file heapam.h.

◆ HEAP_INSERT_SPECULATIVE

#define HEAP_INSERT_SPECULATIVE   0x0010

Definition at line 40 of file heapam.h.

◆ HEAP_PAGE_PRUNE_FREEZE

#define HEAP_PAGE_PRUNE_FREEZE   (1 << 1)

Definition at line 44 of file heapam.h.

◆ HEAP_PAGE_PRUNE_MARK_UNUSED_NOW

#define HEAP_PAGE_PRUNE_MARK_UNUSED_NOW   (1 << 0)

Definition at line 43 of file heapam.h.

◆ MaxLockTupleMode

#define MaxLockTupleMode   LockTupleExclusive

Definition at line 51 of file heapam.h.

Typedef Documentation

◆ BitmapHeapScanDesc

Definition at line 110 of file heapam.h.

◆ BitmapHeapScanDescData

◆ BulkInsertState

Definition at line 46 of file heapam.h.

◆ GlobalVisState

Definition at line 47 of file heapam.h.

◆ HeapPageFreeze

◆ HeapScanDesc

typedef struct HeapScanDescData* HeapScanDesc

Definition at line 102 of file heapam.h.

◆ HeapScanDescData

◆ HeapTupleFreeze

◆ IndexFetchHeapData

◆ PruneFreezeParams

◆ PruneFreezeResult

◆ TupleTableSlot

Definition at line 48 of file heapam.h.

Enumeration Type Documentation

◆ HTSV_Result

Enumerator
HEAPTUPLE_DEAD 
HEAPTUPLE_LIVE 
HEAPTUPLE_RECENTLY_DEAD 
HEAPTUPLE_INSERT_IN_PROGRESS 
HEAPTUPLE_DELETE_IN_PROGRESS 

Definition at line 124 of file heapam.h.

125{
126 HEAPTUPLE_DEAD, /* tuple is dead and deletable */
127 HEAPTUPLE_LIVE, /* tuple is live (committed, no deleter) */
128 HEAPTUPLE_RECENTLY_DEAD, /* tuple is dead, but not deletable yet */
129 HEAPTUPLE_INSERT_IN_PROGRESS, /* inserting xact is still in progress */
130 HEAPTUPLE_DELETE_IN_PROGRESS, /* deleting xact is still in progress */
HTSV_Result
Definition: heapam.h:125
@ HEAPTUPLE_RECENTLY_DEAD
Definition: heapam.h:128
@ HEAPTUPLE_INSERT_IN_PROGRESS
Definition: heapam.h:129
@ HEAPTUPLE_LIVE
Definition: heapam.h:127
@ HEAPTUPLE_DELETE_IN_PROGRESS
Definition: heapam.h:130
@ HEAPTUPLE_DEAD
Definition: heapam.h:126

◆ PruneReason

Enumerator
PRUNE_ON_ACCESS 
PRUNE_VACUUM_SCAN 
PRUNE_VACUUM_CLEANUP 

Definition at line 226 of file heapam.h.

227{
228 PRUNE_ON_ACCESS, /* on-access pruning */
229 PRUNE_VACUUM_SCAN, /* VACUUM 1st heap pass */
230 PRUNE_VACUUM_CLEANUP, /* VACUUM 2nd heap pass */
PruneReason
Definition: heapam.h:227
@ PRUNE_VACUUM_CLEANUP
Definition: heapam.h:230
@ PRUNE_ON_ACCESS
Definition: heapam.h:228
@ PRUNE_VACUUM_SCAN
Definition: heapam.h:229

Function Documentation

◆ FreeBulkInsertState()

void FreeBulkInsertState ( BulkInsertState  bistate)

Definition at line 2056 of file heapam.c.

2057{
2058 if (bistate->current_buf != InvalidBuffer)
2059 ReleaseBuffer(bistate->current_buf);
2060 FreeAccessStrategy(bistate->strategy);
2061 pfree(bistate);
2062}
#define InvalidBuffer
Definition: buf.h:25
void ReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:5461
void FreeAccessStrategy(BufferAccessStrategy strategy)
Definition: freelist.c:643
void pfree(void *pointer)
Definition: mcxt.c:1616
BufferAccessStrategy strategy
Definition: hio.h:31
Buffer current_buf
Definition: hio.h:32

References BulkInsertStateData::current_buf, FreeAccessStrategy(), InvalidBuffer, pfree(), ReleaseBuffer(), and BulkInsertStateData::strategy.

Referenced by ATRewriteTable(), CopyFrom(), CopyMultiInsertBufferCleanup(), deleteSplitPartitionContext(), intorel_shutdown(), MergePartitionsMoveRows(), and transientrel_shutdown().

◆ GetBulkInsertState()

BulkInsertState GetBulkInsertState ( void  )

Definition at line 2039 of file heapam.c.

2040{
2041 BulkInsertState bistate;
2042
2045 bistate->current_buf = InvalidBuffer;
2046 bistate->next_free = InvalidBlockNumber;
2047 bistate->last_free = InvalidBlockNumber;
2048 bistate->already_extended_by = 0;
2049 return bistate;
2050}
#define InvalidBlockNumber
Definition: block.h:33
@ BAS_BULKWRITE
Definition: bufmgr.h:39
#define palloc_object(type)
Definition: fe_memutils.h:74
BufferAccessStrategy GetAccessStrategy(BufferAccessStrategyType btype)
Definition: freelist.c:461
struct BulkInsertStateData * BulkInsertState
Definition: heapam.h:46
BlockNumber last_free
Definition: hio.h:49
uint32 already_extended_by
Definition: hio.h:50
BlockNumber next_free
Definition: hio.h:48

References BulkInsertStateData::already_extended_by, BAS_BULKWRITE, BulkInsertStateData::current_buf, GetAccessStrategy(), InvalidBlockNumber, InvalidBuffer, BulkInsertStateData::last_free, BulkInsertStateData::next_free, palloc_object, and BulkInsertStateData::strategy.

Referenced by ATRewriteTable(), CopyFrom(), CopyMultiInsertBufferInit(), createSplitPartitionContext(), intorel_startup(), MergePartitionsMoveRows(), and transientrel_startup().

◆ heap_abort_speculative()

void heap_abort_speculative ( Relation  relation,
const ItemPointerData tid 
)

Definition at line 6219 of file heapam.c.

6220{
6222 ItemId lp;
6223 HeapTupleData tp;
6224 Page page;
6225 BlockNumber block;
6226 Buffer buffer;
6227
6229
6230 block = ItemPointerGetBlockNumber(tid);
6231 buffer = ReadBuffer(relation, block);
6232 page = BufferGetPage(buffer);
6233
6235
6236 /*
6237 * Page can't be all visible, we just inserted into it, and are still
6238 * running.
6239 */
6240 Assert(!PageIsAllVisible(page));
6241
6244
6245 tp.t_tableOid = RelationGetRelid(relation);
6246 tp.t_data = (HeapTupleHeader) PageGetItem(page, lp);
6247 tp.t_len = ItemIdGetLength(lp);
6248 tp.t_self = *tid;
6249
6250 /*
6251 * Sanity check that the tuple really is a speculatively inserted tuple,
6252 * inserted by us.
6253 */
6254 if (tp.t_data->t_choice.t_heap.t_xmin != xid)
6255 elog(ERROR, "attempted to kill a tuple inserted by another transaction");
6256 if (!(IsToastRelation(relation) || HeapTupleHeaderIsSpeculative(tp.t_data)))
6257 elog(ERROR, "attempted to kill a non-speculative tuple");
6259
6260 /*
6261 * No need to check for serializable conflicts here. There is never a
6262 * need for a combo CID, either. No need to extract replica identity, or
6263 * do anything special with infomask bits.
6264 */
6265
6267
6268 /*
6269 * The tuple will become DEAD immediately. Flag that this page is a
6270 * candidate for pruning by setting xmin to TransactionXmin. While not
6271 * immediately prunable, it is the oldest xid we can cheaply determine
6272 * that's safe against wraparound / being older than the table's
6273 * relfrozenxid. To defend against the unlikely case of a new relation
6274 * having a newer relfrozenxid than our TransactionXmin, use relfrozenxid
6275 * if so (vacuum can't subsequently move relfrozenxid to beyond
6276 * TransactionXmin, so there's no race here).
6277 */
6279 {
6280 TransactionId relfrozenxid = relation->rd_rel->relfrozenxid;
6281 TransactionId prune_xid;
6282
6283 if (TransactionIdPrecedes(TransactionXmin, relfrozenxid))
6284 prune_xid = relfrozenxid;
6285 else
6286 prune_xid = TransactionXmin;
6287 PageSetPrunable(page, prune_xid);
6288 }
6289
6290 /* store transaction information of xact deleting the tuple */
6292 tp.t_data->t_infomask2 &= ~HEAP_KEYS_UPDATED;
6293
6294 /*
6295 * Set the tuple header xmin to InvalidTransactionId. This makes the
6296 * tuple immediately invisible everyone. (In particular, to any
6297 * transactions waiting on the speculative token, woken up later.)
6298 */
6300
6301 /* Clear the speculative insertion token too */
6302 tp.t_data->t_ctid = tp.t_self;
6303
6304 MarkBufferDirty(buffer);
6305
6306 /*
6307 * XLOG stuff
6308 *
6309 * The WAL records generated here match heap_delete(). The same recovery
6310 * routines are used.
6311 */
6312 if (RelationNeedsWAL(relation))
6313 {
6314 xl_heap_delete xlrec;
6315 XLogRecPtr recptr;
6316
6317 xlrec.flags = XLH_DELETE_IS_SUPER;
6319 tp.t_data->t_infomask2);
6321 xlrec.xmax = xid;
6322
6326
6327 /* No replica identity & replication origin logged */
6328
6329 recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_DELETE);
6330
6331 PageSetLSN(page, recptr);
6332 }
6333
6335
6337
6338 if (HeapTupleHasExternal(&tp))
6339 {
6340 Assert(!IsToastRelation(relation));
6341 heap_toast_delete(relation, &tp, true);
6342 }
6343
6344 /*
6345 * Never need to mark tuple for invalidation, since catalogs don't support
6346 * speculative insertion
6347 */
6348
6349 /* Now we can release the buffer */
6350 ReleaseBuffer(buffer);
6351
6352 /* count deletion, as we counted the insertion too */
6353 pgstat_count_heap_delete(relation);
6354}
uint32 BlockNumber
Definition: block.h:31
int Buffer
Definition: buf.h:23
void LockBuffer(Buffer buffer, BufferLockMode mode)
Definition: bufmgr.c:5699
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:3037
Buffer ReadBuffer(Relation reln, BlockNumber blockNum)
Definition: bufmgr.c:839
static Page BufferGetPage(Buffer buffer)
Definition: bufmgr.h:436
@ BUFFER_LOCK_EXCLUSIVE
Definition: bufmgr.h:207
@ BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:205
static bool PageIsAllVisible(const PageData *page)
Definition: bufpage.h:428
static ItemId PageGetItemId(Page page, OffsetNumber offsetNumber)
Definition: bufpage.h:243
static void * PageGetItem(PageData *page, const ItemIdData *itemId)
Definition: bufpage.h:353
static void PageSetLSN(Page page, XLogRecPtr lsn)
Definition: bufpage.h:390
PageData * Page
Definition: bufpage.h:81
#define PageSetPrunable(page, xid)
Definition: bufpage.h:446
uint32 TransactionId
Definition: c.h:672
bool IsToastRelation(Relation relation)
Definition: catalog.c:206
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:226
Assert(PointerIsAligned(start, uint64))
static uint8 compute_infobits(uint16 infomask, uint16 infomask2)
Definition: heapam.c:2762
#define XLOG_HEAP_DELETE
Definition: heapam_xlog.h:34
#define SizeOfHeapDelete
Definition: heapam_xlog.h:121
#define XLH_DELETE_IS_SUPER
Definition: heapam_xlog.h:105
void heap_toast_delete(Relation rel, HeapTuple oldtup, bool is_speculative)
Definition: heaptoast.c:43
HeapTupleHeaderData * HeapTupleHeader
Definition: htup.h:23
static bool HeapTupleHasExternal(const HeapTupleData *tuple)
Definition: htup_details.h:762
#define HEAP_XMAX_BITS
Definition: htup_details.h:281
static bool HeapTupleHeaderIsHeapOnly(const HeapTupleHeaderData *tup)
Definition: htup_details.h:555
#define HEAP_MOVED
Definition: htup_details.h:213
static bool HeapTupleHeaderIsSpeculative(const HeapTupleHeaderData *tup)
Definition: htup_details.h:461
static void HeapTupleHeaderSetXmin(HeapTupleHeaderData *tup, TransactionId xid)
Definition: htup_details.h:331
#define ItemIdGetLength(itemId)
Definition: itemid.h:59
#define ItemIdIsNormal(itemId)
Definition: itemid.h:99
static OffsetNumber ItemPointerGetOffsetNumber(const ItemPointerData *pointer)
Definition: itemptr.h:124
static BlockNumber ItemPointerGetBlockNumber(const ItemPointerData *pointer)
Definition: itemptr.h:103
static bool ItemPointerIsValid(const ItemPointerData *pointer)
Definition: itemptr.h:83
#define START_CRIT_SECTION()
Definition: miscadmin.h:150
#define END_CRIT_SECTION()
Definition: miscadmin.h:152
void pgstat_count_heap_delete(Relation rel)
#define RelationGetRelid(relation)
Definition: rel.h:515
#define RelationNeedsWAL(relation)
Definition: rel.h:638
TransactionId TransactionXmin
Definition: snapmgr.c:159
ItemPointerData t_self
Definition: htup.h:65
uint32 t_len
Definition: htup.h:64
HeapTupleHeader t_data
Definition: htup.h:68
Oid t_tableOid
Definition: htup.h:66
TransactionId t_xmin
Definition: htup_details.h:124
union HeapTupleHeaderData::@56 t_choice
ItemPointerData t_ctid
Definition: htup_details.h:161
HeapTupleFields t_heap
Definition: htup_details.h:157
Form_pg_class rd_rel
Definition: rel.h:111
TransactionId xmax
Definition: heapam_xlog.h:115
OffsetNumber offnum
Definition: heapam_xlog.h:116
uint8 infobits_set
Definition: heapam_xlog.h:117
#define InvalidTransactionId
Definition: transam.h:31
#define TransactionIdIsValid(xid)
Definition: transam.h:41
static bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition: transam.h:263
TransactionId GetCurrentTransactionId(void)
Definition: xact.c:455
uint64 XLogRecPtr
Definition: xlogdefs.h:21
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition: xloginsert.c:478
void XLogRegisterData(const void *data, uint32 len)
Definition: xloginsert.c:368
void XLogRegisterBuffer(uint8 block_id, Buffer buffer, uint8 flags)
Definition: xloginsert.c:245
void XLogBeginInsert(void)
Definition: xloginsert.c:152
#define REGBUF_STANDARD
Definition: xloginsert.h:35

References Assert(), BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferGetPage(), compute_infobits(), elog, END_CRIT_SECTION, ERROR, xl_heap_delete::flags, GetCurrentTransactionId(), HEAP_MOVED, heap_toast_delete(), HEAP_XMAX_BITS, HeapTupleHasExternal(), HeapTupleHeaderIsHeapOnly(), HeapTupleHeaderIsSpeculative(), HeapTupleHeaderSetXmin(), xl_heap_delete::infobits_set, InvalidTransactionId, IsToastRelation(), ItemIdGetLength, ItemIdIsNormal, ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), ItemPointerIsValid(), LockBuffer(), MarkBufferDirty(), xl_heap_delete::offnum, PageGetItem(), PageGetItemId(), PageIsAllVisible(), PageSetLSN(), PageSetPrunable, pgstat_count_heap_delete(), RelationData::rd_rel, ReadBuffer(), REGBUF_STANDARD, RelationGetRelid, RelationNeedsWAL, ReleaseBuffer(), SizeOfHeapDelete, START_CRIT_SECTION, HeapTupleHeaderData::t_choice, HeapTupleHeaderData::t_ctid, HeapTupleData::t_data, HeapTupleHeaderData::t_heap, HeapTupleHeaderData::t_infomask, HeapTupleHeaderData::t_infomask2, HeapTupleData::t_len, HeapTupleData::t_self, HeapTupleData::t_tableOid, HeapTupleFields::t_xmin, TransactionIdIsValid, TransactionIdPrecedes(), TransactionXmin, XLH_DELETE_IS_SUPER, XLOG_HEAP_DELETE, XLogBeginInsert(), XLogInsert(), XLogRegisterBuffer(), XLogRegisterData(), and xl_heap_delete::xmax.

Referenced by heapam_tuple_complete_speculative(), and toast_delete_datum().

◆ heap_beginscan()

TableScanDesc heap_beginscan ( Relation  relation,
Snapshot  snapshot,
int  nkeys,
ScanKey  key,
ParallelTableScanDesc  parallel_scan,
uint32  flags 
)

Definition at line 1118 of file heapam.c.

1122{
1123 HeapScanDesc scan;
1124
1125 /*
1126 * increment relation ref count while scanning relation
1127 *
1128 * This is just to make really sure the relcache entry won't go away while
1129 * the scan has a pointer to it. Caller should be holding the rel open
1130 * anyway, so this is redundant in all normal scenarios...
1131 */
1133
1134 /*
1135 * allocate and initialize scan descriptor
1136 */
1137 if (flags & SO_TYPE_BITMAPSCAN)
1138 {
1140
1141 /*
1142 * Bitmap Heap scans do not have any fields that a normal Heap Scan
1143 * does not have, so no special initializations required here.
1144 */
1145 scan = (HeapScanDesc) bscan;
1146 }
1147 else
1149
1150 scan->rs_base.rs_rd = relation;
1151 scan->rs_base.rs_snapshot = snapshot;
1152 scan->rs_base.rs_nkeys = nkeys;
1153 scan->rs_base.rs_flags = flags;
1154 scan->rs_base.rs_parallel = parallel_scan;
1155 scan->rs_strategy = NULL; /* set in initscan */
1156 scan->rs_cbuf = InvalidBuffer;
1157
1158 /*
1159 * Disable page-at-a-time mode if it's not a MVCC-safe snapshot.
1160 */
1161 if (!(snapshot && IsMVCCSnapshot(snapshot)))
1162 scan->rs_base.rs_flags &= ~SO_ALLOW_PAGEMODE;
1163
1164 /* Check that a historic snapshot is not used for non-catalog tables */
1165 if (snapshot &&
1166 IsHistoricMVCCSnapshot(snapshot) &&
1168 {
1169 ereport(ERROR,
1170 (errcode(ERRCODE_INVALID_TRANSACTION_STATE),
1171 errmsg("cannot query non-catalog table \"%s\" during logical decoding",
1172 RelationGetRelationName(relation))));
1173 }
1174
1175 /*
1176 * For seqscan and sample scans in a serializable transaction, acquire a
1177 * predicate lock on the entire relation. This is required not only to
1178 * lock all the matching tuples, but also to conflict with new insertions
1179 * into the table. In an indexscan, we take page locks on the index pages
1180 * covering the range specified in the scan qual, but in a heap scan there
1181 * is nothing more fine-grained to lock. A bitmap scan is a different
1182 * story, there we have already scanned the index and locked the index
1183 * pages covering the predicate. But in that case we still have to lock
1184 * any matching heap tuples. For sample scan we could optimize the locking
1185 * to be at least page-level granularity, but we'd need to add per-tuple
1186 * locking for that.
1187 */
1189 {
1190 /*
1191 * Ensure a missing snapshot is noticed reliably, even if the
1192 * isolation mode means predicate locking isn't performed (and
1193 * therefore the snapshot isn't used here).
1194 */
1195 Assert(snapshot);
1196 PredicateLockRelation(relation, snapshot);
1197 }
1198
1199 /* we only need to set this up once */
1200 scan->rs_ctup.t_tableOid = RelationGetRelid(relation);
1201
1202 /*
1203 * Allocate memory to keep track of page allocation for parallel workers
1204 * when doing a parallel scan.
1205 */
1206 if (parallel_scan != NULL)
1208 else
1209 scan->rs_parallelworkerdata = NULL;
1210
1211 /*
1212 * we do this here instead of in initscan() because heap_rescan also calls
1213 * initscan() and we don't want to allocate memory again
1214 */
1215 if (nkeys > 0)
1216 scan->rs_base.rs_key = palloc_array(ScanKeyData, nkeys);
1217 else
1218 scan->rs_base.rs_key = NULL;
1219
1220 initscan(scan, key, false);
1221
1222 scan->rs_read_stream = NULL;
1223
1224 /*
1225 * Set up a read stream for sequential scans and TID range scans. This
1226 * should be done after initscan() because initscan() allocates the
1227 * BufferAccessStrategy object passed to the read stream API.
1228 */
1229 if (scan->rs_base.rs_flags & SO_TYPE_SEQSCAN ||
1231 {
1233
1234 if (scan->rs_base.rs_parallel)
1236 else
1238
1239 /* ---
1240 * It is safe to use batchmode as the only locks taken by `cb`
1241 * are never taken while waiting for IO:
1242 * - SyncScanLock is used in the non-parallel case
1243 * - in the parallel case, only spinlocks and atomics are used
1244 * ---
1245 */
1248 scan->rs_strategy,
1249 scan->rs_base.rs_rd,
1251 cb,
1252 scan,
1253 0);
1254 }
1255 else if (scan->rs_base.rs_flags & SO_TYPE_BITMAPSCAN)
1256 {
1259 scan->rs_strategy,
1260 scan->rs_base.rs_rd,
1263 scan,
1264 sizeof(TBMIterateResult));
1265 }
1266
1267
1268 return (TableScanDesc) scan;
1269}
int errcode(int sqlerrcode)
Definition: elog.c:863
int errmsg(const char *fmt,...)
Definition: elog.c:1080
#define ereport(elevel,...)
Definition: elog.h:150
#define palloc_array(type, count)
Definition: fe_memutils.h:76
static BlockNumber heap_scan_stream_read_next_parallel(ReadStream *stream, void *callback_private_data, void *per_buffer_data)
Definition: heapam.c:250
static BlockNumber heap_scan_stream_read_next_serial(ReadStream *stream, void *callback_private_data, void *per_buffer_data)
Definition: heapam.c:290
static BlockNumber bitmapheap_stream_read_next(ReadStream *pgsr, void *private_data, void *per_buffer_data)
Definition: heapam.c:315
static void initscan(HeapScanDesc scan, ScanKey key, bool keep_startblock)
Definition: heapam.c:355
struct HeapScanDescData * HeapScanDesc
Definition: heapam.h:102
void PredicateLockRelation(Relation relation, Snapshot snapshot)
Definition: predicate.c:2574
ReadStream * read_stream_begin_relation(int flags, BufferAccessStrategy strategy, Relation rel, ForkNumber forknum, ReadStreamBlockNumberCB callback, void *callback_private_data, size_t per_buffer_data_size)
Definition: read_stream.c:737
#define READ_STREAM_USE_BATCHING
Definition: read_stream.h:64
BlockNumber(* ReadStreamBlockNumberCB)(ReadStream *stream, void *callback_private_data, void *per_buffer_data)
Definition: read_stream.h:77
#define READ_STREAM_DEFAULT
Definition: read_stream.h:21
#define READ_STREAM_SEQUENTIAL
Definition: read_stream.h:36
#define RelationGetRelationName(relation)
Definition: rel.h:549
#define RelationIsAccessibleInLogicalDecoding(relation)
Definition: rel.h:694
void RelationIncrementReferenceCount(Relation rel)
Definition: relcache.c:2182
@ MAIN_FORKNUM
Definition: relpath.h:58
#define IsHistoricMVCCSnapshot(snapshot)
Definition: snapmgr.h:59
#define IsMVCCSnapshot(snapshot)
Definition: snapmgr.h:55
BufferAccessStrategy rs_strategy
Definition: heapam.h:73
Buffer rs_cbuf
Definition: heapam.h:70
ParallelBlockTableScanWorkerData * rs_parallelworkerdata
Definition: heapam.h:95
HeapTupleData rs_ctup
Definition: heapam.h:75
ReadStream * rs_read_stream
Definition: heapam.h:78
TableScanDescData rs_base
Definition: heapam.h:58
Relation rs_rd
Definition: relscan.h:36
uint32 rs_flags
Definition: relscan.h:64
struct ScanKeyData * rs_key
Definition: relscan.h:39
struct SnapshotData * rs_snapshot
Definition: relscan.h:37
struct ParallelTableScanDescData * rs_parallel
Definition: relscan.h:66
@ SO_TYPE_TIDRANGESCAN
Definition: tableam.h:53
@ SO_TYPE_SAMPLESCAN
Definition: tableam.h:51
@ SO_TYPE_SEQSCAN
Definition: tableam.h:49
@ SO_TYPE_BITMAPSCAN
Definition: tableam.h:50

References Assert(), bitmapheap_stream_read_next(), ereport, errcode(), errmsg(), ERROR, heap_scan_stream_read_next_parallel(), heap_scan_stream_read_next_serial(), initscan(), InvalidBuffer, IsHistoricMVCCSnapshot, IsMVCCSnapshot, sort-test::key, MAIN_FORKNUM, palloc_array, palloc_object, PredicateLockRelation(), read_stream_begin_relation(), READ_STREAM_DEFAULT, READ_STREAM_SEQUENTIAL, READ_STREAM_USE_BATCHING, RelationGetRelationName, RelationGetRelid, RelationIncrementReferenceCount(), RelationIsAccessibleInLogicalDecoding, HeapScanDescData::rs_base, HeapScanDescData::rs_cbuf, HeapScanDescData::rs_ctup, TableScanDescData::rs_flags, TableScanDescData::rs_key, TableScanDescData::rs_nkeys, TableScanDescData::rs_parallel, HeapScanDescData::rs_parallelworkerdata, TableScanDescData::rs_rd, HeapScanDescData::rs_read_stream, TableScanDescData::rs_snapshot, HeapScanDescData::rs_strategy, SO_TYPE_BITMAPSCAN, SO_TYPE_SAMPLESCAN, SO_TYPE_SEQSCAN, SO_TYPE_TIDRANGESCAN, and HeapTupleData::t_tableOid.

◆ heap_delete()

TM_Result heap_delete ( Relation  relation,
const ItemPointerData tid,
CommandId  cid,
Snapshot  crosscheck,
bool  wait,
TM_FailureData tmfd,
bool  changingPart 
)

Definition at line 2807 of file heapam.c.

2810{
2811 TM_Result result;
2813 ItemId lp;
2814 HeapTupleData tp;
2815 Page page;
2816 BlockNumber block;
2817 Buffer buffer;
2818 Buffer vmbuffer = InvalidBuffer;
2819 TransactionId new_xmax;
2820 uint16 new_infomask,
2821 new_infomask2;
2822 bool have_tuple_lock = false;
2823 bool iscombo;
2824 bool all_visible_cleared = false;
2825 HeapTuple old_key_tuple = NULL; /* replica identity of the tuple */
2826 bool old_key_copied = false;
2827
2829
2830 AssertHasSnapshotForToast(relation);
2831
2832 /*
2833 * Forbid this during a parallel operation, lest it allocate a combo CID.
2834 * Other workers might need that combo CID for visibility checks, and we
2835 * have no provision for broadcasting it to them.
2836 */
2837 if (IsInParallelMode())
2838 ereport(ERROR,
2839 (errcode(ERRCODE_INVALID_TRANSACTION_STATE),
2840 errmsg("cannot delete tuples during a parallel operation")));
2841
2842 block = ItemPointerGetBlockNumber(tid);
2843 buffer = ReadBuffer(relation, block);
2844 page = BufferGetPage(buffer);
2845
2846 /*
2847 * Before locking the buffer, pin the visibility map page if it appears to
2848 * be necessary. Since we haven't got the lock yet, someone else might be
2849 * in the middle of changing this, so we'll need to recheck after we have
2850 * the lock.
2851 */
2852 if (PageIsAllVisible(page))
2853 visibilitymap_pin(relation, block, &vmbuffer);
2854
2856
2859
2860 tp.t_tableOid = RelationGetRelid(relation);
2861 tp.t_data = (HeapTupleHeader) PageGetItem(page, lp);
2862 tp.t_len = ItemIdGetLength(lp);
2863 tp.t_self = *tid;
2864
2865l1:
2866
2867 /*
2868 * If we didn't pin the visibility map page and the page has become all
2869 * visible while we were busy locking the buffer, we'll have to unlock and
2870 * re-lock, to avoid holding the buffer lock across an I/O. That's a bit
2871 * unfortunate, but hopefully shouldn't happen often.
2872 */
2873 if (vmbuffer == InvalidBuffer && PageIsAllVisible(page))
2874 {
2876 visibilitymap_pin(relation, block, &vmbuffer);
2878 }
2879
2880 result = HeapTupleSatisfiesUpdate(&tp, cid, buffer);
2881
2882 if (result == TM_Invisible)
2883 {
2884 UnlockReleaseBuffer(buffer);
2885 ereport(ERROR,
2886 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
2887 errmsg("attempted to delete invisible tuple")));
2888 }
2889 else if (result == TM_BeingModified && wait)
2890 {
2891 TransactionId xwait;
2892 uint16 infomask;
2893
2894 /* must copy state data before unlocking buffer */
2896 infomask = tp.t_data->t_infomask;
2897
2898 /*
2899 * Sleep until concurrent transaction ends -- except when there's a
2900 * single locker and it's our own transaction. Note we don't care
2901 * which lock mode the locker has, because we need the strongest one.
2902 *
2903 * Before sleeping, we need to acquire tuple lock to establish our
2904 * priority for the tuple (see heap_lock_tuple). LockTuple will
2905 * release us when we are next-in-line for the tuple.
2906 *
2907 * If we are forced to "start over" below, we keep the tuple lock;
2908 * this arranges that we stay at the head of the line while rechecking
2909 * tuple state.
2910 */
2911 if (infomask & HEAP_XMAX_IS_MULTI)
2912 {
2913 bool current_is_member = false;
2914
2915 if (DoesMultiXactIdConflict((MultiXactId) xwait, infomask,
2916 LockTupleExclusive, &current_is_member))
2917 {
2919
2920 /*
2921 * Acquire the lock, if necessary (but skip it when we're
2922 * requesting a lock and already have one; avoids deadlock).
2923 */
2924 if (!current_is_member)
2926 LockWaitBlock, &have_tuple_lock);
2927
2928 /* wait for multixact */
2930 relation, &(tp.t_self), XLTW_Delete,
2931 NULL);
2933
2934 /*
2935 * If xwait had just locked the tuple then some other xact
2936 * could update this tuple before we get to this point. Check
2937 * for xmax change, and start over if so.
2938 *
2939 * We also must start over if we didn't pin the VM page, and
2940 * the page has become all visible.
2941 */
2942 if ((vmbuffer == InvalidBuffer && PageIsAllVisible(page)) ||
2943 xmax_infomask_changed(tp.t_data->t_infomask, infomask) ||
2945 xwait))
2946 goto l1;
2947 }
2948
2949 /*
2950 * You might think the multixact is necessarily done here, but not
2951 * so: it could have surviving members, namely our own xact or
2952 * other subxacts of this backend. It is legal for us to delete
2953 * the tuple in either case, however (the latter case is
2954 * essentially a situation of upgrading our former shared lock to
2955 * exclusive). We don't bother changing the on-disk hint bits
2956 * since we are about to overwrite the xmax altogether.
2957 */
2958 }
2959 else if (!TransactionIdIsCurrentTransactionId(xwait))
2960 {
2961 /*
2962 * Wait for regular transaction to end; but first, acquire tuple
2963 * lock.
2964 */
2967 LockWaitBlock, &have_tuple_lock);
2968 XactLockTableWait(xwait, relation, &(tp.t_self), XLTW_Delete);
2970
2971 /*
2972 * xwait is done, but if xwait had just locked the tuple then some
2973 * other xact could update this tuple before we get to this point.
2974 * Check for xmax change, and start over if so.
2975 *
2976 * We also must start over if we didn't pin the VM page, and the
2977 * page has become all visible.
2978 */
2979 if ((vmbuffer == InvalidBuffer && PageIsAllVisible(page)) ||
2980 xmax_infomask_changed(tp.t_data->t_infomask, infomask) ||
2982 xwait))
2983 goto l1;
2984
2985 /* Otherwise check if it committed or aborted */
2986 UpdateXmaxHintBits(tp.t_data, buffer, xwait);
2987 }
2988
2989 /*
2990 * We may overwrite if previous xmax aborted, or if it committed but
2991 * only locked the tuple without updating it.
2992 */
2993 if ((tp.t_data->t_infomask & HEAP_XMAX_INVALID) ||
2996 result = TM_Ok;
2997 else if (!ItemPointerEquals(&tp.t_self, &tp.t_data->t_ctid))
2998 result = TM_Updated;
2999 else
3000 result = TM_Deleted;
3001 }
3002
3003 /* sanity check the result HeapTupleSatisfiesUpdate() and the logic above */
3004 if (result != TM_Ok)
3005 {
3006 Assert(result == TM_SelfModified ||
3007 result == TM_Updated ||
3008 result == TM_Deleted ||
3009 result == TM_BeingModified);
3011 Assert(result != TM_Updated ||
3013 }
3014
3015 if (crosscheck != InvalidSnapshot && result == TM_Ok)
3016 {
3017 /* Perform additional check for transaction-snapshot mode RI updates */
3018 if (!HeapTupleSatisfiesVisibility(&tp, crosscheck, buffer))
3019 result = TM_Updated;
3020 }
3021
3022 if (result != TM_Ok)
3023 {
3024 tmfd->ctid = tp.t_data->t_ctid;
3026 if (result == TM_SelfModified)
3028 else
3029 tmfd->cmax = InvalidCommandId;
3030 UnlockReleaseBuffer(buffer);
3031 if (have_tuple_lock)
3033 if (vmbuffer != InvalidBuffer)
3034 ReleaseBuffer(vmbuffer);
3035 return result;
3036 }
3037
3038 /*
3039 * We're about to do the actual delete -- check for conflict first, to
3040 * avoid possibly having to roll back work we've just done.
3041 *
3042 * This is safe without a recheck as long as there is no possibility of
3043 * another process scanning the page between this check and the delete
3044 * being visible to the scan (i.e., an exclusive buffer content lock is
3045 * continuously held from this point until the tuple delete is visible).
3046 */
3048
3049 /* replace cid with a combo CID if necessary */
3050 HeapTupleHeaderAdjustCmax(tp.t_data, &cid, &iscombo);
3051
3052 /*
3053 * Compute replica identity tuple before entering the critical section so
3054 * we don't PANIC upon a memory allocation failure.
3055 */
3056 old_key_tuple = ExtractReplicaIdentity(relation, &tp, true, &old_key_copied);
3057
3058 /*
3059 * If this is the first possibly-multixact-able operation in the current
3060 * transaction, set my per-backend OldestMemberMXactId setting. We can be
3061 * certain that the transaction will never become a member of any older
3062 * MultiXactIds than that. (We have to do this even if we end up just
3063 * using our own TransactionId below, since some other backend could
3064 * incorporate our XID into a MultiXact immediately afterwards.)
3065 */
3067
3070 xid, LockTupleExclusive, true,
3071 &new_xmax, &new_infomask, &new_infomask2);
3072
3074
3075 /*
3076 * If this transaction commits, the tuple will become DEAD sooner or
3077 * later. Set flag that this page is a candidate for pruning once our xid
3078 * falls below the OldestXmin horizon. If the transaction finally aborts,
3079 * the subsequent page pruning will be a no-op and the hint will be
3080 * cleared.
3081 */
3082 PageSetPrunable(page, xid);
3083
3084 if (PageIsAllVisible(page))
3085 {
3086 all_visible_cleared = true;
3087 PageClearAllVisible(page);
3088 visibilitymap_clear(relation, BufferGetBlockNumber(buffer),
3089 vmbuffer, VISIBILITYMAP_VALID_BITS);
3090 }
3091
3092 /* store transaction information of xact deleting the tuple */
3094 tp.t_data->t_infomask2 &= ~HEAP_KEYS_UPDATED;
3095 tp.t_data->t_infomask |= new_infomask;
3096 tp.t_data->t_infomask2 |= new_infomask2;
3098 HeapTupleHeaderSetXmax(tp.t_data, new_xmax);
3099 HeapTupleHeaderSetCmax(tp.t_data, cid, iscombo);
3100 /* Make sure there is no forward chain link in t_ctid */
3101 tp.t_data->t_ctid = tp.t_self;
3102
3103 /* Signal that this is actually a move into another partition */
3104 if (changingPart)
3106
3107 MarkBufferDirty(buffer);
3108
3109 /*
3110 * XLOG stuff
3111 *
3112 * NB: heap_abort_speculative() uses the same xlog record and replay
3113 * routines.
3114 */
3115 if (RelationNeedsWAL(relation))
3116 {
3117 xl_heap_delete xlrec;
3118 xl_heap_header xlhdr;
3119 XLogRecPtr recptr;
3120
3121 /*
3122 * For logical decode we need combo CIDs to properly decode the
3123 * catalog
3124 */
3126 log_heap_new_cid(relation, &tp);
3127
3128 xlrec.flags = 0;
3129 if (all_visible_cleared)
3131 if (changingPart)
3134 tp.t_data->t_infomask2);
3136 xlrec.xmax = new_xmax;
3137
3138 if (old_key_tuple != NULL)
3139 {
3140 if (relation->rd_rel->relreplident == REPLICA_IDENTITY_FULL)
3142 else
3144 }
3145
3148
3150
3151 /*
3152 * Log replica identity of the deleted tuple if there is one
3153 */
3154 if (old_key_tuple != NULL)
3155 {
3156 xlhdr.t_infomask2 = old_key_tuple->t_data->t_infomask2;
3157 xlhdr.t_infomask = old_key_tuple->t_data->t_infomask;
3158 xlhdr.t_hoff = old_key_tuple->t_data->t_hoff;
3159
3161 XLogRegisterData((char *) old_key_tuple->t_data
3163 old_key_tuple->t_len
3165 }
3166
3167 /* filtering by origin on a row level is much more efficient */
3169
3170 recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_DELETE);
3171
3172 PageSetLSN(page, recptr);
3173 }
3174
3176
3178
3179 if (vmbuffer != InvalidBuffer)
3180 ReleaseBuffer(vmbuffer);
3181
3182 /*
3183 * If the tuple has toasted out-of-line attributes, we need to delete
3184 * those items too. We have to do this before releasing the buffer
3185 * because we need to look at the contents of the tuple, but it's OK to
3186 * release the content lock on the buffer first.
3187 */
3188 if (relation->rd_rel->relkind != RELKIND_RELATION &&
3189 relation->rd_rel->relkind != RELKIND_MATVIEW)
3190 {
3191 /* toast table entries should never be recursively toasted */
3193 }
3194 else if (HeapTupleHasExternal(&tp))
3195 heap_toast_delete(relation, &tp, false);
3196
3197 /*
3198 * Mark tuple for invalidation from system caches at next command
3199 * boundary. We have to do this before releasing the buffer because we
3200 * need to look at the contents of the tuple.
3201 */
3202 CacheInvalidateHeapTuple(relation, &tp, NULL);
3203
3204 /* Now we can release the buffer */
3205 ReleaseBuffer(buffer);
3206
3207 /*
3208 * Release the lmgr tuple lock, if we had it.
3209 */
3210 if (have_tuple_lock)
3212
3213 pgstat_count_heap_delete(relation);
3214
3215 if (old_key_tuple != NULL && old_key_copied)
3216 heap_freetuple(old_key_tuple);
3217
3218 return TM_Ok;
3219}
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:4318
void UnlockReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:5478
static void PageClearAllVisible(Page page)
Definition: bufpage.h:438
#define InvalidCommandId
Definition: c.h:689
TransactionId MultiXactId
Definition: c.h:682
uint16_t uint16
Definition: c.h:551
void HeapTupleHeaderAdjustCmax(const HeapTupleHeaderData *tup, CommandId *cmax, bool *iscombo)
Definition: combocid.c:153
CommandId HeapTupleHeaderGetCmax(const HeapTupleHeaderData *tup)
Definition: combocid.c:118
static bool DoesMultiXactIdConflict(MultiXactId multi, uint16 infomask, LockTupleMode lockmode, bool *current_is_member)
Definition: heapam.c:7640
static XLogRecPtr log_heap_new_cid(Relation relation, HeapTuple tup)
Definition: heapam.c:9105
static void compute_new_xmax_infomask(TransactionId xmax, uint16 old_infomask, uint16 old_infomask2, TransactionId add_to_xmax, LockTupleMode mode, bool is_update, TransactionId *result_xmax, uint16 *result_infomask, uint16 *result_infomask2)
Definition: heapam.c:5359
static bool heap_acquire_tuplock(Relation relation, const ItemPointerData *tid, LockTupleMode mode, LockWaitPolicy wait_policy, bool *have_tuple_lock)
Definition: heapam.c:5310
static HeapTuple ExtractReplicaIdentity(Relation relation, HeapTuple tp, bool key_required, bool *copy)
Definition: heapam.c:9186
static void MultiXactIdWait(MultiXactId multi, MultiXactStatus status, uint16 infomask, Relation rel, const ItemPointerData *ctid, XLTW_Oper oper, int *remaining)
Definition: heapam.c:7818
static bool xmax_infomask_changed(uint16 new_infomask, uint16 old_infomask)
Definition: heapam.c:2784
#define UnlockTupleTuplock(rel, tup, mode)
Definition: heapam.c:167
static void AssertHasSnapshotForToast(Relation rel)
Definition: heapam.c:223
static void UpdateXmaxHintBits(HeapTupleHeader tuple, Buffer buffer, TransactionId xid)
Definition: heapam.c:2017
bool HeapTupleSatisfiesVisibility(HeapTuple htup, Snapshot snapshot, Buffer buffer)
bool HeapTupleHeaderIsOnlyLocked(HeapTupleHeader tuple)
TM_Result HeapTupleSatisfiesUpdate(HeapTuple htup, CommandId curcid, Buffer buffer)
#define XLH_DELETE_CONTAINS_OLD_KEY
Definition: heapam_xlog.h:104
#define XLH_DELETE_ALL_VISIBLE_CLEARED
Definition: heapam_xlog.h:102
#define SizeOfHeapHeader
Definition: heapam_xlog.h:157
#define XLH_DELETE_IS_PARTITION_MOVE
Definition: heapam_xlog.h:106
#define XLH_DELETE_CONTAINS_OLD_TUPLE
Definition: heapam_xlog.h:103
void heap_freetuple(HeapTuple htup)
Definition: heaptuple.c:1435
#define SizeofHeapTupleHeader
Definition: htup_details.h:185
static bool HEAP_XMAX_IS_LOCKED_ONLY(uint16 infomask)
Definition: htup_details.h:226
static void HeapTupleHeaderSetCmax(HeapTupleHeaderData *tup, CommandId cid, bool iscombo)
Definition: htup_details.h:431
static void HeapTupleHeaderClearHotUpdated(HeapTupleHeaderData *tup)
Definition: htup_details.h:549
static TransactionId HeapTupleHeaderGetRawXmax(const HeapTupleHeaderData *tup)
Definition: htup_details.h:377
#define HEAP_XMAX_IS_MULTI
Definition: htup_details.h:209
#define HEAP_XMAX_INVALID
Definition: htup_details.h:208
static TransactionId HeapTupleHeaderGetUpdateXid(const HeapTupleHeaderData *tup)
Definition: htup_details.h:397
static void HeapTupleHeaderSetMovedPartitions(HeapTupleHeaderData *tup)
Definition: htup_details.h:486
static void HeapTupleHeaderSetXmax(HeapTupleHeaderData *tup, TransactionId xid)
Definition: htup_details.h:383
void CacheInvalidateHeapTuple(Relation relation, HeapTuple tuple, HeapTuple newtuple)
Definition: inval.c:1571
bool ItemPointerEquals(const ItemPointerData *pointer1, const ItemPointerData *pointer2)
Definition: itemptr.c:35
void XactLockTableWait(TransactionId xid, Relation rel, const ItemPointerData *ctid, XLTW_Oper oper)
Definition: lmgr.c:663
@ XLTW_Delete
Definition: lmgr.h:28
@ LockWaitBlock
Definition: lockoptions.h:39
@ LockTupleExclusive
Definition: lockoptions.h:58
void MultiXactIdSetOldestMember(void)
Definition: multixact.c:539
@ MultiXactStatusUpdate
Definition: multixact.h:45
void CheckForSerializableConflictIn(Relation relation, const ItemPointerData *tid, BlockNumber blkno)
Definition: predicate.c:4334
#define InvalidSnapshot
Definition: snapshot.h:119
TransactionId xmax
Definition: tableam.h:150
CommandId cmax
Definition: tableam.h:151
ItemPointerData ctid
Definition: tableam.h:149
uint16 t_infomask
Definition: heapam_xlog.h:153
uint16 t_infomask2
Definition: heapam_xlog.h:152
TM_Result
Definition: tableam.h:73
@ TM_Ok
Definition: tableam.h:78
@ TM_BeingModified
Definition: tableam.h:100
@ TM_Deleted
Definition: tableam.h:93
@ TM_Updated
Definition: tableam.h:90
@ TM_SelfModified
Definition: tableam.h:84
@ TM_Invisible
Definition: tableam.h:81
#define TransactionIdEquals(id1, id2)
Definition: transam.h:43
bool visibilitymap_clear(Relation rel, BlockNumber heapBlk, Buffer vmbuf, uint8 flags)
void visibilitymap_pin(Relation rel, BlockNumber heapBlk, Buffer *vmbuf)
#define VISIBILITYMAP_VALID_BITS
bool TransactionIdIsCurrentTransactionId(TransactionId xid)
Definition: xact.c:942
bool IsInParallelMode(void)
Definition: xact.c:1090
#define XLOG_INCLUDE_ORIGIN
Definition: xlog.h:165
void XLogSetRecordFlags(uint8 flags)
Definition: xloginsert.c:460

References Assert(), AssertHasSnapshotForToast(), BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferGetBlockNumber(), BufferGetPage(), CacheInvalidateHeapTuple(), CheckForSerializableConflictIn(), TM_FailureData::cmax, compute_infobits(), compute_new_xmax_infomask(), TM_FailureData::ctid, DoesMultiXactIdConflict(), END_CRIT_SECTION, ereport, errcode(), errmsg(), ERROR, ExtractReplicaIdentity(), xl_heap_delete::flags, GetCurrentTransactionId(), heap_acquire_tuplock(), heap_freetuple(), HEAP_MOVED, heap_toast_delete(), HEAP_XMAX_BITS, HEAP_XMAX_INVALID, HEAP_XMAX_IS_LOCKED_ONLY(), HEAP_XMAX_IS_MULTI, HeapTupleHasExternal(), HeapTupleHeaderAdjustCmax(), HeapTupleHeaderClearHotUpdated(), HeapTupleHeaderGetCmax(), HeapTupleHeaderGetRawXmax(), HeapTupleHeaderGetUpdateXid(), HeapTupleHeaderIsOnlyLocked(), HeapTupleHeaderSetCmax(), HeapTupleHeaderSetMovedPartitions(), HeapTupleHeaderSetXmax(), HeapTupleSatisfiesUpdate(), HeapTupleSatisfiesVisibility(), xl_heap_delete::infobits_set, InvalidBuffer, InvalidCommandId, InvalidSnapshot, IsInParallelMode(), ItemIdGetLength, ItemIdIsNormal, ItemPointerEquals(), ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), ItemPointerIsValid(), LockBuffer(), LockTupleExclusive, LockWaitBlock, log_heap_new_cid(), MarkBufferDirty(), MultiXactIdSetOldestMember(), MultiXactIdWait(), MultiXactStatusUpdate, xl_heap_delete::offnum, PageClearAllVisible(), PageGetItem(), PageGetItemId(), PageIsAllVisible(), PageSetLSN(), PageSetPrunable, pgstat_count_heap_delete(), RelationData::rd_rel, ReadBuffer(), REGBUF_STANDARD, RelationGetRelid, RelationIsAccessibleInLogicalDecoding, RelationNeedsWAL, ReleaseBuffer(), SizeOfHeapDelete, SizeOfHeapHeader, SizeofHeapTupleHeader, START_CRIT_SECTION, HeapTupleHeaderData::t_ctid, HeapTupleData::t_data, xl_heap_header::t_hoff, HeapTupleHeaderData::t_hoff, xl_heap_header::t_infomask, HeapTupleHeaderData::t_infomask, xl_heap_header::t_infomask2, HeapTupleHeaderData::t_infomask2, HeapTupleData::t_len, HeapTupleData::t_self, HeapTupleData::t_tableOid, TM_BeingModified, TM_Deleted, TM_Invisible, TM_Ok, TM_SelfModified, TM_Updated, TransactionIdEquals, TransactionIdIsCurrentTransactionId(), UnlockReleaseBuffer(), UnlockTupleTuplock, UpdateXmaxHintBits(), visibilitymap_clear(), visibilitymap_pin(), VISIBILITYMAP_VALID_BITS, XactLockTableWait(), XLH_DELETE_ALL_VISIBLE_CLEARED, XLH_DELETE_CONTAINS_OLD_KEY, XLH_DELETE_CONTAINS_OLD_TUPLE, XLH_DELETE_IS_PARTITION_MOVE, XLOG_HEAP_DELETE, XLOG_INCLUDE_ORIGIN, XLogBeginInsert(), XLogInsert(), XLogRegisterBuffer(), XLogRegisterData(), XLogSetRecordFlags(), XLTW_Delete, xl_heap_delete::xmax, TM_FailureData::xmax, and xmax_infomask_changed().

Referenced by heapam_tuple_delete(), and simple_heap_delete().

◆ heap_endscan()

void heap_endscan ( TableScanDesc  sscan)

Definition at line 1325 of file heapam.c.

1326{
1327 HeapScanDesc scan = (HeapScanDesc) sscan;
1328
1329 /* Note: no locking manipulations needed */
1330
1331 /*
1332 * unpin scan buffers
1333 */
1334 if (BufferIsValid(scan->rs_cbuf))
1335 ReleaseBuffer(scan->rs_cbuf);
1336
1337 /*
1338 * Must free the read stream before freeing the BufferAccessStrategy.
1339 */
1340 if (scan->rs_read_stream)
1342
1343 /*
1344 * decrement relation reference count and free scan descriptor storage
1345 */
1347
1348 if (scan->rs_base.rs_key)
1349 pfree(scan->rs_base.rs_key);
1350
1351 if (scan->rs_strategy != NULL)
1353
1354 if (scan->rs_parallelworkerdata != NULL)
1356
1357 if (scan->rs_base.rs_flags & SO_TEMP_SNAPSHOT)
1359
1360 pfree(scan);
1361}
static bool BufferIsValid(Buffer bufnum)
Definition: bufmgr.h:387
void read_stream_end(ReadStream *stream)
Definition: read_stream.c:1089
void RelationDecrementReferenceCount(Relation rel)
Definition: relcache.c:2195
void UnregisterSnapshot(Snapshot snapshot)
Definition: snapmgr.c:866
@ SO_TEMP_SNAPSHOT
Definition: tableam.h:65

References BufferIsValid(), FreeAccessStrategy(), pfree(), read_stream_end(), RelationDecrementReferenceCount(), ReleaseBuffer(), HeapScanDescData::rs_base, HeapScanDescData::rs_cbuf, TableScanDescData::rs_flags, TableScanDescData::rs_key, HeapScanDescData::rs_parallelworkerdata, TableScanDescData::rs_rd, HeapScanDescData::rs_read_stream, TableScanDescData::rs_snapshot, HeapScanDescData::rs_strategy, SO_TEMP_SNAPSHOT, and UnregisterSnapshot().

◆ heap_execute_freeze_tuple()

static void heap_execute_freeze_tuple ( HeapTupleHeader  tuple,
HeapTupleFreeze frz 
)
inlinestatic

Definition at line 475 of file heapam.h.

476{
477 HeapTupleHeaderSetXmax(tuple, frz->xmax);
478
479 if (frz->frzflags & XLH_FREEZE_XVAC)
481
482 if (frz->frzflags & XLH_INVALID_XVAC)
484
485 tuple->t_infomask = frz->t_infomask;
486 tuple->t_infomask2 = frz->t_infomask2;
487}
#define XLH_INVALID_XVAC
Definition: heapam_xlog.h:348
#define XLH_FREEZE_XVAC
Definition: heapam_xlog.h:347
static void HeapTupleHeaderSetXvac(HeapTupleHeaderData *tup, TransactionId xid)
Definition: htup_details.h:451
uint8 frzflags
Definition: heapam.h:147
uint16 t_infomask2
Definition: heapam.h:145
TransactionId xmax
Definition: heapam.h:144
uint16 t_infomask
Definition: heapam.h:146
#define FrozenTransactionId
Definition: transam.h:33

References FrozenTransactionId, HeapTupleFreeze::frzflags, HeapTupleHeaderSetXmax(), HeapTupleHeaderSetXvac(), InvalidTransactionId, HeapTupleFreeze::t_infomask, HeapTupleHeaderData::t_infomask, HeapTupleFreeze::t_infomask2, HeapTupleHeaderData::t_infomask2, XLH_FREEZE_XVAC, XLH_INVALID_XVAC, and HeapTupleFreeze::xmax.

Referenced by heap_freeze_prepared_tuples(), heap_freeze_tuple(), and heap_xlog_prune_freeze().

◆ heap_fetch()

bool heap_fetch ( Relation  relation,
Snapshot  snapshot,
HeapTuple  tuple,
Buffer userbuf,
bool  keep_buf 
)

Definition at line 1623 of file heapam.c.

1628{
1629 ItemPointer tid = &(tuple->t_self);
1630 ItemId lp;
1631 Buffer buffer;
1632 Page page;
1633 OffsetNumber offnum;
1634 bool valid;
1635
1636 /*
1637 * Fetch and pin the appropriate page of the relation.
1638 */
1639 buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(tid));
1640
1641 /*
1642 * Need share lock on buffer to examine tuple commit status.
1643 */
1645 page = BufferGetPage(buffer);
1646
1647 /*
1648 * We'd better check for out-of-range offnum in case of VACUUM since the
1649 * TID was obtained.
1650 */
1651 offnum = ItemPointerGetOffsetNumber(tid);
1652 if (offnum < FirstOffsetNumber || offnum > PageGetMaxOffsetNumber(page))
1653 {
1655 ReleaseBuffer(buffer);
1656 *userbuf = InvalidBuffer;
1657 tuple->t_data = NULL;
1658 return false;
1659 }
1660
1661 /*
1662 * get the item line pointer corresponding to the requested tid
1663 */
1664 lp = PageGetItemId(page, offnum);
1665
1666 /*
1667 * Must check for deleted tuple.
1668 */
1669 if (!ItemIdIsNormal(lp))
1670 {
1672 ReleaseBuffer(buffer);
1673 *userbuf = InvalidBuffer;
1674 tuple->t_data = NULL;
1675 return false;
1676 }
1677
1678 /*
1679 * fill in *tuple fields
1680 */
1681 tuple->t_data = (HeapTupleHeader) PageGetItem(page, lp);
1682 tuple->t_len = ItemIdGetLength(lp);
1683 tuple->t_tableOid = RelationGetRelid(relation);
1684
1685 /*
1686 * check tuple visibility, then release lock
1687 */
1688 valid = HeapTupleSatisfiesVisibility(tuple, snapshot, buffer);
1689
1690 if (valid)
1691 PredicateLockTID(relation, &(tuple->t_self), snapshot,
1693
1694 HeapCheckForSerializableConflictOut(valid, relation, tuple, buffer, snapshot);
1695
1697
1698 if (valid)
1699 {
1700 /*
1701 * All checks passed, so return the tuple as valid. Caller is now
1702 * responsible for releasing the buffer.
1703 */
1704 *userbuf = buffer;
1705
1706 return true;
1707 }
1708
1709 /* Tuple failed time qual, but maybe caller wants to see it anyway. */
1710 if (keep_buf)
1711 *userbuf = buffer;
1712 else
1713 {
1714 ReleaseBuffer(buffer);
1715 *userbuf = InvalidBuffer;
1716 tuple->t_data = NULL;
1717 }
1718
1719 return false;
1720}
@ BUFFER_LOCK_SHARE
Definition: bufmgr.h:206
static OffsetNumber PageGetMaxOffsetNumber(const PageData *page)
Definition: bufpage.h:371
void HeapCheckForSerializableConflictOut(bool visible, Relation relation, HeapTuple tuple, Buffer buffer, Snapshot snapshot)
Definition: heapam.c:9290
static TransactionId HeapTupleHeaderGetXmin(const HeapTupleHeaderData *tup)
Definition: htup_details.h:324
uint16 OffsetNumber
Definition: off.h:24
void PredicateLockTID(Relation relation, const ItemPointerData *tid, Snapshot snapshot, TransactionId tuple_xid)
Definition: predicate.c:2619

References BUFFER_LOCK_SHARE, BUFFER_LOCK_UNLOCK, BufferGetPage(), HeapCheckForSerializableConflictOut(), HeapTupleHeaderGetXmin(), HeapTupleSatisfiesVisibility(), InvalidBuffer, ItemIdGetLength, ItemIdIsNormal, ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), LockBuffer(), PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), PredicateLockTID(), ReadBuffer(), RelationGetRelid, ReleaseBuffer(), HeapTupleData::t_data, HeapTupleData::t_len, HeapTupleData::t_self, and HeapTupleData::t_tableOid.

Referenced by heap_lock_updated_tuple_rec(), heapam_fetch_row_version(), and heapam_tuple_lock().

◆ heap_finish_speculative()

void heap_finish_speculative ( Relation  relation,
const ItemPointerData tid 
)

Definition at line 6132 of file heapam.c.

6133{
6134 Buffer buffer;
6135 Page page;
6136 OffsetNumber offnum;
6137 ItemId lp;
6138 HeapTupleHeader htup;
6139
6140 buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(tid));
6142 page = BufferGetPage(buffer);
6143
6144 offnum = ItemPointerGetOffsetNumber(tid);
6145 if (offnum < 1 || offnum > PageGetMaxOffsetNumber(page))
6146 elog(ERROR, "offnum out of range");
6147 lp = PageGetItemId(page, offnum);
6148 if (!ItemIdIsNormal(lp))
6149 elog(ERROR, "invalid lp");
6150
6151 htup = (HeapTupleHeader) PageGetItem(page, lp);
6152
6153 /* NO EREPORT(ERROR) from here till changes are logged */
6155
6157
6158 MarkBufferDirty(buffer);
6159
6160 /*
6161 * Replace the speculative insertion token with a real t_ctid, pointing to
6162 * itself like it does on regular tuples.
6163 */
6164 htup->t_ctid = *tid;
6165
6166 /* XLOG stuff */
6167 if (RelationNeedsWAL(relation))
6168 {
6169 xl_heap_confirm xlrec;
6170 XLogRecPtr recptr;
6171
6173
6175
6176 /* We want the same filtering on this as on a plain insert */
6178
6181
6182 recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_CONFIRM);
6183
6184 PageSetLSN(page, recptr);
6185 }
6186
6188
6189 UnlockReleaseBuffer(buffer);
6190}
#define SizeOfHeapConfirm
Definition: heapam_xlog.h:431
#define XLOG_HEAP_CONFIRM
Definition: heapam_xlog.h:38
OffsetNumber offnum
Definition: heapam_xlog.h:428

References Assert(), BUFFER_LOCK_EXCLUSIVE, BufferGetPage(), elog, END_CRIT_SECTION, ERROR, HeapTupleHeaderIsSpeculative(), ItemIdIsNormal, ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), LockBuffer(), MarkBufferDirty(), xl_heap_confirm::offnum, PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), PageSetLSN(), ReadBuffer(), REGBUF_STANDARD, RelationNeedsWAL, SizeOfHeapConfirm, START_CRIT_SECTION, HeapTupleHeaderData::t_ctid, UnlockReleaseBuffer(), XLOG_HEAP_CONFIRM, XLOG_INCLUDE_ORIGIN, XLogBeginInsert(), XLogInsert(), XLogRegisterBuffer(), XLogRegisterData(), and XLogSetRecordFlags().

Referenced by heapam_tuple_complete_speculative().

◆ heap_freeze_prepared_tuples()

void heap_freeze_prepared_tuples ( Buffer  buffer,
HeapTupleFreeze tuples,
int  ntuples 
)

Definition at line 7425 of file heapam.c.

7426{
7427 Page page = BufferGetPage(buffer);
7428
7429 for (int i = 0; i < ntuples; i++)
7430 {
7431 HeapTupleFreeze *frz = tuples + i;
7432 ItemId itemid = PageGetItemId(page, frz->offset);
7433 HeapTupleHeader htup;
7434
7435 htup = (HeapTupleHeader) PageGetItem(page, itemid);
7436 heap_execute_freeze_tuple(htup, frz);
7437 }
7438}
static void heap_execute_freeze_tuple(HeapTupleHeader tuple, HeapTupleFreeze *frz)
Definition: heapam.h:475
int i
Definition: isn.c:77
OffsetNumber offset
Definition: heapam.h:152

References BufferGetPage(), heap_execute_freeze_tuple(), i, HeapTupleFreeze::offset, PageGetItem(), and PageGetItemId().

Referenced by heap_page_prune_and_freeze().

◆ heap_freeze_tuple()

bool heap_freeze_tuple ( HeapTupleHeader  tuple,
TransactionId  relfrozenxid,
TransactionId  relminmxid,
TransactionId  FreezeLimit,
TransactionId  MultiXactCutoff 
)

Definition at line 7447 of file heapam.c.

7450{
7451 HeapTupleFreeze frz;
7452 bool do_freeze;
7453 bool totally_frozen;
7454 struct VacuumCutoffs cutoffs;
7455 HeapPageFreeze pagefrz;
7456
7457 cutoffs.relfrozenxid = relfrozenxid;
7458 cutoffs.relminmxid = relminmxid;
7459 cutoffs.OldestXmin = FreezeLimit;
7460 cutoffs.OldestMxact = MultiXactCutoff;
7461 cutoffs.FreezeLimit = FreezeLimit;
7462 cutoffs.MultiXactCutoff = MultiXactCutoff;
7463
7464 pagefrz.freeze_required = true;
7465 pagefrz.FreezePageRelfrozenXid = FreezeLimit;
7466 pagefrz.FreezePageRelminMxid = MultiXactCutoff;
7467 pagefrz.NoFreezePageRelfrozenXid = FreezeLimit;
7468 pagefrz.NoFreezePageRelminMxid = MultiXactCutoff;
7469
7470 do_freeze = heap_prepare_freeze_tuple(tuple, &cutoffs,
7471 &pagefrz, &frz, &totally_frozen);
7472
7473 /*
7474 * Note that because this is not a WAL-logged operation, we don't need to
7475 * fill in the offset in the freeze record.
7476 */
7477
7478 if (do_freeze)
7479 heap_execute_freeze_tuple(tuple, &frz);
7480 return do_freeze;
7481}
bool heap_prepare_freeze_tuple(HeapTupleHeader tuple, const struct VacuumCutoffs *cutoffs, HeapPageFreeze *pagefrz, HeapTupleFreeze *frz, bool *totally_frozen)
Definition: heapam.c:7099
TransactionId FreezeLimit
Definition: vacuum.h:289
TransactionId relfrozenxid
Definition: vacuum.h:263
MultiXactId relminmxid
Definition: vacuum.h:264
MultiXactId MultiXactCutoff
Definition: vacuum.h:290

References VacuumCutoffs::FreezeLimit, heap_execute_freeze_tuple(), heap_prepare_freeze_tuple(), VacuumCutoffs::MultiXactCutoff, VacuumCutoffs::OldestMxact, VacuumCutoffs::OldestXmin, VacuumCutoffs::relfrozenxid, and VacuumCutoffs::relminmxid.

Referenced by rewrite_heap_tuple().

◆ heap_get_latest_tid()

void heap_get_latest_tid ( TableScanDesc  sscan,
ItemPointer  tid 
)

Definition at line 1895 of file heapam.c.

1897{
1898 Relation relation = sscan->rs_rd;
1899 Snapshot snapshot = sscan->rs_snapshot;
1900 ItemPointerData ctid;
1901 TransactionId priorXmax;
1902
1903 /*
1904 * table_tuple_get_latest_tid() verified that the passed in tid is valid.
1905 * Assume that t_ctid links are valid however - there shouldn't be invalid
1906 * ones in the table.
1907 */
1909
1910 /*
1911 * Loop to chase down t_ctid links. At top of loop, ctid is the tuple we
1912 * need to examine, and *tid is the TID we will return if ctid turns out
1913 * to be bogus.
1914 *
1915 * Note that we will loop until we reach the end of the t_ctid chain.
1916 * Depending on the snapshot passed, there might be at most one visible
1917 * version of the row, but we don't try to optimize for that.
1918 */
1919 ctid = *tid;
1920 priorXmax = InvalidTransactionId; /* cannot check first XMIN */
1921 for (;;)
1922 {
1923 Buffer buffer;
1924 Page page;
1925 OffsetNumber offnum;
1926 ItemId lp;
1927 HeapTupleData tp;
1928 bool valid;
1929
1930 /*
1931 * Read, pin, and lock the page.
1932 */
1933 buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(&ctid));
1935 page = BufferGetPage(buffer);
1936
1937 /*
1938 * Check for bogus item number. This is not treated as an error
1939 * condition because it can happen while following a t_ctid link. We
1940 * just assume that the prior tid is OK and return it unchanged.
1941 */
1942 offnum = ItemPointerGetOffsetNumber(&ctid);
1943 if (offnum < FirstOffsetNumber || offnum > PageGetMaxOffsetNumber(page))
1944 {
1945 UnlockReleaseBuffer(buffer);
1946 break;
1947 }
1948 lp = PageGetItemId(page, offnum);
1949 if (!ItemIdIsNormal(lp))
1950 {
1951 UnlockReleaseBuffer(buffer);
1952 break;
1953 }
1954
1955 /* OK to access the tuple */
1956 tp.t_self = ctid;
1957 tp.t_data = (HeapTupleHeader) PageGetItem(page, lp);
1958 tp.t_len = ItemIdGetLength(lp);
1959 tp.t_tableOid = RelationGetRelid(relation);
1960
1961 /*
1962 * After following a t_ctid link, we might arrive at an unrelated
1963 * tuple. Check for XMIN match.
1964 */
1965 if (TransactionIdIsValid(priorXmax) &&
1967 {
1968 UnlockReleaseBuffer(buffer);
1969 break;
1970 }
1971
1972 /*
1973 * Check tuple visibility; if visible, set it as the new result
1974 * candidate.
1975 */
1976 valid = HeapTupleSatisfiesVisibility(&tp, snapshot, buffer);
1977 HeapCheckForSerializableConflictOut(valid, relation, &tp, buffer, snapshot);
1978 if (valid)
1979 *tid = ctid;
1980
1981 /*
1982 * If there's a valid t_ctid link, follow it, else we're done.
1983 */
1984 if ((tp.t_data->t_infomask & HEAP_XMAX_INVALID) ||
1988 {
1989 UnlockReleaseBuffer(buffer);
1990 break;
1991 }
1992
1993 ctid = tp.t_data->t_ctid;
1994 priorXmax = HeapTupleHeaderGetUpdateXid(tp.t_data);
1995 UnlockReleaseBuffer(buffer);
1996 } /* end of loop */
1997}
static bool HeapTupleHeaderIndicatesMovedPartitions(const HeapTupleHeaderData *tup)
Definition: htup_details.h:480

References Assert(), BUFFER_LOCK_SHARE, BufferGetPage(), HEAP_XMAX_INVALID, HeapCheckForSerializableConflictOut(), HeapTupleHeaderGetUpdateXid(), HeapTupleHeaderGetXmin(), HeapTupleHeaderIndicatesMovedPartitions(), HeapTupleHeaderIsOnlyLocked(), HeapTupleSatisfiesVisibility(), InvalidTransactionId, ItemIdGetLength, ItemIdIsNormal, ItemPointerEquals(), ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), ItemPointerIsValid(), LockBuffer(), PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), ReadBuffer(), RelationGetRelid, TableScanDescData::rs_rd, TableScanDescData::rs_snapshot, HeapTupleHeaderData::t_ctid, HeapTupleData::t_data, HeapTupleHeaderData::t_infomask, HeapTupleData::t_len, HeapTupleData::t_self, HeapTupleData::t_tableOid, TransactionIdEquals, TransactionIdIsValid, and UnlockReleaseBuffer().

◆ heap_get_root_tuples()

void heap_get_root_tuples ( Page  page,
OffsetNumber root_offsets 
)

Definition at line 1895 of file pruneheap.c.

1896{
1897 OffsetNumber offnum,
1898 maxoff;
1899
1900 MemSet(root_offsets, InvalidOffsetNumber,
1902
1903 maxoff = PageGetMaxOffsetNumber(page);
1904 for (offnum = FirstOffsetNumber; offnum <= maxoff; offnum = OffsetNumberNext(offnum))
1905 {
1906 ItemId lp = PageGetItemId(page, offnum);
1907 HeapTupleHeader htup;
1908 OffsetNumber nextoffnum;
1909 TransactionId priorXmax;
1910
1911 /* skip unused and dead items */
1912 if (!ItemIdIsUsed(lp) || ItemIdIsDead(lp))
1913 continue;
1914
1915 if (ItemIdIsNormal(lp))
1916 {
1917 htup = (HeapTupleHeader) PageGetItem(page, lp);
1918
1919 /*
1920 * Check if this tuple is part of a HOT-chain rooted at some other
1921 * tuple. If so, skip it for now; we'll process it when we find
1922 * its root.
1923 */
1924 if (HeapTupleHeaderIsHeapOnly(htup))
1925 continue;
1926
1927 /*
1928 * This is either a plain tuple or the root of a HOT-chain.
1929 * Remember it in the mapping.
1930 */
1931 root_offsets[offnum - 1] = offnum;
1932
1933 /* If it's not the start of a HOT-chain, we're done with it */
1934 if (!HeapTupleHeaderIsHotUpdated(htup))
1935 continue;
1936
1937 /* Set up to scan the HOT-chain */
1938 nextoffnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
1939 priorXmax = HeapTupleHeaderGetUpdateXid(htup);
1940 }
1941 else
1942 {
1943 /* Must be a redirect item. We do not set its root_offsets entry */
1945 /* Set up to scan the HOT-chain */
1946 nextoffnum = ItemIdGetRedirect(lp);
1947 priorXmax = InvalidTransactionId;
1948 }
1949
1950 /*
1951 * Now follow the HOT-chain and collect other tuples in the chain.
1952 *
1953 * Note: Even though this is a nested loop, the complexity of the
1954 * function is O(N) because a tuple in the page should be visited not
1955 * more than twice, once in the outer loop and once in HOT-chain
1956 * chases.
1957 */
1958 for (;;)
1959 {
1960 /* Sanity check (pure paranoia) */
1961 if (offnum < FirstOffsetNumber)
1962 break;
1963
1964 /*
1965 * An offset past the end of page's line pointer array is possible
1966 * when the array was truncated
1967 */
1968 if (offnum > maxoff)
1969 break;
1970
1971 lp = PageGetItemId(page, nextoffnum);
1972
1973 /* Check for broken chains */
1974 if (!ItemIdIsNormal(lp))
1975 break;
1976
1977 htup = (HeapTupleHeader) PageGetItem(page, lp);
1978
1979 if (TransactionIdIsValid(priorXmax) &&
1981 break;
1982
1983 /* Remember the root line pointer for this item */
1984 root_offsets[nextoffnum - 1] = offnum;
1985
1986 /* Advance to next chain member, if any */
1987 if (!HeapTupleHeaderIsHotUpdated(htup))
1988 break;
1989
1990 /* HOT implies it can't have moved to different partition */
1992
1993 nextoffnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
1994 priorXmax = HeapTupleHeaderGetUpdateXid(htup);
1995 }
1996 }
1997}
#define MemSet(start, val, len)
Definition: c.h:1019
static bool HeapTupleHeaderIsHotUpdated(const HeapTupleHeaderData *tup)
Definition: htup_details.h:534
#define MaxHeapTuplesPerPage
Definition: htup_details.h:624
#define ItemIdGetRedirect(itemId)
Definition: itemid.h:78
#define ItemIdIsDead(itemId)
Definition: itemid.h:113
#define ItemIdIsUsed(itemId)
Definition: itemid.h:92
#define ItemIdIsRedirected(itemId)
Definition: itemid.h:106
#define InvalidOffsetNumber
Definition: off.h:26
#define OffsetNumberNext(offsetNumber)
Definition: off.h:52
#define FirstOffsetNumber
Definition: off.h:27

References Assert(), FirstOffsetNumber, HeapTupleHeaderGetUpdateXid(), HeapTupleHeaderGetXmin(), HeapTupleHeaderIndicatesMovedPartitions(), HeapTupleHeaderIsHeapOnly(), HeapTupleHeaderIsHotUpdated(), InvalidOffsetNumber, InvalidTransactionId, ItemIdGetRedirect, ItemIdIsDead, ItemIdIsNormal, ItemIdIsRedirected, ItemIdIsUsed, ItemPointerGetOffsetNumber(), MaxHeapTuplesPerPage, MemSet, OffsetNumberNext, PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), HeapTupleHeaderData::t_ctid, TransactionIdEquals, and TransactionIdIsValid.

Referenced by heapam_index_build_range_scan(), and heapam_index_validate_scan().

◆ heap_getnext()

HeapTuple heap_getnext ( TableScanDesc  sscan,
ScanDirection  direction 
)

Definition at line 1364 of file heapam.c.

1365{
1366 HeapScanDesc scan = (HeapScanDesc) sscan;
1367
1368 /*
1369 * This is still widely used directly, without going through table AM, so
1370 * add a safety check. It's possible we should, at a later point,
1371 * downgrade this to an assert. The reason for checking the AM routine,
1372 * rather than the AM oid, is that this allows to write regression tests
1373 * that create another AM reusing the heap handler.
1374 */
1376 ereport(ERROR,
1377 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1378 errmsg_internal("only heap AM is supported")));
1379
1380 /*
1381 * We don't expect direct calls to heap_getnext with valid CheckXidAlive
1382 * for catalog or regular tables. See detailed comments in xact.c where
1383 * these variables are declared. Normally we have such a check at tableam
1384 * level API but this is called from many places so we need to ensure it
1385 * here.
1386 */
1388 elog(ERROR, "unexpected heap_getnext call during logical decoding");
1389
1390 /* Note: no locking manipulations needed */
1391
1393 heapgettup_pagemode(scan, direction,
1394 scan->rs_base.rs_nkeys, scan->rs_base.rs_key);
1395 else
1396 heapgettup(scan, direction,
1397 scan->rs_base.rs_nkeys, scan->rs_base.rs_key);
1398
1399 if (scan->rs_ctup.t_data == NULL)
1400 return NULL;
1401
1402 /*
1403 * if we get here it means we have a new current scan tuple, so point to
1404 * the proper return buffer and return the tuple.
1405 */
1406
1408
1409 return &scan->rs_ctup;
1410}
#define unlikely(x)
Definition: c.h:418
int errmsg_internal(const char *fmt,...)
Definition: elog.c:1170
static void heapgettup(HeapScanDesc scan, ScanDirection dir, int nkeys, ScanKey key)
Definition: heapam.c:914
static void heapgettup_pagemode(HeapScanDesc scan, ScanDirection dir, int nkeys, ScanKey key)
Definition: heapam.c:1024
const TableAmRoutine * GetHeapamTableAmRoutine(void)
#define pgstat_count_heap_getnext(rel)
Definition: pgstat.h:695
const struct TableAmRoutine * rd_tableam
Definition: rel.h:189
@ SO_ALLOW_PAGEMODE
Definition: tableam.h:62
bool bsysscan
Definition: xact.c:101
TransactionId CheckXidAlive
Definition: xact.c:100

References bsysscan, CheckXidAlive, elog, ereport, errcode(), errmsg_internal(), ERROR, GetHeapamTableAmRoutine(), heapgettup(), heapgettup_pagemode(), pgstat_count_heap_getnext, RelationData::rd_tableam, HeapScanDescData::rs_base, HeapScanDescData::rs_ctup, TableScanDescData::rs_flags, TableScanDescData::rs_key, TableScanDescData::rs_nkeys, TableScanDescData::rs_rd, SO_ALLOW_PAGEMODE, HeapTupleData::t_data, TransactionIdIsValid, and unlikely.

Referenced by AlterTableMoveAll(), AlterTableSpaceOptions(), check_db_file_conflict(), CreateDatabaseUsingFileCopy(), do_autovacuum(), DropSetting(), DropTableSpace(), find_typed_table_dependencies(), get_all_vacuum_rels(), get_database_list(), get_subscription_list(), get_tables_to_cluster(), get_tablespace_name(), get_tablespace_oid(), GetAllPublicationRelations(), getRelationsInNamespace(), GetSchemaPublicationRelations(), heapam_index_build_range_scan(), heapam_index_validate_scan(), objectsInSchemaToOids(), pgrowlocks(), pgstat_heap(), populate_typ_list(), ReindexMultipleTables(), remove_dbtablespaces(), RemoveSubscriptionRel(), RenameTableSpace(), ThereIsAtLeastOneRole(), and vac_truncate_clog().

◆ heap_getnextslot()

bool heap_getnextslot ( TableScanDesc  sscan,
ScanDirection  direction,
TupleTableSlot slot 
)

Definition at line 1413 of file heapam.c.

1414{
1415 HeapScanDesc scan = (HeapScanDesc) sscan;
1416
1417 /* Note: no locking manipulations needed */
1418
1419 if (sscan->rs_flags & SO_ALLOW_PAGEMODE)
1420 heapgettup_pagemode(scan, direction, sscan->rs_nkeys, sscan->rs_key);
1421 else
1422 heapgettup(scan, direction, sscan->rs_nkeys, sscan->rs_key);
1423
1424 if (scan->rs_ctup.t_data == NULL)
1425 {
1426 ExecClearTuple(slot);
1427 return false;
1428 }
1429
1430 /*
1431 * if we get here it means we have a new current scan tuple, so point to
1432 * the proper return buffer and return the tuple.
1433 */
1434
1436
1437 ExecStoreBufferHeapTuple(&scan->rs_ctup, slot,
1438 scan->rs_cbuf);
1439 return true;
1440}
TupleTableSlot * ExecStoreBufferHeapTuple(HeapTuple tuple, TupleTableSlot *slot, Buffer buffer)
Definition: execTuples.c:1581
static TupleTableSlot * ExecClearTuple(TupleTableSlot *slot)
Definition: tuptable.h:457

References ExecClearTuple(), ExecStoreBufferHeapTuple(), heapgettup(), heapgettup_pagemode(), pgstat_count_heap_getnext, HeapScanDescData::rs_base, HeapScanDescData::rs_cbuf, HeapScanDescData::rs_ctup, TableScanDescData::rs_flags, TableScanDescData::rs_key, TableScanDescData::rs_nkeys, TableScanDescData::rs_rd, SO_ALLOW_PAGEMODE, and HeapTupleData::t_data.

◆ heap_getnextslot_tidrange()

bool heap_getnextslot_tidrange ( TableScanDesc  sscan,
ScanDirection  direction,
TupleTableSlot slot 
)

Definition at line 1516 of file heapam.c.

1518{
1519 HeapScanDesc scan = (HeapScanDesc) sscan;
1520 ItemPointer mintid = &sscan->st.tidrange.rs_mintid;
1521 ItemPointer maxtid = &sscan->st.tidrange.rs_maxtid;
1522
1523 /* Note: no locking manipulations needed */
1524 for (;;)
1525 {
1526 if (sscan->rs_flags & SO_ALLOW_PAGEMODE)
1527 heapgettup_pagemode(scan, direction, sscan->rs_nkeys, sscan->rs_key);
1528 else
1529 heapgettup(scan, direction, sscan->rs_nkeys, sscan->rs_key);
1530
1531 if (scan->rs_ctup.t_data == NULL)
1532 {
1533 ExecClearTuple(slot);
1534 return false;
1535 }
1536
1537 /*
1538 * heap_set_tidrange will have used heap_setscanlimits to limit the
1539 * range of pages we scan to only ones that can contain the TID range
1540 * we're scanning for. Here we must filter out any tuples from these
1541 * pages that are outside of that range.
1542 */
1543 if (ItemPointerCompare(&scan->rs_ctup.t_self, mintid) < 0)
1544 {
1545 ExecClearTuple(slot);
1546
1547 /*
1548 * When scanning backwards, the TIDs will be in descending order.
1549 * Future tuples in this direction will be lower still, so we can
1550 * just return false to indicate there will be no more tuples.
1551 */
1552 if (ScanDirectionIsBackward(direction))
1553 return false;
1554
1555 continue;
1556 }
1557
1558 /*
1559 * Likewise for the final page, we must filter out TIDs greater than
1560 * maxtid.
1561 */
1562 if (ItemPointerCompare(&scan->rs_ctup.t_self, maxtid) > 0)
1563 {
1564 ExecClearTuple(slot);
1565
1566 /*
1567 * When scanning forward, the TIDs will be in ascending order.
1568 * Future tuples in this direction will be higher still, so we can
1569 * just return false to indicate there will be no more tuples.
1570 */
1571 if (ScanDirectionIsForward(direction))
1572 return false;
1573 continue;
1574 }
1575
1576 break;
1577 }
1578
1579 /*
1580 * if we get here it means we have a new current scan tuple, so point to
1581 * the proper return buffer and return the tuple.
1582 */
1584
1585 ExecStoreBufferHeapTuple(&scan->rs_ctup, slot, scan->rs_cbuf);
1586 return true;
1587}
int32 ItemPointerCompare(const ItemPointerData *arg1, const ItemPointerData *arg2)
Definition: itemptr.c:51
#define ScanDirectionIsForward(direction)
Definition: sdir.h:64
#define ScanDirectionIsBackward(direction)
Definition: sdir.h:50
ItemPointerData rs_mintid
Definition: relscan.h:55
union TableScanDescData::@49 st
ItemPointerData rs_maxtid
Definition: relscan.h:56
struct TableScanDescData::@49::@50 tidrange

References ExecClearTuple(), ExecStoreBufferHeapTuple(), heapgettup(), heapgettup_pagemode(), ItemPointerCompare(), pgstat_count_heap_getnext, HeapScanDescData::rs_base, HeapScanDescData::rs_cbuf, HeapScanDescData::rs_ctup, TableScanDescData::rs_flags, TableScanDescData::rs_key, TableScanDescData::rs_maxtid, TableScanDescData::rs_mintid, TableScanDescData::rs_nkeys, TableScanDescData::rs_rd, ScanDirectionIsBackward, ScanDirectionIsForward, SO_ALLOW_PAGEMODE, TableScanDescData::st, HeapTupleData::t_data, HeapTupleData::t_self, and TableScanDescData::tidrange.

◆ heap_hot_search_buffer()

bool heap_hot_search_buffer ( ItemPointer  tid,
Relation  relation,
Buffer  buffer,
Snapshot  snapshot,
HeapTuple  heapTuple,
bool *  all_dead,
bool  first_call 
)

Definition at line 1743 of file heapam.c.

1746{
1747 Page page = BufferGetPage(buffer);
1749 BlockNumber blkno;
1750 OffsetNumber offnum;
1751 bool at_chain_start;
1752 bool valid;
1753 bool skip;
1754 GlobalVisState *vistest = NULL;
1755
1756 /* If this is not the first call, previous call returned a (live!) tuple */
1757 if (all_dead)
1758 *all_dead = first_call;
1759
1760 blkno = ItemPointerGetBlockNumber(tid);
1761 offnum = ItemPointerGetOffsetNumber(tid);
1762 at_chain_start = first_call;
1763 skip = !first_call;
1764
1765 /* XXX: we should assert that a snapshot is pushed or registered */
1767 Assert(BufferGetBlockNumber(buffer) == blkno);
1768
1769 /* Scan through possible multiple members of HOT-chain */
1770 for (;;)
1771 {
1772 ItemId lp;
1773
1774 /* check for bogus TID */
1775 if (offnum < FirstOffsetNumber || offnum > PageGetMaxOffsetNumber(page))
1776 break;
1777
1778 lp = PageGetItemId(page, offnum);
1779
1780 /* check for unused, dead, or redirected items */
1781 if (!ItemIdIsNormal(lp))
1782 {
1783 /* We should only see a redirect at start of chain */
1784 if (ItemIdIsRedirected(lp) && at_chain_start)
1785 {
1786 /* Follow the redirect */
1787 offnum = ItemIdGetRedirect(lp);
1788 at_chain_start = false;
1789 continue;
1790 }
1791 /* else must be end of chain */
1792 break;
1793 }
1794
1795 /*
1796 * Update heapTuple to point to the element of the HOT chain we're
1797 * currently investigating. Having t_self set correctly is important
1798 * because the SSI checks and the *Satisfies routine for historical
1799 * MVCC snapshots need the correct tid to decide about the visibility.
1800 */
1801 heapTuple->t_data = (HeapTupleHeader) PageGetItem(page, lp);
1802 heapTuple->t_len = ItemIdGetLength(lp);
1803 heapTuple->t_tableOid = RelationGetRelid(relation);
1804 ItemPointerSet(&heapTuple->t_self, blkno, offnum);
1805
1806 /*
1807 * Shouldn't see a HEAP_ONLY tuple at chain start.
1808 */
1809 if (at_chain_start && HeapTupleIsHeapOnly(heapTuple))
1810 break;
1811
1812 /*
1813 * The xmin should match the previous xmax value, else chain is
1814 * broken.
1815 */
1816 if (TransactionIdIsValid(prev_xmax) &&
1817 !TransactionIdEquals(prev_xmax,
1818 HeapTupleHeaderGetXmin(heapTuple->t_data)))
1819 break;
1820
1821 /*
1822 * When first_call is true (and thus, skip is initially false) we'll
1823 * return the first tuple we find. But on later passes, heapTuple
1824 * will initially be pointing to the tuple we returned last time.
1825 * Returning it again would be incorrect (and would loop forever), so
1826 * we skip it and return the next match we find.
1827 */
1828 if (!skip)
1829 {
1830 /* If it's visible per the snapshot, we must return it */
1831 valid = HeapTupleSatisfiesVisibility(heapTuple, snapshot, buffer);
1832 HeapCheckForSerializableConflictOut(valid, relation, heapTuple,
1833 buffer, snapshot);
1834
1835 if (valid)
1836 {
1837 ItemPointerSetOffsetNumber(tid, offnum);
1838 PredicateLockTID(relation, &heapTuple->t_self, snapshot,
1839 HeapTupleHeaderGetXmin(heapTuple->t_data));
1840 if (all_dead)
1841 *all_dead = false;
1842 return true;
1843 }
1844 }
1845 skip = false;
1846
1847 /*
1848 * If we can't see it, maybe no one else can either. At caller
1849 * request, check whether all chain members are dead to all
1850 * transactions.
1851 *
1852 * Note: if you change the criterion here for what is "dead", fix the
1853 * planner's get_actual_variable_range() function to match.
1854 */
1855 if (all_dead && *all_dead)
1856 {
1857 if (!vistest)
1858 vistest = GlobalVisTestFor(relation);
1859
1860 if (!HeapTupleIsSurelyDead(heapTuple, vistest))
1861 *all_dead = false;
1862 }
1863
1864 /*
1865 * Check to see if HOT chain continues past this tuple; if so fetch
1866 * the next offnum and loop around.
1867 */
1868 if (HeapTupleIsHotUpdated(heapTuple))
1869 {
1871 blkno);
1872 offnum = ItemPointerGetOffsetNumber(&heapTuple->t_data->t_ctid);
1873 at_chain_start = false;
1874 prev_xmax = HeapTupleHeaderGetUpdateXid(heapTuple->t_data);
1875 }
1876 else
1877 break; /* end of chain */
1878 }
1879
1880 return false;
1881}
bool HeapTupleIsSurelyDead(HeapTuple htup, GlobalVisState *vistest)
static bool HeapTupleIsHotUpdated(const HeapTupleData *tuple)
Definition: htup_details.h:768
static bool HeapTupleIsHeapOnly(const HeapTupleData *tuple)
Definition: htup_details.h:786
static void ItemPointerSet(ItemPointerData *pointer, BlockNumber blockNumber, OffsetNumber offNum)
Definition: itemptr.h:135
static void ItemPointerSetOffsetNumber(ItemPointerData *pointer, OffsetNumber offsetNumber)
Definition: itemptr.h:158
static const struct exclude_list_item skip[]
Definition: pg_checksums.c:108
GlobalVisState * GlobalVisTestFor(Relation rel)
Definition: procarray.c:4086
TransactionId RecentXmin
Definition: snapmgr.c:160

References Assert(), BufferGetBlockNumber(), BufferGetPage(), GlobalVisTestFor(), HeapCheckForSerializableConflictOut(), HeapTupleHeaderGetUpdateXid(), HeapTupleHeaderGetXmin(), HeapTupleIsHeapOnly(), HeapTupleIsHotUpdated(), HeapTupleIsSurelyDead(), HeapTupleSatisfiesVisibility(), InvalidTransactionId, ItemIdGetLength, ItemIdGetRedirect, ItemIdIsNormal, ItemIdIsRedirected, ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), ItemPointerSet(), ItemPointerSetOffsetNumber(), PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), PredicateLockTID(), RecentXmin, RelationGetRelid, skip, HeapTupleHeaderData::t_ctid, HeapTupleData::t_data, HeapTupleData::t_len, HeapTupleData::t_self, HeapTupleData::t_tableOid, TransactionIdEquals, and TransactionIdIsValid.

Referenced by BitmapHeapScanNextBlock(), heap_index_delete_tuples(), and heapam_index_fetch_tuple().

◆ heap_index_delete_tuples()

TransactionId heap_index_delete_tuples ( Relation  rel,
TM_IndexDeleteOp delstate 
)

Definition at line 8163 of file heapam.c.

8164{
8165 /* Initial assumption is that earlier pruning took care of conflict */
8166 TransactionId snapshotConflictHorizon = InvalidTransactionId;
8169 Page page = NULL;
8171 TransactionId priorXmax;
8172#ifdef USE_PREFETCH
8173 IndexDeletePrefetchState prefetch_state;
8174 int prefetch_distance;
8175#endif
8176 SnapshotData SnapshotNonVacuumable;
8177 int finalndeltids = 0,
8178 nblocksaccessed = 0;
8179
8180 /* State that's only used in bottom-up index deletion case */
8181 int nblocksfavorable = 0;
8182 int curtargetfreespace = delstate->bottomupfreespace,
8183 lastfreespace = 0,
8184 actualfreespace = 0;
8185 bool bottomup_final_block = false;
8186
8187 InitNonVacuumableSnapshot(SnapshotNonVacuumable, GlobalVisTestFor(rel));
8188
8189 /* Sort caller's deltids array by TID for further processing */
8190 index_delete_sort(delstate);
8191
8192 /*
8193 * Bottom-up case: resort deltids array in an order attuned to where the
8194 * greatest number of promising TIDs are to be found, and determine how
8195 * many blocks from the start of sorted array should be considered
8196 * favorable. This will also shrink the deltids array in order to
8197 * eliminate completely unfavorable blocks up front.
8198 */
8199 if (delstate->bottomup)
8200 nblocksfavorable = bottomup_sort_and_shrink(delstate);
8201
8202#ifdef USE_PREFETCH
8203 /* Initialize prefetch state. */
8204 prefetch_state.cur_hblkno = InvalidBlockNumber;
8205 prefetch_state.next_item = 0;
8206 prefetch_state.ndeltids = delstate->ndeltids;
8207 prefetch_state.deltids = delstate->deltids;
8208
8209 /*
8210 * Determine the prefetch distance that we will attempt to maintain.
8211 *
8212 * Since the caller holds a buffer lock somewhere in rel, we'd better make
8213 * sure that isn't a catalog relation before we call code that does
8214 * syscache lookups, to avoid risk of deadlock.
8215 */
8216 if (IsCatalogRelation(rel))
8217 prefetch_distance = maintenance_io_concurrency;
8218 else
8219 prefetch_distance =
8221
8222 /* Cap initial prefetch distance for bottom-up deletion caller */
8223 if (delstate->bottomup)
8224 {
8225 Assert(nblocksfavorable >= 1);
8226 Assert(nblocksfavorable <= BOTTOMUP_MAX_NBLOCKS);
8227 prefetch_distance = Min(prefetch_distance, nblocksfavorable);
8228 }
8229
8230 /* Start prefetching. */
8231 index_delete_prefetch_buffer(rel, &prefetch_state, prefetch_distance);
8232#endif
8233
8234 /* Iterate over deltids, determine which to delete, check their horizon */
8235 Assert(delstate->ndeltids > 0);
8236 for (int i = 0; i < delstate->ndeltids; i++)
8237 {
8238 TM_IndexDelete *ideltid = &delstate->deltids[i];
8239 TM_IndexStatus *istatus = delstate->status + ideltid->id;
8240 ItemPointer htid = &ideltid->tid;
8241 OffsetNumber offnum;
8242
8243 /*
8244 * Read buffer, and perform required extra steps each time a new block
8245 * is encountered. Avoid refetching if it's the same block as the one
8246 * from the last htid.
8247 */
8248 if (blkno == InvalidBlockNumber ||
8249 ItemPointerGetBlockNumber(htid) != blkno)
8250 {
8251 /*
8252 * Consider giving up early for bottom-up index deletion caller
8253 * first. (Only prefetch next-next block afterwards, when it
8254 * becomes clear that we're at least going to access the next
8255 * block in line.)
8256 *
8257 * Sometimes the first block frees so much space for bottom-up
8258 * caller that the deletion process can end without accessing any
8259 * more blocks. It is usually necessary to access 2 or 3 blocks
8260 * per bottom-up deletion operation, though.
8261 */
8262 if (delstate->bottomup)
8263 {
8264 /*
8265 * We often allow caller to delete a few additional items
8266 * whose entries we reached after the point that space target
8267 * from caller was satisfied. The cost of accessing the page
8268 * was already paid at that point, so it made sense to finish
8269 * it off. When that happened, we finalize everything here
8270 * (by finishing off the whole bottom-up deletion operation
8271 * without needlessly paying the cost of accessing any more
8272 * blocks).
8273 */
8274 if (bottomup_final_block)
8275 break;
8276
8277 /*
8278 * Give up when we didn't enable our caller to free any
8279 * additional space as a result of processing the page that we
8280 * just finished up with. This rule is the main way in which
8281 * we keep the cost of bottom-up deletion under control.
8282 */
8283 if (nblocksaccessed >= 1 && actualfreespace == lastfreespace)
8284 break;
8285 lastfreespace = actualfreespace; /* for next time */
8286
8287 /*
8288 * Deletion operation (which is bottom-up) will definitely
8289 * access the next block in line. Prepare for that now.
8290 *
8291 * Decay target free space so that we don't hang on for too
8292 * long with a marginal case. (Space target is only truly
8293 * helpful when it allows us to recognize that we don't need
8294 * to access more than 1 or 2 blocks to satisfy caller due to
8295 * agreeable workload characteristics.)
8296 *
8297 * We are a bit more patient when we encounter contiguous
8298 * blocks, though: these are treated as favorable blocks. The
8299 * decay process is only applied when the next block in line
8300 * is not a favorable/contiguous block. This is not an
8301 * exception to the general rule; we still insist on finding
8302 * at least one deletable item per block accessed. See
8303 * bottomup_nblocksfavorable() for full details of the theory
8304 * behind favorable blocks and heap block locality in general.
8305 *
8306 * Note: The first block in line is always treated as a
8307 * favorable block, so the earliest possible point that the
8308 * decay can be applied is just before we access the second
8309 * block in line. The Assert() verifies this for us.
8310 */
8311 Assert(nblocksaccessed > 0 || nblocksfavorable > 0);
8312 if (nblocksfavorable > 0)
8313 nblocksfavorable--;
8314 else
8315 curtargetfreespace /= 2;
8316 }
8317
8318 /* release old buffer */
8319 if (BufferIsValid(buf))
8321
8322 blkno = ItemPointerGetBlockNumber(htid);
8323 buf = ReadBuffer(rel, blkno);
8324 nblocksaccessed++;
8325 Assert(!delstate->bottomup ||
8326 nblocksaccessed <= BOTTOMUP_MAX_NBLOCKS);
8327
8328#ifdef USE_PREFETCH
8329
8330 /*
8331 * To maintain the prefetch distance, prefetch one more page for
8332 * each page we read.
8333 */
8334 index_delete_prefetch_buffer(rel, &prefetch_state, 1);
8335#endif
8336
8338
8339 page = BufferGetPage(buf);
8340 maxoff = PageGetMaxOffsetNumber(page);
8341 }
8342
8343 /*
8344 * In passing, detect index corruption involving an index page with a
8345 * TID that points to a location in the heap that couldn't possibly be
8346 * correct. We only do this with actual TIDs from caller's index page
8347 * (not items reached by traversing through a HOT chain).
8348 */
8349 index_delete_check_htid(delstate, page, maxoff, htid, istatus);
8350
8351 if (istatus->knowndeletable)
8352 Assert(!delstate->bottomup && !istatus->promising);
8353 else
8354 {
8355 ItemPointerData tmp = *htid;
8356 HeapTupleData heapTuple;
8357
8358 /* Are any tuples from this HOT chain non-vacuumable? */
8359 if (heap_hot_search_buffer(&tmp, rel, buf, &SnapshotNonVacuumable,
8360 &heapTuple, NULL, true))
8361 continue; /* can't delete entry */
8362
8363 /* Caller will delete, since whole HOT chain is vacuumable */
8364 istatus->knowndeletable = true;
8365
8366 /* Maintain index free space info for bottom-up deletion case */
8367 if (delstate->bottomup)
8368 {
8369 Assert(istatus->freespace > 0);
8370 actualfreespace += istatus->freespace;
8371 if (actualfreespace >= curtargetfreespace)
8372 bottomup_final_block = true;
8373 }
8374 }
8375
8376 /*
8377 * Maintain snapshotConflictHorizon value for deletion operation as a
8378 * whole by advancing current value using heap tuple headers. This is
8379 * loosely based on the logic for pruning a HOT chain.
8380 */
8381 offnum = ItemPointerGetOffsetNumber(htid);
8382 priorXmax = InvalidTransactionId; /* cannot check first XMIN */
8383 for (;;)
8384 {
8385 ItemId lp;
8386 HeapTupleHeader htup;
8387
8388 /* Sanity check (pure paranoia) */
8389 if (offnum < FirstOffsetNumber)
8390 break;
8391
8392 /*
8393 * An offset past the end of page's line pointer array is possible
8394 * when the array was truncated
8395 */
8396 if (offnum > maxoff)
8397 break;
8398
8399 lp = PageGetItemId(page, offnum);
8400 if (ItemIdIsRedirected(lp))
8401 {
8402 offnum = ItemIdGetRedirect(lp);
8403 continue;
8404 }
8405
8406 /*
8407 * We'll often encounter LP_DEAD line pointers (especially with an
8408 * entry marked knowndeletable by our caller up front). No heap
8409 * tuple headers get examined for an htid that leads us to an
8410 * LP_DEAD item. This is okay because the earlier pruning
8411 * operation that made the line pointer LP_DEAD in the first place
8412 * must have considered the original tuple header as part of
8413 * generating its own snapshotConflictHorizon value.
8414 *
8415 * Relying on XLOG_HEAP2_PRUNE_VACUUM_SCAN records like this is
8416 * the same strategy that index vacuuming uses in all cases. Index
8417 * VACUUM WAL records don't even have a snapshotConflictHorizon
8418 * field of their own for this reason.
8419 */
8420 if (!ItemIdIsNormal(lp))
8421 break;
8422
8423 htup = (HeapTupleHeader) PageGetItem(page, lp);
8424
8425 /*
8426 * Check the tuple XMIN against prior XMAX, if any
8427 */
8428 if (TransactionIdIsValid(priorXmax) &&
8430 break;
8431
8433 &snapshotConflictHorizon);
8434
8435 /*
8436 * If the tuple is not HOT-updated, then we are at the end of this
8437 * HOT-chain. No need to visit later tuples from the same update
8438 * chain (they get their own index entries) -- just move on to
8439 * next htid from index AM caller.
8440 */
8441 if (!HeapTupleHeaderIsHotUpdated(htup))
8442 break;
8443
8444 /* Advance to next HOT chain member */
8445 Assert(ItemPointerGetBlockNumber(&htup->t_ctid) == blkno);
8446 offnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
8447 priorXmax = HeapTupleHeaderGetUpdateXid(htup);
8448 }
8449
8450 /* Enable further/final shrinking of deltids for caller */
8451 finalndeltids = i + 1;
8452 }
8453
8455
8456 /*
8457 * Shrink deltids array to exclude non-deletable entries at the end. This
8458 * is not just a minor optimization. Final deltids array size might be
8459 * zero for a bottom-up caller. Index AM is explicitly allowed to rely on
8460 * ndeltids being zero in all cases with zero total deletable entries.
8461 */
8462 Assert(finalndeltids > 0 || delstate->bottomup);
8463 delstate->ndeltids = finalndeltids;
8464
8465 return snapshotConflictHorizon;
8466}
int maintenance_io_concurrency
Definition: bufmgr.c:184
#define Min(x, y)
Definition: c.h:1003
bool IsCatalogRelation(Relation relation)
Definition: catalog.c:104
static int bottomup_sort_and_shrink(TM_IndexDeleteOp *delstate)
Definition: heapam.c:8720
void HeapTupleHeaderAdvanceConflictHorizon(HeapTupleHeader tuple, TransactionId *snapshotConflictHorizon)
Definition: heapam.c:8018
#define BOTTOMUP_MAX_NBLOCKS
Definition: heapam.c:187
static void index_delete_check_htid(TM_IndexDeleteOp *delstate, Page page, OffsetNumber maxoff, const ItemPointerData *htid, TM_IndexStatus *istatus)
Definition: heapam.c:8103
bool heap_hot_search_buffer(ItemPointer tid, Relation relation, Buffer buffer, Snapshot snapshot, HeapTuple heapTuple, bool *all_dead, bool first_call)
Definition: heapam.c:1743
static void index_delete_sort(TM_IndexDeleteOp *delstate)
Definition: heapam.c:8508
static char buf[DEFAULT_XLOG_SEG_SIZE]
Definition: pg_test_fsync.c:71
#define InitNonVacuumableSnapshot(snapshotdata, vistestp)
Definition: snapmgr.h:50
int get_tablespace_maintenance_io_concurrency(Oid spcid)
Definition: spccache.c:229
TM_IndexStatus * status
Definition: tableam.h:254
int bottomupfreespace
Definition: tableam.h:249
TM_IndexDelete * deltids
Definition: tableam.h:253
ItemPointerData tid
Definition: tableam.h:212
bool knowndeletable
Definition: tableam.h:219
bool promising
Definition: tableam.h:222
int16 freespace
Definition: tableam.h:223

References Assert(), TM_IndexDeleteOp::bottomup, BOTTOMUP_MAX_NBLOCKS, bottomup_sort_and_shrink(), TM_IndexDeleteOp::bottomupfreespace, buf, BUFFER_LOCK_SHARE, BufferGetPage(), BufferIsValid(), TM_IndexDeleteOp::deltids, FirstOffsetNumber, TM_IndexStatus::freespace, get_tablespace_maintenance_io_concurrency(), GlobalVisTestFor(), heap_hot_search_buffer(), HeapTupleHeaderAdvanceConflictHorizon(), HeapTupleHeaderGetUpdateXid(), HeapTupleHeaderGetXmin(), HeapTupleHeaderIsHotUpdated(), i, TM_IndexDelete::id, index_delete_check_htid(), index_delete_sort(), InitNonVacuumableSnapshot, InvalidBlockNumber, InvalidBuffer, InvalidOffsetNumber, InvalidTransactionId, IsCatalogRelation(), ItemIdGetRedirect, ItemIdIsNormal, ItemIdIsRedirected, ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), TM_IndexStatus::knowndeletable, LockBuffer(), maintenance_io_concurrency, Min, TM_IndexDeleteOp::ndeltids, PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), TM_IndexStatus::promising, RelationData::rd_rel, ReadBuffer(), TM_IndexDeleteOp::status, HeapTupleHeaderData::t_ctid, TM_IndexDelete::tid, TransactionIdEquals, TransactionIdIsValid, and UnlockReleaseBuffer().

◆ heap_inplace_lock()

bool heap_inplace_lock ( Relation  relation,
HeapTuple  oldtup_ptr,
Buffer  buffer,
void(*)(void *)  release_callback,
void *  arg 
)

Definition at line 6401 of file heapam.c.

6404{
6405 HeapTupleData oldtup = *oldtup_ptr; /* minimize diff vs. heap_update() */
6406 TM_Result result;
6407 bool ret;
6408
6409#ifdef USE_ASSERT_CHECKING
6410 if (RelationGetRelid(relation) == RelationRelationId)
6411 check_inplace_rel_lock(oldtup_ptr);
6412#endif
6413
6414 Assert(BufferIsValid(buffer));
6415
6416 /*
6417 * Register shared cache invals if necessary. Other sessions may finish
6418 * inplace updates of this tuple between this step and LockTuple(). Since
6419 * inplace updates don't change cache keys, that's harmless.
6420 *
6421 * While it's tempting to register invals only after confirming we can
6422 * return true, the following obstacle precludes reordering steps that
6423 * way. Registering invals might reach a CatalogCacheInitializeCache()
6424 * that locks "buffer". That would hang indefinitely if running after our
6425 * own LockBuffer(). Hence, we must register invals before LockBuffer().
6426 */
6427 CacheInvalidateHeapTupleInplace(relation, oldtup_ptr);
6428
6429 LockTuple(relation, &oldtup.t_self, InplaceUpdateTupleLock);
6431
6432 /*----------
6433 * Interpret HeapTupleSatisfiesUpdate() like heap_update() does, except:
6434 *
6435 * - wait unconditionally
6436 * - already locked tuple above, since inplace needs that unconditionally
6437 * - don't recheck header after wait: simpler to defer to next iteration
6438 * - don't try to continue even if the updater aborts: likewise
6439 * - no crosscheck
6440 */
6441 result = HeapTupleSatisfiesUpdate(&oldtup, GetCurrentCommandId(false),
6442 buffer);
6443
6444 if (result == TM_Invisible)
6445 {
6446 /* no known way this can happen */
6447 ereport(ERROR,
6448 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
6449 errmsg_internal("attempted to overwrite invisible tuple")));
6450 }
6451 else if (result == TM_SelfModified)
6452 {
6453 /*
6454 * CREATE INDEX might reach this if an expression is silly enough to
6455 * call e.g. SELECT ... FROM pg_class FOR SHARE. C code of other SQL
6456 * statements might get here after a heap_update() of the same row, in
6457 * the absence of an intervening CommandCounterIncrement().
6458 */
6459 ereport(ERROR,
6460 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
6461 errmsg("tuple to be updated was already modified by an operation triggered by the current command")));
6462 }
6463 else if (result == TM_BeingModified)
6464 {
6465 TransactionId xwait;
6466 uint16 infomask;
6467
6468 xwait = HeapTupleHeaderGetRawXmax(oldtup.t_data);
6469 infomask = oldtup.t_data->t_infomask;
6470
6471 if (infomask & HEAP_XMAX_IS_MULTI)
6472 {
6475 int remain;
6476
6477 if (DoesMultiXactIdConflict((MultiXactId) xwait, infomask,
6478 lockmode, NULL))
6479 {
6481 release_callback(arg);
6482 ret = false;
6483 MultiXactIdWait((MultiXactId) xwait, mxact_status, infomask,
6484 relation, &oldtup.t_self, XLTW_Update,
6485 &remain);
6486 }
6487 else
6488 ret = true;
6489 }
6491 ret = true;
6492 else if (HEAP_XMAX_IS_KEYSHR_LOCKED(infomask))
6493 ret = true;
6494 else
6495 {
6497 release_callback(arg);
6498 ret = false;
6499 XactLockTableWait(xwait, relation, &oldtup.t_self,
6500 XLTW_Update);
6501 }
6502 }
6503 else
6504 {
6505 ret = (result == TM_Ok);
6506 if (!ret)
6507 {
6509 release_callback(arg);
6510 }
6511 }
6512
6513 /*
6514 * GetCatalogSnapshot() relies on invalidation messages to know when to
6515 * take a new snapshot. COMMIT of xwait is responsible for sending the
6516 * invalidation. We're not acquiring heavyweight locks sufficient to
6517 * block if not yet sent, so we must take a new snapshot to ensure a later
6518 * attempt has a fair chance. While we don't need this if xwait aborted,
6519 * don't bother optimizing that.
6520 */
6521 if (!ret)
6522 {
6523 UnlockTuple(relation, &oldtup.t_self, InplaceUpdateTupleLock);
6526 }
6527 return ret;
6528}
static bool HEAP_XMAX_IS_KEYSHR_LOCKED(uint16 infomask)
Definition: htup_details.h:275
void CacheInvalidateHeapTupleInplace(Relation relation, HeapTuple key_equivalent_tuple)
Definition: inval.c:1593
void ForgetInplace_Inval(void)
Definition: inval.c:1286
void UnlockTuple(Relation relation, const ItemPointerData *tid, LOCKMODE lockmode)
Definition: lmgr.c:601
void LockTuple(Relation relation, const ItemPointerData *tid, LOCKMODE lockmode)
Definition: lmgr.c:562
@ XLTW_Update
Definition: lmgr.h:27
#define InplaceUpdateTupleLock
Definition: lockdefs.h:48
LockTupleMode
Definition: lockoptions.h:50
@ LockTupleNoKeyExclusive
Definition: lockoptions.h:56
MultiXactStatus
Definition: multixact.h:37
@ MultiXactStatusNoKeyUpdate
Definition: multixact.h:43
void * arg
void InvalidateCatalogSnapshot(void)
Definition: snapmgr.c:455
CommandId GetCurrentCommandId(bool used)
Definition: xact.c:830

References arg, Assert(), BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferIsValid(), CacheInvalidateHeapTupleInplace(), DoesMultiXactIdConflict(), ereport, errcode(), errmsg(), errmsg_internal(), ERROR, ForgetInplace_Inval(), GetCurrentCommandId(), HEAP_XMAX_IS_KEYSHR_LOCKED(), HEAP_XMAX_IS_MULTI, HeapTupleHeaderGetRawXmax(), HeapTupleSatisfiesUpdate(), InplaceUpdateTupleLock, InvalidateCatalogSnapshot(), LockBuffer(), LockTuple(), LockTupleNoKeyExclusive, MultiXactIdWait(), MultiXactStatusNoKeyUpdate, RelationGetRelid, HeapTupleData::t_data, HeapTupleHeaderData::t_infomask, HeapTupleData::t_self, TM_BeingModified, TM_Invisible, TM_Ok, TM_SelfModified, TransactionIdIsCurrentTransactionId(), UnlockTuple(), XactLockTableWait(), and XLTW_Update.

Referenced by systable_inplace_update_begin().

◆ heap_inplace_unlock()

void heap_inplace_unlock ( Relation  relation,
HeapTuple  oldtup,
Buffer  buffer 
)

◆ heap_inplace_update_and_unlock()

void heap_inplace_update_and_unlock ( Relation  relation,
HeapTuple  oldtup,
HeapTuple  tuple,
Buffer  buffer 
)

Definition at line 6539 of file heapam.c.

6542{
6543 HeapTupleHeader htup = oldtup->t_data;
6544 uint32 oldlen;
6545 uint32 newlen;
6546 char *dst;
6547 char *src;
6548 int nmsgs = 0;
6549 SharedInvalidationMessage *invalMessages = NULL;
6550 bool RelcacheInitFileInval = false;
6551
6552 Assert(ItemPointerEquals(&oldtup->t_self, &tuple->t_self));
6553 oldlen = oldtup->t_len - htup->t_hoff;
6554 newlen = tuple->t_len - tuple->t_data->t_hoff;
6555 if (oldlen != newlen || htup->t_hoff != tuple->t_data->t_hoff)
6556 elog(ERROR, "wrong tuple length");
6557
6558 dst = (char *) htup + htup->t_hoff;
6559 src = (char *) tuple->t_data + tuple->t_data->t_hoff;
6560
6561 /* Like RecordTransactionCommit(), log only if needed */
6563 nmsgs = inplaceGetInvalidationMessages(&invalMessages,
6564 &RelcacheInitFileInval);
6565
6566 /*
6567 * Unlink relcache init files as needed. If unlinking, acquire
6568 * RelCacheInitLock until after associated invalidations. By doing this
6569 * in advance, if we checkpoint and then crash between inplace
6570 * XLogInsert() and inval, we don't rely on StartupXLOG() ->
6571 * RelationCacheInitFileRemove(). That uses elevel==LOG, so replay would
6572 * neglect to PANIC on EIO.
6573 */
6575
6576 /*----------
6577 * NO EREPORT(ERROR) from here till changes are complete
6578 *
6579 * Our buffer lock won't stop a reader having already pinned and checked
6580 * visibility for this tuple. Hence, we write WAL first, then mutate the
6581 * buffer. Like in MarkBufferDirtyHint() or RecordTransactionCommit(),
6582 * checkpoint delay makes that acceptable. With the usual order of
6583 * changes, a crash after memcpy() and before XLogInsert() could allow
6584 * datfrozenxid to overtake relfrozenxid:
6585 *
6586 * ["D" is a VACUUM (ONLY_DATABASE_STATS)]
6587 * ["R" is a VACUUM tbl]
6588 * D: vac_update_datfrozenxid() -> systable_beginscan(pg_class)
6589 * D: systable_getnext() returns pg_class tuple of tbl
6590 * R: memcpy() into pg_class tuple of tbl
6591 * D: raise pg_database.datfrozenxid, XLogInsert(), finish
6592 * [crash]
6593 * [recovery restores datfrozenxid w/o relfrozenxid]
6594 *
6595 * Mimic MarkBufferDirtyHint() subroutine XLogSaveBufferForHint().
6596 * Specifically, use DELAY_CHKPT_START, and copy the buffer to the stack.
6597 * The stack copy facilitates a FPI of the post-mutation block before we
6598 * accept other sessions seeing it. DELAY_CHKPT_START allows us to
6599 * XLogInsert() before MarkBufferDirty(). Since XLogSaveBufferForHint()
6600 * can operate under BUFFER_LOCK_SHARED, it can't avoid DELAY_CHKPT_START.
6601 * This function, however, likely could avoid it with the following order
6602 * of operations: MarkBufferDirty(), XLogInsert(), memcpy(). Opt to use
6603 * DELAY_CHKPT_START here, too, as a way to have fewer distinct code
6604 * patterns to analyze. Inplace update isn't so frequent that it should
6605 * pursue the small optimization of skipping DELAY_CHKPT_START.
6606 */
6610
6611 /* XLOG stuff */
6612 if (RelationNeedsWAL(relation))
6613 {
6614 xl_heap_inplace xlrec;
6615 PGAlignedBlock copied_buffer;
6616 char *origdata = (char *) BufferGetBlock(buffer);
6617 Page page = BufferGetPage(buffer);
6618 uint16 lower = ((PageHeader) page)->pd_lower;
6619 uint16 upper = ((PageHeader) page)->pd_upper;
6620 uintptr_t dst_offset_in_block;
6621 RelFileLocator rlocator;
6622 ForkNumber forkno;
6623 BlockNumber blkno;
6624 XLogRecPtr recptr;
6625
6626 xlrec.offnum = ItemPointerGetOffsetNumber(&tuple->t_self);
6627 xlrec.dbId = MyDatabaseId;
6628 xlrec.tsId = MyDatabaseTableSpace;
6629 xlrec.relcacheInitFileInval = RelcacheInitFileInval;
6630 xlrec.nmsgs = nmsgs;
6631
6634 if (nmsgs != 0)
6635 XLogRegisterData(invalMessages,
6636 nmsgs * sizeof(SharedInvalidationMessage));
6637
6638 /* register block matching what buffer will look like after changes */
6639 memcpy(copied_buffer.data, origdata, lower);
6640 memcpy(copied_buffer.data + upper, origdata + upper, BLCKSZ - upper);
6641 dst_offset_in_block = dst - origdata;
6642 memcpy(copied_buffer.data + dst_offset_in_block, src, newlen);
6643 BufferGetTag(buffer, &rlocator, &forkno, &blkno);
6644 Assert(forkno == MAIN_FORKNUM);
6645 XLogRegisterBlock(0, &rlocator, forkno, blkno, copied_buffer.data,
6647 XLogRegisterBufData(0, src, newlen);
6648
6649 /* inplace updates aren't decoded atm, don't log the origin */
6650
6651 recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_INPLACE);
6652
6653 PageSetLSN(page, recptr);
6654 }
6655
6656 memcpy(dst, src, newlen);
6657
6658 MarkBufferDirty(buffer);
6659
6661
6662 /*
6663 * Send invalidations to shared queue. SearchSysCacheLocked1() assumes we
6664 * do this before UnlockTuple().
6665 */
6667
6668 MyProc->delayChkptFlags &= ~DELAY_CHKPT_START;
6670 UnlockTuple(relation, &tuple->t_self, InplaceUpdateTupleLock);
6671
6672 AcceptInvalidationMessages(); /* local processing of just-sent inval */
6673
6674 /*
6675 * Queue a transactional inval, for logical decoding and for third-party
6676 * code that might have been relying on it since long before inplace
6677 * update adopted immediate invalidation. See README.tuplock section
6678 * "Reading inplace-updated columns" for logical decoding details.
6679 */
6681 CacheInvalidateHeapTuple(relation, tuple, NULL);
6682}
void BufferGetTag(Buffer buffer, RelFileLocator *rlocator, ForkNumber *forknum, BlockNumber *blknum)
Definition: bufmgr.c:4339
static Block BufferGetBlock(Buffer buffer)
Definition: bufmgr.h:403
PageHeaderData * PageHeader
Definition: bufpage.h:173
uint32_t uint32
Definition: c.h:552
Oid MyDatabaseTableSpace
Definition: globals.c:96
Oid MyDatabaseId
Definition: globals.c:94
#define MinSizeOfHeapInplace
Definition: heapam_xlog.h:444
#define XLOG_HEAP_INPLACE
Definition: heapam_xlog.h:40
void AcceptInvalidationMessages(void)
Definition: inval.c:930
int inplaceGetInvalidationMessages(SharedInvalidationMessage **msgs, bool *RelcacheInitFileInval)
Definition: inval.c:1088
void PreInplace_Inval(void)
Definition: inval.c:1250
void AtInplace_Inval(void)
Definition: inval.c:1263
#define IsBootstrapProcessingMode()
Definition: miscadmin.h:477
Datum lower(PG_FUNCTION_ARGS)
Definition: oracle_compat.c:49
Datum upper(PG_FUNCTION_ARGS)
Definition: oracle_compat.c:80
#define DELAY_CHKPT_START
Definition: proc.h:135
ForkNumber
Definition: relpath.h:56
PGPROC * MyProc
Definition: proc.c:67
char data[BLCKSZ]
Definition: c.h:1116
int delayChkptFlags
Definition: proc.h:257
OffsetNumber offnum
Definition: heapam_xlog.h:436
bool relcacheInitFileInval
Definition: heapam_xlog.h:439
#define XLogStandbyInfoActive()
Definition: xlog.h:125
void XLogRegisterBufData(uint8 block_id, const void *data, uint32 len)
Definition: xloginsert.c:409
void XLogRegisterBlock(uint8 block_id, RelFileLocator *rlocator, ForkNumber forknum, BlockNumber blknum, const PageData *page, uint8 flags)
Definition: xloginsert.c:313

References AcceptInvalidationMessages(), Assert(), AtInplace_Inval(), BUFFER_LOCK_UNLOCK, BufferGetBlock(), BufferGetPage(), BufferGetTag(), CacheInvalidateHeapTuple(), PGAlignedBlock::data, xl_heap_inplace::dbId, DELAY_CHKPT_START, PGPROC::delayChkptFlags, elog, END_CRIT_SECTION, ERROR, inplaceGetInvalidationMessages(), InplaceUpdateTupleLock, IsBootstrapProcessingMode, ItemPointerEquals(), ItemPointerGetOffsetNumber(), LockBuffer(), lower(), MAIN_FORKNUM, MarkBufferDirty(), MinSizeOfHeapInplace, MyDatabaseId, MyDatabaseTableSpace, MyProc, xl_heap_inplace::nmsgs, xl_heap_inplace::offnum, PageSetLSN(), PreInplace_Inval(), REGBUF_STANDARD, RelationNeedsWAL, xl_heap_inplace::relcacheInitFileInval, START_CRIT_SECTION, HeapTupleData::t_data, HeapTupleHeaderData::t_hoff, HeapTupleData::t_len, HeapTupleData::t_self, xl_heap_inplace::tsId, UnlockTuple(), upper(), XLOG_HEAP_INPLACE, XLogBeginInsert(), XLogInsert(), XLogRegisterBlock(), XLogRegisterBufData(), XLogRegisterData(), and XLogStandbyInfoActive.

Referenced by systable_inplace_update_finish().

◆ heap_insert()

void heap_insert ( Relation  relation,
HeapTuple  tup,
CommandId  cid,
int  options,
BulkInsertState  bistate 
)

Definition at line 2106 of file heapam.c.

2108{
2110 HeapTuple heaptup;
2111 Buffer buffer;
2112 Buffer vmbuffer = InvalidBuffer;
2113 bool all_visible_cleared = false;
2114
2115 /* Cheap, simplistic check that the tuple matches the rel's rowtype. */
2118
2119 AssertHasSnapshotForToast(relation);
2120
2121 /*
2122 * Fill in tuple header fields and toast the tuple if necessary.
2123 *
2124 * Note: below this point, heaptup is the data we actually intend to store
2125 * into the relation; tup is the caller's original untoasted data.
2126 */
2127 heaptup = heap_prepare_insert(relation, tup, xid, cid, options);
2128
2129 /*
2130 * Find buffer to insert this tuple into. If the page is all visible,
2131 * this will also pin the requisite visibility map page.
2132 */
2133 buffer = RelationGetBufferForTuple(relation, heaptup->t_len,
2134 InvalidBuffer, options, bistate,
2135 &vmbuffer, NULL,
2136 0);
2137
2138 /*
2139 * We're about to do the actual insert -- but check for conflict first, to
2140 * avoid possibly having to roll back work we've just done.
2141 *
2142 * This is safe without a recheck as long as there is no possibility of
2143 * another process scanning the page between this check and the insert
2144 * being visible to the scan (i.e., an exclusive buffer content lock is
2145 * continuously held from this point until the tuple insert is visible).
2146 *
2147 * For a heap insert, we only need to check for table-level SSI locks. Our
2148 * new tuple can't possibly conflict with existing tuple locks, and heap
2149 * page locks are only consolidated versions of tuple locks; they do not
2150 * lock "gaps" as index page locks do. So we don't need to specify a
2151 * buffer when making the call, which makes for a faster check.
2152 */
2154
2155 /* NO EREPORT(ERROR) from here till changes are logged */
2157
2158 RelationPutHeapTuple(relation, buffer, heaptup,
2160
2161 if (PageIsAllVisible(BufferGetPage(buffer)))
2162 {
2163 all_visible_cleared = true;
2165 visibilitymap_clear(relation,
2166 ItemPointerGetBlockNumber(&(heaptup->t_self)),
2167 vmbuffer, VISIBILITYMAP_VALID_BITS);
2168 }
2169
2170 /*
2171 * XXX Should we set PageSetPrunable on this page ?
2172 *
2173 * The inserting transaction may eventually abort thus making this tuple
2174 * DEAD and hence available for pruning. Though we don't want to optimize
2175 * for aborts, if no other tuple in this page is UPDATEd/DELETEd, the
2176 * aborted tuple will never be pruned until next vacuum is triggered.
2177 *
2178 * If you do add PageSetPrunable here, add it in heap_xlog_insert too.
2179 */
2180
2181 MarkBufferDirty(buffer);
2182
2183 /* XLOG stuff */
2184 if (RelationNeedsWAL(relation))
2185 {
2186 xl_heap_insert xlrec;
2187 xl_heap_header xlhdr;
2188 XLogRecPtr recptr;
2189 Page page = BufferGetPage(buffer);
2190 uint8 info = XLOG_HEAP_INSERT;
2191 int bufflags = 0;
2192
2193 /*
2194 * If this is a catalog, we need to transmit combo CIDs to properly
2195 * decode, so log that as well.
2196 */
2198 log_heap_new_cid(relation, heaptup);
2199
2200 /*
2201 * If this is the single and first tuple on page, we can reinit the
2202 * page instead of restoring the whole thing. Set flag, and hide
2203 * buffer references from XLogInsert.
2204 */
2207 {
2208 info |= XLOG_HEAP_INIT_PAGE;
2209 bufflags |= REGBUF_WILL_INIT;
2210 }
2211
2212 xlrec.offnum = ItemPointerGetOffsetNumber(&heaptup->t_self);
2213 xlrec.flags = 0;
2214 if (all_visible_cleared)
2219
2220 /*
2221 * For logical decoding, we need the tuple even if we're doing a full
2222 * page write, so make sure it's included even if we take a full-page
2223 * image. (XXX We could alternatively store a pointer into the FPW).
2224 */
2225 if (RelationIsLogicallyLogged(relation) &&
2227 {
2229 bufflags |= REGBUF_KEEP_DATA;
2230
2231 if (IsToastRelation(relation))
2233 }
2234
2237
2238 xlhdr.t_infomask2 = heaptup->t_data->t_infomask2;
2239 xlhdr.t_infomask = heaptup->t_data->t_infomask;
2240 xlhdr.t_hoff = heaptup->t_data->t_hoff;
2241
2242 /*
2243 * note we mark xlhdr as belonging to buffer; if XLogInsert decides to
2244 * write the whole page to the xlog, we don't need to store
2245 * xl_heap_header in the xlog.
2246 */
2247 XLogRegisterBuffer(0, buffer, REGBUF_STANDARD | bufflags);
2249 /* PG73FORMAT: write bitmap [+ padding] [+ oid] + data */
2251 (char *) heaptup->t_data + SizeofHeapTupleHeader,
2252 heaptup->t_len - SizeofHeapTupleHeader);
2253
2254 /* filtering by origin on a row level is much more efficient */
2256
2257 recptr = XLogInsert(RM_HEAP_ID, info);
2258
2259 PageSetLSN(page, recptr);
2260 }
2261
2263
2264 UnlockReleaseBuffer(buffer);
2265 if (vmbuffer != InvalidBuffer)
2266 ReleaseBuffer(vmbuffer);
2267
2268 /*
2269 * If tuple is cacheable, mark it for invalidation from the caches in case
2270 * we abort. Note it is OK to do this after releasing the buffer, because
2271 * the heaptup data structure is all in local memory, not in the shared
2272 * buffer.
2273 */
2274 CacheInvalidateHeapTuple(relation, heaptup, NULL);
2275
2276 /* Note: speculative insertions are counted too, even if aborted later */
2277 pgstat_count_heap_insert(relation, 1);
2278
2279 /*
2280 * If heaptup is a private copy, release it. Don't forget to copy t_self
2281 * back to the caller's image, too.
2282 */
2283 if (heaptup != tup)
2284 {
2285 tup->t_self = heaptup->t_self;
2286 heap_freetuple(heaptup);
2287 }
2288}
uint8_t uint8
Definition: c.h:550
static HeapTuple heap_prepare_insert(Relation relation, HeapTuple tup, TransactionId xid, CommandId cid, int options)
Definition: heapam.c:2297
#define HEAP_INSERT_SPECULATIVE
Definition: heapam.h:40
#define HEAP_INSERT_NO_LOGICAL
Definition: heapam.h:39
#define XLH_INSERT_ON_TOAST_RELATION
Definition: heapam_xlog.h:76
#define XLH_INSERT_IS_SPECULATIVE
Definition: heapam_xlog.h:74
#define XLH_INSERT_ALL_VISIBLE_CLEARED
Definition: heapam_xlog.h:72
#define XLOG_HEAP_INSERT
Definition: heapam_xlog.h:33
#define SizeOfHeapInsert
Definition: heapam_xlog.h:168
#define XLH_INSERT_CONTAINS_NEW_TUPLE
Definition: heapam_xlog.h:75
#define XLOG_HEAP_INIT_PAGE
Definition: heapam_xlog.h:47
void RelationPutHeapTuple(Relation relation, Buffer buffer, HeapTuple tuple, bool token)
Definition: hio.c:35
Buffer RelationGetBufferForTuple(Relation relation, Size len, Buffer otherBuffer, int options, BulkInsertState bistate, Buffer *vmbuffer, Buffer *vmbuffer_other, int num_pages)
Definition: hio.c:500
#define HeapTupleHeaderGetNatts(tup)
Definition: htup_details.h:577
void pgstat_count_heap_insert(Relation rel, PgStat_Counter n)
#define RelationIsLogicallyLogged(relation)
Definition: rel.h:711
#define RelationGetNumberOfAttributes(relation)
Definition: rel.h:521
OffsetNumber offnum
Definition: heapam_xlog.h:162
#define REGBUF_KEEP_DATA
Definition: xloginsert.h:36
#define REGBUF_WILL_INIT
Definition: xloginsert.h:34

References Assert(), AssertHasSnapshotForToast(), BufferGetBlockNumber(), BufferGetPage(), CacheInvalidateHeapTuple(), CheckForSerializableConflictIn(), END_CRIT_SECTION, FirstOffsetNumber, xl_heap_insert::flags, GetCurrentTransactionId(), heap_freetuple(), HEAP_INSERT_NO_LOGICAL, HEAP_INSERT_SPECULATIVE, heap_prepare_insert(), HeapTupleHeaderGetNatts, InvalidBlockNumber, InvalidBuffer, IsToastRelation(), ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), log_heap_new_cid(), MarkBufferDirty(), xl_heap_insert::offnum, PageClearAllVisible(), PageGetMaxOffsetNumber(), PageIsAllVisible(), PageSetLSN(), pgstat_count_heap_insert(), REGBUF_KEEP_DATA, REGBUF_STANDARD, REGBUF_WILL_INIT, RelationGetBufferForTuple(), RelationGetNumberOfAttributes, RelationIsAccessibleInLogicalDecoding, RelationIsLogicallyLogged, RelationNeedsWAL, RelationPutHeapTuple(), ReleaseBuffer(), SizeOfHeapHeader, SizeOfHeapInsert, SizeofHeapTupleHeader, START_CRIT_SECTION, HeapTupleData::t_data, xl_heap_header::t_hoff, HeapTupleHeaderData::t_hoff, xl_heap_header::t_infomask, HeapTupleHeaderData::t_infomask, xl_heap_header::t_infomask2, HeapTupleHeaderData::t_infomask2, HeapTupleData::t_len, HeapTupleData::t_self, UnlockReleaseBuffer(), visibilitymap_clear(), VISIBILITYMAP_VALID_BITS, XLH_INSERT_ALL_VISIBLE_CLEARED, XLH_INSERT_CONTAINS_NEW_TUPLE, XLH_INSERT_IS_SPECULATIVE, XLH_INSERT_ON_TOAST_RELATION, XLOG_HEAP_INIT_PAGE, XLOG_HEAP_INSERT, XLOG_INCLUDE_ORIGIN, XLogBeginInsert(), XLogInsert(), XLogRegisterBufData(), XLogRegisterBuffer(), XLogRegisterData(), and XLogSetRecordFlags().

Referenced by heapam_tuple_insert(), heapam_tuple_insert_speculative(), simple_heap_insert(), and toast_save_datum().

◆ heap_lock_tuple()

TM_Result heap_lock_tuple ( Relation  relation,
HeapTuple  tuple,
CommandId  cid,
LockTupleMode  mode,
LockWaitPolicy  wait_policy,
bool  follow_updates,
Buffer buffer,
TM_FailureData tmfd 
)

Definition at line 4608 of file heapam.c.

4612{
4613 TM_Result result;
4614 ItemPointer tid = &(tuple->t_self);
4615 ItemId lp;
4616 Page page;
4617 Buffer vmbuffer = InvalidBuffer;
4618 BlockNumber block;
4619 TransactionId xid,
4620 xmax;
4621 uint16 old_infomask,
4622 new_infomask,
4623 new_infomask2;
4624 bool first_time = true;
4625 bool skip_tuple_lock = false;
4626 bool have_tuple_lock = false;
4627 bool cleared_all_frozen = false;
4628
4629 *buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(tid));
4630 block = ItemPointerGetBlockNumber(tid);
4631
4632 /*
4633 * Before locking the buffer, pin the visibility map page if it appears to
4634 * be necessary. Since we haven't got the lock yet, someone else might be
4635 * in the middle of changing this, so we'll need to recheck after we have
4636 * the lock.
4637 */
4638 if (PageIsAllVisible(BufferGetPage(*buffer)))
4639 visibilitymap_pin(relation, block, &vmbuffer);
4640
4642
4643 page = BufferGetPage(*buffer);
4646
4647 tuple->t_data = (HeapTupleHeader) PageGetItem(page, lp);
4648 tuple->t_len = ItemIdGetLength(lp);
4649 tuple->t_tableOid = RelationGetRelid(relation);
4650
4651l3:
4652 result = HeapTupleSatisfiesUpdate(tuple, cid, *buffer);
4653
4654 if (result == TM_Invisible)
4655 {
4656 /*
4657 * This is possible, but only when locking a tuple for ON CONFLICT
4658 * UPDATE. We return this value here rather than throwing an error in
4659 * order to give that case the opportunity to throw a more specific
4660 * error.
4661 */
4662 result = TM_Invisible;
4663 goto out_locked;
4664 }
4665 else if (result == TM_BeingModified ||
4666 result == TM_Updated ||
4667 result == TM_Deleted)
4668 {
4669 TransactionId xwait;
4670 uint16 infomask;
4671 uint16 infomask2;
4672 bool require_sleep;
4673 ItemPointerData t_ctid;
4674
4675 /* must copy state data before unlocking buffer */
4676 xwait = HeapTupleHeaderGetRawXmax(tuple->t_data);
4677 infomask = tuple->t_data->t_infomask;
4678 infomask2 = tuple->t_data->t_infomask2;
4679 ItemPointerCopy(&tuple->t_data->t_ctid, &t_ctid);
4680
4682
4683 /*
4684 * If any subtransaction of the current top transaction already holds
4685 * a lock as strong as or stronger than what we're requesting, we
4686 * effectively hold the desired lock already. We *must* succeed
4687 * without trying to take the tuple lock, else we will deadlock
4688 * against anyone wanting to acquire a stronger lock.
4689 *
4690 * Note we only do this the first time we loop on the HTSU result;
4691 * there is no point in testing in subsequent passes, because
4692 * evidently our own transaction cannot have acquired a new lock after
4693 * the first time we checked.
4694 */
4695 if (first_time)
4696 {
4697 first_time = false;
4698
4699 if (infomask & HEAP_XMAX_IS_MULTI)
4700 {
4701 int i;
4702 int nmembers;
4703 MultiXactMember *members;
4704
4705 /*
4706 * We don't need to allow old multixacts here; if that had
4707 * been the case, HeapTupleSatisfiesUpdate would have returned
4708 * MayBeUpdated and we wouldn't be here.
4709 */
4710 nmembers =
4711 GetMultiXactIdMembers(xwait, &members, false,
4712 HEAP_XMAX_IS_LOCKED_ONLY(infomask));
4713
4714 for (i = 0; i < nmembers; i++)
4715 {
4716 /* only consider members of our own transaction */
4717 if (!TransactionIdIsCurrentTransactionId(members[i].xid))
4718 continue;
4719
4720 if (TUPLOCK_from_mxstatus(members[i].status) >= mode)
4721 {
4722 pfree(members);
4723 result = TM_Ok;
4724 goto out_unlocked;
4725 }
4726 else
4727 {
4728 /*
4729 * Disable acquisition of the heavyweight tuple lock.
4730 * Otherwise, when promoting a weaker lock, we might
4731 * deadlock with another locker that has acquired the
4732 * heavyweight tuple lock and is waiting for our
4733 * transaction to finish.
4734 *
4735 * Note that in this case we still need to wait for
4736 * the multixact if required, to avoid acquiring
4737 * conflicting locks.
4738 */
4739 skip_tuple_lock = true;
4740 }
4741 }
4742
4743 if (members)
4744 pfree(members);
4745 }
4747 {
4748 switch (mode)
4749 {
4750 case LockTupleKeyShare:
4752 HEAP_XMAX_IS_SHR_LOCKED(infomask) ||
4753 HEAP_XMAX_IS_EXCL_LOCKED(infomask));
4754 result = TM_Ok;
4755 goto out_unlocked;
4756 case LockTupleShare:
4757 if (HEAP_XMAX_IS_SHR_LOCKED(infomask) ||
4758 HEAP_XMAX_IS_EXCL_LOCKED(infomask))
4759 {
4760 result = TM_Ok;
4761 goto out_unlocked;
4762 }
4763 break;
4765 if (HEAP_XMAX_IS_EXCL_LOCKED(infomask))
4766 {
4767 result = TM_Ok;
4768 goto out_unlocked;
4769 }
4770 break;
4771 case LockTupleExclusive:
4772 if (HEAP_XMAX_IS_EXCL_LOCKED(infomask) &&
4773 infomask2 & HEAP_KEYS_UPDATED)
4774 {
4775 result = TM_Ok;
4776 goto out_unlocked;
4777 }
4778 break;
4779 }
4780 }
4781 }
4782
4783 /*
4784 * Initially assume that we will have to wait for the locking
4785 * transaction(s) to finish. We check various cases below in which
4786 * this can be turned off.
4787 */
4788 require_sleep = true;
4789 if (mode == LockTupleKeyShare)
4790 {
4791 /*
4792 * If we're requesting KeyShare, and there's no update present, we
4793 * don't need to wait. Even if there is an update, we can still
4794 * continue if the key hasn't been modified.
4795 *
4796 * However, if there are updates, we need to walk the update chain
4797 * to mark future versions of the row as locked, too. That way,
4798 * if somebody deletes that future version, we're protected
4799 * against the key going away. This locking of future versions
4800 * could block momentarily, if a concurrent transaction is
4801 * deleting a key; or it could return a value to the effect that
4802 * the transaction deleting the key has already committed. So we
4803 * do this before re-locking the buffer; otherwise this would be
4804 * prone to deadlocks.
4805 *
4806 * Note that the TID we're locking was grabbed before we unlocked
4807 * the buffer. For it to change while we're not looking, the
4808 * other properties we're testing for below after re-locking the
4809 * buffer would also change, in which case we would restart this
4810 * loop above.
4811 */
4812 if (!(infomask2 & HEAP_KEYS_UPDATED))
4813 {
4814 bool updated;
4815
4816 updated = !HEAP_XMAX_IS_LOCKED_ONLY(infomask);
4817
4818 /*
4819 * If there are updates, follow the update chain; bail out if
4820 * that cannot be done.
4821 */
4822 if (follow_updates && updated &&
4823 !ItemPointerEquals(&tuple->t_self, &t_ctid))
4824 {
4825 TM_Result res;
4826
4827 res = heap_lock_updated_tuple(relation,
4828 infomask, xwait, &t_ctid,
4830 mode);
4831 if (res != TM_Ok)
4832 {
4833 result = res;
4834 /* recovery code expects to have buffer lock held */
4836 goto failed;
4837 }
4838 }
4839
4841
4842 /*
4843 * Make sure it's still an appropriate lock, else start over.
4844 * Also, if it wasn't updated before we released the lock, but
4845 * is updated now, we start over too; the reason is that we
4846 * now need to follow the update chain to lock the new
4847 * versions.
4848 */
4849 if (!HeapTupleHeaderIsOnlyLocked(tuple->t_data) &&
4850 ((tuple->t_data->t_infomask2 & HEAP_KEYS_UPDATED) ||
4851 !updated))
4852 goto l3;
4853
4854 /* Things look okay, so we can skip sleeping */
4855 require_sleep = false;
4856
4857 /*
4858 * Note we allow Xmax to change here; other updaters/lockers
4859 * could have modified it before we grabbed the buffer lock.
4860 * However, this is not a problem, because with the recheck we
4861 * just did we ensure that they still don't conflict with the
4862 * lock we want.
4863 */
4864 }
4865 }
4866 else if (mode == LockTupleShare)
4867 {
4868 /*
4869 * If we're requesting Share, we can similarly avoid sleeping if
4870 * there's no update and no exclusive lock present.
4871 */
4872 if (HEAP_XMAX_IS_LOCKED_ONLY(infomask) &&
4873 !HEAP_XMAX_IS_EXCL_LOCKED(infomask))
4874 {
4876
4877 /*
4878 * Make sure it's still an appropriate lock, else start over.
4879 * See above about allowing xmax to change.
4880 */
4883 goto l3;
4884 require_sleep = false;
4885 }
4886 }
4887 else if (mode == LockTupleNoKeyExclusive)
4888 {
4889 /*
4890 * If we're requesting NoKeyExclusive, we might also be able to
4891 * avoid sleeping; just ensure that there no conflicting lock
4892 * already acquired.
4893 */
4894 if (infomask & HEAP_XMAX_IS_MULTI)
4895 {
4896 if (!DoesMultiXactIdConflict((MultiXactId) xwait, infomask,
4897 mode, NULL))
4898 {
4899 /*
4900 * No conflict, but if the xmax changed under us in the
4901 * meantime, start over.
4902 */
4904 if (xmax_infomask_changed(tuple->t_data->t_infomask, infomask) ||
4906 xwait))
4907 goto l3;
4908
4909 /* otherwise, we're good */
4910 require_sleep = false;
4911 }
4912 }
4913 else if (HEAP_XMAX_IS_KEYSHR_LOCKED(infomask))
4914 {
4916
4917 /* if the xmax changed in the meantime, start over */
4918 if (xmax_infomask_changed(tuple->t_data->t_infomask, infomask) ||
4920 xwait))
4921 goto l3;
4922 /* otherwise, we're good */
4923 require_sleep = false;
4924 }
4925 }
4926
4927 /*
4928 * As a check independent from those above, we can also avoid sleeping
4929 * if the current transaction is the sole locker of the tuple. Note
4930 * that the strength of the lock already held is irrelevant; this is
4931 * not about recording the lock in Xmax (which will be done regardless
4932 * of this optimization, below). Also, note that the cases where we
4933 * hold a lock stronger than we are requesting are already handled
4934 * above by not doing anything.
4935 *
4936 * Note we only deal with the non-multixact case here; MultiXactIdWait
4937 * is well equipped to deal with this situation on its own.
4938 */
4939 if (require_sleep && !(infomask & HEAP_XMAX_IS_MULTI) &&
4941 {
4942 /* ... but if the xmax changed in the meantime, start over */
4944 if (xmax_infomask_changed(tuple->t_data->t_infomask, infomask) ||
4946 xwait))
4947 goto l3;
4949 require_sleep = false;
4950 }
4951
4952 /*
4953 * Time to sleep on the other transaction/multixact, if necessary.
4954 *
4955 * If the other transaction is an update/delete that's already
4956 * committed, then sleeping cannot possibly do any good: if we're
4957 * required to sleep, get out to raise an error instead.
4958 *
4959 * By here, we either have already acquired the buffer exclusive lock,
4960 * or we must wait for the locking transaction or multixact; so below
4961 * we ensure that we grab buffer lock after the sleep.
4962 */
4963 if (require_sleep && (result == TM_Updated || result == TM_Deleted))
4964 {
4966 goto failed;
4967 }
4968 else if (require_sleep)
4969 {
4970 /*
4971 * Acquire tuple lock to establish our priority for the tuple, or
4972 * die trying. LockTuple will release us when we are next-in-line
4973 * for the tuple. We must do this even if we are share-locking,
4974 * but not if we already have a weaker lock on the tuple.
4975 *
4976 * If we are forced to "start over" below, we keep the tuple lock;
4977 * this arranges that we stay at the head of the line while
4978 * rechecking tuple state.
4979 */
4980 if (!skip_tuple_lock &&
4981 !heap_acquire_tuplock(relation, tid, mode, wait_policy,
4982 &have_tuple_lock))
4983 {
4984 /*
4985 * This can only happen if wait_policy is Skip and the lock
4986 * couldn't be obtained.
4987 */
4988 result = TM_WouldBlock;
4989 /* recovery code expects to have buffer lock held */
4991 goto failed;
4992 }
4993
4994 if (infomask & HEAP_XMAX_IS_MULTI)
4995 {
4997
4998 /* We only ever lock tuples, never update them */
4999 if (status >= MultiXactStatusNoKeyUpdate)
5000 elog(ERROR, "invalid lock mode in heap_lock_tuple");
5001
5002 /* wait for multixact to end, or die trying */
5003 switch (wait_policy)
5004 {
5005 case LockWaitBlock:
5006 MultiXactIdWait((MultiXactId) xwait, status, infomask,
5007 relation, &tuple->t_self, XLTW_Lock, NULL);
5008 break;
5009 case LockWaitSkip:
5011 status, infomask, relation,
5012 NULL, false))
5013 {
5014 result = TM_WouldBlock;
5015 /* recovery code expects to have buffer lock held */
5017 goto failed;
5018 }
5019 break;
5020 case LockWaitError:
5022 status, infomask, relation,
5023 NULL, log_lock_failures))
5024 ereport(ERROR,
5025 (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
5026 errmsg("could not obtain lock on row in relation \"%s\"",
5027 RelationGetRelationName(relation))));
5028
5029 break;
5030 }
5031
5032 /*
5033 * Of course, the multixact might not be done here: if we're
5034 * requesting a light lock mode, other transactions with light
5035 * locks could still be alive, as well as locks owned by our
5036 * own xact or other subxacts of this backend. We need to
5037 * preserve the surviving MultiXact members. Note that it
5038 * isn't absolutely necessary in the latter case, but doing so
5039 * is simpler.
5040 */
5041 }
5042 else
5043 {
5044 /* wait for regular transaction to end, or die trying */
5045 switch (wait_policy)
5046 {
5047 case LockWaitBlock:
5048 XactLockTableWait(xwait, relation, &tuple->t_self,
5049 XLTW_Lock);
5050 break;
5051 case LockWaitSkip:
5052 if (!ConditionalXactLockTableWait(xwait, false))
5053 {
5054 result = TM_WouldBlock;
5055 /* recovery code expects to have buffer lock held */
5057 goto failed;
5058 }
5059 break;
5060 case LockWaitError:
5062 ereport(ERROR,
5063 (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
5064 errmsg("could not obtain lock on row in relation \"%s\"",
5065 RelationGetRelationName(relation))));
5066 break;
5067 }
5068 }
5069
5070 /* if there are updates, follow the update chain */
5071 if (follow_updates && !HEAP_XMAX_IS_LOCKED_ONLY(infomask) &&
5072 !ItemPointerEquals(&tuple->t_self, &t_ctid))
5073 {
5074 TM_Result res;
5075
5076 res = heap_lock_updated_tuple(relation,
5077 infomask, xwait, &t_ctid,
5079 mode);
5080 if (res != TM_Ok)
5081 {
5082 result = res;
5083 /* recovery code expects to have buffer lock held */
5085 goto failed;
5086 }
5087 }
5088
5090
5091 /*
5092 * xwait is done, but if xwait had just locked the tuple then some
5093 * other xact could update this tuple before we get to this point.
5094 * Check for xmax change, and start over if so.
5095 */
5096 if (xmax_infomask_changed(tuple->t_data->t_infomask, infomask) ||
5098 xwait))
5099 goto l3;
5100
5101 if (!(infomask & HEAP_XMAX_IS_MULTI))
5102 {
5103 /*
5104 * Otherwise check if it committed or aborted. Note we cannot
5105 * be here if the tuple was only locked by somebody who didn't
5106 * conflict with us; that would have been handled above. So
5107 * that transaction must necessarily be gone by now. But
5108 * don't check for this in the multixact case, because some
5109 * locker transactions might still be running.
5110 */
5111 UpdateXmaxHintBits(tuple->t_data, *buffer, xwait);
5112 }
5113 }
5114
5115 /* By here, we're certain that we hold buffer exclusive lock again */
5116
5117 /*
5118 * We may lock if previous xmax aborted, or if it committed but only
5119 * locked the tuple without updating it; or if we didn't have to wait
5120 * at all for whatever reason.
5121 */
5122 if (!require_sleep ||
5123 (tuple->t_data->t_infomask & HEAP_XMAX_INVALID) ||
5126 result = TM_Ok;
5127 else if (!ItemPointerEquals(&tuple->t_self, &tuple->t_data->t_ctid))
5128 result = TM_Updated;
5129 else
5130 result = TM_Deleted;
5131 }
5132
5133failed:
5134 if (result != TM_Ok)
5135 {
5136 Assert(result == TM_SelfModified || result == TM_Updated ||
5137 result == TM_Deleted || result == TM_WouldBlock);
5138
5139 /*
5140 * When locking a tuple under LockWaitSkip semantics and we fail with
5141 * TM_WouldBlock above, it's possible for concurrent transactions to
5142 * release the lock and set HEAP_XMAX_INVALID in the meantime. So
5143 * this assert is slightly different from the equivalent one in
5144 * heap_delete and heap_update.
5145 */
5146 Assert((result == TM_WouldBlock) ||
5147 !(tuple->t_data->t_infomask & HEAP_XMAX_INVALID));
5148 Assert(result != TM_Updated ||
5149 !ItemPointerEquals(&tuple->t_self, &tuple->t_data->t_ctid));
5150 tmfd->ctid = tuple->t_data->t_ctid;
5151 tmfd->xmax = HeapTupleHeaderGetUpdateXid(tuple->t_data);
5152 if (result == TM_SelfModified)
5153 tmfd->cmax = HeapTupleHeaderGetCmax(tuple->t_data);
5154 else
5155 tmfd->cmax = InvalidCommandId;
5156 goto out_locked;
5157 }
5158
5159 /*
5160 * If we didn't pin the visibility map page and the page has become all
5161 * visible while we were busy locking the buffer, or during some
5162 * subsequent window during which we had it unlocked, we'll have to unlock
5163 * and re-lock, to avoid holding the buffer lock across I/O. That's a bit
5164 * unfortunate, especially since we'll now have to recheck whether the
5165 * tuple has been locked or updated under us, but hopefully it won't
5166 * happen very often.
5167 */
5168 if (vmbuffer == InvalidBuffer && PageIsAllVisible(page))
5169 {
5171 visibilitymap_pin(relation, block, &vmbuffer);
5173 goto l3;
5174 }
5175
5176 xmax = HeapTupleHeaderGetRawXmax(tuple->t_data);
5177 old_infomask = tuple->t_data->t_infomask;
5178
5179 /*
5180 * If this is the first possibly-multixact-able operation in the current
5181 * transaction, set my per-backend OldestMemberMXactId setting. We can be
5182 * certain that the transaction will never become a member of any older
5183 * MultiXactIds than that. (We have to do this even if we end up just
5184 * using our own TransactionId below, since some other backend could
5185 * incorporate our XID into a MultiXact immediately afterwards.)
5186 */
5188
5189 /*
5190 * Compute the new xmax and infomask to store into the tuple. Note we do
5191 * not modify the tuple just yet, because that would leave it in the wrong
5192 * state if multixact.c elogs.
5193 */
5194 compute_new_xmax_infomask(xmax, old_infomask, tuple->t_data->t_infomask2,
5195 GetCurrentTransactionId(), mode, false,
5196 &xid, &new_infomask, &new_infomask2);
5197
5199
5200 /*
5201 * Store transaction information of xact locking the tuple.
5202 *
5203 * Note: Cmax is meaningless in this context, so don't set it; this avoids
5204 * possibly generating a useless combo CID. Moreover, if we're locking a
5205 * previously updated tuple, it's important to preserve the Cmax.
5206 *
5207 * Also reset the HOT UPDATE bit, but only if there's no update; otherwise
5208 * we would break the HOT chain.
5209 */
5210 tuple->t_data->t_infomask &= ~HEAP_XMAX_BITS;
5211 tuple->t_data->t_infomask2 &= ~HEAP_KEYS_UPDATED;
5212 tuple->t_data->t_infomask |= new_infomask;
5213 tuple->t_data->t_infomask2 |= new_infomask2;
5214 if (HEAP_XMAX_IS_LOCKED_ONLY(new_infomask))
5216 HeapTupleHeaderSetXmax(tuple->t_data, xid);
5217
5218 /*
5219 * Make sure there is no forward chain link in t_ctid. Note that in the
5220 * cases where the tuple has been updated, we must not overwrite t_ctid,
5221 * because it was set by the updater. Moreover, if the tuple has been
5222 * updated, we need to follow the update chain to lock the new versions of
5223 * the tuple as well.
5224 */
5225 if (HEAP_XMAX_IS_LOCKED_ONLY(new_infomask))
5226 tuple->t_data->t_ctid = *tid;
5227
5228 /* Clear only the all-frozen bit on visibility map if needed */
5229 if (PageIsAllVisible(page) &&
5230 visibilitymap_clear(relation, block, vmbuffer,
5232 cleared_all_frozen = true;
5233
5234
5235 MarkBufferDirty(*buffer);
5236
5237 /*
5238 * XLOG stuff. You might think that we don't need an XLOG record because
5239 * there is no state change worth restoring after a crash. You would be
5240 * wrong however: we have just written either a TransactionId or a
5241 * MultiXactId that may never have been seen on disk before, and we need
5242 * to make sure that there are XLOG entries covering those ID numbers.
5243 * Else the same IDs might be re-used after a crash, which would be
5244 * disastrous if this page made it to disk before the crash. Essentially
5245 * we have to enforce the WAL log-before-data rule even in this case.
5246 * (Also, in a PITR log-shipping or 2PC environment, we have to have XLOG
5247 * entries for everything anyway.)
5248 */
5249 if (RelationNeedsWAL(relation))
5250 {
5251 xl_heap_lock xlrec;
5252 XLogRecPtr recptr;
5253
5256
5257 xlrec.offnum = ItemPointerGetOffsetNumber(&tuple->t_self);
5258 xlrec.xmax = xid;
5259 xlrec.infobits_set = compute_infobits(new_infomask,
5260 tuple->t_data->t_infomask2);
5261 xlrec.flags = cleared_all_frozen ? XLH_LOCK_ALL_FROZEN_CLEARED : 0;
5263
5264 /* we don't decode row locks atm, so no need to log the origin */
5265
5266 recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_LOCK);
5267
5268 PageSetLSN(page, recptr);
5269 }
5270
5272
5273 result = TM_Ok;
5274
5275out_locked:
5277
5278out_unlocked:
5279 if (BufferIsValid(vmbuffer))
5280 ReleaseBuffer(vmbuffer);
5281
5282 /*
5283 * Don't update the visibility map here. Locking a tuple doesn't change
5284 * visibility info.
5285 */
5286
5287 /*
5288 * Now that we have successfully marked the tuple as locked, we can
5289 * release the lmgr tuple lock, if we had it.
5290 */
5291 if (have_tuple_lock)
5292 UnlockTupleTuplock(relation, tid, mode);
5293
5294 return result;
5295}
#define TUPLOCK_from_mxstatus(status)
Definition: heapam.c:216
static bool ConditionalMultiXactIdWait(MultiXactId multi, MultiXactStatus status, uint16 infomask, Relation rel, int *remaining, bool logLockFailure)
Definition: heapam.c:7840
static TM_Result heap_lock_updated_tuple(Relation rel, uint16 prior_infomask, TransactionId prior_raw_xmax, const ItemPointerData *prior_ctid, TransactionId xid, LockTupleMode mode)
Definition: heapam.c:6079
static MultiXactStatus get_mxact_status_for_lock(LockTupleMode mode, bool is_update)
Definition: heapam.c:4561
#define XLH_LOCK_ALL_FROZEN_CLEARED
Definition: heapam_xlog.h:401
#define XLOG_HEAP_LOCK
Definition: heapam_xlog.h:39
#define SizeOfHeapLock
Definition: heapam_xlog.h:412
#define HEAP_KEYS_UPDATED
Definition: htup_details.h:289
static bool HEAP_XMAX_IS_SHR_LOCKED(uint16 infomask)
Definition: htup_details.h:263
static bool HEAP_XMAX_IS_EXCL_LOCKED(uint16 infomask)
Definition: htup_details.h:269
static void ItemPointerCopy(const ItemPointerData *fromPointer, ItemPointerData *toPointer)
Definition: itemptr.h:172
bool ConditionalXactLockTableWait(TransactionId xid, bool logLockFailure)
Definition: lmgr.c:739
@ XLTW_Lock
Definition: lmgr.h:29
bool log_lock_failures
Definition: lock.c:54
@ LockWaitSkip
Definition: lockoptions.h:41
@ LockWaitError
Definition: lockoptions.h:43
@ LockTupleShare
Definition: lockoptions.h:54
@ LockTupleKeyShare
Definition: lockoptions.h:52
int GetMultiXactIdMembers(MultiXactId multi, MultiXactMember **members, bool from_pgupgrade, bool isLockOnly)
Definition: multixact.c:1115
static PgChecksumMode mode
Definition: pg_checksums.c:56
uint8 infobits_set
Definition: heapam_xlog.h:408
OffsetNumber offnum
Definition: heapam_xlog.h:407
TransactionId xmax
Definition: heapam_xlog.h:406
@ TM_WouldBlock
Definition: tableam.h:103
#define VISIBILITYMAP_ALL_FROZEN

References Assert(), BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferGetPage(), BufferIsValid(), TM_FailureData::cmax, compute_infobits(), compute_new_xmax_infomask(), ConditionalMultiXactIdWait(), ConditionalXactLockTableWait(), TM_FailureData::ctid, DoesMultiXactIdConflict(), elog, END_CRIT_SECTION, ereport, errcode(), errmsg(), ERROR, xl_heap_lock::flags, get_mxact_status_for_lock(), GetCurrentTransactionId(), GetMultiXactIdMembers(), heap_acquire_tuplock(), HEAP_KEYS_UPDATED, heap_lock_updated_tuple(), HEAP_XMAX_INVALID, HEAP_XMAX_IS_EXCL_LOCKED(), HEAP_XMAX_IS_KEYSHR_LOCKED(), HEAP_XMAX_IS_LOCKED_ONLY(), HEAP_XMAX_IS_MULTI, HEAP_XMAX_IS_SHR_LOCKED(), HeapTupleHeaderClearHotUpdated(), HeapTupleHeaderGetCmax(), HeapTupleHeaderGetRawXmax(), HeapTupleHeaderGetUpdateXid(), HeapTupleHeaderIsOnlyLocked(), HeapTupleHeaderSetXmax(), HeapTupleSatisfiesUpdate(), i, xl_heap_lock::infobits_set, InvalidBuffer, InvalidCommandId, ItemIdGetLength, ItemIdIsNormal, ItemPointerCopy(), ItemPointerEquals(), ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), LockBuffer(), LockTupleExclusive, LockTupleKeyShare, LockTupleNoKeyExclusive, LockTupleShare, LockWaitBlock, LockWaitError, LockWaitSkip, log_lock_failures, MarkBufferDirty(), mode, MultiXactIdSetOldestMember(), MultiXactIdWait(), MultiXactStatusNoKeyUpdate, xl_heap_lock::offnum, PageGetItem(), PageGetItemId(), PageIsAllVisible(), PageSetLSN(), pfree(), ReadBuffer(), REGBUF_STANDARD, RelationGetRelationName, RelationGetRelid, RelationNeedsWAL, ReleaseBuffer(), SizeOfHeapLock, START_CRIT_SECTION, HeapTupleHeaderData::t_ctid, HeapTupleData::t_data, HeapTupleHeaderData::t_infomask, HeapTupleHeaderData::t_infomask2, HeapTupleData::t_len, HeapTupleData::t_self, HeapTupleData::t_tableOid, TM_BeingModified, TM_Deleted, TM_Invisible, TM_Ok, TM_SelfModified, TM_Updated, TM_WouldBlock, TransactionIdEquals, TransactionIdIsCurrentTransactionId(), TUPLOCK_from_mxstatus, UnlockTupleTuplock, UpdateXmaxHintBits(), VISIBILITYMAP_ALL_FROZEN, visibilitymap_clear(), visibilitymap_pin(), XactLockTableWait(), XLH_LOCK_ALL_FROZEN_CLEARED, XLOG_HEAP_LOCK, XLogBeginInsert(), XLogInsert(), XLogRegisterBuffer(), XLogRegisterData(), XLTW_Lock, xl_heap_lock::xmax, TM_FailureData::xmax, and xmax_infomask_changed().

Referenced by heapam_tuple_lock().

◆ heap_multi_insert()

void heap_multi_insert ( Relation  relation,
TupleTableSlot **  slots,
int  ntuples,
CommandId  cid,
int  options,
BulkInsertState  bistate 
)

Definition at line 2377 of file heapam.c.

2379{
2381 HeapTuple *heaptuples;
2382 int i;
2383 int ndone;
2384 PGAlignedBlock scratch;
2385 Page page;
2386 Buffer vmbuffer = InvalidBuffer;
2387 bool needwal;
2388 Size saveFreeSpace;
2389 bool need_tuple_data = RelationIsLogicallyLogged(relation);
2390 bool need_cids = RelationIsAccessibleInLogicalDecoding(relation);
2391 bool starting_with_empty_page = false;
2392 int npages = 0;
2393 int npages_used = 0;
2394
2395 /* currently not needed (thus unsupported) for heap_multi_insert() */
2397
2398 AssertHasSnapshotForToast(relation);
2399
2400 needwal = RelationNeedsWAL(relation);
2401 saveFreeSpace = RelationGetTargetPageFreeSpace(relation,
2403
2404 /* Toast and set header data in all the slots */
2405 heaptuples = palloc(ntuples * sizeof(HeapTuple));
2406 for (i = 0; i < ntuples; i++)
2407 {
2408 HeapTuple tuple;
2409
2410 tuple = ExecFetchSlotHeapTuple(slots[i], true, NULL);
2411 slots[i]->tts_tableOid = RelationGetRelid(relation);
2412 tuple->t_tableOid = slots[i]->tts_tableOid;
2413 heaptuples[i] = heap_prepare_insert(relation, tuple, xid, cid,
2414 options);
2415 }
2416
2417 /*
2418 * We're about to do the actual inserts -- but check for conflict first,
2419 * to minimize the possibility of having to roll back work we've just
2420 * done.
2421 *
2422 * A check here does not definitively prevent a serialization anomaly;
2423 * that check MUST be done at least past the point of acquiring an
2424 * exclusive buffer content lock on every buffer that will be affected,
2425 * and MAY be done after all inserts are reflected in the buffers and
2426 * those locks are released; otherwise there is a race condition. Since
2427 * multiple buffers can be locked and unlocked in the loop below, and it
2428 * would not be feasible to identify and lock all of those buffers before
2429 * the loop, we must do a final check at the end.
2430 *
2431 * The check here could be omitted with no loss of correctness; it is
2432 * present strictly as an optimization.
2433 *
2434 * For heap inserts, we only need to check for table-level SSI locks. Our
2435 * new tuples can't possibly conflict with existing tuple locks, and heap
2436 * page locks are only consolidated versions of tuple locks; they do not
2437 * lock "gaps" as index page locks do. So we don't need to specify a
2438 * buffer when making the call, which makes for a faster check.
2439 */
2441
2442 ndone = 0;
2443 while (ndone < ntuples)
2444 {
2445 Buffer buffer;
2446 bool all_visible_cleared = false;
2447 bool all_frozen_set = false;
2448 int nthispage;
2449
2451
2452 /*
2453 * Compute number of pages needed to fit the to-be-inserted tuples in
2454 * the worst case. This will be used to determine how much to extend
2455 * the relation by in RelationGetBufferForTuple(), if needed. If we
2456 * filled a prior page from scratch, we can just update our last
2457 * computation, but if we started with a partially filled page,
2458 * recompute from scratch, the number of potentially required pages
2459 * can vary due to tuples needing to fit onto the page, page headers
2460 * etc.
2461 */
2462 if (ndone == 0 || !starting_with_empty_page)
2463 {
2464 npages = heap_multi_insert_pages(heaptuples, ndone, ntuples,
2465 saveFreeSpace);
2466 npages_used = 0;
2467 }
2468 else
2469 npages_used++;
2470
2471 /*
2472 * Find buffer where at least the next tuple will fit. If the page is
2473 * all-visible, this will also pin the requisite visibility map page.
2474 *
2475 * Also pin visibility map page if COPY FREEZE inserts tuples into an
2476 * empty page. See all_frozen_set below.
2477 */
2478 buffer = RelationGetBufferForTuple(relation, heaptuples[ndone]->t_len,
2479 InvalidBuffer, options, bistate,
2480 &vmbuffer, NULL,
2481 npages - npages_used);
2482 page = BufferGetPage(buffer);
2483
2484 starting_with_empty_page = PageGetMaxOffsetNumber(page) == 0;
2485
2486 if (starting_with_empty_page && (options & HEAP_INSERT_FROZEN))
2487 {
2488 all_frozen_set = true;
2489 /* Lock the vmbuffer before entering the critical section */
2491 }
2492
2493 /* NO EREPORT(ERROR) from here till changes are logged */
2495
2496 /*
2497 * RelationGetBufferForTuple has ensured that the first tuple fits.
2498 * Put that on the page, and then as many other tuples as fit.
2499 */
2500 RelationPutHeapTuple(relation, buffer, heaptuples[ndone], false);
2501
2502 /*
2503 * For logical decoding we need combo CIDs to properly decode the
2504 * catalog.
2505 */
2506 if (needwal && need_cids)
2507 log_heap_new_cid(relation, heaptuples[ndone]);
2508
2509 for (nthispage = 1; ndone + nthispage < ntuples; nthispage++)
2510 {
2511 HeapTuple heaptup = heaptuples[ndone + nthispage];
2512
2513 if (PageGetHeapFreeSpace(page) < MAXALIGN(heaptup->t_len) + saveFreeSpace)
2514 break;
2515
2516 RelationPutHeapTuple(relation, buffer, heaptup, false);
2517
2518 /*
2519 * For logical decoding we need combo CIDs to properly decode the
2520 * catalog.
2521 */
2522 if (needwal && need_cids)
2523 log_heap_new_cid(relation, heaptup);
2524 }
2525
2526 /*
2527 * If the page is all visible, need to clear that, unless we're only
2528 * going to add further frozen rows to it.
2529 *
2530 * If we're only adding already frozen rows to a previously empty
2531 * page, mark it as all-frozen and update the visibility map. We're
2532 * already holding a pin on the vmbuffer.
2533 */
2535 {
2536 all_visible_cleared = true;
2537 PageClearAllVisible(page);
2538 visibilitymap_clear(relation,
2539 BufferGetBlockNumber(buffer),
2540 vmbuffer, VISIBILITYMAP_VALID_BITS);
2541 }
2542 else if (all_frozen_set)
2543 {
2544 PageSetAllVisible(page);
2546 vmbuffer,
2549 relation->rd_locator);
2550 }
2551
2552 /*
2553 * XXX Should we set PageSetPrunable on this page ? See heap_insert()
2554 */
2555
2556 MarkBufferDirty(buffer);
2557
2558 /* XLOG stuff */
2559 if (needwal)
2560 {
2561 XLogRecPtr recptr;
2562 xl_heap_multi_insert *xlrec;
2564 char *tupledata;
2565 int totaldatalen;
2566 char *scratchptr = scratch.data;
2567 bool init;
2568 int bufflags = 0;
2569
2570 /*
2571 * If the page was previously empty, we can reinit the page
2572 * instead of restoring the whole thing.
2573 */
2574 init = starting_with_empty_page;
2575
2576 /* allocate xl_heap_multi_insert struct from the scratch area */
2577 xlrec = (xl_heap_multi_insert *) scratchptr;
2578 scratchptr += SizeOfHeapMultiInsert;
2579
2580 /*
2581 * Allocate offsets array. Unless we're reinitializing the page,
2582 * in that case the tuples are stored in order starting at
2583 * FirstOffsetNumber and we don't need to store the offsets
2584 * explicitly.
2585 */
2586 if (!init)
2587 scratchptr += nthispage * sizeof(OffsetNumber);
2588
2589 /* the rest of the scratch space is used for tuple data */
2590 tupledata = scratchptr;
2591
2592 /* check that the mutually exclusive flags are not both set */
2593 Assert(!(all_visible_cleared && all_frozen_set));
2594
2595 xlrec->flags = 0;
2596 if (all_visible_cleared)
2598
2599 /*
2600 * We don't have to worry about including a conflict xid in the
2601 * WAL record, as HEAP_INSERT_FROZEN intentionally violates
2602 * visibility rules.
2603 */
2604 if (all_frozen_set)
2606
2607 xlrec->ntuples = nthispage;
2608
2609 /*
2610 * Write out an xl_multi_insert_tuple and the tuple data itself
2611 * for each tuple.
2612 */
2613 for (i = 0; i < nthispage; i++)
2614 {
2615 HeapTuple heaptup = heaptuples[ndone + i];
2616 xl_multi_insert_tuple *tuphdr;
2617 int datalen;
2618
2619 if (!init)
2620 xlrec->offsets[i] = ItemPointerGetOffsetNumber(&heaptup->t_self);
2621 /* xl_multi_insert_tuple needs two-byte alignment. */
2622 tuphdr = (xl_multi_insert_tuple *) SHORTALIGN(scratchptr);
2623 scratchptr = ((char *) tuphdr) + SizeOfMultiInsertTuple;
2624
2625 tuphdr->t_infomask2 = heaptup->t_data->t_infomask2;
2626 tuphdr->t_infomask = heaptup->t_data->t_infomask;
2627 tuphdr->t_hoff = heaptup->t_data->t_hoff;
2628
2629 /* write bitmap [+ padding] [+ oid] + data */
2630 datalen = heaptup->t_len - SizeofHeapTupleHeader;
2631 memcpy(scratchptr,
2632 (char *) heaptup->t_data + SizeofHeapTupleHeader,
2633 datalen);
2634 tuphdr->datalen = datalen;
2635 scratchptr += datalen;
2636 }
2637 totaldatalen = scratchptr - tupledata;
2638 Assert((scratchptr - scratch.data) < BLCKSZ);
2639
2640 if (need_tuple_data)
2642
2643 /*
2644 * Signal that this is the last xl_heap_multi_insert record
2645 * emitted by this call to heap_multi_insert(). Needed for logical
2646 * decoding so it knows when to cleanup temporary data.
2647 */
2648 if (ndone + nthispage == ntuples)
2650
2651 if (init)
2652 {
2653 info |= XLOG_HEAP_INIT_PAGE;
2654 bufflags |= REGBUF_WILL_INIT;
2655 }
2656
2657 /*
2658 * If we're doing logical decoding, include the new tuple data
2659 * even if we take a full-page image of the page.
2660 */
2661 if (need_tuple_data)
2662 bufflags |= REGBUF_KEEP_DATA;
2663
2665 XLogRegisterData(xlrec, tupledata - scratch.data);
2666 XLogRegisterBuffer(0, buffer, REGBUF_STANDARD | bufflags);
2667 if (all_frozen_set)
2668 XLogRegisterBuffer(1, vmbuffer, 0);
2669
2670 XLogRegisterBufData(0, tupledata, totaldatalen);
2671
2672 /* filtering by origin on a row level is much more efficient */
2674
2675 recptr = XLogInsert(RM_HEAP2_ID, info);
2676
2677 PageSetLSN(page, recptr);
2678 if (all_frozen_set)
2679 {
2680 Assert(BufferIsDirty(vmbuffer));
2681 PageSetLSN(BufferGetPage(vmbuffer), recptr);
2682 }
2683 }
2684
2686
2687 if (all_frozen_set)
2688 LockBuffer(vmbuffer, BUFFER_LOCK_UNLOCK);
2689
2690 UnlockReleaseBuffer(buffer);
2691 ndone += nthispage;
2692
2693 /*
2694 * NB: Only release vmbuffer after inserting all tuples - it's fairly
2695 * likely that we'll insert into subsequent heap pages that are likely
2696 * to use the same vm page.
2697 */
2698 }
2699
2700 /* We're done with inserting all tuples, so release the last vmbuffer. */
2701 if (vmbuffer != InvalidBuffer)
2702 ReleaseBuffer(vmbuffer);
2703
2704 /*
2705 * We're done with the actual inserts. Check for conflicts again, to
2706 * ensure that all rw-conflicts in to these inserts are detected. Without
2707 * this final check, a sequential scan of the heap may have locked the
2708 * table after the "before" check, missing one opportunity to detect the
2709 * conflict, and then scanned the table before the new tuples were there,
2710 * missing the other chance to detect the conflict.
2711 *
2712 * For heap inserts, we only need to check for table-level SSI locks. Our
2713 * new tuples can't possibly conflict with existing tuple locks, and heap
2714 * page locks are only consolidated versions of tuple locks; they do not
2715 * lock "gaps" as index page locks do. So we don't need to specify a
2716 * buffer when making the call.
2717 */
2719
2720 /*
2721 * If tuples are cacheable, mark them for invalidation from the caches in
2722 * case we abort. Note it is OK to do this after releasing the buffer,
2723 * because the heaptuples data structure is all in local memory, not in
2724 * the shared buffer.
2725 */
2726 if (IsCatalogRelation(relation))
2727 {
2728 for (i = 0; i < ntuples; i++)
2729 CacheInvalidateHeapTuple(relation, heaptuples[i], NULL);
2730 }
2731
2732 /* copy t_self fields back to the caller's slots */
2733 for (i = 0; i < ntuples; i++)
2734 slots[i]->tts_tid = heaptuples[i]->t_self;
2735
2736 pgstat_count_heap_insert(relation, ntuples);
2737}
bool BufferIsDirty(Buffer buffer)
Definition: bufmgr.c:3005
Size PageGetHeapFreeSpace(const PageData *page)
Definition: bufpage.c:990
static void PageSetAllVisible(Page page)
Definition: bufpage.h:433
#define MAXALIGN(LEN)
Definition: c.h:832
#define SHORTALIGN(LEN)
Definition: c.h:828
size_t Size
Definition: c.h:625
HeapTuple ExecFetchSlotHeapTuple(TupleTableSlot *slot, bool materialize, bool *shouldFree)
Definition: execTuples.c:1833
static int heap_multi_insert_pages(HeapTuple *heaptuples, int done, int ntuples, Size saveFreeSpace)
Definition: heapam.c:2345
#define HEAP_INSERT_FROZEN
Definition: heapam.h:38
#define SizeOfHeapMultiInsert
Definition: heapam_xlog.h:188
#define XLOG_HEAP2_MULTI_INSERT
Definition: heapam_xlog.h:64
#define XLH_INSERT_LAST_IN_MULTI
Definition: heapam_xlog.h:73
#define XLH_INSERT_ALL_FROZEN_SET
Definition: heapam_xlog.h:79
#define SizeOfMultiInsertTuple
Definition: heapam_xlog.h:199
int init
Definition: isn.c:79
void * palloc(Size size)
Definition: mcxt.c:1387
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:123
#define RelationGetTargetPageFreeSpace(relation, defaultff)
Definition: rel.h:390
#define HEAP_DEFAULT_FILLFACTOR
Definition: rel.h:361
RelFileLocator rd_locator
Definition: rel.h:57
Oid tts_tableOid
Definition: tuptable.h:129
OffsetNumber offsets[FLEXIBLE_ARRAY_MEMBER]
Definition: heapam_xlog.h:185
uint8 visibilitymap_set_vmbits(BlockNumber heapBlk, Buffer vmBuf, uint8 flags, const RelFileLocator rlocator)
#define VISIBILITYMAP_ALL_VISIBLE

References Assert(), AssertHasSnapshotForToast(), BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferGetBlockNumber(), BufferGetPage(), BufferIsDirty(), CacheInvalidateHeapTuple(), CHECK_FOR_INTERRUPTS, CheckForSerializableConflictIn(), PGAlignedBlock::data, xl_multi_insert_tuple::datalen, END_CRIT_SECTION, ExecFetchSlotHeapTuple(), xl_heap_multi_insert::flags, GetCurrentTransactionId(), HEAP_DEFAULT_FILLFACTOR, HEAP_INSERT_FROZEN, HEAP_INSERT_NO_LOGICAL, heap_multi_insert_pages(), heap_prepare_insert(), i, init, InvalidBlockNumber, InvalidBuffer, IsCatalogRelation(), ItemPointerGetOffsetNumber(), LockBuffer(), log_heap_new_cid(), MarkBufferDirty(), MAXALIGN, xl_heap_multi_insert::ntuples, xl_heap_multi_insert::offsets, PageClearAllVisible(), PageGetHeapFreeSpace(), PageGetMaxOffsetNumber(), PageIsAllVisible(), PageSetAllVisible(), PageSetLSN(), palloc(), pgstat_count_heap_insert(), RelationData::rd_locator, REGBUF_KEEP_DATA, REGBUF_STANDARD, REGBUF_WILL_INIT, RelationGetBufferForTuple(), RelationGetRelid, RelationGetTargetPageFreeSpace, RelationIsAccessibleInLogicalDecoding, RelationIsLogicallyLogged, RelationNeedsWAL, RelationPutHeapTuple(), ReleaseBuffer(), SHORTALIGN, SizeOfHeapMultiInsert, SizeofHeapTupleHeader, SizeOfMultiInsertTuple, START_CRIT_SECTION, HeapTupleData::t_data, xl_multi_insert_tuple::t_hoff, HeapTupleHeaderData::t_hoff, xl_multi_insert_tuple::t_infomask, HeapTupleHeaderData::t_infomask, xl_multi_insert_tuple::t_infomask2, HeapTupleHeaderData::t_infomask2, HeapTupleData::t_len, HeapTupleData::t_self, HeapTupleData::t_tableOid, TupleTableSlot::tts_tableOid, UnlockReleaseBuffer(), VISIBILITYMAP_ALL_FROZEN, VISIBILITYMAP_ALL_VISIBLE, visibilitymap_clear(), visibilitymap_set_vmbits(), VISIBILITYMAP_VALID_BITS, XLH_INSERT_ALL_FROZEN_SET, XLH_INSERT_ALL_VISIBLE_CLEARED, XLH_INSERT_CONTAINS_NEW_TUPLE, XLH_INSERT_LAST_IN_MULTI, XLOG_HEAP2_MULTI_INSERT, XLOG_HEAP_INIT_PAGE, XLOG_INCLUDE_ORIGIN, XLogBeginInsert(), XLogInsert(), XLogRegisterBufData(), XLogRegisterBuffer(), XLogRegisterData(), and XLogSetRecordFlags().

Referenced by CatalogTuplesMultiInsertWithInfo().

◆ heap_page_prune_and_freeze()

void heap_page_prune_and_freeze ( PruneFreezeParams params,
PruneFreezeResult presult,
OffsetNumber off_loc,
TransactionId new_relfrozen_xid,
MultiXactId new_relmin_mxid 
)

Definition at line 819 of file pruneheap.c.

824{
825 Buffer buffer = params->buffer;
826 Page page = BufferGetPage(buffer);
827 PruneState prstate;
828 bool do_freeze;
829 bool do_prune;
830 bool do_hint_prune;
831 bool did_tuple_hint_fpi;
832 int64 fpi_before = pgWalUsage.wal_fpi;
833
834 /* Initialize prstate */
835 prune_freeze_setup(params,
836 new_relfrozen_xid, new_relmin_mxid,
837 presult, &prstate);
838
839 /*
840 * Examine all line pointers and tuple visibility information to determine
841 * which line pointers should change state and which tuples may be frozen.
842 * Prepare queue of state changes to later be executed in a critical
843 * section.
844 */
846 buffer, &prstate, off_loc);
847
848 /*
849 * If checksums are enabled, calling heap_prune_satisfies_vacuum() while
850 * checking tuple visibility information in prune_freeze_plan() may have
851 * caused an FPI to be emitted.
852 */
853 did_tuple_hint_fpi = fpi_before != pgWalUsage.wal_fpi;
854
855 do_prune = prstate.nredirected > 0 ||
856 prstate.ndead > 0 ||
857 prstate.nunused > 0;
858
859 /*
860 * Even if we don't prune anything, if we found a new value for the
861 * pd_prune_xid field or the page was marked full, we will update the hint
862 * bit.
863 */
864 do_hint_prune = ((PageHeader) page)->pd_prune_xid != prstate.new_prune_xid ||
865 PageIsFull(page);
866
867 /*
868 * Decide if we want to go ahead with freezing according to the freeze
869 * plans we prepared, or not.
870 */
871 do_freeze = heap_page_will_freeze(params->relation, buffer,
872 did_tuple_hint_fpi,
873 do_prune,
874 do_hint_prune,
875 &prstate);
876
877 /*
878 * While scanning the line pointers, we did not clear
879 * all_visible/all_frozen when encountering LP_DEAD items because we
880 * wanted the decision whether or not to freeze the page to be unaffected
881 * by the short-term presence of LP_DEAD items. These LP_DEAD items are
882 * effectively assumed to be LP_UNUSED items in the making. It doesn't
883 * matter which vacuum heap pass (initial pass or final pass) ends up
884 * setting the page all-frozen, as long as the ongoing VACUUM does it.
885 *
886 * Now that we finished determining whether or not to freeze the page,
887 * update all_visible and all_frozen so that they reflect the true state
888 * of the page for setting PD_ALL_VISIBLE and VM bits.
889 */
890 if (prstate.lpdead_items > 0)
891 prstate.all_visible = prstate.all_frozen = false;
892
893 Assert(!prstate.all_frozen || prstate.all_visible);
894
895 /* Any error while applying the changes is critical */
897
898 if (do_hint_prune)
899 {
900 /*
901 * Update the page's pd_prune_xid field to either zero, or the lowest
902 * XID of any soon-prunable tuple.
903 */
904 ((PageHeader) page)->pd_prune_xid = prstate.new_prune_xid;
905
906 /*
907 * Also clear the "page is full" flag, since there's no point in
908 * repeating the prune/defrag process until something else happens to
909 * the page.
910 */
911 PageClearFull(page);
912
913 /*
914 * If that's all we had to do to the page, this is a non-WAL-logged
915 * hint. If we are going to freeze or prune the page, we will mark
916 * the buffer dirty below.
917 */
918 if (!do_freeze && !do_prune)
919 MarkBufferDirtyHint(buffer, true);
920 }
921
922 if (do_prune || do_freeze)
923 {
924 /* Apply the planned item changes and repair page fragmentation. */
925 if (do_prune)
926 {
927 heap_page_prune_execute(buffer, false,
928 prstate.redirected, prstate.nredirected,
929 prstate.nowdead, prstate.ndead,
930 prstate.nowunused, prstate.nunused);
931 }
932
933 if (do_freeze)
934 heap_freeze_prepared_tuples(buffer, prstate.frozen, prstate.nfrozen);
935
936 MarkBufferDirty(buffer);
937
938 /*
939 * Emit a WAL XLOG_HEAP2_PRUNE* record showing what we did
940 */
941 if (RelationNeedsWAL(params->relation))
942 {
943 /*
944 * The snapshotConflictHorizon for the whole record should be the
945 * most conservative of all the horizons calculated for any of the
946 * possible modifications. If this record will prune tuples, any
947 * transactions on the standby older than the youngest xmax of the
948 * most recently removed tuple this record will prune will
949 * conflict. If this record will freeze tuples, any transactions
950 * on the standby with xids older than the youngest tuple this
951 * record will freeze will conflict.
952 */
953 TransactionId conflict_xid;
954
956 prstate.latest_xid_removed))
957 conflict_xid = prstate.frz_conflict_horizon;
958 else
959 conflict_xid = prstate.latest_xid_removed;
960
961 log_heap_prune_and_freeze(params->relation, buffer,
962 InvalidBuffer, /* vmbuffer */
963 0, /* vmflags */
964 conflict_xid,
965 true, params->reason,
966 prstate.frozen, prstate.nfrozen,
967 prstate.redirected, prstate.nredirected,
968 prstate.nowdead, prstate.ndead,
969 prstate.nowunused, prstate.nunused);
970 }
971 }
972
974
975 /* Copy information back for caller */
976 presult->ndeleted = prstate.ndeleted;
977 presult->nnewlpdead = prstate.ndead;
978 presult->nfrozen = prstate.nfrozen;
979 presult->live_tuples = prstate.live_tuples;
981 presult->all_visible = prstate.all_visible;
982 presult->all_frozen = prstate.all_frozen;
983 presult->hastup = prstate.hastup;
984
985 /*
986 * For callers planning to update the visibility map, the conflict horizon
987 * for that record must be the newest xmin on the page. However, if the
988 * page is completely frozen, there can be no conflict and the
989 * vm_conflict_horizon should remain InvalidTransactionId. This includes
990 * the case that we just froze all the tuples; the prune-freeze record
991 * included the conflict XID already so the caller doesn't need it.
992 */
993 if (presult->all_frozen)
995 else
997
998 presult->lpdead_items = prstate.lpdead_items;
999 /* the presult->deadoffsets array was already filled in */
1000
1001 if (prstate.attempt_freeze)
1002 {
1003 if (presult->nfrozen > 0)
1004 {
1005 *new_relfrozen_xid = prstate.pagefrz.FreezePageRelfrozenXid;
1006 *new_relmin_mxid = prstate.pagefrz.FreezePageRelminMxid;
1007 }
1008 else
1009 {
1010 *new_relfrozen_xid = prstate.pagefrz.NoFreezePageRelfrozenXid;
1011 *new_relmin_mxid = prstate.pagefrz.NoFreezePageRelminMxid;
1012 }
1013 }
1014}
void MarkBufferDirtyHint(Buffer buffer, bool buffer_std)
Definition: bufmgr.c:5525
static void PageClearFull(Page page)
Definition: bufpage.h:422
static bool PageIsFull(const PageData *page)
Definition: bufpage.h:412
int64_t int64
Definition: c.h:549
void heap_freeze_prepared_tuples(Buffer buffer, HeapTupleFreeze *tuples, int ntuples)
Definition: heapam.c:7425
WalUsage pgWalUsage
Definition: instrument.c:22
static bool heap_page_will_freeze(Relation relation, Buffer buffer, bool did_tuple_hint_fpi, bool do_prune, bool do_hint_prune, PruneState *prstate)
Definition: pruneheap.c:663
static void prune_freeze_plan(Oid reloid, Buffer buffer, PruneState *prstate, OffsetNumber *off_loc)
Definition: pruneheap.c:458
void log_heap_prune_and_freeze(Relation relation, Buffer buffer, Buffer vmbuffer, uint8 vmflags, TransactionId conflict_xid, bool cleanup_lock, PruneReason reason, HeapTupleFreeze *frozen, int nfrozen, OffsetNumber *redirected, int nredirected, OffsetNumber *dead, int ndead, OffsetNumber *unused, int nunused)
Definition: pruneheap.c:2167
static void prune_freeze_setup(PruneFreezeParams *params, TransactionId *new_relfrozen_xid, MultiXactId *new_relmin_mxid, PruneFreezeResult *presult, PruneState *prstate)
Definition: pruneheap.c:330
void heap_page_prune_execute(Buffer buffer, bool lp_truncate_only, OffsetNumber *redirected, int nredirected, OffsetNumber *nowdead, int ndead, OffsetNumber *nowunused, int nunused)
Definition: pruneheap.c:1671
MultiXactId NoFreezePageRelminMxid
Definition: heapam.h:220
TransactionId FreezePageRelfrozenXid
Definition: heapam.h:208
MultiXactId FreezePageRelminMxid
Definition: heapam.h:209
TransactionId NoFreezePageRelfrozenXid
Definition: heapam.h:219
PruneReason reason
Definition: heapam.h:245
Buffer buffer
Definition: heapam.h:239
Relation relation
Definition: heapam.h:238
int recently_dead_tuples
Definition: heapam.h:285
TransactionId vm_conflict_horizon
Definition: heapam.h:300
bool all_visible
Definition: heapam.h:298
HeapPageFreeze pagefrz
Definition: pruneheap.c:104
bool all_visible
Definition: pruneheap.c:154
int ndead
Definition: pruneheap.c:56
TransactionId new_prune_xid
Definition: pruneheap.c:53
bool attempt_freeze
Definition: pruneheap.c:46
bool hastup
Definition: pruneheap.c:123
int recently_dead_tuples
Definition: pruneheap.c:120
OffsetNumber nowdead[MaxHeapTuplesPerPage]
Definition: pruneheap.c:61
TransactionId frz_conflict_horizon
Definition: pruneheap.c:137
OffsetNumber nowunused[MaxHeapTuplesPerPage]
Definition: pruneheap.c:62
int live_tuples
Definition: pruneheap.c:119
TransactionId visibility_cutoff_xid
Definition: pruneheap.c:156
bool all_frozen
Definition: pruneheap.c:155
HeapTupleFreeze frozen[MaxHeapTuplesPerPage]
Definition: pruneheap.c:63
int lpdead_items
Definition: pruneheap.c:129
int nfrozen
Definition: pruneheap.c:58
OffsetNumber redirected[MaxHeapTuplesPerPage *2]
Definition: pruneheap.c:60
int ndeleted
Definition: pruneheap.c:116
int nredirected
Definition: pruneheap.c:55
TransactionId latest_xid_removed
Definition: pruneheap.c:54
int nunused
Definition: pruneheap.c:57
int64 wal_fpi
Definition: instrument.h:54
static bool TransactionIdFollows(TransactionId id1, TransactionId id2)
Definition: transam.h:297

References PruneState::all_frozen, PruneFreezeResult::all_frozen, PruneState::all_visible, PruneFreezeResult::all_visible, Assert(), PruneState::attempt_freeze, PruneFreezeParams::buffer, BufferGetPage(), END_CRIT_SECTION, HeapPageFreeze::FreezePageRelfrozenXid, HeapPageFreeze::FreezePageRelminMxid, PruneState::frozen, PruneState::frz_conflict_horizon, PruneState::hastup, PruneFreezeResult::hastup, heap_freeze_prepared_tuples(), heap_page_prune_execute(), heap_page_will_freeze(), InvalidBuffer, InvalidTransactionId, PruneState::latest_xid_removed, PruneState::live_tuples, PruneFreezeResult::live_tuples, log_heap_prune_and_freeze(), PruneState::lpdead_items, PruneFreezeResult::lpdead_items, MarkBufferDirty(), MarkBufferDirtyHint(), PruneState::ndead, PruneState::ndeleted, PruneFreezeResult::ndeleted, PruneState::new_prune_xid, PruneState::nfrozen, PruneFreezeResult::nfrozen, PruneFreezeResult::nnewlpdead, HeapPageFreeze::NoFreezePageRelfrozenXid, HeapPageFreeze::NoFreezePageRelminMxid, PruneState::nowdead, PruneState::nowunused, PruneState::nredirected, PruneState::nunused, PageClearFull(), PruneState::pagefrz, PageIsFull(), pgWalUsage, prune_freeze_plan(), prune_freeze_setup(), PruneFreezeParams::reason, PruneState::recently_dead_tuples, PruneFreezeResult::recently_dead_tuples, PruneState::redirected, PruneFreezeParams::relation, RelationGetRelid, RelationNeedsWAL, START_CRIT_SECTION, TransactionIdFollows(), PruneState::visibility_cutoff_xid, PruneFreezeResult::vm_conflict_horizon, and WalUsage::wal_fpi.

Referenced by heap_page_prune_opt(), and lazy_scan_prune().

◆ heap_page_prune_execute()

void heap_page_prune_execute ( Buffer  buffer,
bool  lp_truncate_only,
OffsetNumber redirected,
int  nredirected,
OffsetNumber nowdead,
int  ndead,
OffsetNumber nowunused,
int  nunused 
)

Definition at line 1671 of file pruneheap.c.

1675{
1676 Page page = BufferGetPage(buffer);
1677 OffsetNumber *offnum;
1679
1680 /* Shouldn't be called unless there's something to do */
1681 Assert(nredirected > 0 || ndead > 0 || nunused > 0);
1682
1683 /* If 'lp_truncate_only', we can only remove already-dead line pointers */
1684 Assert(!lp_truncate_only || (nredirected == 0 && ndead == 0));
1685
1686 /* Update all redirected line pointers */
1687 offnum = redirected;
1688 for (int i = 0; i < nredirected; i++)
1689 {
1690 OffsetNumber fromoff = *offnum++;
1691 OffsetNumber tooff = *offnum++;
1692 ItemId fromlp = PageGetItemId(page, fromoff);
1694
1695#ifdef USE_ASSERT_CHECKING
1696
1697 /*
1698 * Any existing item that we set as an LP_REDIRECT (any 'from' item)
1699 * must be the first item from a HOT chain. If the item has tuple
1700 * storage then it can't be a heap-only tuple. Otherwise we are just
1701 * maintaining an existing LP_REDIRECT from an existing HOT chain that
1702 * has been pruned at least once before now.
1703 */
1704 if (!ItemIdIsRedirected(fromlp))
1705 {
1706 Assert(ItemIdHasStorage(fromlp) && ItemIdIsNormal(fromlp));
1707
1708 htup = (HeapTupleHeader) PageGetItem(page, fromlp);
1710 }
1711 else
1712 {
1713 /* We shouldn't need to redundantly set the redirect */
1714 Assert(ItemIdGetRedirect(fromlp) != tooff);
1715 }
1716
1717 /*
1718 * The item that we're about to set as an LP_REDIRECT (the 'from'
1719 * item) will point to an existing item (the 'to' item) that is
1720 * already a heap-only tuple. There can be at most one LP_REDIRECT
1721 * item per HOT chain.
1722 *
1723 * We need to keep around an LP_REDIRECT item (after original
1724 * non-heap-only root tuple gets pruned away) so that it's always
1725 * possible for VACUUM to easily figure out what TID to delete from
1726 * indexes when an entire HOT chain becomes dead. A heap-only tuple
1727 * can never become LP_DEAD; an LP_REDIRECT item or a regular heap
1728 * tuple can.
1729 *
1730 * This check may miss problems, e.g. the target of a redirect could
1731 * be marked as unused subsequently. The page_verify_redirects() check
1732 * below will catch such problems.
1733 */
1734 tolp = PageGetItemId(page, tooff);
1735 Assert(ItemIdHasStorage(tolp) && ItemIdIsNormal(tolp));
1736 htup = (HeapTupleHeader) PageGetItem(page, tolp);
1738#endif
1739
1740 ItemIdSetRedirect(fromlp, tooff);
1741 }
1742
1743 /* Update all now-dead line pointers */
1744 offnum = nowdead;
1745 for (int i = 0; i < ndead; i++)
1746 {
1747 OffsetNumber off = *offnum++;
1748 ItemId lp = PageGetItemId(page, off);
1749
1750#ifdef USE_ASSERT_CHECKING
1751
1752 /*
1753 * An LP_DEAD line pointer must be left behind when the original item
1754 * (which is dead to everybody) could still be referenced by a TID in
1755 * an index. This should never be necessary with any individual
1756 * heap-only tuple item, though. (It's not clear how much of a problem
1757 * that would be, but there is no reason to allow it.)
1758 */
1759 if (ItemIdHasStorage(lp))
1760 {
1762 htup = (HeapTupleHeader) PageGetItem(page, lp);
1764 }
1765 else
1766 {
1767 /* Whole HOT chain becomes dead */
1769 }
1770#endif
1771
1772 ItemIdSetDead(lp);
1773 }
1774
1775 /* Update all now-unused line pointers */
1776 offnum = nowunused;
1777 for (int i = 0; i < nunused; i++)
1778 {
1779 OffsetNumber off = *offnum++;
1780 ItemId lp = PageGetItemId(page, off);
1781
1782#ifdef USE_ASSERT_CHECKING
1783
1784 if (lp_truncate_only)
1785 {
1786 /* Setting LP_DEAD to LP_UNUSED in vacuum's second pass */
1788 }
1789 else
1790 {
1791 /*
1792 * When heap_page_prune_and_freeze() was called, mark_unused_now
1793 * may have been passed as true, which allows would-be LP_DEAD
1794 * items to be made LP_UNUSED instead. This is only possible if
1795 * the relation has no indexes. If there are any dead items, then
1796 * mark_unused_now was not true and every item being marked
1797 * LP_UNUSED must refer to a heap-only tuple.
1798 */
1799 if (ndead > 0)
1800 {
1802 htup = (HeapTupleHeader) PageGetItem(page, lp);
1804 }
1805 else
1806 Assert(ItemIdIsUsed(lp));
1807 }
1808
1809#endif
1810
1811 ItemIdSetUnused(lp);
1812 }
1813
1814 if (lp_truncate_only)
1816 else
1817 {
1818 /*
1819 * Finally, repair any fragmentation, and update the page's hint bit
1820 * about whether it has free pointers.
1821 */
1823
1824 /*
1825 * Now that the page has been modified, assert that redirect items
1826 * still point to valid targets.
1827 */
1829 }
1830}
void PageRepairFragmentation(Page page)
Definition: bufpage.c:698
void PageTruncateLinePointerArray(Page page)
Definition: bufpage.c:834
#define PG_USED_FOR_ASSERTS_ONLY
Definition: c.h:229
#define ItemIdSetRedirect(itemId, link)
Definition: itemid.h:152
#define ItemIdSetDead(itemId)
Definition: itemid.h:164
#define ItemIdSetUnused(itemId)
Definition: itemid.h:128
#define ItemIdHasStorage(itemId)
Definition: itemid.h:120
static void page_verify_redirects(Page page)
Definition: pruneheap.c:1847

References Assert(), BufferGetPage(), HeapTupleHeaderIsHeapOnly(), i, ItemIdGetRedirect, ItemIdHasStorage, ItemIdIsDead, ItemIdIsNormal, ItemIdIsRedirected, ItemIdIsUsed, ItemIdSetDead, ItemIdSetRedirect, ItemIdSetUnused, page_verify_redirects(), PageGetItem(), PageGetItemId(), PageRepairFragmentation(), PageTruncateLinePointerArray(), and PG_USED_FOR_ASSERTS_ONLY.

Referenced by heap_page_prune_and_freeze(), and heap_xlog_prune_freeze().

◆ heap_page_prune_opt()

void heap_page_prune_opt ( Relation  relation,
Buffer  buffer 
)

Definition at line 209 of file pruneheap.c.

210{
211 Page page = BufferGetPage(buffer);
212 TransactionId prune_xid;
213 GlobalVisState *vistest;
214 Size minfree;
215
216 /*
217 * We can't write WAL in recovery mode, so there's no point trying to
218 * clean the page. The primary will likely issue a cleaning WAL record
219 * soon anyway, so this is no particular loss.
220 */
221 if (RecoveryInProgress())
222 return;
223
224 /*
225 * First check whether there's any chance there's something to prune,
226 * determining the appropriate horizon is a waste if there's no prune_xid
227 * (i.e. no updates/deletes left potentially dead tuples around).
228 */
229 prune_xid = ((PageHeader) page)->pd_prune_xid;
230 if (!TransactionIdIsValid(prune_xid))
231 return;
232
233 /*
234 * Check whether prune_xid indicates that there may be dead rows that can
235 * be cleaned up.
236 */
237 vistest = GlobalVisTestFor(relation);
238
239 if (!GlobalVisTestIsRemovableXid(vistest, prune_xid))
240 return;
241
242 /*
243 * We prune when a previous UPDATE failed to find enough space on the page
244 * for a new tuple version, or when free space falls below the relation's
245 * fill-factor target (but not less than 10%).
246 *
247 * Checking free space here is questionable since we aren't holding any
248 * lock on the buffer; in the worst case we could get a bogus answer. It's
249 * unlikely to be *seriously* wrong, though, since reading either pd_lower
250 * or pd_upper is probably atomic. Avoiding taking a lock seems more
251 * important than sometimes getting a wrong answer in what is after all
252 * just a heuristic estimate.
253 */
254 minfree = RelationGetTargetPageFreeSpace(relation,
256 minfree = Max(minfree, BLCKSZ / 10);
257
258 if (PageIsFull(page) || PageGetHeapFreeSpace(page) < minfree)
259 {
260 /* OK, try to get exclusive buffer lock */
262 return;
263
264 /*
265 * Now that we have buffer lock, get accurate information about the
266 * page's free space, and recheck the heuristic about whether to
267 * prune.
268 */
269 if (PageIsFull(page) || PageGetHeapFreeSpace(page) < minfree)
270 {
271 OffsetNumber dummy_off_loc;
272 PruneFreezeResult presult;
273
274 /*
275 * We don't pass the HEAP_PAGE_PRUNE_MARK_UNUSED_NOW option
276 * regardless of whether or not the relation has indexes, since we
277 * cannot safely determine that during on-access pruning with the
278 * current implementation.
279 */
280 PruneFreezeParams params = {
281 .relation = relation,
282 .buffer = buffer,
283 .reason = PRUNE_ON_ACCESS,
284 .options = 0,
285 .vistest = vistest,
286 .cutoffs = NULL,
287 };
288
289 heap_page_prune_and_freeze(&params, &presult, &dummy_off_loc,
290 NULL, NULL);
291
292 /*
293 * Report the number of tuples reclaimed to pgstats. This is
294 * presult.ndeleted minus the number of newly-LP_DEAD-set items.
295 *
296 * We derive the number of dead tuples like this to avoid totally
297 * forgetting about items that were set to LP_DEAD, since they
298 * still need to be cleaned up by VACUUM. We only want to count
299 * heap-only tuples that just became LP_UNUSED in our report,
300 * which don't.
301 *
302 * VACUUM doesn't have to compensate in the same way when it
303 * tracks ndeleted, since it will set the same LP_DEAD items to
304 * LP_UNUSED separately.
305 */
306 if (presult.ndeleted > presult.nnewlpdead)
308 presult.ndeleted - presult.nnewlpdead);
309 }
310
311 /* And release buffer lock */
313
314 /*
315 * We avoid reuse of any free space created on the page by unrelated
316 * UPDATEs/INSERTs by opting to not update the FSM at this point. The
317 * free space should be reused by UPDATEs to *this* page.
318 */
319 }
320}
bool ConditionalLockBufferForCleanup(Buffer buffer)
Definition: bufmgr.c:5952
#define Max(x, y)
Definition: c.h:997
void pgstat_update_heap_dead_tuples(Relation rel, int delta)
bool GlobalVisTestIsRemovableXid(GlobalVisState *state, TransactionId xid)
Definition: procarray.c:4243
void heap_page_prune_and_freeze(PruneFreezeParams *params, PruneFreezeResult *presult, OffsetNumber *off_loc, TransactionId *new_relfrozen_xid, MultiXactId *new_relmin_mxid)
Definition: pruneheap.c:819
bool RecoveryInProgress(void)
Definition: xlog.c:6461

References BUFFER_LOCK_UNLOCK, BufferGetPage(), ConditionalLockBufferForCleanup(), GlobalVisTestFor(), GlobalVisTestIsRemovableXid(), HEAP_DEFAULT_FILLFACTOR, heap_page_prune_and_freeze(), LockBuffer(), Max, PruneFreezeResult::ndeleted, PruneFreezeResult::nnewlpdead, PageGetHeapFreeSpace(), PageIsFull(), pgstat_update_heap_dead_tuples(), PRUNE_ON_ACCESS, RecoveryInProgress(), PruneFreezeParams::relation, RelationGetTargetPageFreeSpace, and TransactionIdIsValid.

Referenced by BitmapHeapScanNextBlock(), heap_prepare_pagescan(), and heapam_index_fetch_tuple().

◆ heap_pre_freeze_checks()

void heap_pre_freeze_checks ( Buffer  buffer,
HeapTupleFreeze tuples,
int  ntuples 
)

Definition at line 7372 of file heapam.c.

7374{
7375 Page page = BufferGetPage(buffer);
7376
7377 for (int i = 0; i < ntuples; i++)
7378 {
7379 HeapTupleFreeze *frz = tuples + i;
7380 ItemId itemid = PageGetItemId(page, frz->offset);
7381 HeapTupleHeader htup;
7382
7383 htup = (HeapTupleHeader) PageGetItem(page, itemid);
7384
7385 /* Deliberately avoid relying on tuple hint bits here */
7387 {
7389
7391 if (unlikely(!TransactionIdDidCommit(xmin)))
7392 ereport(ERROR,
7394 errmsg_internal("uncommitted xmin %u needs to be frozen",
7395 xmin)));
7396 }
7397
7398 /*
7399 * TransactionIdDidAbort won't work reliably in the presence of XIDs
7400 * left behind by transactions that were in progress during a crash,
7401 * so we can only check that xmax didn't commit
7402 */
7404 {
7406
7409 ereport(ERROR,
7411 errmsg_internal("cannot freeze committed xmax %u",
7412 xmax)));
7413 }
7414 }
7415}
#define HEAP_FREEZE_CHECK_XMAX_ABORTED
Definition: heapam.h:138
#define HEAP_FREEZE_CHECK_XMIN_COMMITTED
Definition: heapam.h:137
static bool HeapTupleHeaderXminFrozen(const HeapTupleHeaderData *tup)
Definition: htup_details.h:350
static TransactionId HeapTupleHeaderGetRawXmin(const HeapTupleHeaderData *tup)
Definition: htup_details.h:318
#define ERRCODE_DATA_CORRUPTED
Definition: pg_basebackup.c:42
uint8 checkflags
Definition: heapam.h:150
bool TransactionIdDidCommit(TransactionId transactionId)
Definition: transam.c:126
#define TransactionIdIsNormal(xid)
Definition: transam.h:42

References Assert(), BufferGetPage(), HeapTupleFreeze::checkflags, ereport, errcode(), ERRCODE_DATA_CORRUPTED, errmsg_internal(), ERROR, HEAP_FREEZE_CHECK_XMAX_ABORTED, HEAP_FREEZE_CHECK_XMIN_COMMITTED, HeapTupleHeaderGetRawXmax(), HeapTupleHeaderGetRawXmin(), HeapTupleHeaderXminFrozen(), i, HeapTupleFreeze::offset, PageGetItem(), PageGetItemId(), TransactionIdDidCommit(), TransactionIdIsNormal, and unlikely.

Referenced by heap_page_will_freeze().

◆ heap_prepare_freeze_tuple()

bool heap_prepare_freeze_tuple ( HeapTupleHeader  tuple,
const struct VacuumCutoffs cutoffs,
HeapPageFreeze pagefrz,
HeapTupleFreeze frz,
bool *  totally_frozen 
)

Definition at line 7099 of file heapam.c.

7103{
7104 bool xmin_already_frozen = false,
7105 xmax_already_frozen = false;
7106 bool freeze_xmin = false,
7107 replace_xvac = false,
7108 replace_xmax = false,
7109 freeze_xmax = false;
7110 TransactionId xid;
7111
7112 frz->xmax = HeapTupleHeaderGetRawXmax(tuple);
7113 frz->t_infomask2 = tuple->t_infomask2;
7114 frz->t_infomask = tuple->t_infomask;
7115 frz->frzflags = 0;
7116 frz->checkflags = 0;
7117
7118 /*
7119 * Process xmin, while keeping track of whether it's already frozen, or
7120 * will become frozen iff our freeze plan is executed by caller (could be
7121 * neither).
7122 */
7123 xid = HeapTupleHeaderGetXmin(tuple);
7124 if (!TransactionIdIsNormal(xid))
7125 xmin_already_frozen = true;
7126 else
7127 {
7128 if (TransactionIdPrecedes(xid, cutoffs->relfrozenxid))
7129 ereport(ERROR,
7131 errmsg_internal("found xmin %u from before relfrozenxid %u",
7132 xid, cutoffs->relfrozenxid)));
7133
7134 /* Will set freeze_xmin flags in freeze plan below */
7135 freeze_xmin = TransactionIdPrecedes(xid, cutoffs->OldestXmin);
7136
7137 /* Verify that xmin committed if and when freeze plan is executed */
7138 if (freeze_xmin)
7140 }
7141
7142 /*
7143 * Old-style VACUUM FULL is gone, but we have to process xvac for as long
7144 * as we support having MOVED_OFF/MOVED_IN tuples in the database
7145 */
7146 xid = HeapTupleHeaderGetXvac(tuple);
7147 if (TransactionIdIsNormal(xid))
7148 {
7150 Assert(TransactionIdPrecedes(xid, cutoffs->OldestXmin));
7151
7152 /*
7153 * For Xvac, we always freeze proactively. This allows totally_frozen
7154 * tracking to ignore xvac.
7155 */
7156 replace_xvac = pagefrz->freeze_required = true;
7157
7158 /* Will set replace_xvac flags in freeze plan below */
7159 }
7160
7161 /* Now process xmax */
7162 xid = frz->xmax;
7163 if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
7164 {
7165 /* Raw xmax is a MultiXactId */
7166 TransactionId newxmax;
7167 uint16 flags;
7168
7169 /*
7170 * We will either remove xmax completely (in the "freeze_xmax" path),
7171 * process xmax by replacing it (in the "replace_xmax" path), or
7172 * perform no-op xmax processing. The only constraint is that the
7173 * FreezeLimit/MultiXactCutoff postcondition must never be violated.
7174 */
7175 newxmax = FreezeMultiXactId(xid, tuple->t_infomask, cutoffs,
7176 &flags, pagefrz);
7177
7178 if (flags & FRM_NOOP)
7179 {
7180 /*
7181 * xmax is a MultiXactId, and nothing about it changes for now.
7182 * This is the only case where 'freeze_required' won't have been
7183 * set for us by FreezeMultiXactId, as well as the only case where
7184 * neither freeze_xmax nor replace_xmax are set (given a multi).
7185 *
7186 * This is a no-op, but the call to FreezeMultiXactId might have
7187 * ratcheted back NewRelfrozenXid and/or NewRelminMxid trackers
7188 * for us (the "freeze page" variants, specifically). That'll
7189 * make it safe for our caller to freeze the page later on, while
7190 * leaving this particular xmax undisturbed.
7191 *
7192 * FreezeMultiXactId is _not_ responsible for the "no freeze"
7193 * NewRelfrozenXid/NewRelminMxid trackers, though -- that's our
7194 * job. A call to heap_tuple_should_freeze for this same tuple
7195 * will take place below if 'freeze_required' isn't set already.
7196 * (This repeats work from FreezeMultiXactId, but allows "no
7197 * freeze" tracker maintenance to happen in only one place.)
7198 */
7199 Assert(!MultiXactIdPrecedes(newxmax, cutoffs->MultiXactCutoff));
7200 Assert(MultiXactIdIsValid(newxmax) && xid == newxmax);
7201 }
7202 else if (flags & FRM_RETURN_IS_XID)
7203 {
7204 /*
7205 * xmax will become an updater Xid (original MultiXact's updater
7206 * member Xid will be carried forward as a simple Xid in Xmax).
7207 */
7208 Assert(!TransactionIdPrecedes(newxmax, cutoffs->OldestXmin));
7209
7210 /*
7211 * NB -- some of these transformations are only valid because we
7212 * know the return Xid is a tuple updater (i.e. not merely a
7213 * locker.) Also note that the only reason we don't explicitly
7214 * worry about HEAP_KEYS_UPDATED is because it lives in
7215 * t_infomask2 rather than t_infomask.
7216 */
7217 frz->t_infomask &= ~HEAP_XMAX_BITS;
7218 frz->xmax = newxmax;
7219 if (flags & FRM_MARK_COMMITTED)
7221 replace_xmax = true;
7222 }
7223 else if (flags & FRM_RETURN_IS_MULTI)
7224 {
7225 uint16 newbits;
7226 uint16 newbits2;
7227
7228 /*
7229 * xmax is an old MultiXactId that we have to replace with a new
7230 * MultiXactId, to carry forward two or more original member XIDs.
7231 */
7232 Assert(!MultiXactIdPrecedes(newxmax, cutoffs->OldestMxact));
7233
7234 /*
7235 * We can't use GetMultiXactIdHintBits directly on the new multi
7236 * here; that routine initializes the masks to all zeroes, which
7237 * would lose other bits we need. Doing it this way ensures all
7238 * unrelated bits remain untouched.
7239 */
7240 frz->t_infomask &= ~HEAP_XMAX_BITS;
7241 frz->t_infomask2 &= ~HEAP_KEYS_UPDATED;
7242 GetMultiXactIdHintBits(newxmax, &newbits, &newbits2);
7243 frz->t_infomask |= newbits;
7244 frz->t_infomask2 |= newbits2;
7245 frz->xmax = newxmax;
7246 replace_xmax = true;
7247 }
7248 else
7249 {
7250 /*
7251 * Freeze plan for tuple "freezes xmax" in the strictest sense:
7252 * it'll leave nothing in xmax (neither an Xid nor a MultiXactId).
7253 */
7254 Assert(flags & FRM_INVALIDATE_XMAX);
7255 Assert(!TransactionIdIsValid(newxmax));
7256
7257 /* Will set freeze_xmax flags in freeze plan below */
7258 freeze_xmax = true;
7259 }
7260
7261 /* MultiXactId processing forces freezing (barring FRM_NOOP case) */
7262 Assert(pagefrz->freeze_required || (!freeze_xmax && !replace_xmax));
7263 }
7264 else if (TransactionIdIsNormal(xid))
7265 {
7266 /* Raw xmax is normal XID */
7267 if (TransactionIdPrecedes(xid, cutoffs->relfrozenxid))
7268 ereport(ERROR,
7270 errmsg_internal("found xmax %u from before relfrozenxid %u",
7271 xid, cutoffs->relfrozenxid)));
7272
7273 /* Will set freeze_xmax flags in freeze plan below */
7274 freeze_xmax = TransactionIdPrecedes(xid, cutoffs->OldestXmin);
7275
7276 /*
7277 * Verify that xmax aborted if and when freeze plan is executed,
7278 * provided it's from an update. (A lock-only xmax can be removed
7279 * independent of this, since the lock is released at xact end.)
7280 */
7281 if (freeze_xmax && !HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask))
7283 }
7284 else if (!TransactionIdIsValid(xid))
7285 {
7286 /* Raw xmax is InvalidTransactionId XID */
7287 Assert((tuple->t_infomask & HEAP_XMAX_IS_MULTI) == 0);
7288 xmax_already_frozen = true;
7289 }
7290 else
7291 ereport(ERROR,
7293 errmsg_internal("found raw xmax %u (infomask 0x%04x) not invalid and not multi",
7294 xid, tuple->t_infomask)));
7295
7296 if (freeze_xmin)
7297 {
7298 Assert(!xmin_already_frozen);
7299
7301 }
7302 if (replace_xvac)
7303 {
7304 /*
7305 * If a MOVED_OFF tuple is not dead, the xvac transaction must have
7306 * failed; whereas a non-dead MOVED_IN tuple must mean the xvac
7307 * transaction succeeded.
7308 */
7309 Assert(pagefrz->freeze_required);
7310 if (tuple->t_infomask & HEAP_MOVED_OFF)
7311 frz->frzflags |= XLH_INVALID_XVAC;
7312 else
7313 frz->frzflags |= XLH_FREEZE_XVAC;
7314 }
7315 if (replace_xmax)
7316 {
7317 Assert(!xmax_already_frozen && !freeze_xmax);
7318 Assert(pagefrz->freeze_required);
7319
7320 /* Already set replace_xmax flags in freeze plan earlier */
7321 }
7322 if (freeze_xmax)
7323 {
7324 Assert(!xmax_already_frozen && !replace_xmax);
7325
7327
7328 /*
7329 * The tuple might be marked either XMAX_INVALID or XMAX_COMMITTED +
7330 * LOCKED. Normalize to INVALID just to be sure no one gets confused.
7331 * Also get rid of the HEAP_KEYS_UPDATED bit.
7332 */
7333 frz->t_infomask &= ~HEAP_XMAX_BITS;
7335 frz->t_infomask2 &= ~HEAP_HOT_UPDATED;
7336 frz->t_infomask2 &= ~HEAP_KEYS_UPDATED;
7337 }
7338
7339 /*
7340 * Determine if this tuple is already totally frozen, or will become
7341 * totally frozen (provided caller executes freeze plans for the page)
7342 */
7343 *totally_frozen = ((freeze_xmin || xmin_already_frozen) &&
7344 (freeze_xmax || xmax_already_frozen));
7345
7346 if (!pagefrz->freeze_required && !(xmin_already_frozen &&
7347 xmax_already_frozen))
7348 {
7349 /*
7350 * So far no previous tuple from the page made freezing mandatory.
7351 * Does this tuple force caller to freeze the entire page?
7352 */
7353 pagefrz->freeze_required =
7354 heap_tuple_should_freeze(tuple, cutoffs,
7355 &pagefrz->NoFreezePageRelfrozenXid,
7356 &pagefrz->NoFreezePageRelminMxid);
7357 }
7358
7359 /* Tell caller if this tuple has a usable freeze plan set in *frz */
7360 return freeze_xmin || replace_xvac || replace_xmax || freeze_xmax;
7361}
static void GetMultiXactIdHintBits(MultiXactId multi, uint16 *new_infomask, uint16 *new_infomask2)
Definition: heapam.c:7491
#define FRM_RETURN_IS_XID
Definition: heapam.c:6698
static TransactionId FreezeMultiXactId(MultiXactId multi, uint16 t_infomask, const struct VacuumCutoffs *cutoffs, uint16 *flags, HeapPageFreeze *pagefrz)
Definition: heapam.c:6749
bool heap_tuple_should_freeze(HeapTupleHeader tuple, const struct VacuumCutoffs *cutoffs, TransactionId *NoFreezePageRelfrozenXid, MultiXactId *NoFreezePageRelminMxid)
Definition: heapam.c:7910
#define FRM_MARK_COMMITTED
Definition: heapam.c:6700
#define FRM_NOOP
Definition: heapam.c:6696
#define FRM_RETURN_IS_MULTI
Definition: heapam.c:6699
#define FRM_INVALIDATE_XMAX
Definition: heapam.c:6697
#define HEAP_MOVED_OFF
Definition: htup_details.h:211
#define HEAP_XMIN_FROZEN
Definition: htup_details.h:206
static TransactionId HeapTupleHeaderGetXvac(const HeapTupleHeaderData *tup)
Definition: htup_details.h:442
#define HEAP_XMAX_COMMITTED
Definition: htup_details.h:207
bool MultiXactIdPrecedes(MultiXactId multi1, MultiXactId multi2)
Definition: multixact.c:2833
#define MultiXactIdIsValid(multi)
Definition: multixact.h:29
bool freeze_required
Definition: heapam.h:182
TransactionId OldestXmin
Definition: vacuum.h:279
MultiXactId OldestMxact
Definition: vacuum.h:280
static bool TransactionIdPrecedesOrEquals(TransactionId id1, TransactionId id2)
Definition: transam.h:282

References Assert(), HeapTupleFreeze::checkflags, ereport, errcode(), ERRCODE_DATA_CORRUPTED, errmsg_internal(), ERROR, HeapPageFreeze::freeze_required, FreezeMultiXactId(), FRM_INVALIDATE_XMAX, FRM_MARK_COMMITTED, FRM_NOOP, FRM_RETURN_IS_MULTI, FRM_RETURN_IS_XID, HeapTupleFreeze::frzflags, GetMultiXactIdHintBits(), HEAP_FREEZE_CHECK_XMAX_ABORTED, HEAP_FREEZE_CHECK_XMIN_COMMITTED, HEAP_MOVED_OFF, heap_tuple_should_freeze(), HEAP_XMAX_COMMITTED, HEAP_XMAX_INVALID, HEAP_XMAX_IS_LOCKED_ONLY(), HEAP_XMAX_IS_MULTI, HEAP_XMIN_FROZEN, HeapTupleHeaderGetRawXmax(), HeapTupleHeaderGetXmin(), HeapTupleHeaderGetXvac(), InvalidTransactionId, VacuumCutoffs::MultiXactCutoff, MultiXactIdIsValid, MultiXactIdPrecedes(), HeapPageFreeze::NoFreezePageRelfrozenXid, HeapPageFreeze::NoFreezePageRelminMxid, VacuumCutoffs::OldestMxact, VacuumCutoffs::OldestXmin, VacuumCutoffs::relfrozenxid, HeapTupleFreeze::t_infomask, HeapTupleHeaderData::t_infomask, HeapTupleFreeze::t_infomask2, HeapTupleHeaderData::t_infomask2, TransactionIdIsNormal, TransactionIdIsValid, TransactionIdPrecedes(), TransactionIdPrecedesOrEquals(), XLH_FREEZE_XVAC, XLH_INVALID_XVAC, and HeapTupleFreeze::xmax.

Referenced by heap_freeze_tuple(), and heap_prune_record_unchanged_lp_normal().

◆ heap_prepare_pagescan()

void heap_prepare_pagescan ( TableScanDesc  sscan)

Definition at line 570 of file heapam.c.

571{
572 HeapScanDesc scan = (HeapScanDesc) sscan;
573 Buffer buffer = scan->rs_cbuf;
574 BlockNumber block = scan->rs_cblock;
575 Snapshot snapshot;
576 Page page;
577 int lines;
578 bool all_visible;
579 bool check_serializable;
580
581 Assert(BufferGetBlockNumber(buffer) == block);
582
583 /* ensure we're not accidentally being used when not in pagemode */
585 snapshot = scan->rs_base.rs_snapshot;
586
587 /*
588 * Prune and repair fragmentation for the whole page, if possible.
589 */
590 heap_page_prune_opt(scan->rs_base.rs_rd, buffer);
591
592 /*
593 * We must hold share lock on the buffer content while examining tuple
594 * visibility. Afterwards, however, the tuples we have found to be
595 * visible are guaranteed good as long as we hold the buffer pin.
596 */
598
599 page = BufferGetPage(buffer);
600 lines = PageGetMaxOffsetNumber(page);
601
602 /*
603 * If the all-visible flag indicates that all tuples on the page are
604 * visible to everyone, we can skip the per-tuple visibility tests.
605 *
606 * Note: In hot standby, a tuple that's already visible to all
607 * transactions on the primary might still be invisible to a read-only
608 * transaction in the standby. We partly handle this problem by tracking
609 * the minimum xmin of visible tuples as the cut-off XID while marking a
610 * page all-visible on the primary and WAL log that along with the
611 * visibility map SET operation. In hot standby, we wait for (or abort)
612 * all transactions that can potentially may not see one or more tuples on
613 * the page. That's how index-only scans work fine in hot standby. A
614 * crucial difference between index-only scans and heap scans is that the
615 * index-only scan completely relies on the visibility map where as heap
616 * scan looks at the page-level PD_ALL_VISIBLE flag. We are not sure if
617 * the page-level flag can be trusted in the same way, because it might
618 * get propagated somehow without being explicitly WAL-logged, e.g. via a
619 * full page write. Until we can prove that beyond doubt, let's check each
620 * tuple for visibility the hard way.
621 */
622 all_visible = PageIsAllVisible(page) && !snapshot->takenDuringRecovery;
623 check_serializable =
625
626 /*
627 * We call page_collect_tuples() with constant arguments, to get the
628 * compiler to constant fold the constant arguments. Separate calls with
629 * constant arguments, rather than variables, are needed on several
630 * compilers to actually perform constant folding.
631 */
632 if (likely(all_visible))
633 {
634 if (likely(!check_serializable))
635 scan->rs_ntuples = page_collect_tuples(scan, snapshot, page, buffer,
636 block, lines, true, false);
637 else
638 scan->rs_ntuples = page_collect_tuples(scan, snapshot, page, buffer,
639 block, lines, true, true);
640 }
641 else
642 {
643 if (likely(!check_serializable))
644 scan->rs_ntuples = page_collect_tuples(scan, snapshot, page, buffer,
645 block, lines, false, false);
646 else
647 scan->rs_ntuples = page_collect_tuples(scan, snapshot, page, buffer,
648 block, lines, false, true);
649 }
650
652}
#define likely(x)
Definition: c.h:417
static pg_attribute_always_inline int page_collect_tuples(HeapScanDesc scan, Snapshot snapshot, Page page, Buffer buffer, BlockNumber block, int lines, bool all_visible, bool check_serializable)
Definition: heapam.c:520
bool CheckForSerializableConflictOutNeeded(Relation relation, Snapshot snapshot)
Definition: predicate.c:3989
void heap_page_prune_opt(Relation relation, Buffer buffer)
Definition: pruneheap.c:209
uint32 rs_ntuples
Definition: heapam.h:99
BlockNumber rs_cblock
Definition: heapam.h:69
bool takenDuringRecovery
Definition: snapshot.h:180

References Assert(), BUFFER_LOCK_SHARE, BUFFER_LOCK_UNLOCK, BufferGetBlockNumber(), BufferGetPage(), CheckForSerializableConflictOutNeeded(), heap_page_prune_opt(), likely, LockBuffer(), page_collect_tuples(), PageGetMaxOffsetNumber(), PageIsAllVisible(), HeapScanDescData::rs_base, HeapScanDescData::rs_cblock, HeapScanDescData::rs_cbuf, TableScanDescData::rs_flags, HeapScanDescData::rs_ntuples, TableScanDescData::rs_rd, TableScanDescData::rs_snapshot, SO_ALLOW_PAGEMODE, and SnapshotData::takenDuringRecovery.

Referenced by heapam_scan_sample_next_block(), and heapgettup_pagemode().

◆ heap_rescan()

void heap_rescan ( TableScanDesc  sscan,
ScanKey  key,
bool  set_params,
bool  allow_strat,
bool  allow_sync,
bool  allow_pagemode 
)

Definition at line 1272 of file heapam.c.

1274{
1275 HeapScanDesc scan = (HeapScanDesc) sscan;
1276
1277 if (set_params)
1278 {
1279 if (allow_strat)
1281 else
1282 scan->rs_base.rs_flags &= ~SO_ALLOW_STRAT;
1283
1284 if (allow_sync)
1286 else
1287 scan->rs_base.rs_flags &= ~SO_ALLOW_SYNC;
1288
1289 if (allow_pagemode && scan->rs_base.rs_snapshot &&
1292 else
1294 }
1295
1296 /*
1297 * unpin scan buffers
1298 */
1299 if (BufferIsValid(scan->rs_cbuf))
1300 {
1301 ReleaseBuffer(scan->rs_cbuf);
1302 scan->rs_cbuf = InvalidBuffer;
1303 }
1304
1305 /*
1306 * SO_TYPE_BITMAPSCAN would be cleaned up here, but it does not hold any
1307 * additional data vs a normal HeapScan
1308 */
1309
1310 /*
1311 * The read stream is reset on rescan. This must be done before
1312 * initscan(), as some state referred to by read_stream_reset() is reset
1313 * in initscan().
1314 */
1315 if (scan->rs_read_stream)
1317
1318 /*
1319 * reinitialize scan descriptor
1320 */
1321 initscan(scan, key, true);
1322}
void read_stream_reset(ReadStream *stream)
Definition: read_stream.c:1044
@ SO_ALLOW_STRAT
Definition: tableam.h:58
@ SO_ALLOW_SYNC
Definition: tableam.h:60

References BufferIsValid(), initscan(), InvalidBuffer, IsMVCCSnapshot, sort-test::key, read_stream_reset(), ReleaseBuffer(), HeapScanDescData::rs_base, HeapScanDescData::rs_cbuf, TableScanDescData::rs_flags, HeapScanDescData::rs_read_stream, TableScanDescData::rs_snapshot, SO_ALLOW_PAGEMODE, SO_ALLOW_STRAT, and SO_ALLOW_SYNC.

◆ heap_set_tidrange()

void heap_set_tidrange ( TableScanDesc  sscan,
ItemPointer  mintid,
ItemPointer  maxtid 
)

Definition at line 1443 of file heapam.c.

1445{
1446 HeapScanDesc scan = (HeapScanDesc) sscan;
1447 BlockNumber startBlk;
1448 BlockNumber numBlks;
1449 ItemPointerData highestItem;
1450 ItemPointerData lowestItem;
1451
1452 /*
1453 * For relations without any pages, we can simply leave the TID range
1454 * unset. There will be no tuples to scan, therefore no tuples outside
1455 * the given TID range.
1456 */
1457 if (scan->rs_nblocks == 0)
1458 return;
1459
1460 /*
1461 * Set up some ItemPointers which point to the first and last possible
1462 * tuples in the heap.
1463 */
1464 ItemPointerSet(&highestItem, scan->rs_nblocks - 1, MaxOffsetNumber);
1465 ItemPointerSet(&lowestItem, 0, FirstOffsetNumber);
1466
1467 /*
1468 * If the given maximum TID is below the highest possible TID in the
1469 * relation, then restrict the range to that, otherwise we scan to the end
1470 * of the relation.
1471 */
1472 if (ItemPointerCompare(maxtid, &highestItem) < 0)
1473 ItemPointerCopy(maxtid, &highestItem);
1474
1475 /*
1476 * If the given minimum TID is above the lowest possible TID in the
1477 * relation, then restrict the range to only scan for TIDs above that.
1478 */
1479 if (ItemPointerCompare(mintid, &lowestItem) > 0)
1480 ItemPointerCopy(mintid, &lowestItem);
1481
1482 /*
1483 * Check for an empty range and protect from would be negative results
1484 * from the numBlks calculation below.
1485 */
1486 if (ItemPointerCompare(&highestItem, &lowestItem) < 0)
1487 {
1488 /* Set an empty range of blocks to scan */
1489 heap_setscanlimits(sscan, 0, 0);
1490 return;
1491 }
1492
1493 /*
1494 * Calculate the first block and the number of blocks we must scan. We
1495 * could be more aggressive here and perform some more validation to try
1496 * and further narrow the scope of blocks to scan by checking if the
1497 * lowestItem has an offset above MaxOffsetNumber. In this case, we could
1498 * advance startBlk by one. Likewise, if highestItem has an offset of 0
1499 * we could scan one fewer blocks. However, such an optimization does not
1500 * seem worth troubling over, currently.
1501 */
1502 startBlk = ItemPointerGetBlockNumberNoCheck(&lowestItem);
1503
1504 numBlks = ItemPointerGetBlockNumberNoCheck(&highestItem) -
1505 ItemPointerGetBlockNumberNoCheck(&lowestItem) + 1;
1506
1507 /* Set the start block and number of blocks to scan */
1508 heap_setscanlimits(sscan, startBlk, numBlks);
1509
1510 /* Finally, set the TID range in sscan */
1511 ItemPointerCopy(&lowestItem, &sscan->st.tidrange.rs_mintid);
1512 ItemPointerCopy(&highestItem, &sscan->st.tidrange.rs_maxtid);
1513}
void heap_setscanlimits(TableScanDesc sscan, BlockNumber startBlk, BlockNumber numBlks)
Definition: heapam.c:498
static BlockNumber ItemPointerGetBlockNumberNoCheck(const ItemPointerData *pointer)
Definition: itemptr.h:93
#define MaxOffsetNumber
Definition: off.h:28
BlockNumber rs_nblocks
Definition: heapam.h:61

References FirstOffsetNumber, heap_setscanlimits(), ItemPointerCompare(), ItemPointerCopy(), ItemPointerGetBlockNumberNoCheck(), ItemPointerSet(), MaxOffsetNumber, TableScanDescData::rs_maxtid, TableScanDescData::rs_mintid, HeapScanDescData::rs_nblocks, TableScanDescData::st, and TableScanDescData::tidrange.

◆ heap_setscanlimits()

void heap_setscanlimits ( TableScanDesc  sscan,
BlockNumber  startBlk,
BlockNumber  numBlks 
)

Definition at line 498 of file heapam.c.

499{
500 HeapScanDesc scan = (HeapScanDesc) sscan;
501
502 Assert(!scan->rs_inited); /* else too late to change */
503 /* else rs_startblock is significant */
505
506 /* Check startBlk is valid (but allow case of zero blocks...) */
507 Assert(startBlk == 0 || startBlk < scan->rs_nblocks);
508
509 scan->rs_startblock = startBlk;
510 scan->rs_numblocks = numBlks;
511}
bool rs_inited
Definition: heapam.h:67
BlockNumber rs_startblock
Definition: heapam.h:62
BlockNumber rs_numblocks
Definition: heapam.h:63

References Assert(), HeapScanDescData::rs_base, TableScanDescData::rs_flags, HeapScanDescData::rs_inited, HeapScanDescData::rs_numblocks, HeapScanDescData::rs_startblock, and SO_ALLOW_SYNC.

Referenced by heap_set_tidrange(), and heapam_index_build_range_scan().

◆ heap_tuple_needs_eventual_freeze()

bool heap_tuple_needs_eventual_freeze ( HeapTupleHeader  tuple)

Definition at line 7855 of file heapam.c.

7856{
7857 TransactionId xid;
7858
7859 /*
7860 * If xmin is a normal transaction ID, this tuple is definitely not
7861 * frozen.
7862 */
7863 xid = HeapTupleHeaderGetXmin(tuple);
7864 if (TransactionIdIsNormal(xid))
7865 return true;
7866
7867 /*
7868 * If xmax is a valid xact or multixact, this tuple is also not frozen.
7869 */
7870 if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
7871 {
7872 MultiXactId multi;
7873
7874 multi = HeapTupleHeaderGetRawXmax(tuple);
7875 if (MultiXactIdIsValid(multi))
7876 return true;
7877 }
7878 else
7879 {
7880 xid = HeapTupleHeaderGetRawXmax(tuple);
7881 if (TransactionIdIsNormal(xid))
7882 return true;
7883 }
7884
7885 if (tuple->t_infomask & HEAP_MOVED)
7886 {
7887 xid = HeapTupleHeaderGetXvac(tuple);
7888 if (TransactionIdIsNormal(xid))
7889 return true;
7890 }
7891
7892 return false;
7893}

References HEAP_MOVED, HEAP_XMAX_IS_MULTI, HeapTupleHeaderGetRawXmax(), HeapTupleHeaderGetXmin(), HeapTupleHeaderGetXvac(), MultiXactIdIsValid, HeapTupleHeaderData::t_infomask, and TransactionIdIsNormal.

Referenced by collect_corrupt_items(), and heap_page_would_be_all_visible().

◆ heap_tuple_should_freeze()

bool heap_tuple_should_freeze ( HeapTupleHeader  tuple,
const struct VacuumCutoffs cutoffs,
TransactionId NoFreezePageRelfrozenXid,
MultiXactId NoFreezePageRelminMxid 
)

Definition at line 7910 of file heapam.c.

7914{
7915 TransactionId xid;
7916 MultiXactId multi;
7917 bool freeze = false;
7918
7919 /* First deal with xmin */
7920 xid = HeapTupleHeaderGetXmin(tuple);
7921 if (TransactionIdIsNormal(xid))
7922 {
7924 if (TransactionIdPrecedes(xid, *NoFreezePageRelfrozenXid))
7925 *NoFreezePageRelfrozenXid = xid;
7926 if (TransactionIdPrecedes(xid, cutoffs->FreezeLimit))
7927 freeze = true;
7928 }
7929
7930 /* Now deal with xmax */
7932 multi = InvalidMultiXactId;
7933 if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
7934 multi = HeapTupleHeaderGetRawXmax(tuple);
7935 else
7936 xid = HeapTupleHeaderGetRawXmax(tuple);
7937
7938 if (TransactionIdIsNormal(xid))
7939 {
7941 /* xmax is a non-permanent XID */
7942 if (TransactionIdPrecedes(xid, *NoFreezePageRelfrozenXid))
7943 *NoFreezePageRelfrozenXid = xid;
7944 if (TransactionIdPrecedes(xid, cutoffs->FreezeLimit))
7945 freeze = true;
7946 }
7947 else if (!MultiXactIdIsValid(multi))
7948 {
7949 /* xmax is a permanent XID or invalid MultiXactId/XID */
7950 }
7951 else if (HEAP_LOCKED_UPGRADED(tuple->t_infomask))
7952 {
7953 /* xmax is a pg_upgrade'd MultiXact, which can't have updater XID */
7954 if (MultiXactIdPrecedes(multi, *NoFreezePageRelminMxid))
7955 *NoFreezePageRelminMxid = multi;
7956 /* heap_prepare_freeze_tuple always freezes pg_upgrade'd xmax */
7957 freeze = true;
7958 }
7959 else
7960 {
7961 /* xmax is a MultiXactId that may have an updater XID */
7962 MultiXactMember *members;
7963 int nmembers;
7964
7966 if (MultiXactIdPrecedes(multi, *NoFreezePageRelminMxid))
7967 *NoFreezePageRelminMxid = multi;
7968 if (MultiXactIdPrecedes(multi, cutoffs->MultiXactCutoff))
7969 freeze = true;
7970
7971 /* need to check whether any member of the mxact is old */
7972 nmembers = GetMultiXactIdMembers(multi, &members, false,
7974
7975 for (int i = 0; i < nmembers; i++)
7976 {
7977 xid = members[i].xid;
7979 if (TransactionIdPrecedes(xid, *NoFreezePageRelfrozenXid))
7980 *NoFreezePageRelfrozenXid = xid;
7981 if (TransactionIdPrecedes(xid, cutoffs->FreezeLimit))
7982 freeze = true;
7983 }
7984 if (nmembers > 0)
7985 pfree(members);
7986 }
7987
7988 if (tuple->t_infomask & HEAP_MOVED)
7989 {
7990 xid = HeapTupleHeaderGetXvac(tuple);
7991 if (TransactionIdIsNormal(xid))
7992 {
7994 if (TransactionIdPrecedes(xid, *NoFreezePageRelfrozenXid))
7995 *NoFreezePageRelfrozenXid = xid;
7996 /* heap_prepare_freeze_tuple forces xvac freezing */
7997 freeze = true;
7998 }
7999 }
8000
8001 return freeze;
8002}
static bool HEAP_LOCKED_UPGRADED(uint16 infomask)
Definition: htup_details.h:251
bool MultiXactIdPrecedesOrEquals(MultiXactId multi1, MultiXactId multi2)
Definition: multixact.c:2847
#define InvalidMultiXactId
Definition: multixact.h:25
TransactionId xid
Definition: multixact.h:57

References Assert(), VacuumCutoffs::FreezeLimit, GetMultiXactIdMembers(), HEAP_LOCKED_UPGRADED(), HEAP_MOVED, HEAP_XMAX_IS_LOCKED_ONLY(), HEAP_XMAX_IS_MULTI, HeapTupleHeaderGetRawXmax(), HeapTupleHeaderGetXmin(), HeapTupleHeaderGetXvac(), i, InvalidMultiXactId, InvalidTransactionId, VacuumCutoffs::MultiXactCutoff, MultiXactIdIsValid, MultiXactIdPrecedes(), MultiXactIdPrecedesOrEquals(), pfree(), VacuumCutoffs::relfrozenxid, VacuumCutoffs::relminmxid, HeapTupleHeaderData::t_infomask, TransactionIdIsNormal, TransactionIdPrecedes(), TransactionIdPrecedesOrEquals(), and MultiXactMember::xid.

Referenced by heap_prepare_freeze_tuple(), and lazy_scan_noprune().

◆ heap_update()

TM_Result heap_update ( Relation  relation,
const ItemPointerData otid,
HeapTuple  newtup,
CommandId  cid,
Snapshot  crosscheck,
bool  wait,
TM_FailureData tmfd,
LockTupleMode lockmode,
TU_UpdateIndexes update_indexes 
)

Definition at line 3276 of file heapam.c.

3280{
3281 TM_Result result;
3283 Bitmapset *hot_attrs;
3284 Bitmapset *sum_attrs;
3285 Bitmapset *key_attrs;
3286 Bitmapset *id_attrs;
3287 Bitmapset *interesting_attrs;
3288 Bitmapset *modified_attrs;
3289 ItemId lp;
3290 HeapTupleData oldtup;
3291 HeapTuple heaptup;
3292 HeapTuple old_key_tuple = NULL;
3293 bool old_key_copied = false;
3294 Page page;
3295 BlockNumber block;
3296 MultiXactStatus mxact_status;
3297 Buffer buffer,
3298 newbuf,
3299 vmbuffer = InvalidBuffer,
3300 vmbuffer_new = InvalidBuffer;
3301 bool need_toast;
3302 Size newtupsize,
3303 pagefree;
3304 bool have_tuple_lock = false;
3305 bool iscombo;
3306 bool use_hot_update = false;
3307 bool summarized_update = false;
3308 bool key_intact;
3309 bool all_visible_cleared = false;
3310 bool all_visible_cleared_new = false;
3311 bool checked_lockers;
3312 bool locker_remains;
3313 bool id_has_external = false;
3314 TransactionId xmax_new_tuple,
3315 xmax_old_tuple;
3316 uint16 infomask_old_tuple,
3317 infomask2_old_tuple,
3318 infomask_new_tuple,
3319 infomask2_new_tuple;
3320
3322
3323 /* Cheap, simplistic check that the tuple matches the rel's rowtype. */
3326
3327 AssertHasSnapshotForToast(relation);
3328
3329 /*
3330 * Forbid this during a parallel operation, lest it allocate a combo CID.
3331 * Other workers might need that combo CID for visibility checks, and we
3332 * have no provision for broadcasting it to them.
3333 */
3334 if (IsInParallelMode())
3335 ereport(ERROR,
3336 (errcode(ERRCODE_INVALID_TRANSACTION_STATE),
3337 errmsg("cannot update tuples during a parallel operation")));
3338
3339#ifdef USE_ASSERT_CHECKING
3340 check_lock_if_inplace_updateable_rel(relation, otid, newtup);
3341#endif
3342
3343 /*
3344 * Fetch the list of attributes to be checked for various operations.
3345 *
3346 * For HOT considerations, this is wasted effort if we fail to update or
3347 * have to put the new tuple on a different page. But we must compute the
3348 * list before obtaining buffer lock --- in the worst case, if we are
3349 * doing an update on one of the relevant system catalogs, we could
3350 * deadlock if we try to fetch the list later. In any case, the relcache
3351 * caches the data so this is usually pretty cheap.
3352 *
3353 * We also need columns used by the replica identity and columns that are
3354 * considered the "key" of rows in the table.
3355 *
3356 * Note that we get copies of each bitmap, so we need not worry about
3357 * relcache flush happening midway through.
3358 */
3359 hot_attrs = RelationGetIndexAttrBitmap(relation,
3361 sum_attrs = RelationGetIndexAttrBitmap(relation,
3364 id_attrs = RelationGetIndexAttrBitmap(relation,
3366 interesting_attrs = NULL;
3367 interesting_attrs = bms_add_members(interesting_attrs, hot_attrs);
3368 interesting_attrs = bms_add_members(interesting_attrs, sum_attrs);
3369 interesting_attrs = bms_add_members(interesting_attrs, key_attrs);
3370 interesting_attrs = bms_add_members(interesting_attrs, id_attrs);
3371
3372 block = ItemPointerGetBlockNumber(otid);
3373 INJECTION_POINT("heap_update-before-pin", NULL);
3374 buffer = ReadBuffer(relation, block);
3375 page = BufferGetPage(buffer);
3376
3377 /*
3378 * Before locking the buffer, pin the visibility map page if it appears to
3379 * be necessary. Since we haven't got the lock yet, someone else might be
3380 * in the middle of changing this, so we'll need to recheck after we have
3381 * the lock.
3382 */
3383 if (PageIsAllVisible(page))
3384 visibilitymap_pin(relation, block, &vmbuffer);
3385
3387
3388 lp = PageGetItemId(page, ItemPointerGetOffsetNumber(otid));
3389
3390 /*
3391 * Usually, a buffer pin and/or snapshot blocks pruning of otid, ensuring
3392 * we see LP_NORMAL here. When the otid origin is a syscache, we may have
3393 * neither a pin nor a snapshot. Hence, we may see other LP_ states, each
3394 * of which indicates concurrent pruning.
3395 *
3396 * Failing with TM_Updated would be most accurate. However, unlike other
3397 * TM_Updated scenarios, we don't know the successor ctid in LP_UNUSED and
3398 * LP_DEAD cases. While the distinction between TM_Updated and TM_Deleted
3399 * does matter to SQL statements UPDATE and MERGE, those SQL statements
3400 * hold a snapshot that ensures LP_NORMAL. Hence, the choice between
3401 * TM_Updated and TM_Deleted affects only the wording of error messages.
3402 * Settle on TM_Deleted, for two reasons. First, it avoids complicating
3403 * the specification of when tmfd->ctid is valid. Second, it creates
3404 * error log evidence that we took this branch.
3405 *
3406 * Since it's possible to see LP_UNUSED at otid, it's also possible to see
3407 * LP_NORMAL for a tuple that replaced LP_UNUSED. If it's a tuple for an
3408 * unrelated row, we'll fail with "duplicate key value violates unique".
3409 * XXX if otid is the live, newer version of the newtup row, we'll discard
3410 * changes originating in versions of this catalog row after the version
3411 * the caller got from syscache. See syscache-update-pruned.spec.
3412 */
3413 if (!ItemIdIsNormal(lp))
3414 {
3416
3417 UnlockReleaseBuffer(buffer);
3418 Assert(!have_tuple_lock);
3419 if (vmbuffer != InvalidBuffer)
3420 ReleaseBuffer(vmbuffer);
3421 tmfd->ctid = *otid;
3422 tmfd->xmax = InvalidTransactionId;
3423 tmfd->cmax = InvalidCommandId;
3424 *update_indexes = TU_None;
3425
3426 bms_free(hot_attrs);
3427 bms_free(sum_attrs);
3428 bms_free(key_attrs);
3429 bms_free(id_attrs);
3430 /* modified_attrs not yet initialized */
3431 bms_free(interesting_attrs);
3432 return TM_Deleted;
3433 }
3434
3435 /*
3436 * Fill in enough data in oldtup for HeapDetermineColumnsInfo to work
3437 * properly.
3438 */
3439 oldtup.t_tableOid = RelationGetRelid(relation);
3440 oldtup.t_data = (HeapTupleHeader) PageGetItem(page, lp);
3441 oldtup.t_len = ItemIdGetLength(lp);
3442 oldtup.t_self = *otid;
3443
3444 /* the new tuple is ready, except for this: */
3445 newtup->t_tableOid = RelationGetRelid(relation);
3446
3447 /*
3448 * Determine columns modified by the update. Additionally, identify
3449 * whether any of the unmodified replica identity key attributes in the
3450 * old tuple is externally stored or not. This is required because for
3451 * such attributes the flattened value won't be WAL logged as part of the
3452 * new tuple so we must include it as part of the old_key_tuple. See
3453 * ExtractReplicaIdentity.
3454 */
3455 modified_attrs = HeapDetermineColumnsInfo(relation, interesting_attrs,
3456 id_attrs, &oldtup,
3457 newtup, &id_has_external);
3458
3459 /*
3460 * If we're not updating any "key" column, we can grab a weaker lock type.
3461 * This allows for more concurrency when we are running simultaneously
3462 * with foreign key checks.
3463 *
3464 * Note that if a column gets detoasted while executing the update, but
3465 * the value ends up being the same, this test will fail and we will use
3466 * the stronger lock. This is acceptable; the important case to optimize
3467 * is updates that don't manipulate key columns, not those that
3468 * serendipitously arrive at the same key values.
3469 */
3470 if (!bms_overlap(modified_attrs, key_attrs))
3471 {
3472 *lockmode = LockTupleNoKeyExclusive;
3473 mxact_status = MultiXactStatusNoKeyUpdate;
3474 key_intact = true;
3475
3476 /*
3477 * If this is the first possibly-multixact-able operation in the
3478 * current transaction, set my per-backend OldestMemberMXactId
3479 * setting. We can be certain that the transaction will never become a
3480 * member of any older MultiXactIds than that. (We have to do this
3481 * even if we end up just using our own TransactionId below, since
3482 * some other backend could incorporate our XID into a MultiXact
3483 * immediately afterwards.)
3484 */
3486 }
3487 else
3488 {
3489 *lockmode = LockTupleExclusive;
3490 mxact_status = MultiXactStatusUpdate;
3491 key_intact = false;
3492 }
3493
3494 /*
3495 * Note: beyond this point, use oldtup not otid to refer to old tuple.
3496 * otid may very well point at newtup->t_self, which we will overwrite
3497 * with the new tuple's location, so there's great risk of confusion if we
3498 * use otid anymore.
3499 */
3500
3501l2:
3502 checked_lockers = false;
3503 locker_remains = false;
3504 result = HeapTupleSatisfiesUpdate(&oldtup, cid, buffer);
3505
3506 /* see below about the "no wait" case */
3507 Assert(result != TM_BeingModified || wait);
3508
3509 if (result == TM_Invisible)
3510 {
3511 UnlockReleaseBuffer(buffer);
3512 ereport(ERROR,
3513 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
3514 errmsg("attempted to update invisible tuple")));
3515 }
3516 else if (result == TM_BeingModified && wait)
3517 {
3518 TransactionId xwait;
3519 uint16 infomask;
3520 bool can_continue = false;
3521
3522 /*
3523 * XXX note that we don't consider the "no wait" case here. This
3524 * isn't a problem currently because no caller uses that case, but it
3525 * should be fixed if such a caller is introduced. It wasn't a
3526 * problem previously because this code would always wait, but now
3527 * that some tuple locks do not conflict with one of the lock modes we
3528 * use, it is possible that this case is interesting to handle
3529 * specially.
3530 *
3531 * This may cause failures with third-party code that calls
3532 * heap_update directly.
3533 */
3534
3535 /* must copy state data before unlocking buffer */
3536 xwait = HeapTupleHeaderGetRawXmax(oldtup.t_data);
3537 infomask = oldtup.t_data->t_infomask;
3538
3539 /*
3540 * Now we have to do something about the existing locker. If it's a
3541 * multi, sleep on it; we might be awakened before it is completely
3542 * gone (or even not sleep at all in some cases); we need to preserve
3543 * it as locker, unless it is gone completely.
3544 *
3545 * If it's not a multi, we need to check for sleeping conditions
3546 * before actually going to sleep. If the update doesn't conflict
3547 * with the locks, we just continue without sleeping (but making sure
3548 * it is preserved).
3549 *
3550 * Before sleeping, we need to acquire tuple lock to establish our
3551 * priority for the tuple (see heap_lock_tuple). LockTuple will
3552 * release us when we are next-in-line for the tuple. Note we must
3553 * not acquire the tuple lock until we're sure we're going to sleep;
3554 * otherwise we're open for race conditions with other transactions
3555 * holding the tuple lock which sleep on us.
3556 *
3557 * If we are forced to "start over" below, we keep the tuple lock;
3558 * this arranges that we stay at the head of the line while rechecking
3559 * tuple state.
3560 */
3561 if (infomask & HEAP_XMAX_IS_MULTI)
3562 {
3563 TransactionId update_xact;
3564 int remain;
3565 bool current_is_member = false;
3566
3567 if (DoesMultiXactIdConflict((MultiXactId) xwait, infomask,
3568 *lockmode, &current_is_member))
3569 {
3571
3572 /*
3573 * Acquire the lock, if necessary (but skip it when we're
3574 * requesting a lock and already have one; avoids deadlock).
3575 */
3576 if (!current_is_member)
3577 heap_acquire_tuplock(relation, &(oldtup.t_self), *lockmode,
3578 LockWaitBlock, &have_tuple_lock);
3579
3580 /* wait for multixact */
3581 MultiXactIdWait((MultiXactId) xwait, mxact_status, infomask,
3582 relation, &oldtup.t_self, XLTW_Update,
3583 &remain);
3584 checked_lockers = true;
3585 locker_remains = remain != 0;
3587
3588 /*
3589 * If xwait had just locked the tuple then some other xact
3590 * could update this tuple before we get to this point. Check
3591 * for xmax change, and start over if so.
3592 */
3594 infomask) ||
3596 xwait))
3597 goto l2;
3598 }
3599
3600 /*
3601 * Note that the multixact may not be done by now. It could have
3602 * surviving members; our own xact or other subxacts of this
3603 * backend, and also any other concurrent transaction that locked
3604 * the tuple with LockTupleKeyShare if we only got
3605 * LockTupleNoKeyExclusive. If this is the case, we have to be
3606 * careful to mark the updated tuple with the surviving members in
3607 * Xmax.
3608 *
3609 * Note that there could have been another update in the
3610 * MultiXact. In that case, we need to check whether it committed
3611 * or aborted. If it aborted we are safe to update it again;
3612 * otherwise there is an update conflict, and we have to return
3613 * TableTuple{Deleted, Updated} below.
3614 *
3615 * In the LockTupleExclusive case, we still need to preserve the
3616 * surviving members: those would include the tuple locks we had
3617 * before this one, which are important to keep in case this
3618 * subxact aborts.
3619 */
3621 update_xact = HeapTupleGetUpdateXid(oldtup.t_data);
3622 else
3623 update_xact = InvalidTransactionId;
3624
3625 /*
3626 * There was no UPDATE in the MultiXact; or it aborted. No
3627 * TransactionIdIsInProgress() call needed here, since we called
3628 * MultiXactIdWait() above.
3629 */
3630 if (!TransactionIdIsValid(update_xact) ||
3631 TransactionIdDidAbort(update_xact))
3632 can_continue = true;
3633 }
3635 {
3636 /*
3637 * The only locker is ourselves; we can avoid grabbing the tuple
3638 * lock here, but must preserve our locking information.
3639 */
3640 checked_lockers = true;
3641 locker_remains = true;
3642 can_continue = true;
3643 }
3644 else if (HEAP_XMAX_IS_KEYSHR_LOCKED(infomask) && key_intact)
3645 {
3646 /*
3647 * If it's just a key-share locker, and we're not changing the key
3648 * columns, we don't need to wait for it to end; but we need to
3649 * preserve it as locker.
3650 */
3651 checked_lockers = true;
3652 locker_remains = true;
3653 can_continue = true;
3654 }
3655 else
3656 {
3657 /*
3658 * Wait for regular transaction to end; but first, acquire tuple
3659 * lock.
3660 */
3662 heap_acquire_tuplock(relation, &(oldtup.t_self), *lockmode,
3663 LockWaitBlock, &have_tuple_lock);
3664 XactLockTableWait(xwait, relation, &oldtup.t_self,
3665 XLTW_Update);
3666 checked_lockers = true;
3668
3669 /*
3670 * xwait is done, but if xwait had just locked the tuple then some
3671 * other xact could update this tuple before we get to this point.
3672 * Check for xmax change, and start over if so.
3673 */
3674 if (xmax_infomask_changed(oldtup.t_data->t_infomask, infomask) ||
3675 !TransactionIdEquals(xwait,
3677 goto l2;
3678
3679 /* Otherwise check if it committed or aborted */
3680 UpdateXmaxHintBits(oldtup.t_data, buffer, xwait);
3681 if (oldtup.t_data->t_infomask & HEAP_XMAX_INVALID)
3682 can_continue = true;
3683 }
3684
3685 if (can_continue)
3686 result = TM_Ok;
3687 else if (!ItemPointerEquals(&oldtup.t_self, &oldtup.t_data->t_ctid))
3688 result = TM_Updated;
3689 else
3690 result = TM_Deleted;
3691 }
3692
3693 /* Sanity check the result HeapTupleSatisfiesUpdate() and the logic above */
3694 if (result != TM_Ok)
3695 {
3696 Assert(result == TM_SelfModified ||
3697 result == TM_Updated ||
3698 result == TM_Deleted ||
3699 result == TM_BeingModified);
3701 Assert(result != TM_Updated ||
3702 !ItemPointerEquals(&oldtup.t_self, &oldtup.t_data->t_ctid));
3703 }
3704
3705 if (crosscheck != InvalidSnapshot && result == TM_Ok)
3706 {
3707 /* Perform additional check for transaction-snapshot mode RI updates */
3708 if (!HeapTupleSatisfiesVisibility(&oldtup, crosscheck, buffer))
3709 result = TM_Updated;
3710 }
3711
3712 if (result != TM_Ok)
3713 {
3714 tmfd->ctid = oldtup.t_data->t_ctid;
3715 tmfd->xmax = HeapTupleHeaderGetUpdateXid(oldtup.t_data);
3716 if (result == TM_SelfModified)
3717 tmfd->cmax = HeapTupleHeaderGetCmax(oldtup.t_data);
3718 else
3719 tmfd->cmax = InvalidCommandId;
3720 UnlockReleaseBuffer(buffer);
3721 if (have_tuple_lock)
3722 UnlockTupleTuplock(relation, &(oldtup.t_self), *lockmode);
3723 if (vmbuffer != InvalidBuffer)
3724 ReleaseBuffer(vmbuffer);
3725 *update_indexes = TU_None;
3726
3727 bms_free(hot_attrs);
3728 bms_free(sum_attrs);
3729 bms_free(key_attrs);
3730 bms_free(id_attrs);
3731 bms_free(modified_attrs);
3732 bms_free(interesting_attrs);
3733 return result;
3734 }
3735
3736 /*
3737 * If we didn't pin the visibility map page and the page has become all
3738 * visible while we were busy locking the buffer, or during some
3739 * subsequent window during which we had it unlocked, we'll have to unlock
3740 * and re-lock, to avoid holding the buffer lock across an I/O. That's a
3741 * bit unfortunate, especially since we'll now have to recheck whether the
3742 * tuple has been locked or updated under us, but hopefully it won't
3743 * happen very often.
3744 */
3745 if (vmbuffer == InvalidBuffer && PageIsAllVisible(page))
3746 {
3748 visibilitymap_pin(relation, block, &vmbuffer);
3750 goto l2;
3751 }
3752
3753 /* Fill in transaction status data */
3754
3755 /*
3756 * If the tuple we're updating is locked, we need to preserve the locking
3757 * info in the old tuple's Xmax. Prepare a new Xmax value for this.
3758 */
3760 oldtup.t_data->t_infomask,
3761 oldtup.t_data->t_infomask2,
3762 xid, *lockmode, true,
3763 &xmax_old_tuple, &infomask_old_tuple,
3764 &infomask2_old_tuple);
3765
3766 /*
3767 * And also prepare an Xmax value for the new copy of the tuple. If there
3768 * was no xmax previously, or there was one but all lockers are now gone,
3769 * then use InvalidTransactionId; otherwise, get the xmax from the old
3770 * tuple. (In rare cases that might also be InvalidTransactionId and yet
3771 * not have the HEAP_XMAX_INVALID bit set; that's fine.)
3772 */
3773 if ((oldtup.t_data->t_infomask & HEAP_XMAX_INVALID) ||
3775 (checked_lockers && !locker_remains))
3776 xmax_new_tuple = InvalidTransactionId;
3777 else
3778 xmax_new_tuple = HeapTupleHeaderGetRawXmax(oldtup.t_data);
3779
3780 if (!TransactionIdIsValid(xmax_new_tuple))
3781 {
3782 infomask_new_tuple = HEAP_XMAX_INVALID;
3783 infomask2_new_tuple = 0;
3784 }
3785 else
3786 {
3787 /*
3788 * If we found a valid Xmax for the new tuple, then the infomask bits
3789 * to use on the new tuple depend on what was there on the old one.
3790 * Note that since we're doing an update, the only possibility is that
3791 * the lockers had FOR KEY SHARE lock.
3792 */
3793 if (oldtup.t_data->t_infomask & HEAP_XMAX_IS_MULTI)
3794 {
3795 GetMultiXactIdHintBits(xmax_new_tuple, &infomask_new_tuple,
3796 &infomask2_new_tuple);
3797 }
3798 else
3799 {
3800 infomask_new_tuple = HEAP_XMAX_KEYSHR_LOCK | HEAP_XMAX_LOCK_ONLY;
3801 infomask2_new_tuple = 0;
3802 }
3803 }
3804
3805 /*
3806 * Prepare the new tuple with the appropriate initial values of Xmin and
3807 * Xmax, as well as initial infomask bits as computed above.
3808 */
3809 newtup->t_data->t_infomask &= ~(HEAP_XACT_MASK);
3810 newtup->t_data->t_infomask2 &= ~(HEAP2_XACT_MASK);
3811 HeapTupleHeaderSetXmin(newtup->t_data, xid);
3812 HeapTupleHeaderSetCmin(newtup->t_data, cid);
3813 newtup->t_data->t_infomask |= HEAP_UPDATED | infomask_new_tuple;
3814 newtup->t_data->t_infomask2 |= infomask2_new_tuple;
3815 HeapTupleHeaderSetXmax(newtup->t_data, xmax_new_tuple);
3816
3817 /*
3818 * Replace cid with a combo CID if necessary. Note that we already put
3819 * the plain cid into the new tuple.
3820 */
3821 HeapTupleHeaderAdjustCmax(oldtup.t_data, &cid, &iscombo);
3822
3823 /*
3824 * If the toaster needs to be activated, OR if the new tuple will not fit
3825 * on the same page as the old, then we need to release the content lock
3826 * (but not the pin!) on the old tuple's buffer while we are off doing
3827 * TOAST and/or table-file-extension work. We must mark the old tuple to
3828 * show that it's locked, else other processes may try to update it
3829 * themselves.
3830 *
3831 * We need to invoke the toaster if there are already any out-of-line
3832 * toasted values present, or if the new tuple is over-threshold.
3833 */
3834 if (relation->rd_rel->relkind != RELKIND_RELATION &&
3835 relation->rd_rel->relkind != RELKIND_MATVIEW)
3836 {
3837 /* toast table entries should never be recursively toasted */
3838 Assert(!HeapTupleHasExternal(&oldtup));
3839 Assert(!HeapTupleHasExternal(newtup));
3840 need_toast = false;
3841 }
3842 else
3843 need_toast = (HeapTupleHasExternal(&oldtup) ||
3844 HeapTupleHasExternal(newtup) ||
3845 newtup->t_len > TOAST_TUPLE_THRESHOLD);
3846
3847 pagefree = PageGetHeapFreeSpace(page);
3848
3849 newtupsize = MAXALIGN(newtup->t_len);
3850
3851 if (need_toast || newtupsize > pagefree)
3852 {
3853 TransactionId xmax_lock_old_tuple;
3854 uint16 infomask_lock_old_tuple,
3855 infomask2_lock_old_tuple;
3856 bool cleared_all_frozen = false;
3857
3858 /*
3859 * To prevent concurrent sessions from updating the tuple, we have to
3860 * temporarily mark it locked, while we release the page-level lock.
3861 *
3862 * To satisfy the rule that any xid potentially appearing in a buffer
3863 * written out to disk, we unfortunately have to WAL log this
3864 * temporary modification. We can reuse xl_heap_lock for this
3865 * purpose. If we crash/error before following through with the
3866 * actual update, xmax will be of an aborted transaction, allowing
3867 * other sessions to proceed.
3868 */
3869
3870 /*
3871 * Compute xmax / infomask appropriate for locking the tuple. This has
3872 * to be done separately from the combo that's going to be used for
3873 * updating, because the potentially created multixact would otherwise
3874 * be wrong.
3875 */
3877 oldtup.t_data->t_infomask,
3878 oldtup.t_data->t_infomask2,
3879 xid, *lockmode, false,
3880 &xmax_lock_old_tuple, &infomask_lock_old_tuple,
3881 &infomask2_lock_old_tuple);
3882
3883 Assert(HEAP_XMAX_IS_LOCKED_ONLY(infomask_lock_old_tuple));
3884
3886
3887 /* Clear obsolete visibility flags ... */
3889 oldtup.t_data->t_infomask2 &= ~HEAP_KEYS_UPDATED;
3890 HeapTupleClearHotUpdated(&oldtup);
3891 /* ... and store info about transaction updating this tuple */
3892 Assert(TransactionIdIsValid(xmax_lock_old_tuple));
3893 HeapTupleHeaderSetXmax(oldtup.t_data, xmax_lock_old_tuple);
3894 oldtup.t_data->t_infomask |= infomask_lock_old_tuple;
3895 oldtup.t_data->t_infomask2 |= infomask2_lock_old_tuple;
3896 HeapTupleHeaderSetCmax(oldtup.t_data, cid, iscombo);
3897
3898 /* temporarily make it look not-updated, but locked */
3899 oldtup.t_data->t_ctid = oldtup.t_self;
3900
3901 /*
3902 * Clear all-frozen bit on visibility map if needed. We could
3903 * immediately reset ALL_VISIBLE, but given that the WAL logging
3904 * overhead would be unchanged, that doesn't seem necessarily
3905 * worthwhile.
3906 */
3907 if (PageIsAllVisible(page) &&
3908 visibilitymap_clear(relation, block, vmbuffer,
3910 cleared_all_frozen = true;
3911
3912 MarkBufferDirty(buffer);
3913
3914 if (RelationNeedsWAL(relation))
3915 {
3916 xl_heap_lock xlrec;
3917 XLogRecPtr recptr;
3918
3921
3922 xlrec.offnum = ItemPointerGetOffsetNumber(&oldtup.t_self);
3923 xlrec.xmax = xmax_lock_old_tuple;
3925 oldtup.t_data->t_infomask2);
3926 xlrec.flags =
3927 cleared_all_frozen ? XLH_LOCK_ALL_FROZEN_CLEARED : 0;
3929 recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_LOCK);
3930 PageSetLSN(page, recptr);
3931 }
3932
3934
3936
3937 /*
3938 * Let the toaster do its thing, if needed.
3939 *
3940 * Note: below this point, heaptup is the data we actually intend to
3941 * store into the relation; newtup is the caller's original untoasted
3942 * data.
3943 */
3944 if (need_toast)
3945 {
3946 /* Note we always use WAL and FSM during updates */
3947 heaptup = heap_toast_insert_or_update(relation, newtup, &oldtup, 0);
3948 newtupsize = MAXALIGN(heaptup->t_len);
3949 }
3950 else
3951 heaptup = newtup;
3952
3953 /*
3954 * Now, do we need a new page for the tuple, or not? This is a bit
3955 * tricky since someone else could have added tuples to the page while
3956 * we weren't looking. We have to recheck the available space after
3957 * reacquiring the buffer lock. But don't bother to do that if the
3958 * former amount of free space is still not enough; it's unlikely
3959 * there's more free now than before.
3960 *
3961 * What's more, if we need to get a new page, we will need to acquire
3962 * buffer locks on both old and new pages. To avoid deadlock against
3963 * some other backend trying to get the same two locks in the other
3964 * order, we must be consistent about the order we get the locks in.
3965 * We use the rule "lock the lower-numbered page of the relation
3966 * first". To implement this, we must do RelationGetBufferForTuple
3967 * while not holding the lock on the old page, and we must rely on it
3968 * to get the locks on both pages in the correct order.
3969 *
3970 * Another consideration is that we need visibility map page pin(s) if
3971 * we will have to clear the all-visible flag on either page. If we
3972 * call RelationGetBufferForTuple, we rely on it to acquire any such
3973 * pins; but if we don't, we have to handle that here. Hence we need
3974 * a loop.
3975 */
3976 for (;;)
3977 {
3978 if (newtupsize > pagefree)
3979 {
3980 /* It doesn't fit, must use RelationGetBufferForTuple. */
3981 newbuf = RelationGetBufferForTuple(relation, heaptup->t_len,
3982 buffer, 0, NULL,
3983 &vmbuffer_new, &vmbuffer,
3984 0);
3985 /* We're all done. */
3986 break;
3987 }
3988 /* Acquire VM page pin if needed and we don't have it. */
3989 if (vmbuffer == InvalidBuffer && PageIsAllVisible(page))
3990 visibilitymap_pin(relation, block, &vmbuffer);
3991 /* Re-acquire the lock on the old tuple's page. */
3993 /* Re-check using the up-to-date free space */
3994 pagefree = PageGetHeapFreeSpace(page);
3995 if (newtupsize > pagefree ||
3996 (vmbuffer == InvalidBuffer && PageIsAllVisible(page)))
3997 {
3998 /*
3999 * Rats, it doesn't fit anymore, or somebody just now set the
4000 * all-visible flag. We must now unlock and loop to avoid
4001 * deadlock. Fortunately, this path should seldom be taken.
4002 */
4004 }
4005 else
4006 {
4007 /* We're all done. */
4008 newbuf = buffer;
4009 break;
4010 }
4011 }
4012 }
4013 else
4014 {
4015 /* No TOAST work needed, and it'll fit on same page */
4016 newbuf = buffer;
4017 heaptup = newtup;
4018 }
4019
4020 /*
4021 * We're about to do the actual update -- check for conflict first, to
4022 * avoid possibly having to roll back work we've just done.
4023 *
4024 * This is safe without a recheck as long as there is no possibility of
4025 * another process scanning the pages between this check and the update
4026 * being visible to the scan (i.e., exclusive buffer content lock(s) are
4027 * continuously held from this point until the tuple update is visible).
4028 *
4029 * For the new tuple the only check needed is at the relation level, but
4030 * since both tuples are in the same relation and the check for oldtup
4031 * will include checking the relation level, there is no benefit to a
4032 * separate check for the new tuple.
4033 */
4034 CheckForSerializableConflictIn(relation, &oldtup.t_self,
4035 BufferGetBlockNumber(buffer));
4036
4037 /*
4038 * At this point newbuf and buffer are both pinned and locked, and newbuf
4039 * has enough space for the new tuple. If they are the same buffer, only
4040 * one pin is held.
4041 */
4042
4043 if (newbuf == buffer)
4044 {
4045 /*
4046 * Since the new tuple is going into the same page, we might be able
4047 * to do a HOT update. Check if any of the index columns have been
4048 * changed.
4049 */
4050 if (!bms_overlap(modified_attrs, hot_attrs))
4051 {
4052 use_hot_update = true;
4053
4054 /*
4055 * If none of the columns that are used in hot-blocking indexes
4056 * were updated, we can apply HOT, but we do still need to check
4057 * if we need to update the summarizing indexes, and update those
4058 * indexes if the columns were updated, or we may fail to detect
4059 * e.g. value bound changes in BRIN minmax indexes.
4060 */
4061 if (bms_overlap(modified_attrs, sum_attrs))
4062 summarized_update = true;
4063 }
4064 }
4065 else
4066 {
4067 /* Set a hint that the old page could use prune/defrag */
4068 PageSetFull(page);
4069 }
4070
4071 /*
4072 * Compute replica identity tuple before entering the critical section so
4073 * we don't PANIC upon a memory allocation failure.
4074 * ExtractReplicaIdentity() will return NULL if nothing needs to be
4075 * logged. Pass old key required as true only if the replica identity key
4076 * columns are modified or it has external data.
4077 */
4078 old_key_tuple = ExtractReplicaIdentity(relation, &oldtup,
4079 bms_overlap(modified_attrs, id_attrs) ||
4080 id_has_external,
4081 &old_key_copied);
4082
4083 /* NO EREPORT(ERROR) from here till changes are logged */
4085
4086 /*
4087 * If this transaction commits, the old tuple will become DEAD sooner or
4088 * later. Set flag that this page is a candidate for pruning once our xid
4089 * falls below the OldestXmin horizon. If the transaction finally aborts,
4090 * the subsequent page pruning will be a no-op and the hint will be
4091 * cleared.
4092 *
4093 * XXX Should we set hint on newbuf as well? If the transaction aborts,
4094 * there would be a prunable tuple in the newbuf; but for now we choose
4095 * not to optimize for aborts. Note that heap_xlog_update must be kept in
4096 * sync if this decision changes.
4097 */
4098 PageSetPrunable(page, xid);
4099
4100 if (use_hot_update)
4101 {
4102 /* Mark the old tuple as HOT-updated */
4103 HeapTupleSetHotUpdated(&oldtup);
4104 /* And mark the new tuple as heap-only */
4105 HeapTupleSetHeapOnly(heaptup);
4106 /* Mark the caller's copy too, in case different from heaptup */
4107 HeapTupleSetHeapOnly(newtup);
4108 }
4109 else
4110 {
4111 /* Make sure tuples are correctly marked as not-HOT */
4112 HeapTupleClearHotUpdated(&oldtup);
4113 HeapTupleClearHeapOnly(heaptup);
4114 HeapTupleClearHeapOnly(newtup);
4115 }
4116
4117 RelationPutHeapTuple(relation, newbuf, heaptup, false); /* insert new tuple */
4118
4119
4120 /* Clear obsolete visibility flags, possibly set by ourselves above... */
4122 oldtup.t_data->t_infomask2 &= ~HEAP_KEYS_UPDATED;
4123 /* ... and store info about transaction updating this tuple */
4124 Assert(TransactionIdIsValid(xmax_old_tuple));
4125 HeapTupleHeaderSetXmax(oldtup.t_data, xmax_old_tuple);
4126 oldtup.t_data->t_infomask |= infomask_old_tuple;
4127 oldtup.t_data->t_infomask2 |= infomask2_old_tuple;
4128 HeapTupleHeaderSetCmax(oldtup.t_data, cid, iscombo);
4129
4130 /* record address of new tuple in t_ctid of old one */
4131 oldtup.t_data->t_ctid = heaptup->t_self;
4132
4133 /* clear PD_ALL_VISIBLE flags, reset all visibilitymap bits */
4134 if (PageIsAllVisible(BufferGetPage(buffer)))
4135 {
4136 all_visible_cleared = true;
4138 visibilitymap_clear(relation, BufferGetBlockNumber(buffer),
4139 vmbuffer, VISIBILITYMAP_VALID_BITS);
4140 }
4141 if (newbuf != buffer && PageIsAllVisible(BufferGetPage(newbuf)))
4142 {
4143 all_visible_cleared_new = true;
4145 visibilitymap_clear(relation, BufferGetBlockNumber(newbuf),
4146 vmbuffer_new, VISIBILITYMAP_VALID_BITS);
4147 }
4148
4149 if (newbuf != buffer)
4150 MarkBufferDirty(newbuf);
4151 MarkBufferDirty(buffer);
4152
4153 /* XLOG stuff */
4154 if (RelationNeedsWAL(relation))
4155 {
4156 XLogRecPtr recptr;
4157
4158 /*
4159 * For logical decoding we need combo CIDs to properly decode the
4160 * catalog.
4161 */
4163 {
4164 log_heap_new_cid(relation, &oldtup);
4165 log_heap_new_cid(relation, heaptup);
4166 }
4167
4168 recptr = log_heap_update(relation, buffer,
4169 newbuf, &oldtup, heaptup,
4170 old_key_tuple,
4171 all_visible_cleared,
4172 all_visible_cleared_new);
4173 if (newbuf != buffer)
4174 {
4175 PageSetLSN(BufferGetPage(newbuf), recptr);
4176 }
4177 PageSetLSN(BufferGetPage(buffer), recptr);
4178 }
4179
4181
4182 if (newbuf != buffer)
4185
4186 /*
4187 * Mark old tuple for invalidation from system caches at next command
4188 * boundary, and mark the new tuple for invalidation in case we abort. We
4189 * have to do this before releasing the buffer because oldtup is in the
4190 * buffer. (heaptup is all in local memory, but it's necessary to process
4191 * both tuple versions in one call to inval.c so we can avoid redundant
4192 * sinval messages.)
4193 */
4194 CacheInvalidateHeapTuple(relation, &oldtup, heaptup);
4195
4196 /* Now we can release the buffer(s) */
4197 if (newbuf != buffer)
4198 ReleaseBuffer(newbuf);
4199 ReleaseBuffer(buffer);
4200 if (BufferIsValid(vmbuffer_new))
4201 ReleaseBuffer(vmbuffer_new);
4202 if (BufferIsValid(vmbuffer))
4203 ReleaseBuffer(vmbuffer);
4204
4205 /*
4206 * Release the lmgr tuple lock, if we had it.
4207 */
4208 if (have_tuple_lock)
4209 UnlockTupleTuplock(relation, &(oldtup.t_self), *lockmode);
4210
4211 pgstat_count_heap_update(relation, use_hot_update, newbuf != buffer);
4212
4213 /*
4214 * If heaptup is a private copy, release it. Don't forget to copy t_self
4215 * back to the caller's image, too.
4216 */
4217 if (heaptup != newtup)
4218 {
4219 newtup->t_self = heaptup->t_self;
4220 heap_freetuple(heaptup);
4221 }
4222
4223 /*
4224 * If it is a HOT update, the update may still need to update summarized
4225 * indexes, lest we fail to update those summaries and get incorrect
4226 * results (for example, minmax bounds of the block may change with this
4227 * update).
4228 */
4229 if (use_hot_update)
4230 {
4231 if (summarized_update)
4232 *update_indexes = TU_Summarizing;
4233 else
4234 *update_indexes = TU_None;
4235 }
4236 else
4237 *update_indexes = TU_All;
4238
4239 if (old_key_tuple != NULL && old_key_copied)
4240 heap_freetuple(old_key_tuple);
4241
4242 bms_free(hot_attrs);
4243 bms_free(sum_attrs);
4244 bms_free(key_attrs);
4245 bms_free(id_attrs);
4246 bms_free(modified_attrs);
4247 bms_free(interesting_attrs);
4248
4249 return TM_Ok;
4250}
void bms_free(Bitmapset *a)
Definition: bitmapset.c:239
Bitmapset * bms_add_members(Bitmapset *a, const Bitmapset *b)
Definition: bitmapset.c:916
bool bms_overlap(const Bitmapset *a, const Bitmapset *b)
Definition: bitmapset.c:581
static void PageSetFull(Page page)
Definition: bufpage.h:417
static Bitmapset * HeapDetermineColumnsInfo(Relation relation, Bitmapset *interesting_cols, Bitmapset *external_cols, HeapTuple oldtup, HeapTuple newtup, bool *has_external)
Definition: heapam.c:4430
static XLogRecPtr log_heap_update(Relation reln, Buffer oldbuf, Buffer newbuf, HeapTuple oldtup, HeapTuple newtup, HeapTuple old_key_tuple, bool all_visible_cleared, bool new_all_visible_cleared)
Definition: heapam.c:8883
TransactionId HeapTupleGetUpdateXid(const HeapTupleHeaderData *tup)
Definition: heapam.c:7624
HeapTuple heap_toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup, int options)
Definition: heaptoast.c:96
#define TOAST_TUPLE_THRESHOLD
Definition: heaptoast.h:48
static void HeapTupleClearHotUpdated(const HeapTupleData *tuple)
Definition: htup_details.h:780
#define HEAP2_XACT_MASK
Definition: htup_details.h:293
#define HEAP_XMAX_LOCK_ONLY
Definition: htup_details.h:197
static void HeapTupleHeaderSetCmin(HeapTupleHeaderData *tup, CommandId cid)
Definition: htup_details.h:422
static void HeapTupleSetHeapOnly(const HeapTupleData *tuple)
Definition: htup_details.h:792
#define HEAP_XACT_MASK
Definition: htup_details.h:215
static void HeapTupleSetHotUpdated(const HeapTupleData *tuple)
Definition: htup_details.h:774
static void HeapTupleClearHeapOnly(const HeapTupleData *tuple)
Definition: htup_details.h:798
#define HEAP_UPDATED
Definition: htup_details.h:210
#define HEAP_XMAX_KEYSHR_LOCK
Definition: htup_details.h:194
#define INJECTION_POINT(name, arg)
void pgstat_count_heap_update(Relation rel, bool hot, bool newpage)
Bitmapset * RelationGetIndexAttrBitmap(Relation relation, IndexAttrBitmapKind attrKind)
Definition: relcache.c:5298
@ INDEX_ATTR_BITMAP_KEY
Definition: relcache.h:69
@ INDEX_ATTR_BITMAP_HOT_BLOCKING
Definition: relcache.h:72
@ INDEX_ATTR_BITMAP_SUMMARIZED
Definition: relcache.h:73
@ INDEX_ATTR_BITMAP_IDENTITY_KEY
Definition: relcache.h:71
bool RelationSupportsSysCache(Oid relid)
Definition: syscache.c:762
@ TU_Summarizing
Definition: tableam.h:119
@ TU_All
Definition: tableam.h:116
@ TU_None
Definition: tableam.h:113
bool TransactionIdDidAbort(TransactionId transactionId)
Definition: transam.c:188

References Assert(), AssertHasSnapshotForToast(), bms_add_members(), bms_free(), bms_overlap(), BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferGetBlockNumber(), BufferGetPage(), BufferIsValid(), CacheInvalidateHeapTuple(), CheckForSerializableConflictIn(), TM_FailureData::cmax, compute_infobits(), compute_new_xmax_infomask(), TM_FailureData::ctid, DoesMultiXactIdConflict(), END_CRIT_SECTION, ereport, errcode(), errmsg(), ERROR, ExtractReplicaIdentity(), xl_heap_lock::flags, GetCurrentTransactionId(), GetMultiXactIdHintBits(), HEAP2_XACT_MASK, heap_acquire_tuplock(), heap_freetuple(), HEAP_LOCKED_UPGRADED(), HEAP_MOVED, heap_toast_insert_or_update(), HEAP_UPDATED, HEAP_XACT_MASK, HEAP_XMAX_BITS, HEAP_XMAX_INVALID, HEAP_XMAX_IS_KEYSHR_LOCKED(), HEAP_XMAX_IS_LOCKED_ONLY(), HEAP_XMAX_IS_MULTI, HEAP_XMAX_KEYSHR_LOCK, HEAP_XMAX_LOCK_ONLY, HeapDetermineColumnsInfo(), HeapTupleClearHeapOnly(), HeapTupleClearHotUpdated(), HeapTupleGetUpdateXid(), HeapTupleHasExternal(), HeapTupleHeaderAdjustCmax(), HeapTupleHeaderGetCmax(), HeapTupleHeaderGetNatts, HeapTupleHeaderGetRawXmax(), HeapTupleHeaderGetUpdateXid(), HeapTupleHeaderSetCmax(), HeapTupleHeaderSetCmin(), HeapTupleHeaderSetXmax(), HeapTupleHeaderSetXmin(), HeapTupleSatisfiesUpdate(), HeapTupleSatisfiesVisibility(), HeapTupleSetHeapOnly(), HeapTupleSetHotUpdated(), INDEX_ATTR_BITMAP_HOT_BLOCKING, INDEX_ATTR_BITMAP_IDENTITY_KEY, INDEX_ATTR_BITMAP_KEY, INDEX_ATTR_BITMAP_SUMMARIZED, xl_heap_lock::infobits_set, INJECTION_POINT, InvalidBuffer, InvalidCommandId, InvalidSnapshot, InvalidTransactionId, IsInParallelMode(), ItemIdGetLength, ItemIdIsNormal, ItemPointerEquals(), ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), ItemPointerIsValid(), LockBuffer(), LockTupleExclusive, LockTupleNoKeyExclusive, LockWaitBlock, log_heap_new_cid(), log_heap_update(), MarkBufferDirty(), MAXALIGN, MultiXactIdSetOldestMember(), MultiXactIdWait(), MultiXactStatusNoKeyUpdate, MultiXactStatusUpdate, xl_heap_lock::offnum, PageClearAllVisible(), PageGetHeapFreeSpace(), PageGetItem(), PageGetItemId(), PageIsAllVisible(), PageSetFull(), PageSetLSN(), PageSetPrunable, pgstat_count_heap_update(), RelationData::rd_rel, ReadBuffer(), REGBUF_STANDARD, RelationGetBufferForTuple(), RelationGetIndexAttrBitmap(), RelationGetNumberOfAttributes, RelationGetRelid, RelationIsAccessibleInLogicalDecoding, RelationNeedsWAL, RelationPutHeapTuple(), RelationSupportsSysCache(), ReleaseBuffer(), SizeOfHeapLock, START_CRIT_SECTION, HeapTupleHeaderData::t_ctid, HeapTupleData::t_data, HeapTupleHeaderData::t_infomask, HeapTupleHeaderData::t_infomask2, HeapTupleData::t_len, HeapTupleData::t_self, HeapTupleData::t_tableOid, TM_BeingModified, TM_Deleted, TM_Invisible, TM_Ok, TM_SelfModified, TM_Updated, TOAST_TUPLE_THRESHOLD, TransactionIdDidAbort(), TransactionIdEquals, TransactionIdIsCurrentTransactionId(), TransactionIdIsValid, TU_All, TU_None, TU_Summarizing, UnlockReleaseBuffer(), UnlockTupleTuplock, UpdateXmaxHintBits(), VISIBILITYMAP_ALL_FROZEN, visibilitymap_clear(), visibilitymap_pin(), VISIBILITYMAP_VALID_BITS, XactLockTableWait(), XLH_LOCK_ALL_FROZEN_CLEARED, XLOG_HEAP_LOCK, XLogBeginInsert(), XLogInsert(), XLogRegisterBuffer(), XLogRegisterData(), XLTW_Update, xl_heap_lock::xmax, TM_FailureData::xmax, and xmax_infomask_changed().

Referenced by heapam_tuple_update(), and simple_heap_update().

◆ heap_vacuum_rel()

void heap_vacuum_rel ( Relation  rel,
const VacuumParams  params,
BufferAccessStrategy  bstrategy 
)

Definition at line 629 of file vacuumlazy.c.

631{
632 LVRelState *vacrel;
633 bool verbose,
634 instrument,
635 skipwithvm,
636 frozenxid_updated,
637 minmulti_updated;
638 BlockNumber orig_rel_pages,
639 new_rel_pages,
640 new_rel_allvisible,
641 new_rel_allfrozen;
642 PGRUsage ru0;
643 TimestampTz starttime = 0;
644 PgStat_Counter startreadtime = 0,
645 startwritetime = 0;
646 WalUsage startwalusage = pgWalUsage;
647 BufferUsage startbufferusage = pgBufferUsage;
648 ErrorContextCallback errcallback;
649 char **indnames = NULL;
650 Size dead_items_max_bytes = 0;
651
652 verbose = (params.options & VACOPT_VERBOSE) != 0;
653 instrument = (verbose || (AmAutoVacuumWorkerProcess() &&
654 params.log_vacuum_min_duration >= 0));
655 if (instrument)
656 {
657 pg_rusage_init(&ru0);
658 if (track_io_timing)
659 {
660 startreadtime = pgStatBlockReadTime;
661 startwritetime = pgStatBlockWriteTime;
662 }
663 }
664
665 /* Used for instrumentation and stats report */
666 starttime = GetCurrentTimestamp();
667
669 RelationGetRelid(rel));
672 params.is_wraparound
675 else
678
679 /*
680 * Setup error traceback support for ereport() first. The idea is to set
681 * up an error context callback to display additional information on any
682 * error during a vacuum. During different phases of vacuum, we update
683 * the state so that the error context callback always display current
684 * information.
685 *
686 * Copy the names of heap rel into local memory for error reporting
687 * purposes, too. It isn't always safe to assume that we can get the name
688 * of each rel. It's convenient for code in lazy_scan_heap to always use
689 * these temp copies.
690 */
691 vacrel = palloc0_object(LVRelState);
695 vacrel->indname = NULL;
697 vacrel->verbose = verbose;
698 errcallback.callback = vacuum_error_callback;
699 errcallback.arg = vacrel;
700 errcallback.previous = error_context_stack;
701 error_context_stack = &errcallback;
702
703 /* Set up high level stuff about rel and its indexes */
704 vacrel->rel = rel;
706 &vacrel->indrels);
707 vacrel->bstrategy = bstrategy;
708 if (instrument && vacrel->nindexes > 0)
709 {
710 /* Copy index names used by instrumentation (not error reporting) */
711 indnames = palloc_array(char *, vacrel->nindexes);
712 for (int i = 0; i < vacrel->nindexes; i++)
713 indnames[i] = pstrdup(RelationGetRelationName(vacrel->indrels[i]));
714 }
715
716 /*
717 * The index_cleanup param either disables index vacuuming and cleanup or
718 * forces it to go ahead when we would otherwise apply the index bypass
719 * optimization. The default is 'auto', which leaves the final decision
720 * up to lazy_vacuum().
721 *
722 * The truncate param allows user to avoid attempting relation truncation,
723 * though it can't force truncation to happen.
724 */
727 params.truncate != VACOPTVALUE_AUTO);
728
729 /*
730 * While VacuumFailSafeActive is reset to false before calling this, we
731 * still need to reset it here due to recursive calls.
732 */
733 VacuumFailsafeActive = false;
734 vacrel->consider_bypass_optimization = true;
735 vacrel->do_index_vacuuming = true;
736 vacrel->do_index_cleanup = true;
737 vacrel->do_rel_truncate = (params.truncate != VACOPTVALUE_DISABLED);
739 {
740 /* Force disable index vacuuming up-front */
741 vacrel->do_index_vacuuming = false;
742 vacrel->do_index_cleanup = false;
743 }
744 else if (params.index_cleanup == VACOPTVALUE_ENABLED)
745 {
746 /* Force index vacuuming. Note that failsafe can still bypass. */
747 vacrel->consider_bypass_optimization = false;
748 }
749 else
750 {
751 /* Default/auto, make all decisions dynamically */
753 }
754
755 /* Initialize page counters explicitly (be tidy) */
756 vacrel->scanned_pages = 0;
757 vacrel->eager_scanned_pages = 0;
758 vacrel->removed_pages = 0;
759 vacrel->new_frozen_tuple_pages = 0;
760 vacrel->lpdead_item_pages = 0;
761 vacrel->missed_dead_pages = 0;
762 vacrel->nonempty_pages = 0;
763 /* dead_items_alloc allocates vacrel->dead_items later on */
764
765 /* Allocate/initialize output statistics state */
766 vacrel->new_rel_tuples = 0;
767 vacrel->new_live_tuples = 0;
768 vacrel->indstats = (IndexBulkDeleteResult **)
769 palloc0(vacrel->nindexes * sizeof(IndexBulkDeleteResult *));
770
771 /* Initialize remaining counters (be tidy) */
772 vacrel->num_index_scans = 0;
773 vacrel->num_dead_items_resets = 0;
774 vacrel->total_dead_items_bytes = 0;
775 vacrel->tuples_deleted = 0;
776 vacrel->tuples_frozen = 0;
777 vacrel->lpdead_items = 0;
778 vacrel->live_tuples = 0;
779 vacrel->recently_dead_tuples = 0;
780 vacrel->missed_dead_tuples = 0;
781
782 vacrel->vm_new_visible_pages = 0;
783 vacrel->vm_new_visible_frozen_pages = 0;
784 vacrel->vm_new_frozen_pages = 0;
785
786 /*
787 * Get cutoffs that determine which deleted tuples are considered DEAD,
788 * not just RECENTLY_DEAD, and which XIDs/MXIDs to freeze. Then determine
789 * the extent of the blocks that we'll scan in lazy_scan_heap. It has to
790 * happen in this order to ensure that the OldestXmin cutoff field works
791 * as an upper bound on the XIDs stored in the pages we'll actually scan
792 * (NewRelfrozenXid tracking must never be allowed to miss unfrozen XIDs).
793 *
794 * Next acquire vistest, a related cutoff that's used in pruning. We use
795 * vistest in combination with OldestXmin to ensure that
796 * heap_page_prune_and_freeze() always removes any deleted tuple whose
797 * xmax is < OldestXmin. lazy_scan_prune must never become confused about
798 * whether a tuple should be frozen or removed. (In the future we might
799 * want to teach lazy_scan_prune to recompute vistest from time to time,
800 * to increase the number of dead tuples it can prune away.)
801 */
802 vacrel->aggressive = vacuum_get_cutoffs(rel, params, &vacrel->cutoffs);
803 vacrel->rel_pages = orig_rel_pages = RelationGetNumberOfBlocks(rel);
804 vacrel->vistest = GlobalVisTestFor(rel);
805
806 /* Initialize state used to track oldest extant XID/MXID */
807 vacrel->NewRelfrozenXid = vacrel->cutoffs.OldestXmin;
808 vacrel->NewRelminMxid = vacrel->cutoffs.OldestMxact;
809
810 /*
811 * Initialize state related to tracking all-visible page skipping. This is
812 * very important to determine whether or not it is safe to advance the
813 * relfrozenxid/relminmxid.
814 */
815 vacrel->skippedallvis = false;
816 skipwithvm = true;
818 {
819 /*
820 * Force aggressive mode, and disable skipping blocks using the
821 * visibility map (even those set all-frozen)
822 */
823 vacrel->aggressive = true;
824 skipwithvm = false;
825 }
826
827 vacrel->skipwithvm = skipwithvm;
828
829 /*
830 * Set up eager scan tracking state. This must happen after determining
831 * whether or not the vacuum must be aggressive, because only normal
832 * vacuums use the eager scan algorithm.
833 */
834 heap_vacuum_eager_scan_setup(vacrel, params);
835
836 /* Report the vacuum mode: 'normal' or 'aggressive' */
838 vacrel->aggressive
841
842 if (verbose)
843 {
844 if (vacrel->aggressive)
846 (errmsg("aggressively vacuuming \"%s.%s.%s\"",
847 vacrel->dbname, vacrel->relnamespace,
848 vacrel->relname)));
849 else
851 (errmsg("vacuuming \"%s.%s.%s\"",
852 vacrel->dbname, vacrel->relnamespace,
853 vacrel->relname)));
854 }
855
856 /*
857 * Allocate dead_items memory using dead_items_alloc. This handles
858 * parallel VACUUM initialization as part of allocating shared memory
859 * space used for dead_items. (But do a failsafe precheck first, to
860 * ensure that parallel VACUUM won't be attempted at all when relfrozenxid
861 * is already dangerously old.)
862 */
864 dead_items_alloc(vacrel, params.nworkers);
865
866 /*
867 * Call lazy_scan_heap to perform all required heap pruning, index
868 * vacuuming, and heap vacuuming (plus related processing)
869 */
870 lazy_scan_heap(vacrel);
871
872 /*
873 * Save dead items max_bytes and update the memory usage statistics before
874 * cleanup, they are freed in parallel vacuum cases during
875 * dead_items_cleanup().
876 */
877 dead_items_max_bytes = vacrel->dead_items_info->max_bytes;
879
880 /*
881 * Free resources managed by dead_items_alloc. This ends parallel mode in
882 * passing when necessary.
883 */
884 dead_items_cleanup(vacrel);
886
887 /*
888 * Update pg_class entries for each of rel's indexes where appropriate.
889 *
890 * Unlike the later update to rel's pg_class entry, this is not critical.
891 * Maintains relpages/reltuples statistics used by the planner only.
892 */
893 if (vacrel->do_index_cleanup)
895
896 /* Done with rel's indexes */
897 vac_close_indexes(vacrel->nindexes, vacrel->indrels, NoLock);
898
899 /* Optionally truncate rel */
900 if (should_attempt_truncation(vacrel))
901 lazy_truncate_heap(vacrel);
902
903 /* Pop the error context stack */
904 error_context_stack = errcallback.previous;
905
906 /* Report that we are now doing final cleanup */
909
910 /*
911 * Prepare to update rel's pg_class entry.
912 *
913 * Aggressive VACUUMs must always be able to advance relfrozenxid to a
914 * value >= FreezeLimit, and relminmxid to a value >= MultiXactCutoff.
915 * Non-aggressive VACUUMs may advance them by any amount, or not at all.
916 */
917 Assert(vacrel->NewRelfrozenXid == vacrel->cutoffs.OldestXmin ||
919 vacrel->cutoffs.relfrozenxid,
920 vacrel->NewRelfrozenXid));
921 Assert(vacrel->NewRelminMxid == vacrel->cutoffs.OldestMxact ||
923 vacrel->cutoffs.relminmxid,
924 vacrel->NewRelminMxid));
925 if (vacrel->skippedallvis)
926 {
927 /*
928 * Must keep original relfrozenxid in a non-aggressive VACUUM that
929 * chose to skip an all-visible page range. The state that tracks new
930 * values will have missed unfrozen XIDs from the pages we skipped.
931 */
932 Assert(!vacrel->aggressive);
935 }
936
937 /*
938 * For safety, clamp relallvisible to be not more than what we're setting
939 * pg_class.relpages to
940 */
941 new_rel_pages = vacrel->rel_pages; /* After possible rel truncation */
942 visibilitymap_count(rel, &new_rel_allvisible, &new_rel_allfrozen);
943 if (new_rel_allvisible > new_rel_pages)
944 new_rel_allvisible = new_rel_pages;
945
946 /*
947 * An all-frozen block _must_ be all-visible. As such, clamp the count of
948 * all-frozen blocks to the count of all-visible blocks. This matches the
949 * clamping of relallvisible above.
950 */
951 if (new_rel_allfrozen > new_rel_allvisible)
952 new_rel_allfrozen = new_rel_allvisible;
953
954 /*
955 * Now actually update rel's pg_class entry.
956 *
957 * In principle new_live_tuples could be -1 indicating that we (still)
958 * don't know the tuple count. In practice that can't happen, since we
959 * scan every page that isn't skipped using the visibility map.
960 */
961 vac_update_relstats(rel, new_rel_pages, vacrel->new_live_tuples,
962 new_rel_allvisible, new_rel_allfrozen,
963 vacrel->nindexes > 0,
964 vacrel->NewRelfrozenXid, vacrel->NewRelminMxid,
965 &frozenxid_updated, &minmulti_updated, false);
966
967 /*
968 * Report results to the cumulative stats system, too.
969 *
970 * Deliberately avoid telling the stats system about LP_DEAD items that
971 * remain in the table due to VACUUM bypassing index and heap vacuuming.
972 * ANALYZE will consider the remaining LP_DEAD items to be dead "tuples".
973 * It seems like a good idea to err on the side of not vacuuming again too
974 * soon in cases where the failsafe prevented significant amounts of heap
975 * vacuuming.
976 */
978 Max(vacrel->new_live_tuples, 0),
979 vacrel->recently_dead_tuples +
980 vacrel->missed_dead_tuples,
981 starttime);
983
984 if (instrument)
985 {
987
988 if (verbose || params.log_vacuum_min_duration == 0 ||
989 TimestampDifferenceExceeds(starttime, endtime,
991 {
992 long secs_dur;
993 int usecs_dur;
994 WalUsage walusage;
995 BufferUsage bufferusage;
997 char *msgfmt;
998 int32 diff;
999 double read_rate = 0,
1000 write_rate = 0;
1001 int64 total_blks_hit;
1002 int64 total_blks_read;
1003 int64 total_blks_dirtied;
1004
1005 TimestampDifference(starttime, endtime, &secs_dur, &usecs_dur);
1006 memset(&walusage, 0, sizeof(WalUsage));
1007 WalUsageAccumDiff(&walusage, &pgWalUsage, &startwalusage);
1008 memset(&bufferusage, 0, sizeof(BufferUsage));
1009 BufferUsageAccumDiff(&bufferusage, &pgBufferUsage, &startbufferusage);
1010
1011 total_blks_hit = bufferusage.shared_blks_hit +
1012 bufferusage.local_blks_hit;
1013 total_blks_read = bufferusage.shared_blks_read +
1014 bufferusage.local_blks_read;
1015 total_blks_dirtied = bufferusage.shared_blks_dirtied +
1016 bufferusage.local_blks_dirtied;
1017
1019 if (verbose)
1020 {
1021 /*
1022 * Aggressiveness already reported earlier, in dedicated
1023 * VACUUM VERBOSE ereport
1024 */
1025 Assert(!params.is_wraparound);
1026 msgfmt = _("finished vacuuming \"%s.%s.%s\": index scans: %d\n");
1027 }
1028 else if (params.is_wraparound)
1029 {
1030 /*
1031 * While it's possible for a VACUUM to be both is_wraparound
1032 * and !aggressive, that's just a corner-case -- is_wraparound
1033 * implies aggressive. Produce distinct output for the corner
1034 * case all the same, just in case.
1035 */
1036 if (vacrel->aggressive)
1037 msgfmt = _("automatic aggressive vacuum to prevent wraparound of table \"%s.%s.%s\": index scans: %d\n");
1038 else
1039 msgfmt = _("automatic vacuum to prevent wraparound of table \"%s.%s.%s\": index scans: %d\n");
1040 }
1041 else
1042 {
1043 if (vacrel->aggressive)
1044 msgfmt = _("automatic aggressive vacuum of table \"%s.%s.%s\": index scans: %d\n");
1045 else
1046 msgfmt = _("automatic vacuum of table \"%s.%s.%s\": index scans: %d\n");
1047 }
1048 appendStringInfo(&buf, msgfmt,
1049 vacrel->dbname,
1050 vacrel->relnamespace,
1051 vacrel->relname,
1052 vacrel->num_index_scans);
1053 appendStringInfo(&buf, _("pages: %u removed, %u remain, %u scanned (%.2f%% of total), %u eagerly scanned\n"),
1054 vacrel->removed_pages,
1055 new_rel_pages,
1056 vacrel->scanned_pages,
1057 orig_rel_pages == 0 ? 100.0 :
1058 100.0 * vacrel->scanned_pages /
1059 orig_rel_pages,
1060 vacrel->eager_scanned_pages);
1062 _("tuples: %" PRId64 " removed, %" PRId64 " remain, %" PRId64 " are dead but not yet removable\n"),
1063 vacrel->tuples_deleted,
1064 (int64) vacrel->new_rel_tuples,
1065 vacrel->recently_dead_tuples);
1066 if (vacrel->missed_dead_tuples > 0)
1068 _("tuples missed: %" PRId64 " dead from %u pages not removed due to cleanup lock contention\n"),
1069 vacrel->missed_dead_tuples,
1070 vacrel->missed_dead_pages);
1071 diff = (int32) (ReadNextTransactionId() -
1072 vacrel->cutoffs.OldestXmin);
1074 _("removable cutoff: %u, which was %d XIDs old when operation ended\n"),
1075 vacrel->cutoffs.OldestXmin, diff);
1076 if (frozenxid_updated)
1077 {
1078 diff = (int32) (vacrel->NewRelfrozenXid -
1079 vacrel->cutoffs.relfrozenxid);
1081 _("new relfrozenxid: %u, which is %d XIDs ahead of previous value\n"),
1082 vacrel->NewRelfrozenXid, diff);
1083 }
1084 if (minmulti_updated)
1085 {
1086 diff = (int32) (vacrel->NewRelminMxid -
1087 vacrel->cutoffs.relminmxid);
1089 _("new relminmxid: %u, which is %d MXIDs ahead of previous value\n"),
1090 vacrel->NewRelminMxid, diff);
1091 }
1092 appendStringInfo(&buf, _("frozen: %u pages from table (%.2f%% of total) had %" PRId64 " tuples frozen\n"),
1093 vacrel->new_frozen_tuple_pages,
1094 orig_rel_pages == 0 ? 100.0 :
1095 100.0 * vacrel->new_frozen_tuple_pages /
1096 orig_rel_pages,
1097 vacrel->tuples_frozen);
1098
1100 _("visibility map: %u pages set all-visible, %u pages set all-frozen (%u were all-visible)\n"),
1101 vacrel->vm_new_visible_pages,
1103 vacrel->vm_new_frozen_pages,
1104 vacrel->vm_new_frozen_pages);
1105 if (vacrel->do_index_vacuuming)
1106 {
1107 if (vacrel->nindexes == 0 || vacrel->num_index_scans == 0)
1108 appendStringInfoString(&buf, _("index scan not needed: "));
1109 else
1110 appendStringInfoString(&buf, _("index scan needed: "));
1111
1112 msgfmt = _("%u pages from table (%.2f%% of total) had %" PRId64 " dead item identifiers removed\n");
1113 }
1114 else
1115 {
1117 appendStringInfoString(&buf, _("index scan bypassed: "));
1118 else
1119 appendStringInfoString(&buf, _("index scan bypassed by failsafe: "));
1120
1121 msgfmt = _("%u pages from table (%.2f%% of total) have %" PRId64 " dead item identifiers\n");
1122 }
1123 appendStringInfo(&buf, msgfmt,
1124 vacrel->lpdead_item_pages,
1125 orig_rel_pages == 0 ? 100.0 :
1126 100.0 * vacrel->lpdead_item_pages / orig_rel_pages,
1127 vacrel->lpdead_items);
1128 for (int i = 0; i < vacrel->nindexes; i++)
1129 {
1130 IndexBulkDeleteResult *istat = vacrel->indstats[i];
1131
1132 if (!istat)
1133 continue;
1134
1136 _("index \"%s\": pages: %u in total, %u newly deleted, %u currently deleted, %u reusable\n"),
1137 indnames[i],
1138 istat->num_pages,
1139 istat->pages_newly_deleted,
1140 istat->pages_deleted,
1141 istat->pages_free);
1142 }
1144 {
1145 /*
1146 * We bypass the changecount mechanism because this value is
1147 * only updated by the calling process. We also rely on the
1148 * above call to pgstat_progress_end_command() to not clear
1149 * the st_progress_param array.
1150 */
1151 appendStringInfo(&buf, _("delay time: %.3f ms\n"),
1153 }
1154 if (track_io_timing)
1155 {
1156 double read_ms = (double) (pgStatBlockReadTime - startreadtime) / 1000;
1157 double write_ms = (double) (pgStatBlockWriteTime - startwritetime) / 1000;
1158
1159 appendStringInfo(&buf, _("I/O timings: read: %.3f ms, write: %.3f ms\n"),
1160 read_ms, write_ms);
1161 }
1162 if (secs_dur > 0 || usecs_dur > 0)
1163 {
1164 read_rate = (double) BLCKSZ * total_blks_read /
1165 (1024 * 1024) / (secs_dur + usecs_dur / 1000000.0);
1166 write_rate = (double) BLCKSZ * total_blks_dirtied /
1167 (1024 * 1024) / (secs_dur + usecs_dur / 1000000.0);
1168 }
1169 appendStringInfo(&buf, _("avg read rate: %.3f MB/s, avg write rate: %.3f MB/s\n"),
1170 read_rate, write_rate);
1172 _("buffer usage: %" PRId64 " hits, %" PRId64 " reads, %" PRId64 " dirtied\n"),
1173 total_blks_hit,
1174 total_blks_read,
1175 total_blks_dirtied);
1177 _("WAL usage: %" PRId64 " records, %" PRId64 " full page images, %" PRIu64 " bytes, %" PRIu64 " full page image bytes, %" PRId64 " buffers full\n"),
1178 walusage.wal_records,
1179 walusage.wal_fpi,
1180 walusage.wal_bytes,
1181 walusage.wal_fpi_bytes,
1182 walusage.wal_buffers_full);
1183
1184 /*
1185 * Report the dead items memory usage.
1186 *
1187 * The num_dead_items_resets counter increases when we reset the
1188 * collected dead items, so the counter is non-zero if at least
1189 * one dead items are collected, even if index vacuuming is
1190 * disabled.
1191 */
1193 ngettext("memory usage: dead item storage %.2f MB accumulated across %d reset (limit %.2f MB each)\n",
1194 "memory usage: dead item storage %.2f MB accumulated across %d resets (limit %.2f MB each)\n",
1195 vacrel->num_dead_items_resets),
1196 (double) vacrel->total_dead_items_bytes / (1024 * 1024),
1197 vacrel->num_dead_items_resets,
1198 (double) dead_items_max_bytes / (1024 * 1024));
1199 appendStringInfo(&buf, _("system usage: %s"), pg_rusage_show(&ru0));
1200
1201 ereport(verbose ? INFO : LOG,
1202 (errmsg_internal("%s", buf.data)));
1203 pfree(buf.data);
1204 }
1205 }
1206
1207 /* Cleanup index statistics and index names */
1208 for (int i = 0; i < vacrel->nindexes; i++)
1209 {
1210 if (vacrel->indstats[i])
1211 pfree(vacrel->indstats[i]);
1212
1213 if (instrument)
1214 pfree(indnames[i]);
1215 }
1216}
void TimestampDifference(TimestampTz start_time, TimestampTz stop_time, long *secs, int *microsecs)
Definition: timestamp.c:1721
bool TimestampDifferenceExceeds(TimestampTz start_time, TimestampTz stop_time, int msec)
Definition: timestamp.c:1781
TimestampTz GetCurrentTimestamp(void)
Definition: timestamp.c:1645
void pgstat_progress_start_command(ProgressCommandType cmdtype, Oid relid)
void pgstat_progress_update_param(int index, int64 val)
void pgstat_progress_end_command(void)
@ PROGRESS_COMMAND_VACUUM
PgBackendStatus * MyBEEntry
bool track_io_timing
Definition: bufmgr.c:169
#define RelationGetNumberOfBlocks(reln)
Definition: bufmgr.h:294
#define ngettext(s, p, n)
Definition: c.h:1166
int32_t int32
Definition: c.h:548
int64 TimestampTz
Definition: timestamp.h:39
ErrorContextCallback * error_context_stack
Definition: elog.c:95
#define _(x)
Definition: elog.c:91
#define LOG
Definition: elog.h:31
#define INFO
Definition: elog.h:34
#define palloc0_object(type)
Definition: fe_memutils.h:75
int verbose
void WalUsageAccumDiff(WalUsage *dst, const WalUsage *add, const WalUsage *sub)
Definition: instrument.c:288
BufferUsage pgBufferUsage
Definition: instrument.c:20
void BufferUsageAccumDiff(BufferUsage *dst, const BufferUsage *add, const BufferUsage *sub)
Definition: instrument.c:248
#define NoLock
Definition: lockdefs.h:34
#define RowExclusiveLock
Definition: lockdefs.h:38
char * get_database_name(Oid dbid)
Definition: lsyscache.c:1242
char * get_namespace_name(Oid nspid)
Definition: lsyscache.c:3516
char * pstrdup(const char *in)
Definition: mcxt.c:1781
void * palloc0(Size size)
Definition: mcxt.c:1417
#define AmAutoVacuumWorkerProcess()
Definition: miscadmin.h:383
const char * pg_rusage_show(const PGRUsage *ru0)
Definition: pg_rusage.c:40
void pg_rusage_init(PGRUsage *ru0)
Definition: pg_rusage.c:27
int64 PgStat_Counter
Definition: pgstat.h:67
PgStat_Counter pgStatBlockReadTime
PgStat_Counter pgStatBlockWriteTime
void pgstat_report_vacuum(Relation rel, PgStat_Counter livetuples, PgStat_Counter deadtuples, TimestampTz starttime)
#define PROGRESS_VACUUM_PHASE_FINAL_CLEANUP
Definition: progress.h:41
#define PROGRESS_VACUUM_MODE
Definition: progress.h:32
#define PROGRESS_VACUUM_MODE_NORMAL
Definition: progress.h:44
#define PROGRESS_VACUUM_STARTED_BY_AUTOVACUUM
Definition: progress.h:50
#define PROGRESS_VACUUM_PHASE
Definition: progress.h:21
#define PROGRESS_VACUUM_DELAY_TIME
Definition: progress.h:31
#define PROGRESS_VACUUM_STARTED_BY_AUTOVACUUM_WRAPAROUND
Definition: progress.h:51
#define PROGRESS_VACUUM_STARTED_BY_MANUAL
Definition: progress.h:49
#define PROGRESS_VACUUM_STARTED_BY
Definition: progress.h:33
#define PROGRESS_VACUUM_MODE_AGGRESSIVE
Definition: progress.h:45
#define RelationGetNamespace(relation)
Definition: rel.h:556
void appendStringInfo(StringInfo str, const char *fmt,...)
Definition: stringinfo.c:145
void appendStringInfoString(StringInfo str, const char *s)
Definition: stringinfo.c:230
void initStringInfo(StringInfo str)
Definition: stringinfo.c:97
int64 shared_blks_dirtied
Definition: instrument.h:28
int64 local_blks_hit
Definition: instrument.h:30
int64 shared_blks_read
Definition: instrument.h:27
int64 local_blks_read
Definition: instrument.h:31
int64 local_blks_dirtied
Definition: instrument.h:32
int64 shared_blks_hit
Definition: instrument.h:26
struct ErrorContextCallback * previous
Definition: elog.h:297
void(* callback)(void *arg)
Definition: elog.h:298
BlockNumber pages_deleted
Definition: genam.h:109
BlockNumber pages_newly_deleted
Definition: genam.h:108
BlockNumber pages_free
Definition: genam.h:110
BlockNumber num_pages
Definition: genam.h:104
bool verbose
Definition: vacuumlazy.c:297
VacDeadItemsInfo * dead_items_info
Definition: vacuumlazy.c:310
BlockNumber vm_new_frozen_pages
Definition: vacuumlazy.c:336
int nindexes
Definition: vacuumlazy.c:263
TidStore * dead_items
Definition: vacuumlazy.c:309
int64 tuples_deleted
Definition: vacuumlazy.c:353
BlockNumber nonempty_pages
Definition: vacuumlazy.c:340
bool do_rel_truncate
Definition: vacuumlazy.c:279
BlockNumber scanned_pages
Definition: vacuumlazy.c:313
int num_dead_items_resets
Definition: vacuumlazy.c:350
bool aggressive
Definition: vacuumlazy.c:270
BlockNumber new_frozen_tuple_pages
Definition: vacuumlazy.c:322
GlobalVisState * vistest
Definition: vacuumlazy.c:283
BlockNumber removed_pages
Definition: vacuumlazy.c:321
int num_index_scans
Definition: vacuumlazy.c:349
IndexBulkDeleteResult ** indstats
Definition: vacuumlazy.c:346
double new_live_tuples
Definition: vacuumlazy.c:344
double new_rel_tuples
Definition: vacuumlazy.c:343
TransactionId NewRelfrozenXid
Definition: vacuumlazy.c:285
Relation rel
Definition: vacuumlazy.c:261
bool consider_bypass_optimization
Definition: vacuumlazy.c:274
BlockNumber rel_pages
Definition: vacuumlazy.c:312
Size total_dead_items_bytes
Definition: vacuumlazy.c:351
int64 recently_dead_tuples
Definition: vacuumlazy.c:357
int64 tuples_frozen
Definition: vacuumlazy.c:354
char * dbname
Definition: vacuumlazy.c:290
BlockNumber missed_dead_pages
Definition: vacuumlazy.c:339
char * relnamespace
Definition: vacuumlazy.c:291
int64 live_tuples
Definition: vacuumlazy.c:356
int64 lpdead_items
Definition: vacuumlazy.c:355
BufferAccessStrategy bstrategy
Definition: vacuumlazy.c:266
bool skippedallvis
Definition: vacuumlazy.c:287
BlockNumber lpdead_item_pages
Definition: vacuumlazy.c:338
BlockNumber eager_scanned_pages
Definition: vacuumlazy.c:319
Relation * indrels
Definition: vacuumlazy.c:262
bool skipwithvm
Definition: vacuumlazy.c:272
bool do_index_cleanup
Definition: vacuumlazy.c:278
MultiXactId NewRelminMxid
Definition: vacuumlazy.c:286
int64 missed_dead_tuples
Definition: vacuumlazy.c:358
struct VacuumCutoffs cutoffs
Definition: vacuumlazy.c:282
BlockNumber vm_new_visible_pages
Definition: vacuumlazy.c:325
char * relname
Definition: vacuumlazy.c:292
VacErrPhase phase
Definition: vacuumlazy.c:296
char * indname
Definition: vacuumlazy.c:293
BlockNumber vm_new_visible_frozen_pages
Definition: vacuumlazy.c:333
bool do_index_vacuuming
Definition: vacuumlazy.c:277
int64 st_progress_param[PGSTAT_NUM_PROGRESS_PARAM]
size_t max_bytes
Definition: vacuum.h:299
int nworkers
Definition: vacuum.h:251
VacOptValue truncate
Definition: vacuum.h:236
bits32 options
Definition: vacuum.h:219
int log_vacuum_min_duration
Definition: vacuum.h:227
bool is_wraparound
Definition: vacuum.h:226
VacOptValue index_cleanup
Definition: vacuum.h:235
int64 wal_buffers_full
Definition: instrument.h:57
uint64 wal_bytes
Definition: instrument.h:55
uint64 wal_fpi_bytes
Definition: instrument.h:56
int64 wal_records
Definition: instrument.h:53
size_t TidStoreMemoryUsage(TidStore *ts)
Definition: tidstore.c:532
static TransactionId ReadNextTransactionId(void)
Definition: transam.h:377
bool track_cost_delay_timing
Definition: vacuum.c:82
void vac_open_indexes(Relation relation, LOCKMODE lockmode, int *nindexes, Relation **Irel)
Definition: vacuum.c:2362
void vac_close_indexes(int nindexes, Relation *Irel, LOCKMODE lockmode)
Definition: vacuum.c:2405
bool VacuumFailsafeActive
Definition: vacuum.c:110
void vac_update_relstats(Relation relation, BlockNumber num_pages, double num_tuples, BlockNumber num_all_visible_pages, BlockNumber num_all_frozen_pages, bool hasindex, TransactionId frozenxid, MultiXactId minmulti, bool *frozenxid_updated, bool *minmulti_updated, bool in_outer_xact)
Definition: vacuum.c:1426
bool vacuum_get_cutoffs(Relation rel, const VacuumParams params, struct VacuumCutoffs *cutoffs)
Definition: vacuum.c:1100
#define VACOPT_VERBOSE
Definition: vacuum.h:182
@ VACOPTVALUE_AUTO
Definition: vacuum.h:203
@ VACOPTVALUE_ENABLED
Definition: vacuum.h:205
@ VACOPTVALUE_UNSPECIFIED
Definition: vacuum.h:202
@ VACOPTVALUE_DISABLED
Definition: vacuum.h:204
#define VACOPT_DISABLE_PAGE_SKIPPING
Definition: vacuum.h:188
static void dead_items_cleanup(LVRelState *vacrel)
Definition: vacuumlazy.c:3673
static void update_relstats_all_indexes(LVRelState *vacrel)
Definition: vacuumlazy.c:3879
static void heap_vacuum_eager_scan_setup(LVRelState *vacrel, const VacuumParams params)
Definition: vacuumlazy.c:502
static void vacuum_error_callback(void *arg)
Definition: vacuumlazy.c:3914
static void lazy_truncate_heap(LVRelState *vacrel)
Definition: vacuumlazy.c:3286
static bool should_attempt_truncation(LVRelState *vacrel)
Definition: vacuumlazy.c:3266
@ VACUUM_ERRCB_PHASE_UNKNOWN
Definition: vacuumlazy.c:225
static void lazy_scan_heap(LVRelState *vacrel)
Definition: vacuumlazy.c:1255
static bool lazy_check_wraparound_failsafe(LVRelState *vacrel)
Definition: vacuumlazy.c:3035
static void dead_items_alloc(LVRelState *vacrel, int nworkers)
Definition: vacuumlazy.c:3560
void visibilitymap_count(Relation rel, BlockNumber *all_visible, BlockNumber *all_frozen)

References _, LVRelState::aggressive, AmAutoVacuumWorkerProcess, appendStringInfo(), appendStringInfoString(), ErrorContextCallback::arg, Assert(), LVRelState::bstrategy, buf, BufferUsageAccumDiff(), ErrorContextCallback::callback, LVRelState::consider_bypass_optimization, LVRelState::cutoffs, LVRelState::dbname, LVRelState::dead_items, dead_items_alloc(), dead_items_cleanup(), LVRelState::dead_items_info, LVRelState::do_index_cleanup, LVRelState::do_index_vacuuming, LVRelState::do_rel_truncate, LVRelState::eager_scanned_pages, ereport, errmsg(), errmsg_internal(), error_context_stack, VacuumCutoffs::FreezeLimit, get_database_name(), get_namespace_name(), GetCurrentTimestamp(), GlobalVisTestFor(), heap_vacuum_eager_scan_setup(), i, VacuumParams::index_cleanup, LVRelState::indname, LVRelState::indrels, LVRelState::indstats, INFO, initStringInfo(), InvalidMultiXactId, InvalidTransactionId, VacuumParams::is_wraparound, IsInParallelMode(), lazy_check_wraparound_failsafe(), lazy_scan_heap(), lazy_truncate_heap(), LVRelState::live_tuples, BufferUsage::local_blks_dirtied, BufferUsage::local_blks_hit, BufferUsage::local_blks_read, LOG, VacuumParams::log_vacuum_min_duration, LVRelState::lpdead_item_pages, LVRelState::lpdead_items, Max, VacDeadItemsInfo::max_bytes, LVRelState::missed_dead_pages, LVRelState::missed_dead_tuples, VacuumCutoffs::MultiXactCutoff, MultiXactIdPrecedesOrEquals(), MyBEEntry, MyDatabaseId, LVRelState::new_frozen_tuple_pages, LVRelState::new_live_tuples, LVRelState::new_rel_tuples, LVRelState::NewRelfrozenXid, LVRelState::NewRelminMxid, ngettext, LVRelState::nindexes, NoLock, LVRelState::nonempty_pages, LVRelState::num_dead_items_resets, LVRelState::num_index_scans, IndexBulkDeleteResult::num_pages, VacuumParams::nworkers, VacuumCutoffs::OldestMxact, VacuumCutoffs::OldestXmin, VacuumParams::options, IndexBulkDeleteResult::pages_deleted, IndexBulkDeleteResult::pages_free, IndexBulkDeleteResult::pages_newly_deleted, palloc0(), palloc0_object, palloc_array, pfree(), pg_rusage_init(), pg_rusage_show(), pgBufferUsage, pgstat_progress_end_command(), pgstat_progress_start_command(), pgstat_progress_update_param(), pgstat_report_vacuum(), pgStatBlockReadTime, pgStatBlockWriteTime, pgWalUsage, LVRelState::phase, ErrorContextCallback::previous, PROGRESS_COMMAND_VACUUM, PROGRESS_VACUUM_DELAY_TIME, PROGRESS_VACUUM_MODE, PROGRESS_VACUUM_MODE_AGGRESSIVE, PROGRESS_VACUUM_MODE_NORMAL, PROGRESS_VACUUM_PHASE, PROGRESS_VACUUM_PHASE_FINAL_CLEANUP, PROGRESS_VACUUM_STARTED_BY, PROGRESS_VACUUM_STARTED_BY_AUTOVACUUM, PROGRESS_VACUUM_STARTED_BY_AUTOVACUUM_WRAPAROUND, PROGRESS_VACUUM_STARTED_BY_MANUAL, pstrdup(), ReadNextTransactionId(), LVRelState::recently_dead_tuples, LVRelState::rel, LVRelState::rel_pages, RelationGetNamespace, RelationGetNumberOfBlocks, RelationGetRelationName, RelationGetRelid, VacuumCutoffs::relfrozenxid, VacuumCutoffs::relminmxid, LVRelState::relname, LVRelState::relnamespace, LVRelState::removed_pages, RowExclusiveLock, LVRelState::scanned_pages, BufferUsage::shared_blks_dirtied, BufferUsage::shared_blks_hit, BufferUsage::shared_blks_read, should_attempt_truncation(), LVRelState::skippedallvis, LVRelState::skipwithvm, PgBackendStatus::st_progress_param, TidStoreMemoryUsage(), TimestampDifference(), TimestampDifferenceExceeds(), LVRelState::total_dead_items_bytes, track_cost_delay_timing, track_io_timing, TransactionIdPrecedesOrEquals(), VacuumParams::truncate, LVRelState::tuples_deleted, LVRelState::tuples_frozen, update_relstats_all_indexes(), vac_close_indexes(), vac_open_indexes(), vac_update_relstats(), VACOPT_DISABLE_PAGE_SKIPPING, VACOPT_VERBOSE, VACOPTVALUE_AUTO, VACOPTVALUE_DISABLED, VACOPTVALUE_ENABLED, VACOPTVALUE_UNSPECIFIED, VACUUM_ERRCB_PHASE_UNKNOWN, vacuum_error_callback(), vacuum_get_cutoffs(), VacuumFailsafeActive, LVRelState::verbose, verbose, visibilitymap_count(), LVRelState::vistest, LVRelState::vm_new_frozen_pages, LVRelState::vm_new_visible_frozen_pages, LVRelState::vm_new_visible_pages, WalUsage::wal_buffers_full, WalUsage::wal_bytes, WalUsage::wal_fpi, WalUsage::wal_fpi_bytes, WalUsage::wal_records, and WalUsageAccumDiff().

◆ HeapCheckForSerializableConflictOut()

void HeapCheckForSerializableConflictOut ( bool  visible,
Relation  relation,
HeapTuple  tuple,
Buffer  buffer,
Snapshot  snapshot 
)

Definition at line 9290 of file heapam.c.

9293{
9294 TransactionId xid;
9295 HTSV_Result htsvResult;
9296
9297 if (!CheckForSerializableConflictOutNeeded(relation, snapshot))
9298 return;
9299
9300 /*
9301 * Check to see whether the tuple has been written to by a concurrent
9302 * transaction, either to create it not visible to us, or to delete it
9303 * while it is visible to us. The "visible" bool indicates whether the
9304 * tuple is visible to us, while HeapTupleSatisfiesVacuum checks what else
9305 * is going on with it.
9306 *
9307 * In the event of a concurrently inserted tuple that also happens to have
9308 * been concurrently updated (by a separate transaction), the xmin of the
9309 * tuple will be used -- not the updater's xid.
9310 */
9311 htsvResult = HeapTupleSatisfiesVacuum(tuple, TransactionXmin, buffer);
9312 switch (htsvResult)
9313 {
9314 case HEAPTUPLE_LIVE:
9315 if (visible)
9316 return;
9317 xid = HeapTupleHeaderGetXmin(tuple->t_data);
9318 break;
9321 if (visible)
9322 xid = HeapTupleHeaderGetUpdateXid(tuple->t_data);
9323 else
9324 xid = HeapTupleHeaderGetXmin(tuple->t_data);
9325
9327 {
9328 /* This is like the HEAPTUPLE_DEAD case */
9329 Assert(!visible);
9330 return;
9331 }
9332 break;
9334 xid = HeapTupleHeaderGetXmin(tuple->t_data);
9335 break;
9336 case HEAPTUPLE_DEAD:
9337 Assert(!visible);
9338 return;
9339 default:
9340
9341 /*
9342 * The only way to get to this default clause is if a new value is
9343 * added to the enum type without adding it to this switch
9344 * statement. That's a bug, so elog.
9345 */
9346 elog(ERROR, "unrecognized return value from HeapTupleSatisfiesVacuum: %u", htsvResult);
9347
9348 /*
9349 * In spite of having all enum values covered and calling elog on
9350 * this default, some compilers think this is a code path which
9351 * allows xid to be used below without initialization. Silence
9352 * that warning.
9353 */
9355 }
9356
9359
9360 /*
9361 * Find top level xid. Bail out if xid is too early to be a conflict, or
9362 * if it's our own xid.
9363 */
9365 return;
9368 return;
9369
9370 CheckForSerializableConflictOut(relation, xid, snapshot);
9371}
HTSV_Result HeapTupleSatisfiesVacuum(HeapTuple htup, TransactionId OldestXmin, Buffer buffer)
void CheckForSerializableConflictOut(Relation relation, TransactionId xid, Snapshot snapshot)
Definition: predicate.c:4021
TransactionId SubTransGetTopmostTransaction(TransactionId xid)
Definition: subtrans.c:162
static bool TransactionIdFollowsOrEquals(TransactionId id1, TransactionId id2)
Definition: transam.h:312
TransactionId GetTopTransactionIdIfAny(void)
Definition: xact.c:442

References Assert(), CheckForSerializableConflictOut(), CheckForSerializableConflictOutNeeded(), elog, ERROR, GetTopTransactionIdIfAny(), HEAPTUPLE_DEAD, HEAPTUPLE_DELETE_IN_PROGRESS, HEAPTUPLE_INSERT_IN_PROGRESS, HEAPTUPLE_LIVE, HEAPTUPLE_RECENTLY_DEAD, HeapTupleHeaderGetUpdateXid(), HeapTupleHeaderGetXmin(), HeapTupleSatisfiesVacuum(), InvalidTransactionId, SubTransGetTopmostTransaction(), HeapTupleData::t_data, TransactionIdEquals, TransactionIdFollowsOrEquals(), TransactionIdIsValid, TransactionIdPrecedes(), and TransactionXmin.

Referenced by BitmapHeapScanNextBlock(), heap_fetch(), heap_get_latest_tid(), heap_hot_search_buffer(), heapam_scan_sample_next_tuple(), heapgettup(), and page_collect_tuples().

◆ HeapTupleHeaderIsOnlyLocked()

bool HeapTupleHeaderIsOnlyLocked ( HeapTupleHeader  tuple)

Definition at line 1358 of file heapam_visibility.c.

1359{
1360 TransactionId xmax;
1361
1362 /* if there's no valid Xmax, then there's obviously no update either */
1363 if (tuple->t_infomask & HEAP_XMAX_INVALID)
1364 return true;
1365
1366 if (tuple->t_infomask & HEAP_XMAX_LOCK_ONLY)
1367 return true;
1368
1369 /* invalid xmax means no update */
1371 return true;
1372
1373 /*
1374 * if HEAP_XMAX_LOCK_ONLY is not set and not a multi, then this must
1375 * necessarily have been updated
1376 */
1377 if (!(tuple->t_infomask & HEAP_XMAX_IS_MULTI))
1378 return false;
1379
1380 /* ... but if it's a multi, then perhaps the updating Xid aborted. */
1381 xmax = HeapTupleGetUpdateXid(tuple);
1382
1383 /* not LOCKED_ONLY, so it has to have an xmax */
1385
1387 return false;
1388 if (TransactionIdIsInProgress(xmax))
1389 return false;
1390 if (TransactionIdDidCommit(xmax))
1391 return false;
1392
1393 /*
1394 * not current, not in progress, not committed -- must have aborted or
1395 * crashed
1396 */
1397 return true;
1398}
bool TransactionIdIsInProgress(TransactionId xid)
Definition: procarray.c:1404

References Assert(), HEAP_XMAX_INVALID, HEAP_XMAX_IS_MULTI, HEAP_XMAX_LOCK_ONLY, HeapTupleGetUpdateXid(), HeapTupleHeaderGetRawXmax(), HeapTupleHeaderData::t_infomask, TransactionIdDidCommit(), TransactionIdIsCurrentTransactionId(), TransactionIdIsInProgress(), and TransactionIdIsValid.

Referenced by heap_delete(), heap_get_latest_tid(), heap_lock_tuple(), heap_lock_updated_tuple_rec(), HeapTupleSatisfiesVacuumHorizon(), and rewrite_heap_tuple().

◆ HeapTupleIsSurelyDead()

bool HeapTupleIsSurelyDead ( HeapTuple  htup,
GlobalVisState vistest 
)

Definition at line 1303 of file heapam_visibility.c.

1304{
1305 HeapTupleHeader tuple = htup->t_data;
1306
1308 Assert(htup->t_tableOid != InvalidOid);
1309
1310 /*
1311 * If the inserting transaction is marked invalid, then it aborted, and
1312 * the tuple is definitely dead. If it's marked neither committed nor
1313 * invalid, then we assume it's still alive (since the presumption is that
1314 * all relevant hint bits were just set moments ago).
1315 */
1316 if (!HeapTupleHeaderXminCommitted(tuple))
1317 return HeapTupleHeaderXminInvalid(tuple);
1318
1319 /*
1320 * If the inserting transaction committed, but any deleting transaction
1321 * aborted, the tuple is still alive.
1322 */
1323 if (tuple->t_infomask & HEAP_XMAX_INVALID)
1324 return false;
1325
1326 /*
1327 * If the XMAX is just a lock, the tuple is still alive.
1328 */
1330 return false;
1331
1332 /*
1333 * If the Xmax is a MultiXact, it might be dead or alive, but we cannot
1334 * know without checking pg_multixact.
1335 */
1336 if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
1337 return false;
1338
1339 /* If deleter isn't known to have committed, assume it's still running. */
1340 if (!(tuple->t_infomask & HEAP_XMAX_COMMITTED))
1341 return false;
1342
1343 /* Deleter committed, so tuple is dead if the XID is old enough. */
1344 return GlobalVisTestIsRemovableXid(vistest,
1346}
static bool HeapTupleHeaderXminInvalid(const HeapTupleHeaderData *tup)
Definition: htup_details.h:343
static bool HeapTupleHeaderXminCommitted(const HeapTupleHeaderData *tup)
Definition: htup_details.h:337
#define InvalidOid
Definition: postgres_ext.h:37

References Assert(), GlobalVisTestIsRemovableXid(), HEAP_XMAX_COMMITTED, HEAP_XMAX_INVALID, HEAP_XMAX_IS_LOCKED_ONLY(), HEAP_XMAX_IS_MULTI, HeapTupleHeaderGetRawXmax(), HeapTupleHeaderXminCommitted(), HeapTupleHeaderXminInvalid(), InvalidOid, ItemPointerIsValid(), HeapTupleData::t_data, HeapTupleHeaderData::t_infomask, HeapTupleData::t_self, and HeapTupleData::t_tableOid.

Referenced by heap_hot_search_buffer().

◆ HeapTupleSatisfiesUpdate()

TM_Result HeapTupleSatisfiesUpdate ( HeapTuple  htup,
CommandId  curcid,
Buffer  buffer 
)

Definition at line 433 of file heapam_visibility.c.

435{
436 HeapTupleHeader tuple = htup->t_data;
437
439 Assert(htup->t_tableOid != InvalidOid);
440
442 {
444 return TM_Invisible;
445
446 else if (!HeapTupleCleanMoved(tuple, buffer))
447 return TM_Invisible;
449 {
450 if (HeapTupleHeaderGetCmin(tuple) >= curcid)
451 return TM_Invisible; /* inserted after scan started */
452
453 if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid */
454 return TM_Ok;
455
457 {
458 TransactionId xmax;
459
460 xmax = HeapTupleHeaderGetRawXmax(tuple);
461
462 /*
463 * Careful here: even though this tuple was created by our own
464 * transaction, it might be locked by other transactions, if
465 * the original version was key-share locked when we updated
466 * it.
467 */
468
469 if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
470 {
471 if (MultiXactIdIsRunning(xmax, true))
472 return TM_BeingModified;
473 else
474 return TM_Ok;
475 }
476
477 /*
478 * If the locker is gone, then there is nothing of interest
479 * left in this Xmax; otherwise, report the tuple as
480 * locked/updated.
481 */
482 if (!TransactionIdIsInProgress(xmax))
483 return TM_Ok;
484 return TM_BeingModified;
485 }
486
487 if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
488 {
489 TransactionId xmax;
490
491 xmax = HeapTupleGetUpdateXid(tuple);
492
493 /* not LOCKED_ONLY, so it has to have an xmax */
495
496 /* deleting subtransaction must have aborted */
498 {
500 false))
501 return TM_BeingModified;
502 return TM_Ok;
503 }
504 else
505 {
506 if (HeapTupleHeaderGetCmax(tuple) >= curcid)
507 return TM_SelfModified; /* updated after scan started */
508 else
509 return TM_Invisible; /* updated before scan started */
510 }
511 }
512
514 {
515 /* deleting subtransaction must have aborted */
516 SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
518 return TM_Ok;
519 }
520
521 if (HeapTupleHeaderGetCmax(tuple) >= curcid)
522 return TM_SelfModified; /* updated after scan started */
523 else
524 return TM_Invisible; /* updated before scan started */
525 }
527 return TM_Invisible;
529 SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED,
531 else
532 {
533 /* it must have aborted or crashed */
534 SetHintBits(tuple, buffer, HEAP_XMIN_INVALID,
536 return TM_Invisible;
537 }
538 }
539
540 /* by here, the inserting transaction has committed */
541
542 if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid or aborted */
543 return TM_Ok;
544
545 if (tuple->t_infomask & HEAP_XMAX_COMMITTED)
546 {
548 return TM_Ok;
549 if (!ItemPointerEquals(&htup->t_self, &tuple->t_ctid))
550 return TM_Updated; /* updated by other */
551 else
552 return TM_Deleted; /* deleted by other */
553 }
554
555 if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
556 {
557 TransactionId xmax;
558
560 return TM_Ok;
561
563 {
565 return TM_BeingModified;
566
568 return TM_Ok;
569 }
570
571 xmax = HeapTupleGetUpdateXid(tuple);
572 if (!TransactionIdIsValid(xmax))
573 {
575 return TM_BeingModified;
576 }
577
578 /* not LOCKED_ONLY, so it has to have an xmax */
580
582 {
583 if (HeapTupleHeaderGetCmax(tuple) >= curcid)
584 return TM_SelfModified; /* updated after scan started */
585 else
586 return TM_Invisible; /* updated before scan started */
587 }
588
590 return TM_BeingModified;
591
592 if (TransactionIdDidCommit(xmax))
593 {
594 if (!ItemPointerEquals(&htup->t_self, &tuple->t_ctid))
595 return TM_Updated;
596 else
597 return TM_Deleted;
598 }
599
600 /*
601 * By here, the update in the Xmax is either aborted or crashed, but
602 * what about the other members?
603 */
604
606 {
607 /*
608 * There's no member, even just a locker, alive anymore, so we can
609 * mark the Xmax as invalid.
610 */
611 SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
613 return TM_Ok;
614 }
615 else
616 {
617 /* There are lockers running */
618 return TM_BeingModified;
619 }
620 }
621
623 {
625 return TM_BeingModified;
626 if (HeapTupleHeaderGetCmax(tuple) >= curcid)
627 return TM_SelfModified; /* updated after scan started */
628 else
629 return TM_Invisible; /* updated before scan started */
630 }
631
633 return TM_BeingModified;
634
636 {
637 /* it must have aborted or crashed */
638 SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
640 return TM_Ok;
641 }
642
643 /* xmax transaction committed */
644
646 {
647 SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
649 return TM_Ok;
650 }
651
652 SetHintBits(tuple, buffer, HEAP_XMAX_COMMITTED,
654 if (!ItemPointerEquals(&htup->t_self, &tuple->t_ctid))
655 return TM_Updated; /* updated by other */
656 else
657 return TM_Deleted; /* deleted by other */
658}
CommandId HeapTupleHeaderGetCmin(const HeapTupleHeaderData *tup)
Definition: combocid.c:104
static void SetHintBits(HeapTupleHeader tuple, Buffer buffer, uint16 infomask, TransactionId xid)
static bool HeapTupleCleanMoved(HeapTupleHeader tuple, Buffer buffer)
#define HEAP_XMIN_COMMITTED
Definition: htup_details.h:204
#define HEAP_XMIN_INVALID
Definition: htup_details.h:205
bool MultiXactIdIsRunning(MultiXactId multi, bool isLockOnly)
Definition: multixact.c:465

References Assert(), HEAP_LOCKED_UPGRADED(), HEAP_XMAX_COMMITTED, HEAP_XMAX_INVALID, HEAP_XMAX_IS_LOCKED_ONLY(), HEAP_XMAX_IS_MULTI, HEAP_XMIN_COMMITTED, HEAP_XMIN_INVALID, HeapTupleCleanMoved(), HeapTupleGetUpdateXid(), HeapTupleHeaderGetCmax(), HeapTupleHeaderGetCmin(), HeapTupleHeaderGetRawXmax(), HeapTupleHeaderGetRawXmin(), HeapTupleHeaderXminCommitted(), HeapTupleHeaderXminInvalid(), InvalidOid, InvalidTransactionId, ItemPointerEquals(), ItemPointerIsValid(), MultiXactIdIsRunning(), SetHintBits(), HeapTupleHeaderData::t_ctid, HeapTupleData::t_data, HeapTupleHeaderData::t_infomask, HeapTupleData::t_self, HeapTupleData::t_tableOid, TM_BeingModified, TM_Deleted, TM_Invisible, TM_Ok, TM_SelfModified, TM_Updated, TransactionIdDidCommit(), TransactionIdIsCurrentTransactionId(), TransactionIdIsInProgress(), and TransactionIdIsValid.

Referenced by heap_delete(), heap_inplace_lock(), heap_lock_tuple(), heap_update(), and pgrowlocks().

◆ HeapTupleSatisfiesVacuum()

HTSV_Result HeapTupleSatisfiesVacuum ( HeapTuple  htup,
TransactionId  OldestXmin,
Buffer  buffer 
)

Definition at line 1035 of file heapam_visibility.c.

1037{
1039 HTSV_Result res;
1040
1041 res = HeapTupleSatisfiesVacuumHorizon(htup, buffer, &dead_after);
1042
1043 if (res == HEAPTUPLE_RECENTLY_DEAD)
1044 {
1045 Assert(TransactionIdIsValid(dead_after));
1046
1047 if (TransactionIdPrecedes(dead_after, OldestXmin))
1048 res = HEAPTUPLE_DEAD;
1049 }
1050 else
1051 Assert(!TransactionIdIsValid(dead_after));
1052
1053 return res;
1054}
HTSV_Result HeapTupleSatisfiesVacuumHorizon(HeapTuple htup, Buffer buffer, TransactionId *dead_after)

References Assert(), HEAPTUPLE_DEAD, HEAPTUPLE_RECENTLY_DEAD, HeapTupleSatisfiesVacuumHorizon(), InvalidTransactionId, TransactionIdIsValid, and TransactionIdPrecedes().

Referenced by heap_page_would_be_all_visible(), heapam_index_build_range_scan(), heapam_relation_copy_for_cluster(), heapam_scan_analyze_next_tuple(), HeapCheckForSerializableConflictOut(), lazy_scan_noprune(), statapprox_heap(), tuple_all_visible(), and update_most_recent_deletion_info().

◆ HeapTupleSatisfiesVacuumHorizon()

HTSV_Result HeapTupleSatisfiesVacuumHorizon ( HeapTuple  htup,
Buffer  buffer,
TransactionId dead_after 
)

Definition at line 1069 of file heapam_visibility.c.

1070{
1071 HeapTupleHeader tuple = htup->t_data;
1072
1074 Assert(htup->t_tableOid != InvalidOid);
1075 Assert(dead_after != NULL);
1076
1077 *dead_after = InvalidTransactionId;
1078
1079 /*
1080 * Has inserting transaction committed?
1081 *
1082 * If the inserting transaction aborted, then the tuple was never visible
1083 * to any other transaction, so we can delete it immediately.
1084 */
1085 if (!HeapTupleHeaderXminCommitted(tuple))
1086 {
1087 if (HeapTupleHeaderXminInvalid(tuple))
1088 return HEAPTUPLE_DEAD;
1089 else if (!HeapTupleCleanMoved(tuple, buffer))
1090 return HEAPTUPLE_DEAD;
1092 {
1093 if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid */
1095 /* only locked? run infomask-only check first, for performance */
1099 /* inserted and then deleted by same xact */
1102 /* deleting subtransaction must have aborted */
1104 }
1106 {
1107 /*
1108 * It'd be possible to discern between INSERT/DELETE in progress
1109 * here by looking at xmax - but that doesn't seem beneficial for
1110 * the majority of callers and even detrimental for some. We'd
1111 * rather have callers look at/wait for xmin than xmax. It's
1112 * always correct to return INSERT_IN_PROGRESS because that's
1113 * what's happening from the view of other backends.
1114 */
1116 }
1118 SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED,
1120 else
1121 {
1122 /*
1123 * Not in Progress, Not Committed, so either Aborted or crashed
1124 */
1125 SetHintBits(tuple, buffer, HEAP_XMIN_INVALID,
1127 return HEAPTUPLE_DEAD;
1128 }
1129
1130 /*
1131 * At this point the xmin is known committed, but we might not have
1132 * been able to set the hint bit yet; so we can no longer Assert that
1133 * it's set.
1134 */
1135 }
1136
1137 /*
1138 * Okay, the inserter committed, so it was good at some point. Now what
1139 * about the deleting transaction?
1140 */
1141 if (tuple->t_infomask & HEAP_XMAX_INVALID)
1142 return HEAPTUPLE_LIVE;
1143
1145 {
1146 /*
1147 * "Deleting" xact really only locked it, so the tuple is live in any
1148 * case. However, we should make sure that either XMAX_COMMITTED or
1149 * XMAX_INVALID gets set once the xact is gone, to reduce the costs of
1150 * examining the tuple for future xacts.
1151 */
1152 if (!(tuple->t_infomask & HEAP_XMAX_COMMITTED))
1153 {
1154 if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
1155 {
1156 /*
1157 * If it's a pre-pg_upgrade tuple, the multixact cannot
1158 * possibly be running; otherwise have to check.
1159 */
1160 if (!HEAP_LOCKED_UPGRADED(tuple->t_infomask) &&
1162 true))
1163 return HEAPTUPLE_LIVE;
1165 }
1166 else
1167 {
1169 return HEAPTUPLE_LIVE;
1170 SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
1172 }
1173 }
1174
1175 /*
1176 * We don't really care whether xmax did commit, abort or crash. We
1177 * know that xmax did lock the tuple, but it did not and will never
1178 * actually update it.
1179 */
1180
1181 return HEAPTUPLE_LIVE;
1182 }
1183
1184 if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
1185 {
1187
1188 /* already checked above */
1190
1191 /* not LOCKED_ONLY, so it has to have an xmax */
1193
1194 if (TransactionIdIsInProgress(xmax))
1196 else if (TransactionIdDidCommit(xmax))
1197 {
1198 /*
1199 * The multixact might still be running due to lockers. Need to
1200 * allow for pruning if below the xid horizon regardless --
1201 * otherwise we could end up with a tuple where the updater has to
1202 * be removed due to the horizon, but is not pruned away. It's
1203 * not a problem to prune that tuple, because any remaining
1204 * lockers will also be present in newer tuple versions.
1205 */
1206 *dead_after = xmax;
1208 }
1209 else if (!MultiXactIdIsRunning(HeapTupleHeaderGetRawXmax(tuple), false))
1210 {
1211 /*
1212 * Not in Progress, Not Committed, so either Aborted or crashed.
1213 * Mark the Xmax as invalid.
1214 */
1216 }
1217
1218 return HEAPTUPLE_LIVE;
1219 }
1220
1221 if (!(tuple->t_infomask & HEAP_XMAX_COMMITTED))
1222 {
1226 SetHintBits(tuple, buffer, HEAP_XMAX_COMMITTED,
1228 else
1229 {
1230 /*
1231 * Not in Progress, Not Committed, so either Aborted or crashed
1232 */
1233 SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
1235 return HEAPTUPLE_LIVE;
1236 }
1237
1238 /*
1239 * At this point the xmax is known committed, but we might not have
1240 * been able to set the hint bit yet; so we can no longer Assert that
1241 * it's set.
1242 */
1243 }
1244
1245 /*
1246 * Deleter committed, allow caller to check if it was recent enough that
1247 * some open transactions could still see the tuple.
1248 */
1249 *dead_after = HeapTupleHeaderGetRawXmax(tuple);
1251}

References Assert(), HEAP_LOCKED_UPGRADED(), HEAP_XMAX_COMMITTED, HEAP_XMAX_INVALID, HEAP_XMAX_IS_LOCKED_ONLY(), HEAP_XMAX_IS_MULTI, HEAP_XMIN_COMMITTED, HEAP_XMIN_INVALID, HEAPTUPLE_DEAD, HEAPTUPLE_DELETE_IN_PROGRESS, HEAPTUPLE_INSERT_IN_PROGRESS, HEAPTUPLE_LIVE, HEAPTUPLE_RECENTLY_DEAD, HeapTupleCleanMoved(), HeapTupleGetUpdateXid(), HeapTupleHeaderGetRawXmax(), HeapTupleHeaderGetRawXmin(), HeapTupleHeaderGetUpdateXid(), HeapTupleHeaderIsOnlyLocked(), HeapTupleHeaderXminCommitted(), HeapTupleHeaderXminInvalid(), InvalidOid, InvalidTransactionId, ItemPointerIsValid(), MultiXactIdIsRunning(), SetHintBits(), HeapTupleData::t_data, HeapTupleHeaderData::t_infomask, HeapTupleData::t_self, HeapTupleData::t_tableOid, TransactionIdDidCommit(), TransactionIdIsCurrentTransactionId(), TransactionIdIsInProgress(), and TransactionIdIsValid.

Referenced by heap_prune_satisfies_vacuum(), HeapTupleSatisfiesNonVacuumable(), and HeapTupleSatisfiesVacuum().

◆ HeapTupleSatisfiesVisibility()

bool HeapTupleSatisfiesVisibility ( HeapTuple  htup,
Snapshot  snapshot,
Buffer  buffer 
)

Definition at line 1605 of file heapam_visibility.c.

1606{
1607 switch (snapshot->snapshot_type)
1608 {
1609 case SNAPSHOT_MVCC:
1610 return HeapTupleSatisfiesMVCC(htup, snapshot, buffer);
1611 case SNAPSHOT_SELF:
1612 return HeapTupleSatisfiesSelf(htup, snapshot, buffer);
1613 case SNAPSHOT_ANY:
1614 return HeapTupleSatisfiesAny(htup, snapshot, buffer);
1615 case SNAPSHOT_TOAST:
1616 return HeapTupleSatisfiesToast(htup, snapshot, buffer);
1617 case SNAPSHOT_DIRTY:
1618 return HeapTupleSatisfiesDirty(htup, snapshot, buffer);
1620 return HeapTupleSatisfiesHistoricMVCC(htup, snapshot, buffer);
1622 return HeapTupleSatisfiesNonVacuumable(htup, snapshot, buffer);
1623 }
1624
1625 return false; /* keep compiler quiet */
1626}
static bool HeapTupleSatisfiesAny(HeapTuple htup, Snapshot snapshot, Buffer buffer)
static bool HeapTupleSatisfiesNonVacuumable(HeapTuple htup, Snapshot snapshot, Buffer buffer)
static bool HeapTupleSatisfiesToast(HeapTuple htup, Snapshot snapshot, Buffer buffer)
static bool HeapTupleSatisfiesDirty(HeapTuple htup, Snapshot snapshot, Buffer buffer)
static bool HeapTupleSatisfiesMVCC(HeapTuple htup, Snapshot snapshot, Buffer buffer)
static bool HeapTupleSatisfiesSelf(HeapTuple htup, Snapshot snapshot, Buffer buffer)
static bool HeapTupleSatisfiesHistoricMVCC(HeapTuple htup, Snapshot snapshot, Buffer buffer)
@ SNAPSHOT_TOAST
Definition: snapshot.h:70
@ SNAPSHOT_SELF
Definition: snapshot.h:60
@ SNAPSHOT_NON_VACUUMABLE
Definition: snapshot.h:114
@ SNAPSHOT_MVCC
Definition: snapshot.h:46
@ SNAPSHOT_ANY
Definition: snapshot.h:65
@ SNAPSHOT_HISTORIC_MVCC
Definition: snapshot.h:105
@ SNAPSHOT_DIRTY
Definition: snapshot.h:98
SnapshotType snapshot_type
Definition: snapshot.h:140

References HeapTupleSatisfiesAny(), HeapTupleSatisfiesDirty(), HeapTupleSatisfiesHistoricMVCC(), HeapTupleSatisfiesMVCC(), HeapTupleSatisfiesNonVacuumable(), HeapTupleSatisfiesSelf(), HeapTupleSatisfiesToast(), SNAPSHOT_ANY, SNAPSHOT_DIRTY, SNAPSHOT_HISTORIC_MVCC, SNAPSHOT_MVCC, SNAPSHOT_NON_VACUUMABLE, SNAPSHOT_SELF, SNAPSHOT_TOAST, and SnapshotData::snapshot_type.

Referenced by BitmapHeapScanNextBlock(), heap_delete(), heap_fetch(), heap_get_latest_tid(), heap_hot_search_buffer(), heap_update(), heapam_tuple_satisfies_snapshot(), heapgettup(), page_collect_tuples(), pgstat_heap(), SampleHeapTupleVisible(), and ScanSourceDatabasePgClassPage().

◆ HeapTupleSetHintBits()

void HeapTupleSetHintBits ( HeapTupleHeader  tuple,
Buffer  buffer,
uint16  infomask,
TransactionId  xid 
)

Definition at line 141 of file heapam_visibility.c.

143{
144 SetHintBits(tuple, buffer, infomask, xid);
145}

References SetHintBits().

Referenced by UpdateXmaxHintBits().

◆ log_heap_prune_and_freeze()

void log_heap_prune_and_freeze ( Relation  relation,
Buffer  buffer,
Buffer  vmbuffer,
uint8  vmflags,
TransactionId  conflict_xid,
bool  cleanup_lock,
PruneReason  reason,
HeapTupleFreeze frozen,
int  nfrozen,
OffsetNumber redirected,
int  nredirected,
OffsetNumber dead,
int  ndead,
OffsetNumber unused,
int  nunused 
)

Definition at line 2167 of file pruneheap.c.

2176{
2177 xl_heap_prune xlrec;
2178 XLogRecPtr recptr;
2179 uint8 info;
2180 uint8 regbuf_flags_heap;
2181
2182 /* The following local variables hold data registered in the WAL record: */
2184 xlhp_freeze_plans freeze_plans;
2185 xlhp_prune_items redirect_items;
2186 xlhp_prune_items dead_items;
2187 xlhp_prune_items unused_items;
2189 bool do_prune = nredirected > 0 || ndead > 0 || nunused > 0;
2190 bool do_set_vm = vmflags & VISIBILITYMAP_VALID_BITS;
2191
2192 Assert((vmflags & VISIBILITYMAP_VALID_BITS) == vmflags);
2193
2194 xlrec.flags = 0;
2195 regbuf_flags_heap = REGBUF_STANDARD;
2196
2197 /*
2198 * We can avoid an FPI of the heap page if the only modification we are
2199 * making to it is to set PD_ALL_VISIBLE and checksums/wal_log_hints are
2200 * disabled. Note that if we explicitly skip an FPI, we must not stamp the
2201 * heap page with this record's LSN. Recovery skips records <= the stamped
2202 * LSN, so this could lead to skipping an earlier FPI needed to repair a
2203 * torn page.
2204 */
2205 if (!do_prune &&
2206 nfrozen == 0 &&
2207 (!do_set_vm || !XLogHintBitIsNeeded()))
2208 regbuf_flags_heap |= REGBUF_NO_IMAGE;
2209
2210 /*
2211 * Prepare data for the buffer. The arrays are not actually in the
2212 * buffer, but we pretend that they are. When XLogInsert stores a full
2213 * page image, the arrays can be omitted.
2214 */
2216 XLogRegisterBuffer(0, buffer, regbuf_flags_heap);
2217
2218 if (do_set_vm)
2219 XLogRegisterBuffer(1, vmbuffer, 0);
2220
2221 if (nfrozen > 0)
2222 {
2223 int nplans;
2224
2226
2227 /*
2228 * Prepare deduplicated representation for use in the WAL record. This
2229 * destructively sorts frozen tuples array in-place.
2230 */
2231 nplans = heap_log_freeze_plan(frozen, nfrozen, plans, frz_offsets);
2232
2233 freeze_plans.nplans = nplans;
2234 XLogRegisterBufData(0, &freeze_plans,
2235 offsetof(xlhp_freeze_plans, plans));
2236 XLogRegisterBufData(0, plans,
2237 sizeof(xlhp_freeze_plan) * nplans);
2238 }
2239 if (nredirected > 0)
2240 {
2242
2243 redirect_items.ntargets = nredirected;
2244 XLogRegisterBufData(0, &redirect_items,
2245 offsetof(xlhp_prune_items, data));
2246 XLogRegisterBufData(0, redirected,
2247 sizeof(OffsetNumber[2]) * nredirected);
2248 }
2249 if (ndead > 0)
2250 {
2251 xlrec.flags |= XLHP_HAS_DEAD_ITEMS;
2252
2253 dead_items.ntargets = ndead;
2254 XLogRegisterBufData(0, &dead_items,
2255 offsetof(xlhp_prune_items, data));
2256 XLogRegisterBufData(0, dead,
2257 sizeof(OffsetNumber) * ndead);
2258 }
2259 if (nunused > 0)
2260 {
2262
2263 unused_items.ntargets = nunused;
2264 XLogRegisterBufData(0, &unused_items,
2265 offsetof(xlhp_prune_items, data));
2266 XLogRegisterBufData(0, unused,
2267 sizeof(OffsetNumber) * nunused);
2268 }
2269 if (nfrozen > 0)
2270 XLogRegisterBufData(0, frz_offsets,
2271 sizeof(OffsetNumber) * nfrozen);
2272
2273 /*
2274 * Prepare the main xl_heap_prune record. We already set the XLHP_HAS_*
2275 * flag above.
2276 */
2277 if (vmflags & VISIBILITYMAP_ALL_VISIBLE)
2278 {
2279 xlrec.flags |= XLHP_VM_ALL_VISIBLE;
2280 if (vmflags & VISIBILITYMAP_ALL_FROZEN)
2281 xlrec.flags |= XLHP_VM_ALL_FROZEN;
2282 }
2284 xlrec.flags |= XLHP_IS_CATALOG_REL;
2285 if (TransactionIdIsValid(conflict_xid))
2287 if (cleanup_lock)
2288 xlrec.flags |= XLHP_CLEANUP_LOCK;
2289 else
2290 {
2291 Assert(nredirected == 0 && ndead == 0);
2292 /* also, any items in 'unused' must've been LP_DEAD previously */
2293 }
2295 if (TransactionIdIsValid(conflict_xid))
2296 XLogRegisterData(&conflict_xid, sizeof(TransactionId));
2297
2298 switch (reason)
2299 {
2300 case PRUNE_ON_ACCESS:
2302 break;
2303 case PRUNE_VACUUM_SCAN:
2305 break;
2308 break;
2309 default:
2310 elog(ERROR, "unrecognized prune reason: %d", (int) reason);
2311 break;
2312 }
2313 recptr = XLogInsert(RM_HEAP2_ID, info);
2314
2315 if (do_set_vm)
2316 {
2317 Assert(BufferIsDirty(vmbuffer));
2318 PageSetLSN(BufferGetPage(vmbuffer), recptr);
2319 }
2320
2321 /*
2322 * See comment at the top of the function about regbuf_flags_heap for
2323 * details on when we can advance the page LSN.
2324 */
2325 if (do_prune || nfrozen > 0 || (do_set_vm && XLogHintBitIsNeeded()))
2326 {
2327 Assert(BufferIsDirty(buffer));
2328 PageSetLSN(BufferGetPage(buffer), recptr);
2329 }
2330}
#define XLHP_HAS_CONFLICT_HORIZON
Definition: heapam_xlog.h:316
#define XLHP_HAS_FREEZE_PLANS
Definition: heapam_xlog.h:322
#define XLHP_VM_ALL_VISIBLE
Definition: heapam_xlog.h:339
#define SizeOfHeapPrune
Definition: heapam_xlog.h:295
#define XLHP_HAS_NOW_UNUSED_ITEMS
Definition: heapam_xlog.h:331
#define XLHP_VM_ALL_FROZEN
Definition: heapam_xlog.h:340
#define XLHP_HAS_REDIRECTIONS
Definition: heapam_xlog.h:329
#define XLOG_HEAP2_PRUNE_VACUUM_SCAN
Definition: heapam_xlog.h:61
#define XLOG_HEAP2_PRUNE_ON_ACCESS
Definition: heapam_xlog.h:60
#define XLHP_CLEANUP_LOCK
Definition: heapam_xlog.h:308
#define XLHP_HAS_DEAD_ITEMS
Definition: heapam_xlog.h:330
#define XLOG_HEAP2_PRUNE_VACUUM_CLEANUP
Definition: heapam_xlog.h:62
#define XLHP_IS_CATALOG_REL
Definition: heapam_xlog.h:298
const void * data
static int heap_log_freeze_plan(HeapTupleFreeze *tuples, int ntuples, xlhp_freeze_plan *plans_out, OffsetNumber *offsets_out)
Definition: pruneheap.c:2088
#define XLogHintBitIsNeeded()
Definition: xlog.h:122
#define REGBUF_NO_IMAGE
Definition: xloginsert.h:33

References Assert(), BufferGetPage(), BufferIsDirty(), data, elog, ERROR, xl_heap_prune::flags, heap_log_freeze_plan(), MaxHeapTuplesPerPage, xlhp_freeze_plans::nplans, xlhp_prune_items::ntargets, PageSetLSN(), PRUNE_ON_ACCESS, PRUNE_VACUUM_CLEANUP, PRUNE_VACUUM_SCAN, REGBUF_NO_IMAGE, REGBUF_STANDARD, RelationIsAccessibleInLogicalDecoding, SizeOfHeapPrune, TransactionIdIsValid, VISIBILITYMAP_ALL_FROZEN, VISIBILITYMAP_ALL_VISIBLE, VISIBILITYMAP_VALID_BITS, XLHP_CLEANUP_LOCK, XLHP_HAS_CONFLICT_HORIZON, XLHP_HAS_DEAD_ITEMS, XLHP_HAS_FREEZE_PLANS, XLHP_HAS_NOW_UNUSED_ITEMS, XLHP_HAS_REDIRECTIONS, XLHP_IS_CATALOG_REL, XLHP_VM_ALL_FROZEN, XLHP_VM_ALL_VISIBLE, XLOG_HEAP2_PRUNE_ON_ACCESS, XLOG_HEAP2_PRUNE_VACUUM_CLEANUP, XLOG_HEAP2_PRUNE_VACUUM_SCAN, XLogBeginInsert(), XLogHintBitIsNeeded, XLogInsert(), XLogRegisterBufData(), XLogRegisterBuffer(), and XLogRegisterData().

Referenced by heap_page_prune_and_freeze(), and lazy_vacuum_heap_page().

◆ ReleaseBulkInsertStatePin()

void ReleaseBulkInsertStatePin ( BulkInsertState  bistate)

Definition at line 2068 of file heapam.c.

2069{
2070 if (bistate->current_buf != InvalidBuffer)
2071 ReleaseBuffer(bistate->current_buf);
2072 bistate->current_buf = InvalidBuffer;
2073
2074 /*
2075 * Despite the name, we also reset bulk relation extension state.
2076 * Otherwise we can end up erroring out due to looking for free space in
2077 * ->next_free of one partition, even though ->next_free was set when
2078 * extending another partition. It could obviously also be bad for
2079 * efficiency to look at existing blocks at offsets from another
2080 * partition, even if we don't error out.
2081 */
2082 bistate->next_free = InvalidBlockNumber;
2083 bistate->last_free = InvalidBlockNumber;
2084}

References BulkInsertStateData::current_buf, InvalidBlockNumber, InvalidBuffer, BulkInsertStateData::last_free, BulkInsertStateData::next_free, and ReleaseBuffer().

Referenced by CopyFrom().

◆ ResolveCminCmaxDuringDecoding()

bool ResolveCminCmaxDuringDecoding ( struct HTAB tuplecid_data,
Snapshot  snapshot,
HeapTuple  htup,
Buffer  buffer,
CommandId cmin,
CommandId cmax 
)

Definition at line 5557 of file reorderbuffer.c.

5561{
5564 ForkNumber forkno;
5565 BlockNumber blockno;
5566 bool updated_mapping = false;
5567
5568 /*
5569 * Return unresolved if tuplecid_data is not valid. That's because when
5570 * streaming in-progress transactions we may run into tuples with the CID
5571 * before actually decoding them. Think e.g. about INSERT followed by
5572 * TRUNCATE, where the TRUNCATE may not be decoded yet when applying the
5573 * INSERT. So in such cases, we assume the CID is from the future
5574 * command.
5575 */
5576 if (tuplecid_data == NULL)
5577 return false;
5578
5579 /* be careful about padding */
5580 memset(&key, 0, sizeof(key));
5581
5582 Assert(!BufferIsLocal(buffer));
5583
5584 /*
5585 * get relfilelocator from the buffer, no convenient way to access it
5586 * other than that.
5587 */
5588 BufferGetTag(buffer, &key.rlocator, &forkno, &blockno);
5589
5590 /* tuples can only be in the main fork */
5591 Assert(forkno == MAIN_FORKNUM);
5592 Assert(blockno == ItemPointerGetBlockNumber(&htup->t_self));
5593
5594 ItemPointerCopy(&htup->t_self,
5595 &key.tid);
5596
5597restart:
5598 ent = (ReorderBufferTupleCidEnt *)
5600
5601 /*
5602 * failed to find a mapping, check whether the table was rewritten and
5603 * apply mapping if so, but only do that once - there can be no new
5604 * mappings while we are in here since we have to hold a lock on the
5605 * relation.
5606 */
5607 if (ent == NULL && !updated_mapping)
5608 {
5610 /* now check but don't update for a mapping again */
5611 updated_mapping = true;
5612 goto restart;
5613 }
5614 else if (ent == NULL)
5615 return false;
5616
5617 if (cmin)
5618 *cmin = ent->cmin;
5619 if (cmax)
5620 *cmax = ent->cmax;
5621 return true;
5622}
#define BufferIsLocal(buffer)
Definition: buf.h:37
void * hash_search(HTAB *hashp, const void *keyPtr, HASHACTION action, bool *foundPtr)
Definition: dynahash.c:952
@ HASH_FIND
Definition: hsearch.h:113
static void UpdateLogicalMappings(HTAB *tuplecid_data, Oid relid, Snapshot snapshot)
static HTAB * tuplecid_data
Definition: snapmgr.c:163

References Assert(), BufferGetTag(), BufferIsLocal, ReorderBufferTupleCidEnt::cmax, ReorderBufferTupleCidEnt::cmin, HASH_FIND, hash_search(), ItemPointerCopy(), ItemPointerGetBlockNumber(), sort-test::key, MAIN_FORKNUM, HeapTupleData::t_self, HeapTupleData::t_tableOid, tuplecid_data, and UpdateLogicalMappings().

Referenced by HeapTupleSatisfiesHistoricMVCC().

◆ simple_heap_delete()

void simple_heap_delete ( Relation  relation,
const ItemPointerData tid 
)

Definition at line 3230 of file heapam.c.

3231{
3232 TM_Result result;
3233 TM_FailureData tmfd;
3234
3235 result = heap_delete(relation, tid,
3237 true /* wait for commit */ ,
3238 &tmfd, false /* changingPart */ );
3239 switch (result)
3240 {
3241 case TM_SelfModified:
3242 /* Tuple was already updated in current command? */
3243 elog(ERROR, "tuple already updated by self");
3244 break;
3245
3246 case TM_Ok:
3247 /* done successfully */
3248 break;
3249
3250 case TM_Updated:
3251 elog(ERROR, "tuple concurrently updated");
3252 break;
3253
3254 case TM_Deleted:
3255 elog(ERROR, "tuple concurrently deleted");
3256 break;
3257
3258 default:
3259 elog(ERROR, "unrecognized heap_delete status: %u", result);
3260 break;
3261 }
3262}
TM_Result heap_delete(Relation relation, const ItemPointerData *tid, CommandId cid, Snapshot crosscheck, bool wait, TM_FailureData *tmfd, bool changingPart)
Definition: heapam.c:2807

References elog, ERROR, GetCurrentCommandId(), heap_delete(), InvalidSnapshot, TM_Deleted, TM_Ok, TM_SelfModified, and TM_Updated.

Referenced by CatalogTupleDelete(), and toast_delete_datum().

◆ simple_heap_insert()

void simple_heap_insert ( Relation  relation,
HeapTuple  tup 
)

Definition at line 2749 of file heapam.c.

2750{
2751 heap_insert(relation, tup, GetCurrentCommandId(true), 0, NULL);
2752}
void heap_insert(Relation relation, HeapTuple tup, CommandId cid, int options, BulkInsertState bistate)
Definition: heapam.c:2106

References GetCurrentCommandId(), and heap_insert().

Referenced by CatalogTupleInsert(), CatalogTupleInsertWithInfo(), and InsertOneTuple().

◆ simple_heap_update()

void simple_heap_update ( Relation  relation,
const ItemPointerData otid,
HeapTuple  tup,
TU_UpdateIndexes update_indexes 
)

Definition at line 4520 of file heapam.c.

4522{
4523 TM_Result result;
4524 TM_FailureData tmfd;
4525 LockTupleMode lockmode;
4526
4527 result = heap_update(relation, otid, tup,
4529 true /* wait for commit */ ,
4530 &tmfd, &lockmode, update_indexes);
4531 switch (result)
4532 {
4533 case TM_SelfModified:
4534 /* Tuple was already updated in current command? */
4535 elog(ERROR, "tuple already updated by self");
4536 break;
4537
4538 case TM_Ok:
4539 /* done successfully */
4540 break;
4541
4542 case TM_Updated:
4543 elog(ERROR, "tuple concurrently updated");
4544 break;
4545
4546 case TM_Deleted:
4547 elog(ERROR, "tuple concurrently deleted");
4548 break;
4549
4550 default:
4551 elog(ERROR, "unrecognized heap_update status: %u", result);
4552 break;
4553 }
4554}
TM_Result heap_update(Relation relation, const ItemPointerData *otid, HeapTuple newtup, CommandId cid, Snapshot crosscheck, bool wait, TM_FailureData *tmfd, LockTupleMode *lockmode, TU_UpdateIndexes *update_indexes)
Definition: heapam.c:3276

References elog, ERROR, GetCurrentCommandId(), heap_update(), InvalidSnapshot, TM_Deleted, TM_Ok, TM_SelfModified, and TM_Updated.

Referenced by CatalogTupleUpdate(), and CatalogTupleUpdateWithInfo().