PostgreSQL Source Code git master
Loading...
Searching...
No Matches
heapam.h File Reference
#include "access/heapam_xlog.h"
#include "access/relation.h"
#include "access/relscan.h"
#include "access/sdir.h"
#include "access/skey.h"
#include "access/table.h"
#include "access/tableam.h"
#include "commands/vacuum.h"
#include "nodes/lockoptions.h"
#include "nodes/primnodes.h"
#include "storage/bufpage.h"
#include "storage/dsm.h"
#include "storage/lockdefs.h"
#include "storage/read_stream.h"
#include "storage/shm_toc.h"
#include "utils/relcache.h"
#include "utils/snapshot.h"
Include dependency graph for heapam.h:
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Data Structures

struct  HeapScanDescData
 
struct  BitmapHeapScanDescData
 
struct  IndexFetchHeapData
 
struct  HeapTupleFreeze
 
struct  HeapPageFreeze
 
struct  PruneFreezeParams
 
struct  PruneFreezeResult
 
struct  BatchMVCCState
 

Macros

#define HEAP_INSERT_SKIP_FSM   TABLE_INSERT_SKIP_FSM
 
#define HEAP_INSERT_FROZEN   TABLE_INSERT_FROZEN
 
#define HEAP_INSERT_NO_LOGICAL   TABLE_INSERT_NO_LOGICAL
 
#define HEAP_INSERT_SPECULATIVE   0x0010
 
#define HEAP_PAGE_PRUNE_MARK_UNUSED_NOW   (1 << 0)
 
#define HEAP_PAGE_PRUNE_FREEZE   (1 << 1)
 
#define MaxLockTupleMode   LockTupleExclusive
 
#define HEAP_FREEZE_CHECK_XMIN_COMMITTED   0x01
 
#define HEAP_FREEZE_CHECK_XMAX_ABORTED   0x02
 

Typedefs

typedef struct BulkInsertStateDataBulkInsertState
 
typedef struct GlobalVisState GlobalVisState
 
typedef struct TupleTableSlot TupleTableSlot
 
typedef struct HeapScanDescData HeapScanDescData
 
typedef struct HeapScanDescDataHeapScanDesc
 
typedef struct BitmapHeapScanDescData BitmapHeapScanDescData
 
typedef struct BitmapHeapScanDescDataBitmapHeapScanDesc
 
typedef struct IndexFetchHeapData IndexFetchHeapData
 
typedef struct HeapTupleFreeze HeapTupleFreeze
 
typedef struct HeapPageFreeze HeapPageFreeze
 
typedef struct PruneFreezeParams PruneFreezeParams
 
typedef struct PruneFreezeResult PruneFreezeResult
 
typedef struct BatchMVCCState BatchMVCCState
 

Enumerations

enum  HTSV_Result {
  HEAPTUPLE_DEAD , HEAPTUPLE_LIVE , HEAPTUPLE_RECENTLY_DEAD , HEAPTUPLE_INSERT_IN_PROGRESS ,
  HEAPTUPLE_DELETE_IN_PROGRESS
}
 
enum  PruneReason { PRUNE_ON_ACCESS , PRUNE_VACUUM_SCAN , PRUNE_VACUUM_CLEANUP }
 

Functions

TableScanDesc heap_beginscan (Relation relation, Snapshot snapshot, int nkeys, ScanKey key, ParallelTableScanDesc parallel_scan, uint32 flags)
 
void heap_setscanlimits (TableScanDesc sscan, BlockNumber startBlk, BlockNumber numBlks)
 
void heap_prepare_pagescan (TableScanDesc sscan)
 
void heap_rescan (TableScanDesc sscan, ScanKey key, bool set_params, bool allow_strat, bool allow_sync, bool allow_pagemode)
 
void heap_endscan (TableScanDesc sscan)
 
HeapTuple heap_getnext (TableScanDesc sscan, ScanDirection direction)
 
bool heap_getnextslot (TableScanDesc sscan, ScanDirection direction, TupleTableSlot *slot)
 
void heap_set_tidrange (TableScanDesc sscan, ItemPointer mintid, ItemPointer maxtid)
 
bool heap_getnextslot_tidrange (TableScanDesc sscan, ScanDirection direction, TupleTableSlot *slot)
 
bool heap_fetch (Relation relation, Snapshot snapshot, HeapTuple tuple, Buffer *userbuf, bool keep_buf)
 
bool heap_hot_search_buffer (ItemPointer tid, Relation relation, Buffer buffer, Snapshot snapshot, HeapTuple heapTuple, bool *all_dead, bool first_call)
 
void heap_get_latest_tid (TableScanDesc sscan, ItemPointer tid)
 
BulkInsertState GetBulkInsertState (void)
 
void FreeBulkInsertState (BulkInsertState)
 
void ReleaseBulkInsertStatePin (BulkInsertState bistate)
 
void heap_insert (Relation relation, HeapTuple tup, CommandId cid, int options, BulkInsertState bistate)
 
void heap_multi_insert (Relation relation, TupleTableSlot **slots, int ntuples, CommandId cid, int options, BulkInsertState bistate)
 
TM_Result heap_delete (Relation relation, const ItemPointerData *tid, CommandId cid, Snapshot crosscheck, bool wait, TM_FailureData *tmfd, bool changingPart)
 
void heap_finish_speculative (Relation relation, const ItemPointerData *tid)
 
void heap_abort_speculative (Relation relation, const ItemPointerData *tid)
 
TM_Result heap_update (Relation relation, const ItemPointerData *otid, HeapTuple newtup, CommandId cid, Snapshot crosscheck, bool wait, TM_FailureData *tmfd, LockTupleMode *lockmode, TU_UpdateIndexes *update_indexes)
 
TM_Result heap_lock_tuple (Relation relation, HeapTuple tuple, CommandId cid, LockTupleMode mode, LockWaitPolicy wait_policy, bool follow_updates, Buffer *buffer, TM_FailureData *tmfd)
 
bool heap_inplace_lock (Relation relation, HeapTuple oldtup_ptr, Buffer buffer, void(*release_callback)(void *), void *arg)
 
void heap_inplace_update_and_unlock (Relation relation, HeapTuple oldtup, HeapTuple tuple, Buffer buffer)
 
void heap_inplace_unlock (Relation relation, HeapTuple oldtup, Buffer buffer)
 
bool heap_prepare_freeze_tuple (HeapTupleHeader tuple, const struct VacuumCutoffs *cutoffs, HeapPageFreeze *pagefrz, HeapTupleFreeze *frz, bool *totally_frozen)
 
void heap_pre_freeze_checks (Buffer buffer, HeapTupleFreeze *tuples, int ntuples)
 
void heap_freeze_prepared_tuples (Buffer buffer, HeapTupleFreeze *tuples, int ntuples)
 
bool heap_freeze_tuple (HeapTupleHeader tuple, TransactionId relfrozenxid, TransactionId relminmxid, TransactionId FreezeLimit, TransactionId MultiXactCutoff)
 
bool heap_tuple_should_freeze (HeapTupleHeader tuple, const struct VacuumCutoffs *cutoffs, TransactionId *NoFreezePageRelfrozenXid, MultiXactId *NoFreezePageRelminMxid)
 
bool heap_tuple_needs_eventual_freeze (HeapTupleHeader tuple)
 
void simple_heap_insert (Relation relation, HeapTuple tup)
 
void simple_heap_delete (Relation relation, const ItemPointerData *tid)
 
void simple_heap_update (Relation relation, const ItemPointerData *otid, HeapTuple tup, TU_UpdateIndexes *update_indexes)
 
TransactionId heap_index_delete_tuples (Relation rel, TM_IndexDeleteOp *delstate)
 
void heap_page_prune_opt (Relation relation, Buffer buffer)
 
void heap_page_prune_and_freeze (PruneFreezeParams *params, PruneFreezeResult *presult, OffsetNumber *off_loc, TransactionId *new_relfrozen_xid, MultiXactId *new_relmin_mxid)
 
void heap_page_prune_execute (Buffer buffer, bool lp_truncate_only, OffsetNumber *redirected, int nredirected, OffsetNumber *nowdead, int ndead, OffsetNumber *nowunused, int nunused)
 
void heap_get_root_tuples (Page page, OffsetNumber *root_offsets)
 
void log_heap_prune_and_freeze (Relation relation, Buffer buffer, Buffer vmbuffer, uint8 vmflags, TransactionId conflict_xid, bool cleanup_lock, PruneReason reason, HeapTupleFreeze *frozen, int nfrozen, OffsetNumber *redirected, int nredirected, OffsetNumber *dead, int ndead, OffsetNumber *unused, int nunused)
 
void heap_vacuum_rel (Relation rel, const VacuumParams params, BufferAccessStrategy bstrategy)
 
bool HeapTupleSatisfiesVisibility (HeapTuple htup, Snapshot snapshot, Buffer buffer)
 
TM_Result HeapTupleSatisfiesUpdate (HeapTuple htup, CommandId curcid, Buffer buffer)
 
HTSV_Result HeapTupleSatisfiesVacuum (HeapTuple htup, TransactionId OldestXmin, Buffer buffer)
 
HTSV_Result HeapTupleSatisfiesVacuumHorizon (HeapTuple htup, Buffer buffer, TransactionId *dead_after)
 
void HeapTupleSetHintBits (HeapTupleHeader tuple, Buffer buffer, uint16 infomask, TransactionId xid)
 
bool HeapTupleHeaderIsOnlyLocked (HeapTupleHeader tuple)
 
bool HeapTupleIsSurelyDead (HeapTuple htup, GlobalVisState *vistest)
 
int HeapTupleSatisfiesMVCCBatch (Snapshot snapshot, Buffer buffer, int ntups, BatchMVCCState *batchmvcc, OffsetNumber *vistuples_dense)
 
bool ResolveCminCmaxDuringDecoding (struct HTAB *tuplecid_data, Snapshot snapshot, HeapTuple htup, Buffer buffer, CommandId *cmin, CommandId *cmax)
 
void HeapCheckForSerializableConflictOut (bool visible, Relation relation, HeapTuple tuple, Buffer buffer, Snapshot snapshot)
 
static void heap_execute_freeze_tuple (HeapTupleHeader tuple, HeapTupleFreeze *frz)
 

Macro Definition Documentation

◆ HEAP_FREEZE_CHECK_XMAX_ABORTED

#define HEAP_FREEZE_CHECK_XMAX_ABORTED   0x02

Definition at line 138 of file heapam.h.

◆ HEAP_FREEZE_CHECK_XMIN_COMMITTED

#define HEAP_FREEZE_CHECK_XMIN_COMMITTED   0x01

Definition at line 137 of file heapam.h.

◆ HEAP_INSERT_FROZEN

#define HEAP_INSERT_FROZEN   TABLE_INSERT_FROZEN

Definition at line 38 of file heapam.h.

◆ HEAP_INSERT_NO_LOGICAL

#define HEAP_INSERT_NO_LOGICAL   TABLE_INSERT_NO_LOGICAL

Definition at line 39 of file heapam.h.

◆ HEAP_INSERT_SKIP_FSM

#define HEAP_INSERT_SKIP_FSM   TABLE_INSERT_SKIP_FSM

Definition at line 37 of file heapam.h.

◆ HEAP_INSERT_SPECULATIVE

#define HEAP_INSERT_SPECULATIVE   0x0010

Definition at line 40 of file heapam.h.

◆ HEAP_PAGE_PRUNE_FREEZE

#define HEAP_PAGE_PRUNE_FREEZE   (1 << 1)

Definition at line 44 of file heapam.h.

◆ HEAP_PAGE_PRUNE_MARK_UNUSED_NOW

#define HEAP_PAGE_PRUNE_MARK_UNUSED_NOW   (1 << 0)

Definition at line 43 of file heapam.h.

◆ MaxLockTupleMode

#define MaxLockTupleMode   LockTupleExclusive

Definition at line 51 of file heapam.h.

Typedef Documentation

◆ BatchMVCCState

◆ BitmapHeapScanDesc

◆ BitmapHeapScanDescData

◆ BulkInsertState

◆ GlobalVisState

Definition at line 47 of file heapam.h.

◆ HeapPageFreeze

◆ HeapScanDesc

Definition at line 102 of file heapam.h.

◆ HeapScanDescData

◆ HeapTupleFreeze

◆ IndexFetchHeapData

◆ PruneFreezeParams

◆ PruneFreezeResult

◆ TupleTableSlot

Definition at line 48 of file heapam.h.

Enumeration Type Documentation

◆ HTSV_Result

Enumerator
HEAPTUPLE_DEAD 
HEAPTUPLE_LIVE 
HEAPTUPLE_RECENTLY_DEAD 
HEAPTUPLE_INSERT_IN_PROGRESS 
HEAPTUPLE_DELETE_IN_PROGRESS 

Definition at line 124 of file heapam.h.

125{
126 HEAPTUPLE_DEAD, /* tuple is dead and deletable */
127 HEAPTUPLE_LIVE, /* tuple is live (committed, no deleter) */
128 HEAPTUPLE_RECENTLY_DEAD, /* tuple is dead, but not deletable yet */
129 HEAPTUPLE_INSERT_IN_PROGRESS, /* inserting xact is still in progress */
130 HEAPTUPLE_DELETE_IN_PROGRESS, /* deleting xact is still in progress */
HTSV_Result
Definition heapam.h:125
@ HEAPTUPLE_RECENTLY_DEAD
Definition heapam.h:128
@ HEAPTUPLE_INSERT_IN_PROGRESS
Definition heapam.h:129
@ HEAPTUPLE_LIVE
Definition heapam.h:127
@ HEAPTUPLE_DELETE_IN_PROGRESS
Definition heapam.h:130
@ HEAPTUPLE_DEAD
Definition heapam.h:126

◆ PruneReason

Enumerator
PRUNE_ON_ACCESS 
PRUNE_VACUUM_SCAN 
PRUNE_VACUUM_CLEANUP 

Definition at line 226 of file heapam.h.

227{
228 PRUNE_ON_ACCESS, /* on-access pruning */
229 PRUNE_VACUUM_SCAN, /* VACUUM 1st heap pass */
230 PRUNE_VACUUM_CLEANUP, /* VACUUM 2nd heap pass */
PruneReason
Definition heapam.h:227
@ PRUNE_VACUUM_CLEANUP
Definition heapam.h:230
@ PRUNE_ON_ACCESS
Definition heapam.h:228
@ PRUNE_VACUUM_SCAN
Definition heapam.h:229

Function Documentation

◆ FreeBulkInsertState()

void FreeBulkInsertState ( BulkInsertState  bistate)
extern

Definition at line 2100 of file heapam.c.

2101{
2102 if (bistate->current_buf != InvalidBuffer)
2103 ReleaseBuffer(bistate->current_buf);
2104 FreeAccessStrategy(bistate->strategy);
2105 pfree(bistate);
2106}
#define InvalidBuffer
Definition buf.h:25
void ReleaseBuffer(Buffer buffer)
Definition bufmgr.c:5501
void FreeAccessStrategy(BufferAccessStrategy strategy)
Definition freelist.c:643
void pfree(void *pointer)
Definition mcxt.c:1616
BufferAccessStrategy strategy
Definition hio.h:31
Buffer current_buf
Definition hio.h:32

References BulkInsertStateData::current_buf, FreeAccessStrategy(), InvalidBuffer, pfree(), ReleaseBuffer(), and BulkInsertStateData::strategy.

Referenced by ATRewriteTable(), CopyFrom(), CopyMultiInsertBufferCleanup(), deleteSplitPartitionContext(), intorel_shutdown(), MergePartitionsMoveRows(), and transientrel_shutdown().

◆ GetBulkInsertState()

BulkInsertState GetBulkInsertState ( void  )
extern

Definition at line 2083 of file heapam.c.

2084{
2085 BulkInsertState bistate;
2086
2089 bistate->current_buf = InvalidBuffer;
2090 bistate->next_free = InvalidBlockNumber;
2091 bistate->last_free = InvalidBlockNumber;
2092 bistate->already_extended_by = 0;
2093 return bistate;
2094}
#define InvalidBlockNumber
Definition block.h:33
@ BAS_BULKWRITE
Definition bufmgr.h:39
#define palloc_object(type)
Definition fe_memutils.h:74
BufferAccessStrategy GetAccessStrategy(BufferAccessStrategyType btype)
Definition freelist.c:461
struct BulkInsertStateData * BulkInsertState
Definition heapam.h:46
BlockNumber last_free
Definition hio.h:49
uint32 already_extended_by
Definition hio.h:50
BlockNumber next_free
Definition hio.h:48

References BulkInsertStateData::already_extended_by, BAS_BULKWRITE, BulkInsertStateData::current_buf, GetAccessStrategy(), InvalidBlockNumber, InvalidBuffer, BulkInsertStateData::last_free, BulkInsertStateData::next_free, palloc_object, and BulkInsertStateData::strategy.

Referenced by ATRewriteTable(), CopyFrom(), CopyMultiInsertBufferInit(), createSplitPartitionContext(), intorel_startup(), MergePartitionsMoveRows(), and transientrel_startup().

◆ heap_abort_speculative()

void heap_abort_speculative ( Relation  relation,
const ItemPointerData tid 
)
extern

Definition at line 6263 of file heapam.c.

6264{
6266 ItemId lp;
6267 HeapTupleData tp;
6268 Page page;
6269 BlockNumber block;
6270 Buffer buffer;
6271
6273
6274 block = ItemPointerGetBlockNumber(tid);
6275 buffer = ReadBuffer(relation, block);
6276 page = BufferGetPage(buffer);
6277
6279
6280 /*
6281 * Page can't be all visible, we just inserted into it, and are still
6282 * running.
6283 */
6284 Assert(!PageIsAllVisible(page));
6285
6288
6289 tp.t_tableOid = RelationGetRelid(relation);
6290 tp.t_data = (HeapTupleHeader) PageGetItem(page, lp);
6291 tp.t_len = ItemIdGetLength(lp);
6292 tp.t_self = *tid;
6293
6294 /*
6295 * Sanity check that the tuple really is a speculatively inserted tuple,
6296 * inserted by us.
6297 */
6298 if (tp.t_data->t_choice.t_heap.t_xmin != xid)
6299 elog(ERROR, "attempted to kill a tuple inserted by another transaction");
6300 if (!(IsToastRelation(relation) || HeapTupleHeaderIsSpeculative(tp.t_data)))
6301 elog(ERROR, "attempted to kill a non-speculative tuple");
6303
6304 /*
6305 * No need to check for serializable conflicts here. There is never a
6306 * need for a combo CID, either. No need to extract replica identity, or
6307 * do anything special with infomask bits.
6308 */
6309
6311
6312 /*
6313 * The tuple will become DEAD immediately. Flag that this page is a
6314 * candidate for pruning by setting xmin to TransactionXmin. While not
6315 * immediately prunable, it is the oldest xid we can cheaply determine
6316 * that's safe against wraparound / being older than the table's
6317 * relfrozenxid. To defend against the unlikely case of a new relation
6318 * having a newer relfrozenxid than our TransactionXmin, use relfrozenxid
6319 * if so (vacuum can't subsequently move relfrozenxid to beyond
6320 * TransactionXmin, so there's no race here).
6321 */
6323 {
6324 TransactionId relfrozenxid = relation->rd_rel->relfrozenxid;
6326
6327 if (TransactionIdPrecedes(TransactionXmin, relfrozenxid))
6328 prune_xid = relfrozenxid;
6329 else
6332 }
6333
6334 /* store transaction information of xact deleting the tuple */
6337
6338 /*
6339 * Set the tuple header xmin to InvalidTransactionId. This makes the
6340 * tuple immediately invisible everyone. (In particular, to any
6341 * transactions waiting on the speculative token, woken up later.)
6342 */
6344
6345 /* Clear the speculative insertion token too */
6346 tp.t_data->t_ctid = tp.t_self;
6347
6348 MarkBufferDirty(buffer);
6349
6350 /*
6351 * XLOG stuff
6352 *
6353 * The WAL records generated here match heap_delete(). The same recovery
6354 * routines are used.
6355 */
6356 if (RelationNeedsWAL(relation))
6357 {
6360
6362 xlrec.infobits_set = compute_infobits(tp.t_data->t_infomask,
6363 tp.t_data->t_infomask2);
6365 xlrec.xmax = xid;
6366
6370
6371 /* No replica identity & replication origin logged */
6372
6374
6375 PageSetLSN(page, recptr);
6376 }
6377
6379
6381
6382 if (HeapTupleHasExternal(&tp))
6383 {
6384 Assert(!IsToastRelation(relation));
6385 heap_toast_delete(relation, &tp, true);
6386 }
6387
6388 /*
6389 * Never need to mark tuple for invalidation, since catalogs don't support
6390 * speculative insertion
6391 */
6392
6393 /* Now we can release the buffer */
6394 ReleaseBuffer(buffer);
6395
6396 /* count deletion, as we counted the insertion too */
6397 pgstat_count_heap_delete(relation);
6398}
uint32 BlockNumber
Definition block.h:31
int Buffer
Definition buf.h:23
void MarkBufferDirty(Buffer buffer)
Definition bufmgr.c:3056
Buffer ReadBuffer(Relation reln, BlockNumber blockNum)
Definition bufmgr.c:864
static Page BufferGetPage(Buffer buffer)
Definition bufmgr.h:466
@ BUFFER_LOCK_EXCLUSIVE
Definition bufmgr.h:220
@ BUFFER_LOCK_UNLOCK
Definition bufmgr.h:205
static void LockBuffer(Buffer buffer, BufferLockMode mode)
Definition bufmgr.h:328
static bool PageIsAllVisible(const PageData *page)
Definition bufpage.h:428
static ItemId PageGetItemId(Page page, OffsetNumber offsetNumber)
Definition bufpage.h:243
static void * PageGetItem(PageData *page, const ItemIdData *itemId)
Definition bufpage.h:353
static void PageSetLSN(Page page, XLogRecPtr lsn)
Definition bufpage.h:390
PageData * Page
Definition bufpage.h:81
#define PageSetPrunable(page, xid)
Definition bufpage.h:446
#define Assert(condition)
Definition c.h:873
uint32 TransactionId
Definition c.h:666
bool IsToastRelation(Relation relation)
Definition catalog.c:206
#define ERROR
Definition elog.h:39
#define elog(elevel,...)
Definition elog.h:226
static uint8 compute_infobits(uint16 infomask, uint16 infomask2)
Definition heapam.c:2806
#define XLOG_HEAP_DELETE
Definition heapam_xlog.h:34
#define SizeOfHeapDelete
#define XLH_DELETE_IS_SUPER
void heap_toast_delete(Relation rel, HeapTuple oldtup, bool is_speculative)
Definition heaptoast.c:43
HeapTupleHeaderData * HeapTupleHeader
Definition htup.h:23
static bool HeapTupleHasExternal(const HeapTupleData *tuple)
#define HEAP_XMAX_BITS
static bool HeapTupleHeaderIsHeapOnly(const HeapTupleHeaderData *tup)
#define HEAP_MOVED
static bool HeapTupleHeaderIsSpeculative(const HeapTupleHeaderData *tup)
static void HeapTupleHeaderSetXmin(HeapTupleHeaderData *tup, TransactionId xid)
#define ItemIdGetLength(itemId)
Definition itemid.h:59
#define ItemIdIsNormal(itemId)
Definition itemid.h:99
static OffsetNumber ItemPointerGetOffsetNumber(const ItemPointerData *pointer)
Definition itemptr.h:124
static BlockNumber ItemPointerGetBlockNumber(const ItemPointerData *pointer)
Definition itemptr.h:103
static bool ItemPointerIsValid(const ItemPointerData *pointer)
Definition itemptr.h:83
#define START_CRIT_SECTION()
Definition miscadmin.h:150
#define END_CRIT_SECTION()
Definition miscadmin.h:152
void pgstat_count_heap_delete(Relation rel)
static int fb(int x)
#define RelationGetRelid(relation)
Definition rel.h:514
#define RelationNeedsWAL(relation)
Definition rel.h:637
TransactionId TransactionXmin
Definition snapmgr.c:159
ItemPointerData t_self
Definition htup.h:65
uint32 t_len
Definition htup.h:64
HeapTupleHeader t_data
Definition htup.h:68
Oid t_tableOid
Definition htup.h:66
TransactionId t_xmin
union HeapTupleHeaderData::@49 t_choice
ItemPointerData t_ctid
HeapTupleFields t_heap
Form_pg_class rd_rel
Definition rel.h:111
#define InvalidTransactionId
Definition transam.h:31
#define TransactionIdIsValid(xid)
Definition transam.h:41
static bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition transam.h:263
TransactionId GetCurrentTransactionId(void)
Definition xact.c:455
uint64 XLogRecPtr
Definition xlogdefs.h:21
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition xloginsert.c:478
void XLogRegisterData(const void *data, uint32 len)
Definition xloginsert.c:368
void XLogRegisterBuffer(uint8 block_id, Buffer buffer, uint8 flags)
Definition xloginsert.c:245
void XLogBeginInsert(void)
Definition xloginsert.c:152
#define REGBUF_STANDARD
Definition xloginsert.h:35

References Assert, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferGetPage(), compute_infobits(), elog, END_CRIT_SECTION, ERROR, fb(), xl_heap_delete::flags, GetCurrentTransactionId(), HEAP_MOVED, heap_toast_delete(), HEAP_XMAX_BITS, HeapTupleHasExternal(), HeapTupleHeaderIsHeapOnly(), HeapTupleHeaderIsSpeculative(), HeapTupleHeaderSetXmin(), InvalidTransactionId, IsToastRelation(), ItemIdGetLength, ItemIdIsNormal, ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), ItemPointerIsValid(), LockBuffer(), MarkBufferDirty(), PageGetItem(), PageGetItemId(), PageIsAllVisible(), PageSetLSN(), PageSetPrunable, pgstat_count_heap_delete(), RelationData::rd_rel, ReadBuffer(), REGBUF_STANDARD, RelationGetRelid, RelationNeedsWAL, ReleaseBuffer(), SizeOfHeapDelete, START_CRIT_SECTION, HeapTupleHeaderData::t_choice, HeapTupleHeaderData::t_ctid, HeapTupleData::t_data, HeapTupleHeaderData::t_heap, HeapTupleHeaderData::t_infomask, HeapTupleHeaderData::t_infomask2, HeapTupleData::t_len, HeapTupleData::t_self, HeapTupleData::t_tableOid, HeapTupleFields::t_xmin, TransactionIdIsValid, TransactionIdPrecedes(), TransactionXmin, XLH_DELETE_IS_SUPER, XLOG_HEAP_DELETE, XLogBeginInsert(), XLogInsert(), XLogRegisterBuffer(), and XLogRegisterData().

Referenced by heapam_tuple_complete_speculative(), and toast_delete_datum().

◆ heap_beginscan()

TableScanDesc heap_beginscan ( Relation  relation,
Snapshot  snapshot,
int  nkeys,
ScanKey  key,
ParallelTableScanDesc  parallel_scan,
uint32  flags 
)
extern

Definition at line 1162 of file heapam.c.

1166{
1167 HeapScanDesc scan;
1168
1169 /*
1170 * increment relation ref count while scanning relation
1171 *
1172 * This is just to make really sure the relcache entry won't go away while
1173 * the scan has a pointer to it. Caller should be holding the rel open
1174 * anyway, so this is redundant in all normal scenarios...
1175 */
1177
1178 /*
1179 * allocate and initialize scan descriptor
1180 */
1181 if (flags & SO_TYPE_BITMAPSCAN)
1182 {
1184
1185 /*
1186 * Bitmap Heap scans do not have any fields that a normal Heap Scan
1187 * does not have, so no special initializations required here.
1188 */
1189 scan = (HeapScanDesc) bscan;
1190 }
1191 else
1193
1194 scan->rs_base.rs_rd = relation;
1195 scan->rs_base.rs_snapshot = snapshot;
1196 scan->rs_base.rs_nkeys = nkeys;
1197 scan->rs_base.rs_flags = flags;
1198 scan->rs_base.rs_parallel = parallel_scan;
1199 scan->rs_strategy = NULL; /* set in initscan */
1200 scan->rs_cbuf = InvalidBuffer;
1201
1202 /*
1203 * Disable page-at-a-time mode if it's not a MVCC-safe snapshot.
1204 */
1205 if (!(snapshot && IsMVCCSnapshot(snapshot)))
1207
1208 /* Check that a historic snapshot is not used for non-catalog tables */
1209 if (snapshot &&
1210 IsHistoricMVCCSnapshot(snapshot) &&
1212 {
1213 ereport(ERROR,
1215 errmsg("cannot query non-catalog table \"%s\" during logical decoding",
1216 RelationGetRelationName(relation))));
1217 }
1218
1219 /*
1220 * For seqscan and sample scans in a serializable transaction, acquire a
1221 * predicate lock on the entire relation. This is required not only to
1222 * lock all the matching tuples, but also to conflict with new insertions
1223 * into the table. In an indexscan, we take page locks on the index pages
1224 * covering the range specified in the scan qual, but in a heap scan there
1225 * is nothing more fine-grained to lock. A bitmap scan is a different
1226 * story, there we have already scanned the index and locked the index
1227 * pages covering the predicate. But in that case we still have to lock
1228 * any matching heap tuples. For sample scan we could optimize the locking
1229 * to be at least page-level granularity, but we'd need to add per-tuple
1230 * locking for that.
1231 */
1233 {
1234 /*
1235 * Ensure a missing snapshot is noticed reliably, even if the
1236 * isolation mode means predicate locking isn't performed (and
1237 * therefore the snapshot isn't used here).
1238 */
1239 Assert(snapshot);
1240 PredicateLockRelation(relation, snapshot);
1241 }
1242
1243 /* we only need to set this up once */
1244 scan->rs_ctup.t_tableOid = RelationGetRelid(relation);
1245
1246 /*
1247 * Allocate memory to keep track of page allocation for parallel workers
1248 * when doing a parallel scan.
1249 */
1250 if (parallel_scan != NULL)
1252 else
1254
1255 /*
1256 * we do this here instead of in initscan() because heap_rescan also calls
1257 * initscan() and we don't want to allocate memory again
1258 */
1259 if (nkeys > 0)
1260 scan->rs_base.rs_key = palloc_array(ScanKeyData, nkeys);
1261 else
1262 scan->rs_base.rs_key = NULL;
1263
1264 initscan(scan, key, false);
1265
1266 scan->rs_read_stream = NULL;
1267
1268 /*
1269 * Set up a read stream for sequential scans and TID range scans. This
1270 * should be done after initscan() because initscan() allocates the
1271 * BufferAccessStrategy object passed to the read stream API.
1272 */
1273 if (scan->rs_base.rs_flags & SO_TYPE_SEQSCAN ||
1275 {
1277
1278 if (scan->rs_base.rs_parallel)
1280 else
1282
1283 /* ---
1284 * It is safe to use batchmode as the only locks taken by `cb`
1285 * are never taken while waiting for IO:
1286 * - SyncScanLock is used in the non-parallel case
1287 * - in the parallel case, only spinlocks and atomics are used
1288 * ---
1289 */
1292 scan->rs_strategy,
1293 scan->rs_base.rs_rd,
1295 cb,
1296 scan,
1297 0);
1298 }
1299 else if (scan->rs_base.rs_flags & SO_TYPE_BITMAPSCAN)
1300 {
1303 scan->rs_strategy,
1304 scan->rs_base.rs_rd,
1307 scan,
1308 sizeof(TBMIterateResult));
1309 }
1310
1311
1312 return (TableScanDesc) scan;
1313}
int errcode(int sqlerrcode)
Definition elog.c:863
int errmsg(const char *fmt,...)
Definition elog.c:1080
#define ereport(elevel,...)
Definition elog.h:150
#define palloc_array(type, count)
Definition fe_memutils.h:76
static BlockNumber heap_scan_stream_read_next_parallel(ReadStream *stream, void *callback_private_data, void *per_buffer_data)
Definition heapam.c:250
static BlockNumber heap_scan_stream_read_next_serial(ReadStream *stream, void *callback_private_data, void *per_buffer_data)
Definition heapam.c:290
static BlockNumber bitmapheap_stream_read_next(ReadStream *pgsr, void *private_data, void *per_buffer_data)
Definition heapam.c:315
static void initscan(HeapScanDesc scan, ScanKey key, bool keep_startblock)
Definition heapam.c:355
struct HeapScanDescData * HeapScanDesc
Definition heapam.h:102
void PredicateLockRelation(Relation relation, Snapshot snapshot)
Definition predicate.c:2574
ReadStream * read_stream_begin_relation(int flags, BufferAccessStrategy strategy, Relation rel, ForkNumber forknum, ReadStreamBlockNumberCB callback, void *callback_private_data, size_t per_buffer_data_size)
#define READ_STREAM_USE_BATCHING
Definition read_stream.h:64
BlockNumber(* ReadStreamBlockNumberCB)(ReadStream *stream, void *callback_private_data, void *per_buffer_data)
Definition read_stream.h:77
#define READ_STREAM_DEFAULT
Definition read_stream.h:21
#define READ_STREAM_SEQUENTIAL
Definition read_stream.h:36
#define RelationGetRelationName(relation)
Definition rel.h:548
#define RelationIsAccessibleInLogicalDecoding(relation)
Definition rel.h:693
void RelationIncrementReferenceCount(Relation rel)
Definition relcache.c:2182
@ MAIN_FORKNUM
Definition relpath.h:58
#define IsHistoricMVCCSnapshot(snapshot)
Definition snapmgr.h:59
#define IsMVCCSnapshot(snapshot)
Definition snapmgr.h:55
BufferAccessStrategy rs_strategy
Definition heapam.h:73
Buffer rs_cbuf
Definition heapam.h:70
ParallelBlockTableScanWorkerData * rs_parallelworkerdata
Definition heapam.h:95
HeapTupleData rs_ctup
Definition heapam.h:75
ReadStream * rs_read_stream
Definition heapam.h:78
TableScanDescData rs_base
Definition heapam.h:58
Relation rs_rd
Definition relscan.h:35
uint32 rs_flags
Definition relscan.h:63
struct ScanKeyData * rs_key
Definition relscan.h:38
struct SnapshotData * rs_snapshot
Definition relscan.h:36
struct ParallelTableScanDescData * rs_parallel
Definition relscan.h:65
@ SO_TYPE_TIDRANGESCAN
Definition tableam.h:53
@ SO_TYPE_SAMPLESCAN
Definition tableam.h:51
@ SO_TYPE_SEQSCAN
Definition tableam.h:49
@ SO_TYPE_BITMAPSCAN
Definition tableam.h:50

References Assert, bitmapheap_stream_read_next(), ereport, errcode(), errmsg(), ERROR, fb(), heap_scan_stream_read_next_parallel(), heap_scan_stream_read_next_serial(), initscan(), InvalidBuffer, IsHistoricMVCCSnapshot, IsMVCCSnapshot, MAIN_FORKNUM, palloc_array, palloc_object, PredicateLockRelation(), read_stream_begin_relation(), READ_STREAM_DEFAULT, READ_STREAM_SEQUENTIAL, READ_STREAM_USE_BATCHING, RelationGetRelationName, RelationGetRelid, RelationIncrementReferenceCount(), RelationIsAccessibleInLogicalDecoding, HeapScanDescData::rs_base, HeapScanDescData::rs_cbuf, HeapScanDescData::rs_ctup, TableScanDescData::rs_flags, TableScanDescData::rs_key, TableScanDescData::rs_nkeys, TableScanDescData::rs_parallel, HeapScanDescData::rs_parallelworkerdata, TableScanDescData::rs_rd, HeapScanDescData::rs_read_stream, TableScanDescData::rs_snapshot, HeapScanDescData::rs_strategy, SO_TYPE_BITMAPSCAN, SO_TYPE_SAMPLESCAN, SO_TYPE_SEQSCAN, SO_TYPE_TIDRANGESCAN, and HeapTupleData::t_tableOid.

◆ heap_delete()

TM_Result heap_delete ( Relation  relation,
const ItemPointerData tid,
CommandId  cid,
Snapshot  crosscheck,
bool  wait,
TM_FailureData tmfd,
bool  changingPart 
)
extern

Definition at line 2851 of file heapam.c.

2854{
2855 TM_Result result;
2857 ItemId lp;
2858 HeapTupleData tp;
2859 Page page;
2860 BlockNumber block;
2861 Buffer buffer;
2862 Buffer vmbuffer = InvalidBuffer;
2863 TransactionId new_xmax;
2866 bool have_tuple_lock = false;
2867 bool iscombo;
2868 bool all_visible_cleared = false;
2869 HeapTuple old_key_tuple = NULL; /* replica identity of the tuple */
2870 bool old_key_copied = false;
2871
2873
2874 AssertHasSnapshotForToast(relation);
2875
2876 /*
2877 * Forbid this during a parallel operation, lest it allocate a combo CID.
2878 * Other workers might need that combo CID for visibility checks, and we
2879 * have no provision for broadcasting it to them.
2880 */
2881 if (IsInParallelMode())
2882 ereport(ERROR,
2884 errmsg("cannot delete tuples during a parallel operation")));
2885
2886 block = ItemPointerGetBlockNumber(tid);
2887 buffer = ReadBuffer(relation, block);
2888 page = BufferGetPage(buffer);
2889
2890 /*
2891 * Before locking the buffer, pin the visibility map page if it appears to
2892 * be necessary. Since we haven't got the lock yet, someone else might be
2893 * in the middle of changing this, so we'll need to recheck after we have
2894 * the lock.
2895 */
2896 if (PageIsAllVisible(page))
2897 visibilitymap_pin(relation, block, &vmbuffer);
2898
2900
2903
2904 tp.t_tableOid = RelationGetRelid(relation);
2905 tp.t_data = (HeapTupleHeader) PageGetItem(page, lp);
2906 tp.t_len = ItemIdGetLength(lp);
2907 tp.t_self = *tid;
2908
2909l1:
2910
2911 /*
2912 * If we didn't pin the visibility map page and the page has become all
2913 * visible while we were busy locking the buffer, we'll have to unlock and
2914 * re-lock, to avoid holding the buffer lock across an I/O. That's a bit
2915 * unfortunate, but hopefully shouldn't happen often.
2916 */
2917 if (vmbuffer == InvalidBuffer && PageIsAllVisible(page))
2918 {
2920 visibilitymap_pin(relation, block, &vmbuffer);
2922 }
2923
2924 result = HeapTupleSatisfiesUpdate(&tp, cid, buffer);
2925
2926 if (result == TM_Invisible)
2927 {
2928 UnlockReleaseBuffer(buffer);
2929 ereport(ERROR,
2931 errmsg("attempted to delete invisible tuple")));
2932 }
2933 else if (result == TM_BeingModified && wait)
2934 {
2937
2938 /* must copy state data before unlocking buffer */
2941
2942 /*
2943 * Sleep until concurrent transaction ends -- except when there's a
2944 * single locker and it's our own transaction. Note we don't care
2945 * which lock mode the locker has, because we need the strongest one.
2946 *
2947 * Before sleeping, we need to acquire tuple lock to establish our
2948 * priority for the tuple (see heap_lock_tuple). LockTuple will
2949 * release us when we are next-in-line for the tuple.
2950 *
2951 * If we are forced to "start over" below, we keep the tuple lock;
2952 * this arranges that we stay at the head of the line while rechecking
2953 * tuple state.
2954 */
2956 {
2957 bool current_is_member = false;
2958
2961 {
2963
2964 /*
2965 * Acquire the lock, if necessary (but skip it when we're
2966 * requesting a lock and already have one; avoids deadlock).
2967 */
2968 if (!current_is_member)
2971
2972 /* wait for multixact */
2974 relation, &(tp.t_self), XLTW_Delete,
2975 NULL);
2977
2978 /*
2979 * If xwait had just locked the tuple then some other xact
2980 * could update this tuple before we get to this point. Check
2981 * for xmax change, and start over if so.
2982 *
2983 * We also must start over if we didn't pin the VM page, and
2984 * the page has become all visible.
2985 */
2986 if ((vmbuffer == InvalidBuffer && PageIsAllVisible(page)) ||
2989 xwait))
2990 goto l1;
2991 }
2992
2993 /*
2994 * You might think the multixact is necessarily done here, but not
2995 * so: it could have surviving members, namely our own xact or
2996 * other subxacts of this backend. It is legal for us to delete
2997 * the tuple in either case, however (the latter case is
2998 * essentially a situation of upgrading our former shared lock to
2999 * exclusive). We don't bother changing the on-disk hint bits
3000 * since we are about to overwrite the xmax altogether.
3001 */
3002 }
3004 {
3005 /*
3006 * Wait for regular transaction to end; but first, acquire tuple
3007 * lock.
3008 */
3012 XactLockTableWait(xwait, relation, &(tp.t_self), XLTW_Delete);
3014
3015 /*
3016 * xwait is done, but if xwait had just locked the tuple then some
3017 * other xact could update this tuple before we get to this point.
3018 * Check for xmax change, and start over if so.
3019 *
3020 * We also must start over if we didn't pin the VM page, and the
3021 * page has become all visible.
3022 */
3023 if ((vmbuffer == InvalidBuffer && PageIsAllVisible(page)) ||
3026 xwait))
3027 goto l1;
3028
3029 /* Otherwise check if it committed or aborted */
3030 UpdateXmaxHintBits(tp.t_data, buffer, xwait);
3031 }
3032
3033 /*
3034 * We may overwrite if previous xmax aborted, or if it committed but
3035 * only locked the tuple without updating it.
3036 */
3037 if ((tp.t_data->t_infomask & HEAP_XMAX_INVALID) ||
3040 result = TM_Ok;
3041 else if (!ItemPointerEquals(&tp.t_self, &tp.t_data->t_ctid))
3042 result = TM_Updated;
3043 else
3044 result = TM_Deleted;
3045 }
3046
3047 /* sanity check the result HeapTupleSatisfiesUpdate() and the logic above */
3048 if (result != TM_Ok)
3049 {
3050 Assert(result == TM_SelfModified ||
3051 result == TM_Updated ||
3052 result == TM_Deleted ||
3053 result == TM_BeingModified);
3055 Assert(result != TM_Updated ||
3057 }
3058
3059 if (crosscheck != InvalidSnapshot && result == TM_Ok)
3060 {
3061 /* Perform additional check for transaction-snapshot mode RI updates */
3062 if (!HeapTupleSatisfiesVisibility(&tp, crosscheck, buffer))
3063 result = TM_Updated;
3064 }
3065
3066 if (result != TM_Ok)
3067 {
3068 tmfd->ctid = tp.t_data->t_ctid;
3070 if (result == TM_SelfModified)
3072 else
3073 tmfd->cmax = InvalidCommandId;
3074 UnlockReleaseBuffer(buffer);
3075 if (have_tuple_lock)
3077 if (vmbuffer != InvalidBuffer)
3078 ReleaseBuffer(vmbuffer);
3079 return result;
3080 }
3081
3082 /*
3083 * We're about to do the actual delete -- check for conflict first, to
3084 * avoid possibly having to roll back work we've just done.
3085 *
3086 * This is safe without a recheck as long as there is no possibility of
3087 * another process scanning the page between this check and the delete
3088 * being visible to the scan (i.e., an exclusive buffer content lock is
3089 * continuously held from this point until the tuple delete is visible).
3090 */
3092
3093 /* replace cid with a combo CID if necessary */
3095
3096 /*
3097 * Compute replica identity tuple before entering the critical section so
3098 * we don't PANIC upon a memory allocation failure.
3099 */
3100 old_key_tuple = ExtractReplicaIdentity(relation, &tp, true, &old_key_copied);
3101
3102 /*
3103 * If this is the first possibly-multixact-able operation in the current
3104 * transaction, set my per-backend OldestMemberMXactId setting. We can be
3105 * certain that the transaction will never become a member of any older
3106 * MultiXactIds than that. (We have to do this even if we end up just
3107 * using our own TransactionId below, since some other backend could
3108 * incorporate our XID into a MultiXact immediately afterwards.)
3109 */
3111
3114 xid, LockTupleExclusive, true,
3115 &new_xmax, &new_infomask, &new_infomask2);
3116
3118
3119 /*
3120 * If this transaction commits, the tuple will become DEAD sooner or
3121 * later. Set flag that this page is a candidate for pruning once our xid
3122 * falls below the OldestXmin horizon. If the transaction finally aborts,
3123 * the subsequent page pruning will be a no-op and the hint will be
3124 * cleared.
3125 */
3126 PageSetPrunable(page, xid);
3127
3128 if (PageIsAllVisible(page))
3129 {
3130 all_visible_cleared = true;
3131 PageClearAllVisible(page);
3132 visibilitymap_clear(relation, BufferGetBlockNumber(buffer),
3133 vmbuffer, VISIBILITYMAP_VALID_BITS);
3134 }
3135
3136 /* store transaction information of xact deleting the tuple */
3142 HeapTupleHeaderSetXmax(tp.t_data, new_xmax);
3144 /* Make sure there is no forward chain link in t_ctid */
3145 tp.t_data->t_ctid = tp.t_self;
3146
3147 /* Signal that this is actually a move into another partition */
3148 if (changingPart)
3150
3151 MarkBufferDirty(buffer);
3152
3153 /*
3154 * XLOG stuff
3155 *
3156 * NB: heap_abort_speculative() uses the same xlog record and replay
3157 * routines.
3158 */
3159 if (RelationNeedsWAL(relation))
3160 {
3164
3165 /*
3166 * For logical decode we need combo CIDs to properly decode the
3167 * catalog
3168 */
3170 log_heap_new_cid(relation, &tp);
3171
3172 xlrec.flags = 0;
3175 if (changingPart)
3177 xlrec.infobits_set = compute_infobits(tp.t_data->t_infomask,
3178 tp.t_data->t_infomask2);
3180 xlrec.xmax = new_xmax;
3181
3182 if (old_key_tuple != NULL)
3183 {
3184 if (relation->rd_rel->relreplident == REPLICA_IDENTITY_FULL)
3186 else
3188 }
3189
3192
3194
3195 /*
3196 * Log replica identity of the deleted tuple if there is one
3197 */
3198 if (old_key_tuple != NULL)
3199 {
3200 xlhdr.t_infomask2 = old_key_tuple->t_data->t_infomask2;
3201 xlhdr.t_infomask = old_key_tuple->t_data->t_infomask;
3202 xlhdr.t_hoff = old_key_tuple->t_data->t_hoff;
3203
3205 XLogRegisterData((char *) old_key_tuple->t_data
3207 old_key_tuple->t_len
3209 }
3210
3211 /* filtering by origin on a row level is much more efficient */
3213
3215
3216 PageSetLSN(page, recptr);
3217 }
3218
3220
3222
3223 if (vmbuffer != InvalidBuffer)
3224 ReleaseBuffer(vmbuffer);
3225
3226 /*
3227 * If the tuple has toasted out-of-line attributes, we need to delete
3228 * those items too. We have to do this before releasing the buffer
3229 * because we need to look at the contents of the tuple, but it's OK to
3230 * release the content lock on the buffer first.
3231 */
3232 if (relation->rd_rel->relkind != RELKIND_RELATION &&
3233 relation->rd_rel->relkind != RELKIND_MATVIEW)
3234 {
3235 /* toast table entries should never be recursively toasted */
3237 }
3238 else if (HeapTupleHasExternal(&tp))
3239 heap_toast_delete(relation, &tp, false);
3240
3241 /*
3242 * Mark tuple for invalidation from system caches at next command
3243 * boundary. We have to do this before releasing the buffer because we
3244 * need to look at the contents of the tuple.
3245 */
3246 CacheInvalidateHeapTuple(relation, &tp, NULL);
3247
3248 /* Now we can release the buffer */
3249 ReleaseBuffer(buffer);
3250
3251 /*
3252 * Release the lmgr tuple lock, if we had it.
3253 */
3254 if (have_tuple_lock)
3256
3257 pgstat_count_heap_delete(relation);
3258
3261
3262 return TM_Ok;
3263}
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition bufmgr.c:4356
void UnlockReleaseBuffer(Buffer buffer)
Definition bufmgr.c:5518
static void PageClearAllVisible(Page page)
Definition bufpage.h:438
#define InvalidCommandId
Definition c.h:683
TransactionId MultiXactId
Definition c.h:676
uint16_t uint16
Definition c.h:545
void HeapTupleHeaderAdjustCmax(const HeapTupleHeaderData *tup, CommandId *cmax, bool *iscombo)
Definition combocid.c:153
CommandId HeapTupleHeaderGetCmax(const HeapTupleHeaderData *tup)
Definition combocid.c:118
static bool DoesMultiXactIdConflict(MultiXactId multi, uint16 infomask, LockTupleMode lockmode, bool *current_is_member)
Definition heapam.c:7684
static XLogRecPtr log_heap_new_cid(Relation relation, HeapTuple tup)
Definition heapam.c:9149
static void compute_new_xmax_infomask(TransactionId xmax, uint16 old_infomask, uint16 old_infomask2, TransactionId add_to_xmax, LockTupleMode mode, bool is_update, TransactionId *result_xmax, uint16 *result_infomask, uint16 *result_infomask2)
Definition heapam.c:5403
static bool heap_acquire_tuplock(Relation relation, const ItemPointerData *tid, LockTupleMode mode, LockWaitPolicy wait_policy, bool *have_tuple_lock)
Definition heapam.c:5354
static HeapTuple ExtractReplicaIdentity(Relation relation, HeapTuple tp, bool key_required, bool *copy)
Definition heapam.c:9230
static void MultiXactIdWait(MultiXactId multi, MultiXactStatus status, uint16 infomask, Relation rel, const ItemPointerData *ctid, XLTW_Oper oper, int *remaining)
Definition heapam.c:7862
static bool xmax_infomask_changed(uint16 new_infomask, uint16 old_infomask)
Definition heapam.c:2828
#define UnlockTupleTuplock(rel, tup, mode)
Definition heapam.c:167
static void AssertHasSnapshotForToast(Relation rel)
Definition heapam.c:223
static void UpdateXmaxHintBits(HeapTupleHeader tuple, Buffer buffer, TransactionId xid)
Definition heapam.c:2061
bool HeapTupleSatisfiesVisibility(HeapTuple htup, Snapshot snapshot, Buffer buffer)
bool HeapTupleHeaderIsOnlyLocked(HeapTupleHeader tuple)
TM_Result HeapTupleSatisfiesUpdate(HeapTuple htup, CommandId curcid, Buffer buffer)
#define XLH_DELETE_CONTAINS_OLD_KEY
#define XLH_DELETE_ALL_VISIBLE_CLEARED
#define SizeOfHeapHeader
#define XLH_DELETE_IS_PARTITION_MOVE
#define XLH_DELETE_CONTAINS_OLD_TUPLE
void heap_freetuple(HeapTuple htup)
Definition heaptuple.c:1435
#define SizeofHeapTupleHeader
static bool HEAP_XMAX_IS_LOCKED_ONLY(uint16 infomask)
static void HeapTupleHeaderSetCmax(HeapTupleHeaderData *tup, CommandId cid, bool iscombo)
static void HeapTupleHeaderClearHotUpdated(HeapTupleHeaderData *tup)
static TransactionId HeapTupleHeaderGetRawXmax(const HeapTupleHeaderData *tup)
#define HEAP_XMAX_IS_MULTI
#define HEAP_XMAX_INVALID
static TransactionId HeapTupleHeaderGetUpdateXid(const HeapTupleHeaderData *tup)
static void HeapTupleHeaderSetMovedPartitions(HeapTupleHeaderData *tup)
static void HeapTupleHeaderSetXmax(HeapTupleHeaderData *tup, TransactionId xid)
void CacheInvalidateHeapTuple(Relation relation, HeapTuple tuple, HeapTuple newtuple)
Definition inval.c:1571
bool ItemPointerEquals(const ItemPointerData *pointer1, const ItemPointerData *pointer2)
Definition itemptr.c:35
void XactLockTableWait(TransactionId xid, Relation rel, const ItemPointerData *ctid, XLTW_Oper oper)
Definition lmgr.c:663
@ XLTW_Delete
Definition lmgr.h:28
@ LockWaitBlock
Definition lockoptions.h:39
@ LockTupleExclusive
Definition lockoptions.h:58
void MultiXactIdSetOldestMember(void)
Definition multixact.c:537
@ MultiXactStatusUpdate
Definition multixact.h:45
void CheckForSerializableConflictIn(Relation relation, const ItemPointerData *tid, BlockNumber blkno)
Definition predicate.c:4334
#define InvalidSnapshot
Definition snapshot.h:119
TransactionId xmax
Definition tableam.h:150
CommandId cmax
Definition tableam.h:151
ItemPointerData ctid
Definition tableam.h:149
TM_Result
Definition tableam.h:73
@ TM_Ok
Definition tableam.h:78
@ TM_BeingModified
Definition tableam.h:100
@ TM_Deleted
Definition tableam.h:93
@ TM_Updated
Definition tableam.h:90
@ TM_SelfModified
Definition tableam.h:84
@ TM_Invisible
Definition tableam.h:81
#define TransactionIdEquals(id1, id2)
Definition transam.h:43
bool visibilitymap_clear(Relation rel, BlockNumber heapBlk, Buffer vmbuf, uint8 flags)
void visibilitymap_pin(Relation rel, BlockNumber heapBlk, Buffer *vmbuf)
#define VISIBILITYMAP_VALID_BITS
bool TransactionIdIsCurrentTransactionId(TransactionId xid)
Definition xact.c:942
bool IsInParallelMode(void)
Definition xact.c:1090
#define XLOG_INCLUDE_ORIGIN
Definition xlog.h:165
void XLogSetRecordFlags(uint8 flags)
Definition xloginsert.c:460

References Assert, AssertHasSnapshotForToast(), BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferGetBlockNumber(), BufferGetPage(), CacheInvalidateHeapTuple(), CheckForSerializableConflictIn(), TM_FailureData::cmax, compute_infobits(), compute_new_xmax_infomask(), TM_FailureData::ctid, DoesMultiXactIdConflict(), END_CRIT_SECTION, ereport, errcode(), errmsg(), ERROR, ExtractReplicaIdentity(), fb(), GetCurrentTransactionId(), heap_acquire_tuplock(), heap_freetuple(), HEAP_MOVED, heap_toast_delete(), HEAP_XMAX_BITS, HEAP_XMAX_INVALID, HEAP_XMAX_IS_LOCKED_ONLY(), HEAP_XMAX_IS_MULTI, HeapTupleHasExternal(), HeapTupleHeaderAdjustCmax(), HeapTupleHeaderClearHotUpdated(), HeapTupleHeaderGetCmax(), HeapTupleHeaderGetRawXmax(), HeapTupleHeaderGetUpdateXid(), HeapTupleHeaderIsOnlyLocked(), HeapTupleHeaderSetCmax(), HeapTupleHeaderSetMovedPartitions(), HeapTupleHeaderSetXmax(), HeapTupleSatisfiesUpdate(), HeapTupleSatisfiesVisibility(), InvalidBuffer, InvalidCommandId, InvalidSnapshot, IsInParallelMode(), ItemIdGetLength, ItemIdIsNormal, ItemPointerEquals(), ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), ItemPointerIsValid(), LockBuffer(), LockTupleExclusive, LockWaitBlock, log_heap_new_cid(), MarkBufferDirty(), MultiXactIdSetOldestMember(), MultiXactIdWait(), MultiXactStatusUpdate, PageClearAllVisible(), PageGetItem(), PageGetItemId(), PageIsAllVisible(), PageSetLSN(), PageSetPrunable, pgstat_count_heap_delete(), RelationData::rd_rel, ReadBuffer(), REGBUF_STANDARD, RelationGetRelid, RelationIsAccessibleInLogicalDecoding, RelationNeedsWAL, ReleaseBuffer(), SizeOfHeapDelete, SizeOfHeapHeader, SizeofHeapTupleHeader, START_CRIT_SECTION, HeapTupleHeaderData::t_ctid, HeapTupleData::t_data, HeapTupleHeaderData::t_infomask, HeapTupleHeaderData::t_infomask2, HeapTupleData::t_len, HeapTupleData::t_self, HeapTupleData::t_tableOid, TM_BeingModified, TM_Deleted, TM_Invisible, TM_Ok, TM_SelfModified, TM_Updated, TransactionIdEquals, TransactionIdIsCurrentTransactionId(), UnlockReleaseBuffer(), UnlockTupleTuplock, UpdateXmaxHintBits(), visibilitymap_clear(), visibilitymap_pin(), VISIBILITYMAP_VALID_BITS, XactLockTableWait(), XLH_DELETE_ALL_VISIBLE_CLEARED, XLH_DELETE_CONTAINS_OLD_KEY, XLH_DELETE_CONTAINS_OLD_TUPLE, XLH_DELETE_IS_PARTITION_MOVE, XLOG_HEAP_DELETE, XLOG_INCLUDE_ORIGIN, XLogBeginInsert(), XLogInsert(), XLogRegisterBuffer(), XLogRegisterData(), XLogSetRecordFlags(), XLTW_Delete, TM_FailureData::xmax, and xmax_infomask_changed().

Referenced by heapam_tuple_delete(), and simple_heap_delete().

◆ heap_endscan()

void heap_endscan ( TableScanDesc  sscan)
extern

Definition at line 1369 of file heapam.c.

1370{
1372
1373 /* Note: no locking manipulations needed */
1374
1375 /*
1376 * unpin scan buffers
1377 */
1378 if (BufferIsValid(scan->rs_cbuf))
1379 ReleaseBuffer(scan->rs_cbuf);
1380
1381 /*
1382 * Must free the read stream before freeing the BufferAccessStrategy.
1383 */
1384 if (scan->rs_read_stream)
1386
1387 /*
1388 * decrement relation reference count and free scan descriptor storage
1389 */
1391
1392 if (scan->rs_base.rs_key)
1393 pfree(scan->rs_base.rs_key);
1394
1395 if (scan->rs_strategy != NULL)
1397
1398 if (scan->rs_parallelworkerdata != NULL)
1400
1401 if (scan->rs_base.rs_flags & SO_TEMP_SNAPSHOT)
1403
1404 pfree(scan);
1405}
static bool BufferIsValid(Buffer bufnum)
Definition bufmgr.h:417
void read_stream_end(ReadStream *stream)
void RelationDecrementReferenceCount(Relation rel)
Definition relcache.c:2195
void UnregisterSnapshot(Snapshot snapshot)
Definition snapmgr.c:866
@ SO_TEMP_SNAPSHOT
Definition tableam.h:65

References BufferIsValid(), fb(), FreeAccessStrategy(), pfree(), read_stream_end(), RelationDecrementReferenceCount(), ReleaseBuffer(), HeapScanDescData::rs_base, HeapScanDescData::rs_cbuf, TableScanDescData::rs_flags, TableScanDescData::rs_key, HeapScanDescData::rs_parallelworkerdata, TableScanDescData::rs_rd, HeapScanDescData::rs_read_stream, TableScanDescData::rs_snapshot, HeapScanDescData::rs_strategy, SO_TEMP_SNAPSHOT, and UnregisterSnapshot().

◆ heap_execute_freeze_tuple()

static void heap_execute_freeze_tuple ( HeapTupleHeader  tuple,
HeapTupleFreeze frz 
)
inlinestatic

Definition at line 492 of file heapam.h.

493{
494 HeapTupleHeaderSetXmax(tuple, frz->xmax);
495
496 if (frz->frzflags & XLH_FREEZE_XVAC)
498
499 if (frz->frzflags & XLH_INVALID_XVAC)
501
502 tuple->t_infomask = frz->t_infomask;
503 tuple->t_infomask2 = frz->t_infomask2;
504}
#define XLH_INVALID_XVAC
#define XLH_FREEZE_XVAC
static void HeapTupleHeaderSetXvac(HeapTupleHeaderData *tup, TransactionId xid)
#define FrozenTransactionId
Definition transam.h:33

References fb(), FrozenTransactionId, HeapTupleHeaderSetXmax(), HeapTupleHeaderSetXvac(), InvalidTransactionId, HeapTupleHeaderData::t_infomask, HeapTupleHeaderData::t_infomask2, XLH_FREEZE_XVAC, and XLH_INVALID_XVAC.

Referenced by heap_freeze_prepared_tuples(), heap_freeze_tuple(), and heap_xlog_prune_freeze().

◆ heap_fetch()

bool heap_fetch ( Relation  relation,
Snapshot  snapshot,
HeapTuple  tuple,
Buffer userbuf,
bool  keep_buf 
)
extern

Definition at line 1667 of file heapam.c.

1672{
1673 ItemPointer tid = &(tuple->t_self);
1674 ItemId lp;
1675 Buffer buffer;
1676 Page page;
1677 OffsetNumber offnum;
1678 bool valid;
1679
1680 /*
1681 * Fetch and pin the appropriate page of the relation.
1682 */
1683 buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(tid));
1684
1685 /*
1686 * Need share lock on buffer to examine tuple commit status.
1687 */
1689 page = BufferGetPage(buffer);
1690
1691 /*
1692 * We'd better check for out-of-range offnum in case of VACUUM since the
1693 * TID was obtained.
1694 */
1695 offnum = ItemPointerGetOffsetNumber(tid);
1697 {
1699 ReleaseBuffer(buffer);
1701 tuple->t_data = NULL;
1702 return false;
1703 }
1704
1705 /*
1706 * get the item line pointer corresponding to the requested tid
1707 */
1708 lp = PageGetItemId(page, offnum);
1709
1710 /*
1711 * Must check for deleted tuple.
1712 */
1713 if (!ItemIdIsNormal(lp))
1714 {
1716 ReleaseBuffer(buffer);
1718 tuple->t_data = NULL;
1719 return false;
1720 }
1721
1722 /*
1723 * fill in *tuple fields
1724 */
1725 tuple->t_data = (HeapTupleHeader) PageGetItem(page, lp);
1726 tuple->t_len = ItemIdGetLength(lp);
1727 tuple->t_tableOid = RelationGetRelid(relation);
1728
1729 /*
1730 * check tuple visibility, then release lock
1731 */
1732 valid = HeapTupleSatisfiesVisibility(tuple, snapshot, buffer);
1733
1734 if (valid)
1735 PredicateLockTID(relation, &(tuple->t_self), snapshot,
1737
1738 HeapCheckForSerializableConflictOut(valid, relation, tuple, buffer, snapshot);
1739
1741
1742 if (valid)
1743 {
1744 /*
1745 * All checks passed, so return the tuple as valid. Caller is now
1746 * responsible for releasing the buffer.
1747 */
1748 *userbuf = buffer;
1749
1750 return true;
1751 }
1752
1753 /* Tuple failed time qual, but maybe caller wants to see it anyway. */
1754 if (keep_buf)
1755 *userbuf = buffer;
1756 else
1757 {
1758 ReleaseBuffer(buffer);
1760 tuple->t_data = NULL;
1761 }
1762
1763 return false;
1764}
@ BUFFER_LOCK_SHARE
Definition bufmgr.h:210
static OffsetNumber PageGetMaxOffsetNumber(const PageData *page)
Definition bufpage.h:371
void HeapCheckForSerializableConflictOut(bool visible, Relation relation, HeapTuple tuple, Buffer buffer, Snapshot snapshot)
Definition heapam.c:9334
static TransactionId HeapTupleHeaderGetXmin(const HeapTupleHeaderData *tup)
uint16 OffsetNumber
Definition off.h:24
void PredicateLockTID(Relation relation, const ItemPointerData *tid, Snapshot snapshot, TransactionId tuple_xid)
Definition predicate.c:2619

References BUFFER_LOCK_SHARE, BUFFER_LOCK_UNLOCK, BufferGetPage(), fb(), HeapCheckForSerializableConflictOut(), HeapTupleHeaderGetXmin(), HeapTupleSatisfiesVisibility(), InvalidBuffer, ItemIdGetLength, ItemIdIsNormal, ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), LockBuffer(), PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), PredicateLockTID(), ReadBuffer(), RelationGetRelid, ReleaseBuffer(), HeapTupleData::t_data, HeapTupleData::t_len, HeapTupleData::t_self, and HeapTupleData::t_tableOid.

Referenced by heap_lock_updated_tuple_rec(), heapam_fetch_row_version(), and heapam_tuple_lock().

◆ heap_finish_speculative()

void heap_finish_speculative ( Relation  relation,
const ItemPointerData tid 
)
extern

Definition at line 6176 of file heapam.c.

6177{
6178 Buffer buffer;
6179 Page page;
6180 OffsetNumber offnum;
6181 ItemId lp;
6182 HeapTupleHeader htup;
6183
6184 buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(tid));
6186 page = BufferGetPage(buffer);
6187
6188 offnum = ItemPointerGetOffsetNumber(tid);
6190 elog(ERROR, "offnum out of range");
6191 lp = PageGetItemId(page, offnum);
6192 if (!ItemIdIsNormal(lp))
6193 elog(ERROR, "invalid lp");
6194
6195 htup = (HeapTupleHeader) PageGetItem(page, lp);
6196
6197 /* NO EREPORT(ERROR) from here till changes are logged */
6199
6201
6202 MarkBufferDirty(buffer);
6203
6204 /*
6205 * Replace the speculative insertion token with a real t_ctid, pointing to
6206 * itself like it does on regular tuples.
6207 */
6208 htup->t_ctid = *tid;
6209
6210 /* XLOG stuff */
6211 if (RelationNeedsWAL(relation))
6212 {
6215
6217
6219
6220 /* We want the same filtering on this as on a plain insert */
6222
6225
6227
6228 PageSetLSN(page, recptr);
6229 }
6230
6232
6233 UnlockReleaseBuffer(buffer);
6234}
#define SizeOfHeapConfirm
#define XLOG_HEAP_CONFIRM
Definition heapam_xlog.h:38
OffsetNumber offnum

References Assert, BUFFER_LOCK_EXCLUSIVE, BufferGetPage(), elog, END_CRIT_SECTION, ERROR, fb(), HeapTupleHeaderIsSpeculative(), ItemIdIsNormal, ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), LockBuffer(), MarkBufferDirty(), xl_heap_confirm::offnum, PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), PageSetLSN(), ReadBuffer(), REGBUF_STANDARD, RelationNeedsWAL, SizeOfHeapConfirm, START_CRIT_SECTION, HeapTupleHeaderData::t_ctid, UnlockReleaseBuffer(), XLOG_HEAP_CONFIRM, XLOG_INCLUDE_ORIGIN, XLogBeginInsert(), XLogInsert(), XLogRegisterBuffer(), XLogRegisterData(), and XLogSetRecordFlags().

Referenced by heapam_tuple_complete_speculative().

◆ heap_freeze_prepared_tuples()

void heap_freeze_prepared_tuples ( Buffer  buffer,
HeapTupleFreeze tuples,
int  ntuples 
)
extern

Definition at line 7469 of file heapam.c.

7470{
7471 Page page = BufferGetPage(buffer);
7472
7473 for (int i = 0; i < ntuples; i++)
7474 {
7475 HeapTupleFreeze *frz = tuples + i;
7476 ItemId itemid = PageGetItemId(page, frz->offset);
7477 HeapTupleHeader htup;
7478
7479 htup = (HeapTupleHeader) PageGetItem(page, itemid);
7481 }
7482}
static void heap_execute_freeze_tuple(HeapTupleHeader tuple, HeapTupleFreeze *frz)
Definition heapam.h:492
int i
Definition isn.c:77

References BufferGetPage(), fb(), heap_execute_freeze_tuple(), i, PageGetItem(), and PageGetItemId().

Referenced by heap_page_prune_and_freeze().

◆ heap_freeze_tuple()

bool heap_freeze_tuple ( HeapTupleHeader  tuple,
TransactionId  relfrozenxid,
TransactionId  relminmxid,
TransactionId  FreezeLimit,
TransactionId  MultiXactCutoff 
)
extern

Definition at line 7491 of file heapam.c.

7494{
7496 bool do_freeze;
7497 bool totally_frozen;
7498 struct VacuumCutoffs cutoffs;
7499 HeapPageFreeze pagefrz;
7500
7501 cutoffs.relfrozenxid = relfrozenxid;
7502 cutoffs.relminmxid = relminmxid;
7503 cutoffs.OldestXmin = FreezeLimit;
7504 cutoffs.OldestMxact = MultiXactCutoff;
7505 cutoffs.FreezeLimit = FreezeLimit;
7506 cutoffs.MultiXactCutoff = MultiXactCutoff;
7507
7508 pagefrz.freeze_required = true;
7509 pagefrz.FreezePageRelfrozenXid = FreezeLimit;
7510 pagefrz.FreezePageRelminMxid = MultiXactCutoff;
7511 pagefrz.NoFreezePageRelfrozenXid = FreezeLimit;
7512 pagefrz.NoFreezePageRelminMxid = MultiXactCutoff;
7513
7514 do_freeze = heap_prepare_freeze_tuple(tuple, &cutoffs,
7515 &pagefrz, &frz, &totally_frozen);
7516
7517 /*
7518 * Note that because this is not a WAL-logged operation, we don't need to
7519 * fill in the offset in the freeze record.
7520 */
7521
7522 if (do_freeze)
7524 return do_freeze;
7525}
bool heap_prepare_freeze_tuple(HeapTupleHeader tuple, const struct VacuumCutoffs *cutoffs, HeapPageFreeze *pagefrz, HeapTupleFreeze *frz, bool *totally_frozen)
Definition heapam.c:7143
bool freeze_required
Definition heapam.h:182
TransactionId FreezeLimit
Definition vacuum.h:289
TransactionId relfrozenxid
Definition vacuum.h:263
MultiXactId relminmxid
Definition vacuum.h:264
MultiXactId MultiXactCutoff
Definition vacuum.h:290

References fb(), VacuumCutoffs::FreezeLimit, heap_execute_freeze_tuple(), heap_prepare_freeze_tuple(), VacuumCutoffs::MultiXactCutoff, VacuumCutoffs::OldestMxact, VacuumCutoffs::OldestXmin, VacuumCutoffs::relfrozenxid, and VacuumCutoffs::relminmxid.

Referenced by rewrite_heap_tuple().

◆ heap_get_latest_tid()

void heap_get_latest_tid ( TableScanDesc  sscan,
ItemPointer  tid 
)
extern

Definition at line 1939 of file heapam.c.

1941{
1942 Relation relation = sscan->rs_rd;
1943 Snapshot snapshot = sscan->rs_snapshot;
1944 ItemPointerData ctid;
1946
1947 /*
1948 * table_tuple_get_latest_tid() verified that the passed in tid is valid.
1949 * Assume that t_ctid links are valid however - there shouldn't be invalid
1950 * ones in the table.
1951 */
1953
1954 /*
1955 * Loop to chase down t_ctid links. At top of loop, ctid is the tuple we
1956 * need to examine, and *tid is the TID we will return if ctid turns out
1957 * to be bogus.
1958 *
1959 * Note that we will loop until we reach the end of the t_ctid chain.
1960 * Depending on the snapshot passed, there might be at most one visible
1961 * version of the row, but we don't try to optimize for that.
1962 */
1963 ctid = *tid;
1964 priorXmax = InvalidTransactionId; /* cannot check first XMIN */
1965 for (;;)
1966 {
1967 Buffer buffer;
1968 Page page;
1969 OffsetNumber offnum;
1970 ItemId lp;
1971 HeapTupleData tp;
1972 bool valid;
1973
1974 /*
1975 * Read, pin, and lock the page.
1976 */
1977 buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(&ctid));
1979 page = BufferGetPage(buffer);
1980
1981 /*
1982 * Check for bogus item number. This is not treated as an error
1983 * condition because it can happen while following a t_ctid link. We
1984 * just assume that the prior tid is OK and return it unchanged.
1985 */
1986 offnum = ItemPointerGetOffsetNumber(&ctid);
1988 {
1989 UnlockReleaseBuffer(buffer);
1990 break;
1991 }
1992 lp = PageGetItemId(page, offnum);
1993 if (!ItemIdIsNormal(lp))
1994 {
1995 UnlockReleaseBuffer(buffer);
1996 break;
1997 }
1998
1999 /* OK to access the tuple */
2000 tp.t_self = ctid;
2001 tp.t_data = (HeapTupleHeader) PageGetItem(page, lp);
2002 tp.t_len = ItemIdGetLength(lp);
2003 tp.t_tableOid = RelationGetRelid(relation);
2004
2005 /*
2006 * After following a t_ctid link, we might arrive at an unrelated
2007 * tuple. Check for XMIN match.
2008 */
2011 {
2012 UnlockReleaseBuffer(buffer);
2013 break;
2014 }
2015
2016 /*
2017 * Check tuple visibility; if visible, set it as the new result
2018 * candidate.
2019 */
2020 valid = HeapTupleSatisfiesVisibility(&tp, snapshot, buffer);
2021 HeapCheckForSerializableConflictOut(valid, relation, &tp, buffer, snapshot);
2022 if (valid)
2023 *tid = ctid;
2024
2025 /*
2026 * If there's a valid t_ctid link, follow it, else we're done.
2027 */
2028 if ((tp.t_data->t_infomask & HEAP_XMAX_INVALID) ||
2032 {
2033 UnlockReleaseBuffer(buffer);
2034 break;
2035 }
2036
2037 ctid = tp.t_data->t_ctid;
2039 UnlockReleaseBuffer(buffer);
2040 } /* end of loop */
2041}
static bool HeapTupleHeaderIndicatesMovedPartitions(const HeapTupleHeaderData *tup)

References Assert, BUFFER_LOCK_SHARE, BufferGetPage(), fb(), HEAP_XMAX_INVALID, HeapCheckForSerializableConflictOut(), HeapTupleHeaderGetUpdateXid(), HeapTupleHeaderGetXmin(), HeapTupleHeaderIndicatesMovedPartitions(), HeapTupleHeaderIsOnlyLocked(), HeapTupleSatisfiesVisibility(), InvalidTransactionId, ItemIdGetLength, ItemIdIsNormal, ItemPointerEquals(), ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), ItemPointerIsValid(), LockBuffer(), PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), ReadBuffer(), RelationGetRelid, HeapTupleHeaderData::t_ctid, HeapTupleData::t_data, HeapTupleHeaderData::t_infomask, HeapTupleData::t_len, HeapTupleData::t_self, HeapTupleData::t_tableOid, TransactionIdEquals, TransactionIdIsValid, and UnlockReleaseBuffer().

◆ heap_get_root_tuples()

void heap_get_root_tuples ( Page  page,
OffsetNumber root_offsets 
)
extern

Definition at line 1895 of file pruneheap.c.

1896{
1897 OffsetNumber offnum,
1898 maxoff;
1899
1902
1903 maxoff = PageGetMaxOffsetNumber(page);
1904 for (offnum = FirstOffsetNumber; offnum <= maxoff; offnum = OffsetNumberNext(offnum))
1905 {
1906 ItemId lp = PageGetItemId(page, offnum);
1907 HeapTupleHeader htup;
1910
1911 /* skip unused and dead items */
1912 if (!ItemIdIsUsed(lp) || ItemIdIsDead(lp))
1913 continue;
1914
1915 if (ItemIdIsNormal(lp))
1916 {
1917 htup = (HeapTupleHeader) PageGetItem(page, lp);
1918
1919 /*
1920 * Check if this tuple is part of a HOT-chain rooted at some other
1921 * tuple. If so, skip it for now; we'll process it when we find
1922 * its root.
1923 */
1924 if (HeapTupleHeaderIsHeapOnly(htup))
1925 continue;
1926
1927 /*
1928 * This is either a plain tuple or the root of a HOT-chain.
1929 * Remember it in the mapping.
1930 */
1931 root_offsets[offnum - 1] = offnum;
1932
1933 /* If it's not the start of a HOT-chain, we're done with it */
1934 if (!HeapTupleHeaderIsHotUpdated(htup))
1935 continue;
1936
1937 /* Set up to scan the HOT-chain */
1940 }
1941 else
1942 {
1943 /* Must be a redirect item. We do not set its root_offsets entry */
1945 /* Set up to scan the HOT-chain */
1948 }
1949
1950 /*
1951 * Now follow the HOT-chain and collect other tuples in the chain.
1952 *
1953 * Note: Even though this is a nested loop, the complexity of the
1954 * function is O(N) because a tuple in the page should be visited not
1955 * more than twice, once in the outer loop and once in HOT-chain
1956 * chases.
1957 */
1958 for (;;)
1959 {
1960 /* Sanity check (pure paranoia) */
1961 if (offnum < FirstOffsetNumber)
1962 break;
1963
1964 /*
1965 * An offset past the end of page's line pointer array is possible
1966 * when the array was truncated
1967 */
1968 if (offnum > maxoff)
1969 break;
1970
1971 lp = PageGetItemId(page, nextoffnum);
1972
1973 /* Check for broken chains */
1974 if (!ItemIdIsNormal(lp))
1975 break;
1976
1977 htup = (HeapTupleHeader) PageGetItem(page, lp);
1978
1981 break;
1982
1983 /* Remember the root line pointer for this item */
1984 root_offsets[nextoffnum - 1] = offnum;
1985
1986 /* Advance to next chain member, if any */
1987 if (!HeapTupleHeaderIsHotUpdated(htup))
1988 break;
1989
1990 /* HOT implies it can't have moved to different partition */
1992
1995 }
1996 }
1997}
#define MemSet(start, val, len)
Definition c.h:1013
static bool HeapTupleHeaderIsHotUpdated(const HeapTupleHeaderData *tup)
#define MaxHeapTuplesPerPage
#define ItemIdGetRedirect(itemId)
Definition itemid.h:78
#define ItemIdIsDead(itemId)
Definition itemid.h:113
#define ItemIdIsUsed(itemId)
Definition itemid.h:92
#define ItemIdIsRedirected(itemId)
Definition itemid.h:106
#define InvalidOffsetNumber
Definition off.h:26
#define OffsetNumberNext(offsetNumber)
Definition off.h:52
#define FirstOffsetNumber
Definition off.h:27

References Assert, fb(), FirstOffsetNumber, HeapTupleHeaderGetUpdateXid(), HeapTupleHeaderGetXmin(), HeapTupleHeaderIndicatesMovedPartitions(), HeapTupleHeaderIsHeapOnly(), HeapTupleHeaderIsHotUpdated(), InvalidOffsetNumber, InvalidTransactionId, ItemIdGetRedirect, ItemIdIsDead, ItemIdIsNormal, ItemIdIsRedirected, ItemIdIsUsed, ItemPointerGetOffsetNumber(), MaxHeapTuplesPerPage, MemSet, OffsetNumberNext, PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), HeapTupleHeaderData::t_ctid, TransactionIdEquals, and TransactionIdIsValid.

Referenced by heapam_index_build_range_scan(), and heapam_index_validate_scan().

◆ heap_getnext()

HeapTuple heap_getnext ( TableScanDesc  sscan,
ScanDirection  direction 
)
extern

Definition at line 1408 of file heapam.c.

1409{
1411
1412 /*
1413 * This is still widely used directly, without going through table AM, so
1414 * add a safety check. It's possible we should, at a later point,
1415 * downgrade this to an assert. The reason for checking the AM routine,
1416 * rather than the AM oid, is that this allows to write regression tests
1417 * that create another AM reusing the heap handler.
1418 */
1419 if (unlikely(sscan->rs_rd->rd_tableam != GetHeapamTableAmRoutine()))
1420 ereport(ERROR,
1422 errmsg_internal("only heap AM is supported")));
1423
1424 /*
1425 * We don't expect direct calls to heap_getnext with valid CheckXidAlive
1426 * for catalog or regular tables. See detailed comments in xact.c where
1427 * these variables are declared. Normally we have such a check at tableam
1428 * level API but this is called from many places so we need to ensure it
1429 * here.
1430 */
1432 elog(ERROR, "unexpected heap_getnext call during logical decoding");
1433
1434 /* Note: no locking manipulations needed */
1435
1437 heapgettup_pagemode(scan, direction,
1438 scan->rs_base.rs_nkeys, scan->rs_base.rs_key);
1439 else
1440 heapgettup(scan, direction,
1441 scan->rs_base.rs_nkeys, scan->rs_base.rs_key);
1442
1443 if (scan->rs_ctup.t_data == NULL)
1444 return NULL;
1445
1446 /*
1447 * if we get here it means we have a new current scan tuple, so point to
1448 * the proper return buffer and return the tuple.
1449 */
1450
1452
1453 return &scan->rs_ctup;
1454}
#define unlikely(x)
Definition c.h:412
int errmsg_internal(const char *fmt,...)
Definition elog.c:1170
static void heapgettup(HeapScanDesc scan, ScanDirection dir, int nkeys, ScanKey key)
Definition heapam.c:958
static void heapgettup_pagemode(HeapScanDesc scan, ScanDirection dir, int nkeys, ScanKey key)
Definition heapam.c:1068
const TableAmRoutine * GetHeapamTableAmRoutine(void)
#define pgstat_count_heap_getnext(rel)
Definition pgstat.h:695
@ SO_ALLOW_PAGEMODE
Definition tableam.h:62
bool bsysscan
Definition xact.c:101
TransactionId CheckXidAlive
Definition xact.c:100

References bsysscan, CheckXidAlive, elog, ereport, errcode(), errmsg_internal(), ERROR, fb(), GetHeapamTableAmRoutine(), heapgettup(), heapgettup_pagemode(), pgstat_count_heap_getnext, HeapScanDescData::rs_base, HeapScanDescData::rs_ctup, TableScanDescData::rs_flags, TableScanDescData::rs_key, TableScanDescData::rs_nkeys, TableScanDescData::rs_rd, SO_ALLOW_PAGEMODE, HeapTupleData::t_data, TransactionIdIsValid, and unlikely.

Referenced by AlterTableMoveAll(), AlterTableSpaceOptions(), check_db_file_conflict(), CreateDatabaseUsingFileCopy(), do_autovacuum(), DropSetting(), DropTableSpace(), find_typed_table_dependencies(), get_all_vacuum_rels(), get_database_list(), get_subscription_list(), get_tables_to_cluster(), get_tablespace_name(), get_tablespace_oid(), GetAllPublicationRelations(), getRelationsInNamespace(), GetSchemaPublicationRelations(), heapam_index_build_range_scan(), heapam_index_validate_scan(), objectsInSchemaToOids(), pgrowlocks(), pgstat_heap(), populate_typ_list(), ReindexMultipleTables(), remove_dbtablespaces(), RemoveSubscriptionRel(), RenameTableSpace(), ThereIsAtLeastOneRole(), and vac_truncate_clog().

◆ heap_getnextslot()

bool heap_getnextslot ( TableScanDesc  sscan,
ScanDirection  direction,
TupleTableSlot slot 
)
extern

Definition at line 1457 of file heapam.c.

1458{
1460
1461 /* Note: no locking manipulations needed */
1462
1463 if (sscan->rs_flags & SO_ALLOW_PAGEMODE)
1464 heapgettup_pagemode(scan, direction, sscan->rs_nkeys, sscan->rs_key);
1465 else
1466 heapgettup(scan, direction, sscan->rs_nkeys, sscan->rs_key);
1467
1468 if (scan->rs_ctup.t_data == NULL)
1469 {
1470 ExecClearTuple(slot);
1471 return false;
1472 }
1473
1474 /*
1475 * if we get here it means we have a new current scan tuple, so point to
1476 * the proper return buffer and return the tuple.
1477 */
1478
1480
1481 ExecStoreBufferHeapTuple(&scan->rs_ctup, slot,
1482 scan->rs_cbuf);
1483 return true;
1484}
TupleTableSlot * ExecStoreBufferHeapTuple(HeapTuple tuple, TupleTableSlot *slot, Buffer buffer)
static TupleTableSlot * ExecClearTuple(TupleTableSlot *slot)
Definition tuptable.h:457

References ExecClearTuple(), ExecStoreBufferHeapTuple(), fb(), heapgettup(), heapgettup_pagemode(), pgstat_count_heap_getnext, HeapScanDescData::rs_base, HeapScanDescData::rs_cbuf, HeapScanDescData::rs_ctup, TableScanDescData::rs_rd, SO_ALLOW_PAGEMODE, and HeapTupleData::t_data.

◆ heap_getnextslot_tidrange()

bool heap_getnextslot_tidrange ( TableScanDesc  sscan,
ScanDirection  direction,
TupleTableSlot slot 
)
extern

Definition at line 1560 of file heapam.c.

1562{
1564 ItemPointer mintid = &sscan->st.tidrange.rs_mintid;
1565 ItemPointer maxtid = &sscan->st.tidrange.rs_maxtid;
1566
1567 /* Note: no locking manipulations needed */
1568 for (;;)
1569 {
1570 if (sscan->rs_flags & SO_ALLOW_PAGEMODE)
1571 heapgettup_pagemode(scan, direction, sscan->rs_nkeys, sscan->rs_key);
1572 else
1573 heapgettup(scan, direction, sscan->rs_nkeys, sscan->rs_key);
1574
1575 if (scan->rs_ctup.t_data == NULL)
1576 {
1577 ExecClearTuple(slot);
1578 return false;
1579 }
1580
1581 /*
1582 * heap_set_tidrange will have used heap_setscanlimits to limit the
1583 * range of pages we scan to only ones that can contain the TID range
1584 * we're scanning for. Here we must filter out any tuples from these
1585 * pages that are outside of that range.
1586 */
1587 if (ItemPointerCompare(&scan->rs_ctup.t_self, mintid) < 0)
1588 {
1589 ExecClearTuple(slot);
1590
1591 /*
1592 * When scanning backwards, the TIDs will be in descending order.
1593 * Future tuples in this direction will be lower still, so we can
1594 * just return false to indicate there will be no more tuples.
1595 */
1596 if (ScanDirectionIsBackward(direction))
1597 return false;
1598
1599 continue;
1600 }
1601
1602 /*
1603 * Likewise for the final page, we must filter out TIDs greater than
1604 * maxtid.
1605 */
1606 if (ItemPointerCompare(&scan->rs_ctup.t_self, maxtid) > 0)
1607 {
1608 ExecClearTuple(slot);
1609
1610 /*
1611 * When scanning forward, the TIDs will be in ascending order.
1612 * Future tuples in this direction will be higher still, so we can
1613 * just return false to indicate there will be no more tuples.
1614 */
1615 if (ScanDirectionIsForward(direction))
1616 return false;
1617 continue;
1618 }
1619
1620 break;
1621 }
1622
1623 /*
1624 * if we get here it means we have a new current scan tuple, so point to
1625 * the proper return buffer and return the tuple.
1626 */
1628
1629 ExecStoreBufferHeapTuple(&scan->rs_ctup, slot, scan->rs_cbuf);
1630 return true;
1631}
int32 ItemPointerCompare(const ItemPointerData *arg1, const ItemPointerData *arg2)
Definition itemptr.c:51
#define ScanDirectionIsForward(direction)
Definition sdir.h:64
#define ScanDirectionIsBackward(direction)
Definition sdir.h:50

References ExecClearTuple(), ExecStoreBufferHeapTuple(), fb(), heapgettup(), heapgettup_pagemode(), ItemPointerCompare(), pgstat_count_heap_getnext, HeapScanDescData::rs_base, HeapScanDescData::rs_cbuf, HeapScanDescData::rs_ctup, TableScanDescData::rs_rd, ScanDirectionIsBackward, ScanDirectionIsForward, SO_ALLOW_PAGEMODE, HeapTupleData::t_data, and HeapTupleData::t_self.

◆ heap_hot_search_buffer()

bool heap_hot_search_buffer ( ItemPointer  tid,
Relation  relation,
Buffer  buffer,
Snapshot  snapshot,
HeapTuple  heapTuple,
bool all_dead,
bool  first_call 
)
extern

Definition at line 1787 of file heapam.c.

1790{
1791 Page page = BufferGetPage(buffer);
1793 BlockNumber blkno;
1794 OffsetNumber offnum;
1795 bool at_chain_start;
1796 bool valid;
1797 bool skip;
1798 GlobalVisState *vistest = NULL;
1799
1800 /* If this is not the first call, previous call returned a (live!) tuple */
1801 if (all_dead)
1803
1804 blkno = ItemPointerGetBlockNumber(tid);
1805 offnum = ItemPointerGetOffsetNumber(tid);
1807 skip = !first_call;
1808
1809 /* XXX: we should assert that a snapshot is pushed or registered */
1811 Assert(BufferGetBlockNumber(buffer) == blkno);
1812
1813 /* Scan through possible multiple members of HOT-chain */
1814 for (;;)
1815 {
1816 ItemId lp;
1817
1818 /* check for bogus TID */
1820 break;
1821
1822 lp = PageGetItemId(page, offnum);
1823
1824 /* check for unused, dead, or redirected items */
1825 if (!ItemIdIsNormal(lp))
1826 {
1827 /* We should only see a redirect at start of chain */
1829 {
1830 /* Follow the redirect */
1831 offnum = ItemIdGetRedirect(lp);
1832 at_chain_start = false;
1833 continue;
1834 }
1835 /* else must be end of chain */
1836 break;
1837 }
1838
1839 /*
1840 * Update heapTuple to point to the element of the HOT chain we're
1841 * currently investigating. Having t_self set correctly is important
1842 * because the SSI checks and the *Satisfies routine for historical
1843 * MVCC snapshots need the correct tid to decide about the visibility.
1844 */
1845 heapTuple->t_data = (HeapTupleHeader) PageGetItem(page, lp);
1846 heapTuple->t_len = ItemIdGetLength(lp);
1847 heapTuple->t_tableOid = RelationGetRelid(relation);
1848 ItemPointerSet(&heapTuple->t_self, blkno, offnum);
1849
1850 /*
1851 * Shouldn't see a HEAP_ONLY tuple at chain start.
1852 */
1854 break;
1855
1856 /*
1857 * The xmin should match the previous xmax value, else chain is
1858 * broken.
1859 */
1863 break;
1864
1865 /*
1866 * When first_call is true (and thus, skip is initially false) we'll
1867 * return the first tuple we find. But on later passes, heapTuple
1868 * will initially be pointing to the tuple we returned last time.
1869 * Returning it again would be incorrect (and would loop forever), so
1870 * we skip it and return the next match we find.
1871 */
1872 if (!skip)
1873 {
1874 /* If it's visible per the snapshot, we must return it */
1875 valid = HeapTupleSatisfiesVisibility(heapTuple, snapshot, buffer);
1877 buffer, snapshot);
1878
1879 if (valid)
1880 {
1881 ItemPointerSetOffsetNumber(tid, offnum);
1882 PredicateLockTID(relation, &heapTuple->t_self, snapshot,
1884 if (all_dead)
1885 *all_dead = false;
1886 return true;
1887 }
1888 }
1889 skip = false;
1890
1891 /*
1892 * If we can't see it, maybe no one else can either. At caller
1893 * request, check whether all chain members are dead to all
1894 * transactions.
1895 *
1896 * Note: if you change the criterion here for what is "dead", fix the
1897 * planner's get_actual_variable_range() function to match.
1898 */
1899 if (all_dead && *all_dead)
1900 {
1901 if (!vistest)
1902 vistest = GlobalVisTestFor(relation);
1903
1904 if (!HeapTupleIsSurelyDead(heapTuple, vistest))
1905 *all_dead = false;
1906 }
1907
1908 /*
1909 * Check to see if HOT chain continues past this tuple; if so fetch
1910 * the next offnum and loop around.
1911 */
1913 {
1914 Assert(ItemPointerGetBlockNumber(&heapTuple->t_data->t_ctid) ==
1915 blkno);
1916 offnum = ItemPointerGetOffsetNumber(&heapTuple->t_data->t_ctid);
1917 at_chain_start = false;
1919 }
1920 else
1921 break; /* end of chain */
1922 }
1923
1924 return false;
1925}
bool HeapTupleIsSurelyDead(HeapTuple htup, GlobalVisState *vistest)
static bool HeapTupleIsHotUpdated(const HeapTupleData *tuple)
static bool HeapTupleIsHeapOnly(const HeapTupleData *tuple)
static void ItemPointerSet(ItemPointerData *pointer, BlockNumber blockNumber, OffsetNumber offNum)
Definition itemptr.h:135
static void ItemPointerSetOffsetNumber(ItemPointerData *pointer, OffsetNumber offsetNumber)
Definition itemptr.h:158
static const struct exclude_list_item skip[]
GlobalVisState * GlobalVisTestFor(Relation rel)
Definition procarray.c:4086
TransactionId RecentXmin
Definition snapmgr.c:160

References Assert, BufferGetBlockNumber(), BufferGetPage(), fb(), GlobalVisTestFor(), HeapCheckForSerializableConflictOut(), HeapTupleHeaderGetUpdateXid(), HeapTupleHeaderGetXmin(), HeapTupleIsHeapOnly(), HeapTupleIsHotUpdated(), HeapTupleIsSurelyDead(), HeapTupleSatisfiesVisibility(), InvalidTransactionId, ItemIdGetLength, ItemIdGetRedirect, ItemIdIsNormal, ItemIdIsRedirected, ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), ItemPointerSet(), ItemPointerSetOffsetNumber(), PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), PredicateLockTID(), RecentXmin, RelationGetRelid, skip, TransactionIdEquals, and TransactionIdIsValid.

Referenced by BitmapHeapScanNextBlock(), heap_index_delete_tuples(), and heapam_index_fetch_tuple().

◆ heap_index_delete_tuples()

TransactionId heap_index_delete_tuples ( Relation  rel,
TM_IndexDeleteOp delstate 
)
extern

Definition at line 8207 of file heapam.c.

8208{
8209 /* Initial assumption is that earlier pruning took care of conflict */
8210 TransactionId snapshotConflictHorizon = InvalidTransactionId;
8213 Page page = NULL;
8216#ifdef USE_PREFETCH
8219#endif
8221 int finalndeltids = 0,
8222 nblocksaccessed = 0;
8223
8224 /* State that's only used in bottom-up index deletion case */
8225 int nblocksfavorable = 0;
8226 int curtargetfreespace = delstate->bottomupfreespace,
8227 lastfreespace = 0,
8228 actualfreespace = 0;
8229 bool bottomup_final_block = false;
8230
8232
8233 /* Sort caller's deltids array by TID for further processing */
8235
8236 /*
8237 * Bottom-up case: resort deltids array in an order attuned to where the
8238 * greatest number of promising TIDs are to be found, and determine how
8239 * many blocks from the start of sorted array should be considered
8240 * favorable. This will also shrink the deltids array in order to
8241 * eliminate completely unfavorable blocks up front.
8242 */
8243 if (delstate->bottomup)
8245
8246#ifdef USE_PREFETCH
8247 /* Initialize prefetch state. */
8249 prefetch_state.next_item = 0;
8250 prefetch_state.ndeltids = delstate->ndeltids;
8251 prefetch_state.deltids = delstate->deltids;
8252
8253 /*
8254 * Determine the prefetch distance that we will attempt to maintain.
8255 *
8256 * Since the caller holds a buffer lock somewhere in rel, we'd better make
8257 * sure that isn't a catalog relation before we call code that does
8258 * syscache lookups, to avoid risk of deadlock.
8259 */
8260 if (IsCatalogRelation(rel))
8262 else
8265
8266 /* Cap initial prefetch distance for bottom-up deletion caller */
8267 if (delstate->bottomup)
8268 {
8272 }
8273
8274 /* Start prefetching. */
8276#endif
8277
8278 /* Iterate over deltids, determine which to delete, check their horizon */
8279 Assert(delstate->ndeltids > 0);
8280 for (int i = 0; i < delstate->ndeltids; i++)
8281 {
8282 TM_IndexDelete *ideltid = &delstate->deltids[i];
8283 TM_IndexStatus *istatus = delstate->status + ideltid->id;
8284 ItemPointer htid = &ideltid->tid;
8285 OffsetNumber offnum;
8286
8287 /*
8288 * Read buffer, and perform required extra steps each time a new block
8289 * is encountered. Avoid refetching if it's the same block as the one
8290 * from the last htid.
8291 */
8292 if (blkno == InvalidBlockNumber ||
8294 {
8295 /*
8296 * Consider giving up early for bottom-up index deletion caller
8297 * first. (Only prefetch next-next block afterwards, when it
8298 * becomes clear that we're at least going to access the next
8299 * block in line.)
8300 *
8301 * Sometimes the first block frees so much space for bottom-up
8302 * caller that the deletion process can end without accessing any
8303 * more blocks. It is usually necessary to access 2 or 3 blocks
8304 * per bottom-up deletion operation, though.
8305 */
8306 if (delstate->bottomup)
8307 {
8308 /*
8309 * We often allow caller to delete a few additional items
8310 * whose entries we reached after the point that space target
8311 * from caller was satisfied. The cost of accessing the page
8312 * was already paid at that point, so it made sense to finish
8313 * it off. When that happened, we finalize everything here
8314 * (by finishing off the whole bottom-up deletion operation
8315 * without needlessly paying the cost of accessing any more
8316 * blocks).
8317 */
8319 break;
8320
8321 /*
8322 * Give up when we didn't enable our caller to free any
8323 * additional space as a result of processing the page that we
8324 * just finished up with. This rule is the main way in which
8325 * we keep the cost of bottom-up deletion under control.
8326 */
8328 break;
8329 lastfreespace = actualfreespace; /* for next time */
8330
8331 /*
8332 * Deletion operation (which is bottom-up) will definitely
8333 * access the next block in line. Prepare for that now.
8334 *
8335 * Decay target free space so that we don't hang on for too
8336 * long with a marginal case. (Space target is only truly
8337 * helpful when it allows us to recognize that we don't need
8338 * to access more than 1 or 2 blocks to satisfy caller due to
8339 * agreeable workload characteristics.)
8340 *
8341 * We are a bit more patient when we encounter contiguous
8342 * blocks, though: these are treated as favorable blocks. The
8343 * decay process is only applied when the next block in line
8344 * is not a favorable/contiguous block. This is not an
8345 * exception to the general rule; we still insist on finding
8346 * at least one deletable item per block accessed. See
8347 * bottomup_nblocksfavorable() for full details of the theory
8348 * behind favorable blocks and heap block locality in general.
8349 *
8350 * Note: The first block in line is always treated as a
8351 * favorable block, so the earliest possible point that the
8352 * decay can be applied is just before we access the second
8353 * block in line. The Assert() verifies this for us.
8354 */
8356 if (nblocksfavorable > 0)
8358 else
8359 curtargetfreespace /= 2;
8360 }
8361
8362 /* release old buffer */
8363 if (BufferIsValid(buf))
8365
8367 buf = ReadBuffer(rel, blkno);
8369 Assert(!delstate->bottomup ||
8371
8372#ifdef USE_PREFETCH
8373
8374 /*
8375 * To maintain the prefetch distance, prefetch one more page for
8376 * each page we read.
8377 */
8379#endif
8380
8382
8383 page = BufferGetPage(buf);
8384 maxoff = PageGetMaxOffsetNumber(page);
8385 }
8386
8387 /*
8388 * In passing, detect index corruption involving an index page with a
8389 * TID that points to a location in the heap that couldn't possibly be
8390 * correct. We only do this with actual TIDs from caller's index page
8391 * (not items reached by traversing through a HOT chain).
8392 */
8394
8395 if (istatus->knowndeletable)
8396 Assert(!delstate->bottomup && !istatus->promising);
8397 else
8398 {
8399 ItemPointerData tmp = *htid;
8401
8402 /* Are any tuples from this HOT chain non-vacuumable? */
8404 &heapTuple, NULL, true))
8405 continue; /* can't delete entry */
8406
8407 /* Caller will delete, since whole HOT chain is vacuumable */
8408 istatus->knowndeletable = true;
8409
8410 /* Maintain index free space info for bottom-up deletion case */
8411 if (delstate->bottomup)
8412 {
8413 Assert(istatus->freespace > 0);
8414 actualfreespace += istatus->freespace;
8416 bottomup_final_block = true;
8417 }
8418 }
8419
8420 /*
8421 * Maintain snapshotConflictHorizon value for deletion operation as a
8422 * whole by advancing current value using heap tuple headers. This is
8423 * loosely based on the logic for pruning a HOT chain.
8424 */
8426 priorXmax = InvalidTransactionId; /* cannot check first XMIN */
8427 for (;;)
8428 {
8429 ItemId lp;
8430 HeapTupleHeader htup;
8431
8432 /* Sanity check (pure paranoia) */
8433 if (offnum < FirstOffsetNumber)
8434 break;
8435
8436 /*
8437 * An offset past the end of page's line pointer array is possible
8438 * when the array was truncated
8439 */
8440 if (offnum > maxoff)
8441 break;
8442
8443 lp = PageGetItemId(page, offnum);
8445 {
8446 offnum = ItemIdGetRedirect(lp);
8447 continue;
8448 }
8449
8450 /*
8451 * We'll often encounter LP_DEAD line pointers (especially with an
8452 * entry marked knowndeletable by our caller up front). No heap
8453 * tuple headers get examined for an htid that leads us to an
8454 * LP_DEAD item. This is okay because the earlier pruning
8455 * operation that made the line pointer LP_DEAD in the first place
8456 * must have considered the original tuple header as part of
8457 * generating its own snapshotConflictHorizon value.
8458 *
8459 * Relying on XLOG_HEAP2_PRUNE_VACUUM_SCAN records like this is
8460 * the same strategy that index vacuuming uses in all cases. Index
8461 * VACUUM WAL records don't even have a snapshotConflictHorizon
8462 * field of their own for this reason.
8463 */
8464 if (!ItemIdIsNormal(lp))
8465 break;
8466
8467 htup = (HeapTupleHeader) PageGetItem(page, lp);
8468
8469 /*
8470 * Check the tuple XMIN against prior XMAX, if any
8471 */
8474 break;
8475
8477 &snapshotConflictHorizon);
8478
8479 /*
8480 * If the tuple is not HOT-updated, then we are at the end of this
8481 * HOT-chain. No need to visit later tuples from the same update
8482 * chain (they get their own index entries) -- just move on to
8483 * next htid from index AM caller.
8484 */
8485 if (!HeapTupleHeaderIsHotUpdated(htup))
8486 break;
8487
8488 /* Advance to next HOT chain member */
8489 Assert(ItemPointerGetBlockNumber(&htup->t_ctid) == blkno);
8490 offnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
8492 }
8493
8494 /* Enable further/final shrinking of deltids for caller */
8495 finalndeltids = i + 1;
8496 }
8497
8499
8500 /*
8501 * Shrink deltids array to exclude non-deletable entries at the end. This
8502 * is not just a minor optimization. Final deltids array size might be
8503 * zero for a bottom-up caller. Index AM is explicitly allowed to rely on
8504 * ndeltids being zero in all cases with zero total deletable entries.
8505 */
8506 Assert(finalndeltids > 0 || delstate->bottomup);
8507 delstate->ndeltids = finalndeltids;
8508
8509 return snapshotConflictHorizon;
8510}
int maintenance_io_concurrency
Definition bufmgr.c:191
#define Min(x, y)
Definition c.h:997
bool IsCatalogRelation(Relation relation)
Definition catalog.c:104
static int bottomup_sort_and_shrink(TM_IndexDeleteOp *delstate)
Definition heapam.c:8764
void HeapTupleHeaderAdvanceConflictHorizon(HeapTupleHeader tuple, TransactionId *snapshotConflictHorizon)
Definition heapam.c:8062
#define BOTTOMUP_MAX_NBLOCKS
Definition heapam.c:187
static void index_delete_check_htid(TM_IndexDeleteOp *delstate, Page page, OffsetNumber maxoff, const ItemPointerData *htid, TM_IndexStatus *istatus)
Definition heapam.c:8147
bool heap_hot_search_buffer(ItemPointer tid, Relation relation, Buffer buffer, Snapshot snapshot, HeapTuple heapTuple, bool *all_dead, bool first_call)
Definition heapam.c:1787
static void index_delete_sort(TM_IndexDeleteOp *delstate)
Definition heapam.c:8552
static char buf[DEFAULT_XLOG_SEG_SIZE]
#define InitNonVacuumableSnapshot(snapshotdata, vistestp)
Definition snapmgr.h:50
int get_tablespace_maintenance_io_concurrency(Oid spcid)
Definition spccache.c:229

References Assert, BOTTOMUP_MAX_NBLOCKS, bottomup_sort_and_shrink(), buf, BUFFER_LOCK_SHARE, BufferGetPage(), BufferIsValid(), fb(), FirstOffsetNumber, get_tablespace_maintenance_io_concurrency(), GlobalVisTestFor(), heap_hot_search_buffer(), HeapTupleHeaderAdvanceConflictHorizon(), HeapTupleHeaderGetUpdateXid(), HeapTupleHeaderGetXmin(), HeapTupleHeaderIsHotUpdated(), i, index_delete_check_htid(), index_delete_sort(), InitNonVacuumableSnapshot, InvalidBlockNumber, InvalidBuffer, InvalidOffsetNumber, InvalidTransactionId, IsCatalogRelation(), ItemIdGetRedirect, ItemIdIsNormal, ItemIdIsRedirected, ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), LockBuffer(), maintenance_io_concurrency, Min, PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), RelationData::rd_rel, ReadBuffer(), HeapTupleHeaderData::t_ctid, TransactionIdEquals, TransactionIdIsValid, and UnlockReleaseBuffer().

◆ heap_inplace_lock()

bool heap_inplace_lock ( Relation  relation,
HeapTuple  oldtup_ptr,
Buffer  buffer,
void(*)(void *)  release_callback,
void arg 
)
extern

Definition at line 6445 of file heapam.c.

6448{
6449 HeapTupleData oldtup = *oldtup_ptr; /* minimize diff vs. heap_update() */
6450 TM_Result result;
6451 bool ret;
6452
6453#ifdef USE_ASSERT_CHECKING
6454 if (RelationGetRelid(relation) == RelationRelationId)
6456#endif
6457
6458 Assert(BufferIsValid(buffer));
6459
6460 /*
6461 * Register shared cache invals if necessary. Other sessions may finish
6462 * inplace updates of this tuple between this step and LockTuple(). Since
6463 * inplace updates don't change cache keys, that's harmless.
6464 *
6465 * While it's tempting to register invals only after confirming we can
6466 * return true, the following obstacle precludes reordering steps that
6467 * way. Registering invals might reach a CatalogCacheInitializeCache()
6468 * that locks "buffer". That would hang indefinitely if running after our
6469 * own LockBuffer(). Hence, we must register invals before LockBuffer().
6470 */
6472
6473 LockTuple(relation, &oldtup.t_self, InplaceUpdateTupleLock);
6475
6476 /*----------
6477 * Interpret HeapTupleSatisfiesUpdate() like heap_update() does, except:
6478 *
6479 * - wait unconditionally
6480 * - already locked tuple above, since inplace needs that unconditionally
6481 * - don't recheck header after wait: simpler to defer to next iteration
6482 * - don't try to continue even if the updater aborts: likewise
6483 * - no crosscheck
6484 */
6486 buffer);
6487
6488 if (result == TM_Invisible)
6489 {
6490 /* no known way this can happen */
6491 ereport(ERROR,
6493 errmsg_internal("attempted to overwrite invisible tuple")));
6494 }
6495 else if (result == TM_SelfModified)
6496 {
6497 /*
6498 * CREATE INDEX might reach this if an expression is silly enough to
6499 * call e.g. SELECT ... FROM pg_class FOR SHARE. C code of other SQL
6500 * statements might get here after a heap_update() of the same row, in
6501 * the absence of an intervening CommandCounterIncrement().
6502 */
6503 ereport(ERROR,
6505 errmsg("tuple to be updated was already modified by an operation triggered by the current command")));
6506 }
6507 else if (result == TM_BeingModified)
6508 {
6511
6513 infomask = oldtup.t_data->t_infomask;
6514
6516 {
6519 int remain;
6520
6522 lockmode, NULL))
6523 {
6526 ret = false;
6528 relation, &oldtup.t_self, XLTW_Update,
6529 &remain);
6530 }
6531 else
6532 ret = true;
6533 }
6535 ret = true;
6537 ret = true;
6538 else
6539 {
6542 ret = false;
6543 XactLockTableWait(xwait, relation, &oldtup.t_self,
6544 XLTW_Update);
6545 }
6546 }
6547 else
6548 {
6549 ret = (result == TM_Ok);
6550 if (!ret)
6551 {
6554 }
6555 }
6556
6557 /*
6558 * GetCatalogSnapshot() relies on invalidation messages to know when to
6559 * take a new snapshot. COMMIT of xwait is responsible for sending the
6560 * invalidation. We're not acquiring heavyweight locks sufficient to
6561 * block if not yet sent, so we must take a new snapshot to ensure a later
6562 * attempt has a fair chance. While we don't need this if xwait aborted,
6563 * don't bother optimizing that.
6564 */
6565 if (!ret)
6566 {
6567 UnlockTuple(relation, &oldtup.t_self, InplaceUpdateTupleLock);
6570 }
6571 return ret;
6572}
static bool HEAP_XMAX_IS_KEYSHR_LOCKED(uint16 infomask)
void CacheInvalidateHeapTupleInplace(Relation relation, HeapTuple key_equivalent_tuple)
Definition inval.c:1593
void ForgetInplace_Inval(void)
Definition inval.c:1286
void UnlockTuple(Relation relation, const ItemPointerData *tid, LOCKMODE lockmode)
Definition lmgr.c:601
void LockTuple(Relation relation, const ItemPointerData *tid, LOCKMODE lockmode)
Definition lmgr.c:562
@ XLTW_Update
Definition lmgr.h:27
#define InplaceUpdateTupleLock
Definition lockdefs.h:48
LockTupleMode
Definition lockoptions.h:50
@ LockTupleNoKeyExclusive
Definition lockoptions.h:56
MultiXactStatus
Definition multixact.h:37
@ MultiXactStatusNoKeyUpdate
Definition multixact.h:43
void * arg
void InvalidateCatalogSnapshot(void)
Definition snapmgr.c:455
CommandId GetCurrentCommandId(bool used)
Definition xact.c:830

References arg, Assert, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferIsValid(), CacheInvalidateHeapTupleInplace(), DoesMultiXactIdConflict(), ereport, errcode(), errmsg(), errmsg_internal(), ERROR, fb(), ForgetInplace_Inval(), GetCurrentCommandId(), HEAP_XMAX_IS_KEYSHR_LOCKED(), HEAP_XMAX_IS_MULTI, HeapTupleHeaderGetRawXmax(), HeapTupleSatisfiesUpdate(), InplaceUpdateTupleLock, InvalidateCatalogSnapshot(), LockBuffer(), LockTuple(), LockTupleNoKeyExclusive, MultiXactIdWait(), MultiXactStatusNoKeyUpdate, RelationGetRelid, TM_BeingModified, TM_Invisible, TM_Ok, TM_SelfModified, TransactionIdIsCurrentTransactionId(), UnlockTuple(), XactLockTableWait(), and XLTW_Update.

Referenced by systable_inplace_update_begin().

◆ heap_inplace_unlock()

void heap_inplace_unlock ( Relation  relation,
HeapTuple  oldtup,
Buffer  buffer 
)
extern

◆ heap_inplace_update_and_unlock()

void heap_inplace_update_and_unlock ( Relation  relation,
HeapTuple  oldtup,
HeapTuple  tuple,
Buffer  buffer 
)
extern

Definition at line 6583 of file heapam.c.

6586{
6587 HeapTupleHeader htup = oldtup->t_data;
6588 uint32 oldlen;
6589 uint32 newlen;
6590 char *dst;
6591 char *src;
6592 int nmsgs = 0;
6594 bool RelcacheInitFileInval = false;
6595
6596 Assert(ItemPointerEquals(&oldtup->t_self, &tuple->t_self));
6597 oldlen = oldtup->t_len - htup->t_hoff;
6598 newlen = tuple->t_len - tuple->t_data->t_hoff;
6599 if (oldlen != newlen || htup->t_hoff != tuple->t_data->t_hoff)
6600 elog(ERROR, "wrong tuple length");
6601
6602 dst = (char *) htup + htup->t_hoff;
6603 src = (char *) tuple->t_data + tuple->t_data->t_hoff;
6604
6605 /* Like RecordTransactionCommit(), log only if needed */
6608 &RelcacheInitFileInval);
6609
6610 /*
6611 * Unlink relcache init files as needed. If unlinking, acquire
6612 * RelCacheInitLock until after associated invalidations. By doing this
6613 * in advance, if we checkpoint and then crash between inplace
6614 * XLogInsert() and inval, we don't rely on StartupXLOG() ->
6615 * RelationCacheInitFileRemove(). That uses elevel==LOG, so replay would
6616 * neglect to PANIC on EIO.
6617 */
6619
6620 /*----------
6621 * NO EREPORT(ERROR) from here till changes are complete
6622 *
6623 * Our buffer lock won't stop a reader having already pinned and checked
6624 * visibility for this tuple. Hence, we write WAL first, then mutate the
6625 * buffer. Like in MarkBufferDirtyHint() or RecordTransactionCommit(),
6626 * checkpoint delay makes that acceptable. With the usual order of
6627 * changes, a crash after memcpy() and before XLogInsert() could allow
6628 * datfrozenxid to overtake relfrozenxid:
6629 *
6630 * ["D" is a VACUUM (ONLY_DATABASE_STATS)]
6631 * ["R" is a VACUUM tbl]
6632 * D: vac_update_datfrozenxid() -> systable_beginscan(pg_class)
6633 * D: systable_getnext() returns pg_class tuple of tbl
6634 * R: memcpy() into pg_class tuple of tbl
6635 * D: raise pg_database.datfrozenxid, XLogInsert(), finish
6636 * [crash]
6637 * [recovery restores datfrozenxid w/o relfrozenxid]
6638 *
6639 * Mimic MarkBufferDirtyHint() subroutine XLogSaveBufferForHint().
6640 * Specifically, use DELAY_CHKPT_START, and copy the buffer to the stack.
6641 * The stack copy facilitates a FPI of the post-mutation block before we
6642 * accept other sessions seeing it. DELAY_CHKPT_START allows us to
6643 * XLogInsert() before MarkBufferDirty(). Since XLogSaveBufferForHint()
6644 * can operate under BUFFER_LOCK_SHARED, it can't avoid DELAY_CHKPT_START.
6645 * This function, however, likely could avoid it with the following order
6646 * of operations: MarkBufferDirty(), XLogInsert(), memcpy(). Opt to use
6647 * DELAY_CHKPT_START here, too, as a way to have fewer distinct code
6648 * patterns to analyze. Inplace update isn't so frequent that it should
6649 * pursue the small optimization of skipping DELAY_CHKPT_START.
6650 */
6654
6655 /* XLOG stuff */
6656 if (RelationNeedsWAL(relation))
6657 {
6660 char *origdata = (char *) BufferGetBlock(buffer);
6661 Page page = BufferGetPage(buffer);
6662 uint16 lower = ((PageHeader) page)->pd_lower;
6663 uint16 upper = ((PageHeader) page)->pd_upper;
6665 RelFileLocator rlocator;
6666 ForkNumber forkno;
6667 BlockNumber blkno;
6669
6670 xlrec.offnum = ItemPointerGetOffsetNumber(&tuple->t_self);
6671 xlrec.dbId = MyDatabaseId;
6673 xlrec.relcacheInitFileInval = RelcacheInitFileInval;
6674 xlrec.nmsgs = nmsgs;
6675
6678 if (nmsgs != 0)
6680 nmsgs * sizeof(SharedInvalidationMessage));
6681
6682 /* register block matching what buffer will look like after changes */
6687 BufferGetTag(buffer, &rlocator, &forkno, &blkno);
6688 Assert(forkno == MAIN_FORKNUM);
6689 XLogRegisterBlock(0, &rlocator, forkno, blkno, copied_buffer.data,
6691 XLogRegisterBufData(0, src, newlen);
6692
6693 /* inplace updates aren't decoded atm, don't log the origin */
6694
6696
6697 PageSetLSN(page, recptr);
6698 }
6699
6700 memcpy(dst, src, newlen);
6701
6702 MarkBufferDirty(buffer);
6703
6705
6706 /*
6707 * Send invalidations to shared queue. SearchSysCacheLocked1() assumes we
6708 * do this before UnlockTuple().
6709 */
6711
6714 UnlockTuple(relation, &tuple->t_self, InplaceUpdateTupleLock);
6715
6716 AcceptInvalidationMessages(); /* local processing of just-sent inval */
6717
6718 /*
6719 * Queue a transactional inval, for logical decoding and for third-party
6720 * code that might have been relying on it since long before inplace
6721 * update adopted immediate invalidation. See README.tuplock section
6722 * "Reading inplace-updated columns" for logical decoding details.
6723 */
6725 CacheInvalidateHeapTuple(relation, tuple, NULL);
6726}
void BufferGetTag(Buffer buffer, RelFileLocator *rlocator, ForkNumber *forknum, BlockNumber *blknum)
Definition bufmgr.c:4377
static Block BufferGetBlock(Buffer buffer)
Definition bufmgr.h:433
PageHeaderData * PageHeader
Definition bufpage.h:173
uint32_t uint32
Definition c.h:546
Oid MyDatabaseTableSpace
Definition globals.c:96
Oid MyDatabaseId
Definition globals.c:94
#define MinSizeOfHeapInplace
#define XLOG_HEAP_INPLACE
Definition heapam_xlog.h:40
void AcceptInvalidationMessages(void)
Definition inval.c:930
int inplaceGetInvalidationMessages(SharedInvalidationMessage **msgs, bool *RelcacheInitFileInval)
Definition inval.c:1088
void PreInplace_Inval(void)
Definition inval.c:1250
void AtInplace_Inval(void)
Definition inval.c:1263
#define IsBootstrapProcessingMode()
Definition miscadmin.h:477
Datum lower(PG_FUNCTION_ARGS)
Datum upper(PG_FUNCTION_ARGS)
#define DELAY_CHKPT_START
Definition proc.h:135
ForkNumber
Definition relpath.h:56
PGPROC * MyProc
Definition proc.c:67
int delayChkptFlags
Definition proc.h:263
#define XLogStandbyInfoActive()
Definition xlog.h:125
void XLogRegisterBufData(uint8 block_id, const void *data, uint32 len)
Definition xloginsert.c:409
void XLogRegisterBlock(uint8 block_id, RelFileLocator *rlocator, ForkNumber forknum, BlockNumber blknum, const PageData *page, uint8 flags)
Definition xloginsert.c:313

References AcceptInvalidationMessages(), Assert, AtInplace_Inval(), BUFFER_LOCK_UNLOCK, BufferGetBlock(), BufferGetPage(), BufferGetTag(), CacheInvalidateHeapTuple(), DELAY_CHKPT_START, PGPROC::delayChkptFlags, elog, END_CRIT_SECTION, ERROR, fb(), inplaceGetInvalidationMessages(), InplaceUpdateTupleLock, IsBootstrapProcessingMode, ItemPointerEquals(), ItemPointerGetOffsetNumber(), LockBuffer(), lower(), MAIN_FORKNUM, MarkBufferDirty(), MinSizeOfHeapInplace, MyDatabaseId, MyDatabaseTableSpace, MyProc, PageSetLSN(), PreInplace_Inval(), REGBUF_STANDARD, RelationNeedsWAL, START_CRIT_SECTION, HeapTupleData::t_data, HeapTupleHeaderData::t_hoff, HeapTupleData::t_len, HeapTupleData::t_self, UnlockTuple(), upper(), XLOG_HEAP_INPLACE, XLogBeginInsert(), XLogInsert(), XLogRegisterBlock(), XLogRegisterBufData(), XLogRegisterData(), and XLogStandbyInfoActive.

Referenced by systable_inplace_update_finish().

◆ heap_insert()

void heap_insert ( Relation  relation,
HeapTuple  tup,
CommandId  cid,
int  options,
BulkInsertState  bistate 
)
extern

Definition at line 2150 of file heapam.c.

2152{
2155 Buffer buffer;
2156 Buffer vmbuffer = InvalidBuffer;
2157 bool all_visible_cleared = false;
2158
2159 /* Cheap, simplistic check that the tuple matches the rel's rowtype. */
2162
2163 AssertHasSnapshotForToast(relation);
2164
2165 /*
2166 * Fill in tuple header fields and toast the tuple if necessary.
2167 *
2168 * Note: below this point, heaptup is the data we actually intend to store
2169 * into the relation; tup is the caller's original untoasted data.
2170 */
2171 heaptup = heap_prepare_insert(relation, tup, xid, cid, options);
2172
2173 /*
2174 * Find buffer to insert this tuple into. If the page is all visible,
2175 * this will also pin the requisite visibility map page.
2176 */
2177 buffer = RelationGetBufferForTuple(relation, heaptup->t_len,
2178 InvalidBuffer, options, bistate,
2179 &vmbuffer, NULL,
2180 0);
2181
2182 /*
2183 * We're about to do the actual insert -- but check for conflict first, to
2184 * avoid possibly having to roll back work we've just done.
2185 *
2186 * This is safe without a recheck as long as there is no possibility of
2187 * another process scanning the page between this check and the insert
2188 * being visible to the scan (i.e., an exclusive buffer content lock is
2189 * continuously held from this point until the tuple insert is visible).
2190 *
2191 * For a heap insert, we only need to check for table-level SSI locks. Our
2192 * new tuple can't possibly conflict with existing tuple locks, and heap
2193 * page locks are only consolidated versions of tuple locks; they do not
2194 * lock "gaps" as index page locks do. So we don't need to specify a
2195 * buffer when making the call, which makes for a faster check.
2196 */
2198
2199 /* NO EREPORT(ERROR) from here till changes are logged */
2201
2202 RelationPutHeapTuple(relation, buffer, heaptup,
2204
2205 if (PageIsAllVisible(BufferGetPage(buffer)))
2206 {
2207 all_visible_cleared = true;
2209 visibilitymap_clear(relation,
2211 vmbuffer, VISIBILITYMAP_VALID_BITS);
2212 }
2213
2214 /*
2215 * XXX Should we set PageSetPrunable on this page ?
2216 *
2217 * The inserting transaction may eventually abort thus making this tuple
2218 * DEAD and hence available for pruning. Though we don't want to optimize
2219 * for aborts, if no other tuple in this page is UPDATEd/DELETEd, the
2220 * aborted tuple will never be pruned until next vacuum is triggered.
2221 *
2222 * If you do add PageSetPrunable here, add it in heap_xlog_insert too.
2223 */
2224
2225 MarkBufferDirty(buffer);
2226
2227 /* XLOG stuff */
2228 if (RelationNeedsWAL(relation))
2229 {
2233 Page page = BufferGetPage(buffer);
2234 uint8 info = XLOG_HEAP_INSERT;
2235 int bufflags = 0;
2236
2237 /*
2238 * If this is a catalog, we need to transmit combo CIDs to properly
2239 * decode, so log that as well.
2240 */
2242 log_heap_new_cid(relation, heaptup);
2243
2244 /*
2245 * If this is the single and first tuple on page, we can reinit the
2246 * page instead of restoring the whole thing. Set flag, and hide
2247 * buffer references from XLogInsert.
2248 */
2251 {
2252 info |= XLOG_HEAP_INIT_PAGE;
2254 }
2255
2256 xlrec.offnum = ItemPointerGetOffsetNumber(&heaptup->t_self);
2257 xlrec.flags = 0;
2263
2264 /*
2265 * For logical decoding, we need the tuple even if we're doing a full
2266 * page write, so make sure it's included even if we take a full-page
2267 * image. (XXX We could alternatively store a pointer into the FPW).
2268 */
2269 if (RelationIsLogicallyLogged(relation) &&
2271 {
2274
2275 if (IsToastRelation(relation))
2277 }
2278
2281
2282 xlhdr.t_infomask2 = heaptup->t_data->t_infomask2;
2283 xlhdr.t_infomask = heaptup->t_data->t_infomask;
2284 xlhdr.t_hoff = heaptup->t_data->t_hoff;
2285
2286 /*
2287 * note we mark xlhdr as belonging to buffer; if XLogInsert decides to
2288 * write the whole page to the xlog, we don't need to store
2289 * xl_heap_header in the xlog.
2290 */
2293 /* PG73FORMAT: write bitmap [+ padding] [+ oid] + data */
2295 (char *) heaptup->t_data + SizeofHeapTupleHeader,
2297
2298 /* filtering by origin on a row level is much more efficient */
2300
2301 recptr = XLogInsert(RM_HEAP_ID, info);
2302
2303 PageSetLSN(page, recptr);
2304 }
2305
2307
2308 UnlockReleaseBuffer(buffer);
2309 if (vmbuffer != InvalidBuffer)
2310 ReleaseBuffer(vmbuffer);
2311
2312 /*
2313 * If tuple is cacheable, mark it for invalidation from the caches in case
2314 * we abort. Note it is OK to do this after releasing the buffer, because
2315 * the heaptup data structure is all in local memory, not in the shared
2316 * buffer.
2317 */
2319
2320 /* Note: speculative insertions are counted too, even if aborted later */
2321 pgstat_count_heap_insert(relation, 1);
2322
2323 /*
2324 * If heaptup is a private copy, release it. Don't forget to copy t_self
2325 * back to the caller's image, too.
2326 */
2327 if (heaptup != tup)
2328 {
2329 tup->t_self = heaptup->t_self;
2331 }
2332}
uint8_t uint8
Definition c.h:544
static HeapTuple heap_prepare_insert(Relation relation, HeapTuple tup, TransactionId xid, CommandId cid, int options)
Definition heapam.c:2341
#define HEAP_INSERT_SPECULATIVE
Definition heapam.h:40
#define HEAP_INSERT_NO_LOGICAL
Definition heapam.h:39
#define XLH_INSERT_ON_TOAST_RELATION
Definition heapam_xlog.h:76
#define XLH_INSERT_IS_SPECULATIVE
Definition heapam_xlog.h:74
#define XLH_INSERT_ALL_VISIBLE_CLEARED
Definition heapam_xlog.h:72
#define XLOG_HEAP_INSERT
Definition heapam_xlog.h:33
#define SizeOfHeapInsert
#define XLH_INSERT_CONTAINS_NEW_TUPLE
Definition heapam_xlog.h:75
#define XLOG_HEAP_INIT_PAGE
Definition heapam_xlog.h:47
void RelationPutHeapTuple(Relation relation, Buffer buffer, HeapTuple tuple, bool token)
Definition hio.c:35
Buffer RelationGetBufferForTuple(Relation relation, Size len, Buffer otherBuffer, int options, BulkInsertState bistate, Buffer *vmbuffer, Buffer *vmbuffer_other, int num_pages)
Definition hio.c:500
#define HeapTupleHeaderGetNatts(tup)
void pgstat_count_heap_insert(Relation rel, PgStat_Counter n)
#define RelationIsLogicallyLogged(relation)
Definition rel.h:710
#define RelationGetNumberOfAttributes(relation)
Definition rel.h:520
#define REGBUF_KEEP_DATA
Definition xloginsert.h:36
#define REGBUF_WILL_INIT
Definition xloginsert.h:34

References Assert, AssertHasSnapshotForToast(), BufferGetBlockNumber(), BufferGetPage(), CacheInvalidateHeapTuple(), CheckForSerializableConflictIn(), END_CRIT_SECTION, fb(), FirstOffsetNumber, GetCurrentTransactionId(), heap_freetuple(), HEAP_INSERT_NO_LOGICAL, HEAP_INSERT_SPECULATIVE, heap_prepare_insert(), HeapTupleHeaderGetNatts, InvalidBlockNumber, InvalidBuffer, IsToastRelation(), ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), log_heap_new_cid(), MarkBufferDirty(), PageClearAllVisible(), PageGetMaxOffsetNumber(), PageIsAllVisible(), PageSetLSN(), pgstat_count_heap_insert(), REGBUF_KEEP_DATA, REGBUF_STANDARD, REGBUF_WILL_INIT, RelationGetBufferForTuple(), RelationGetNumberOfAttributes, RelationIsAccessibleInLogicalDecoding, RelationIsLogicallyLogged, RelationNeedsWAL, RelationPutHeapTuple(), ReleaseBuffer(), SizeOfHeapHeader, SizeOfHeapInsert, SizeofHeapTupleHeader, START_CRIT_SECTION, UnlockReleaseBuffer(), visibilitymap_clear(), VISIBILITYMAP_VALID_BITS, XLH_INSERT_ALL_VISIBLE_CLEARED, XLH_INSERT_CONTAINS_NEW_TUPLE, XLH_INSERT_IS_SPECULATIVE, XLH_INSERT_ON_TOAST_RELATION, XLOG_HEAP_INIT_PAGE, XLOG_HEAP_INSERT, XLOG_INCLUDE_ORIGIN, XLogBeginInsert(), XLogInsert(), XLogRegisterBufData(), XLogRegisterBuffer(), XLogRegisterData(), and XLogSetRecordFlags().

Referenced by heapam_tuple_insert(), heapam_tuple_insert_speculative(), simple_heap_insert(), and toast_save_datum().

◆ heap_lock_tuple()

TM_Result heap_lock_tuple ( Relation  relation,
HeapTuple  tuple,
CommandId  cid,
LockTupleMode  mode,
LockWaitPolicy  wait_policy,
bool  follow_updates,
Buffer buffer,
TM_FailureData tmfd 
)
extern

Definition at line 4652 of file heapam.c.

4656{
4657 TM_Result result;
4658 ItemPointer tid = &(tuple->t_self);
4659 ItemId lp;
4660 Page page;
4661 Buffer vmbuffer = InvalidBuffer;
4662 BlockNumber block;
4663 TransactionId xid,
4664 xmax;
4668 bool first_time = true;
4669 bool skip_tuple_lock = false;
4670 bool have_tuple_lock = false;
4671 bool cleared_all_frozen = false;
4672
4673 *buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(tid));
4674 block = ItemPointerGetBlockNumber(tid);
4675
4676 /*
4677 * Before locking the buffer, pin the visibility map page if it appears to
4678 * be necessary. Since we haven't got the lock yet, someone else might be
4679 * in the middle of changing this, so we'll need to recheck after we have
4680 * the lock.
4681 */
4682 if (PageIsAllVisible(BufferGetPage(*buffer)))
4683 visibilitymap_pin(relation, block, &vmbuffer);
4684
4686
4687 page = BufferGetPage(*buffer);
4690
4691 tuple->t_data = (HeapTupleHeader) PageGetItem(page, lp);
4692 tuple->t_len = ItemIdGetLength(lp);
4693 tuple->t_tableOid = RelationGetRelid(relation);
4694
4695l3:
4696 result = HeapTupleSatisfiesUpdate(tuple, cid, *buffer);
4697
4698 if (result == TM_Invisible)
4699 {
4700 /*
4701 * This is possible, but only when locking a tuple for ON CONFLICT
4702 * UPDATE. We return this value here rather than throwing an error in
4703 * order to give that case the opportunity to throw a more specific
4704 * error.
4705 */
4706 result = TM_Invisible;
4707 goto out_locked;
4708 }
4709 else if (result == TM_BeingModified ||
4710 result == TM_Updated ||
4711 result == TM_Deleted)
4712 {
4716 bool require_sleep;
4717 ItemPointerData t_ctid;
4718
4719 /* must copy state data before unlocking buffer */
4721 infomask = tuple->t_data->t_infomask;
4722 infomask2 = tuple->t_data->t_infomask2;
4723 ItemPointerCopy(&tuple->t_data->t_ctid, &t_ctid);
4724
4726
4727 /*
4728 * If any subtransaction of the current top transaction already holds
4729 * a lock as strong as or stronger than what we're requesting, we
4730 * effectively hold the desired lock already. We *must* succeed
4731 * without trying to take the tuple lock, else we will deadlock
4732 * against anyone wanting to acquire a stronger lock.
4733 *
4734 * Note we only do this the first time we loop on the HTSU result;
4735 * there is no point in testing in subsequent passes, because
4736 * evidently our own transaction cannot have acquired a new lock after
4737 * the first time we checked.
4738 */
4739 if (first_time)
4740 {
4741 first_time = false;
4742
4744 {
4745 int i;
4746 int nmembers;
4747 MultiXactMember *members;
4748
4749 /*
4750 * We don't need to allow old multixacts here; if that had
4751 * been the case, HeapTupleSatisfiesUpdate would have returned
4752 * MayBeUpdated and we wouldn't be here.
4753 */
4754 nmembers =
4755 GetMultiXactIdMembers(xwait, &members, false,
4757
4758 for (i = 0; i < nmembers; i++)
4759 {
4760 /* only consider members of our own transaction */
4761 if (!TransactionIdIsCurrentTransactionId(members[i].xid))
4762 continue;
4763
4764 if (TUPLOCK_from_mxstatus(members[i].status) >= mode)
4765 {
4766 pfree(members);
4767 result = TM_Ok;
4768 goto out_unlocked;
4769 }
4770 else
4771 {
4772 /*
4773 * Disable acquisition of the heavyweight tuple lock.
4774 * Otherwise, when promoting a weaker lock, we might
4775 * deadlock with another locker that has acquired the
4776 * heavyweight tuple lock and is waiting for our
4777 * transaction to finish.
4778 *
4779 * Note that in this case we still need to wait for
4780 * the multixact if required, to avoid acquiring
4781 * conflicting locks.
4782 */
4783 skip_tuple_lock = true;
4784 }
4785 }
4786
4787 if (members)
4788 pfree(members);
4789 }
4791 {
4792 switch (mode)
4793 {
4794 case LockTupleKeyShare:
4798 result = TM_Ok;
4799 goto out_unlocked;
4800 case LockTupleShare:
4803 {
4804 result = TM_Ok;
4805 goto out_unlocked;
4806 }
4807 break;
4810 {
4811 result = TM_Ok;
4812 goto out_unlocked;
4813 }
4814 break;
4815 case LockTupleExclusive:
4818 {
4819 result = TM_Ok;
4820 goto out_unlocked;
4821 }
4822 break;
4823 }
4824 }
4825 }
4826
4827 /*
4828 * Initially assume that we will have to wait for the locking
4829 * transaction(s) to finish. We check various cases below in which
4830 * this can be turned off.
4831 */
4832 require_sleep = true;
4833 if (mode == LockTupleKeyShare)
4834 {
4835 /*
4836 * If we're requesting KeyShare, and there's no update present, we
4837 * don't need to wait. Even if there is an update, we can still
4838 * continue if the key hasn't been modified.
4839 *
4840 * However, if there are updates, we need to walk the update chain
4841 * to mark future versions of the row as locked, too. That way,
4842 * if somebody deletes that future version, we're protected
4843 * against the key going away. This locking of future versions
4844 * could block momentarily, if a concurrent transaction is
4845 * deleting a key; or it could return a value to the effect that
4846 * the transaction deleting the key has already committed. So we
4847 * do this before re-locking the buffer; otherwise this would be
4848 * prone to deadlocks.
4849 *
4850 * Note that the TID we're locking was grabbed before we unlocked
4851 * the buffer. For it to change while we're not looking, the
4852 * other properties we're testing for below after re-locking the
4853 * buffer would also change, in which case we would restart this
4854 * loop above.
4855 */
4857 {
4858 bool updated;
4859
4861
4862 /*
4863 * If there are updates, follow the update chain; bail out if
4864 * that cannot be done.
4865 */
4866 if (follow_updates && updated &&
4867 !ItemPointerEquals(&tuple->t_self, &t_ctid))
4868 {
4869 TM_Result res;
4870
4871 res = heap_lock_updated_tuple(relation,
4872 infomask, xwait, &t_ctid,
4874 mode);
4875 if (res != TM_Ok)
4876 {
4877 result = res;
4878 /* recovery code expects to have buffer lock held */
4880 goto failed;
4881 }
4882 }
4883
4885
4886 /*
4887 * Make sure it's still an appropriate lock, else start over.
4888 * Also, if it wasn't updated before we released the lock, but
4889 * is updated now, we start over too; the reason is that we
4890 * now need to follow the update chain to lock the new
4891 * versions.
4892 */
4893 if (!HeapTupleHeaderIsOnlyLocked(tuple->t_data) &&
4894 ((tuple->t_data->t_infomask2 & HEAP_KEYS_UPDATED) ||
4895 !updated))
4896 goto l3;
4897
4898 /* Things look okay, so we can skip sleeping */
4899 require_sleep = false;
4900
4901 /*
4902 * Note we allow Xmax to change here; other updaters/lockers
4903 * could have modified it before we grabbed the buffer lock.
4904 * However, this is not a problem, because with the recheck we
4905 * just did we ensure that they still don't conflict with the
4906 * lock we want.
4907 */
4908 }
4909 }
4910 else if (mode == LockTupleShare)
4911 {
4912 /*
4913 * If we're requesting Share, we can similarly avoid sleeping if
4914 * there's no update and no exclusive lock present.
4915 */
4918 {
4920
4921 /*
4922 * Make sure it's still an appropriate lock, else start over.
4923 * See above about allowing xmax to change.
4924 */
4927 goto l3;
4928 require_sleep = false;
4929 }
4930 }
4931 else if (mode == LockTupleNoKeyExclusive)
4932 {
4933 /*
4934 * If we're requesting NoKeyExclusive, we might also be able to
4935 * avoid sleeping; just ensure that there no conflicting lock
4936 * already acquired.
4937 */
4939 {
4941 mode, NULL))
4942 {
4943 /*
4944 * No conflict, but if the xmax changed under us in the
4945 * meantime, start over.
4946 */
4950 xwait))
4951 goto l3;
4952
4953 /* otherwise, we're good */
4954 require_sleep = false;
4955 }
4956 }
4958 {
4960
4961 /* if the xmax changed in the meantime, start over */
4964 xwait))
4965 goto l3;
4966 /* otherwise, we're good */
4967 require_sleep = false;
4968 }
4969 }
4970
4971 /*
4972 * As a check independent from those above, we can also avoid sleeping
4973 * if the current transaction is the sole locker of the tuple. Note
4974 * that the strength of the lock already held is irrelevant; this is
4975 * not about recording the lock in Xmax (which will be done regardless
4976 * of this optimization, below). Also, note that the cases where we
4977 * hold a lock stronger than we are requesting are already handled
4978 * above by not doing anything.
4979 *
4980 * Note we only deal with the non-multixact case here; MultiXactIdWait
4981 * is well equipped to deal with this situation on its own.
4982 */
4985 {
4986 /* ... but if the xmax changed in the meantime, start over */
4990 xwait))
4991 goto l3;
4993 require_sleep = false;
4994 }
4995
4996 /*
4997 * Time to sleep on the other transaction/multixact, if necessary.
4998 *
4999 * If the other transaction is an update/delete that's already
5000 * committed, then sleeping cannot possibly do any good: if we're
5001 * required to sleep, get out to raise an error instead.
5002 *
5003 * By here, we either have already acquired the buffer exclusive lock,
5004 * or we must wait for the locking transaction or multixact; so below
5005 * we ensure that we grab buffer lock after the sleep.
5006 */
5007 if (require_sleep && (result == TM_Updated || result == TM_Deleted))
5008 {
5010 goto failed;
5011 }
5012 else if (require_sleep)
5013 {
5014 /*
5015 * Acquire tuple lock to establish our priority for the tuple, or
5016 * die trying. LockTuple will release us when we are next-in-line
5017 * for the tuple. We must do this even if we are share-locking,
5018 * but not if we already have a weaker lock on the tuple.
5019 *
5020 * If we are forced to "start over" below, we keep the tuple lock;
5021 * this arranges that we stay at the head of the line while
5022 * rechecking tuple state.
5023 */
5024 if (!skip_tuple_lock &&
5025 !heap_acquire_tuplock(relation, tid, mode, wait_policy,
5027 {
5028 /*
5029 * This can only happen if wait_policy is Skip and the lock
5030 * couldn't be obtained.
5031 */
5032 result = TM_WouldBlock;
5033 /* recovery code expects to have buffer lock held */
5035 goto failed;
5036 }
5037
5039 {
5041
5042 /* We only ever lock tuples, never update them */
5043 if (status >= MultiXactStatusNoKeyUpdate)
5044 elog(ERROR, "invalid lock mode in heap_lock_tuple");
5045
5046 /* wait for multixact to end, or die trying */
5047 switch (wait_policy)
5048 {
5049 case LockWaitBlock:
5051 relation, &tuple->t_self, XLTW_Lock, NULL);
5052 break;
5053 case LockWaitSkip:
5055 status, infomask, relation,
5056 NULL, false))
5057 {
5058 result = TM_WouldBlock;
5059 /* recovery code expects to have buffer lock held */
5061 goto failed;
5062 }
5063 break;
5064 case LockWaitError:
5066 status, infomask, relation,
5068 ereport(ERROR,
5070 errmsg("could not obtain lock on row in relation \"%s\"",
5071 RelationGetRelationName(relation))));
5072
5073 break;
5074 }
5075
5076 /*
5077 * Of course, the multixact might not be done here: if we're
5078 * requesting a light lock mode, other transactions with light
5079 * locks could still be alive, as well as locks owned by our
5080 * own xact or other subxacts of this backend. We need to
5081 * preserve the surviving MultiXact members. Note that it
5082 * isn't absolutely necessary in the latter case, but doing so
5083 * is simpler.
5084 */
5085 }
5086 else
5087 {
5088 /* wait for regular transaction to end, or die trying */
5089 switch (wait_policy)
5090 {
5091 case LockWaitBlock:
5092 XactLockTableWait(xwait, relation, &tuple->t_self,
5093 XLTW_Lock);
5094 break;
5095 case LockWaitSkip:
5097 {
5098 result = TM_WouldBlock;
5099 /* recovery code expects to have buffer lock held */
5101 goto failed;
5102 }
5103 break;
5104 case LockWaitError:
5106 ereport(ERROR,
5108 errmsg("could not obtain lock on row in relation \"%s\"",
5109 RelationGetRelationName(relation))));
5110 break;
5111 }
5112 }
5113
5114 /* if there are updates, follow the update chain */
5116 !ItemPointerEquals(&tuple->t_self, &t_ctid))
5117 {
5118 TM_Result res;
5119
5120 res = heap_lock_updated_tuple(relation,
5121 infomask, xwait, &t_ctid,
5123 mode);
5124 if (res != TM_Ok)
5125 {
5126 result = res;
5127 /* recovery code expects to have buffer lock held */
5129 goto failed;
5130 }
5131 }
5132
5134
5135 /*
5136 * xwait is done, but if xwait had just locked the tuple then some
5137 * other xact could update this tuple before we get to this point.
5138 * Check for xmax change, and start over if so.
5139 */
5142 xwait))
5143 goto l3;
5144
5146 {
5147 /*
5148 * Otherwise check if it committed or aborted. Note we cannot
5149 * be here if the tuple was only locked by somebody who didn't
5150 * conflict with us; that would have been handled above. So
5151 * that transaction must necessarily be gone by now. But
5152 * don't check for this in the multixact case, because some
5153 * locker transactions might still be running.
5154 */
5155 UpdateXmaxHintBits(tuple->t_data, *buffer, xwait);
5156 }
5157 }
5158
5159 /* By here, we're certain that we hold buffer exclusive lock again */
5160
5161 /*
5162 * We may lock if previous xmax aborted, or if it committed but only
5163 * locked the tuple without updating it; or if we didn't have to wait
5164 * at all for whatever reason.
5165 */
5166 if (!require_sleep ||
5167 (tuple->t_data->t_infomask & HEAP_XMAX_INVALID) ||
5170 result = TM_Ok;
5171 else if (!ItemPointerEquals(&tuple->t_self, &tuple->t_data->t_ctid))
5172 result = TM_Updated;
5173 else
5174 result = TM_Deleted;
5175 }
5176
5177failed:
5178 if (result != TM_Ok)
5179 {
5180 Assert(result == TM_SelfModified || result == TM_Updated ||
5181 result == TM_Deleted || result == TM_WouldBlock);
5182
5183 /*
5184 * When locking a tuple under LockWaitSkip semantics and we fail with
5185 * TM_WouldBlock above, it's possible for concurrent transactions to
5186 * release the lock and set HEAP_XMAX_INVALID in the meantime. So
5187 * this assert is slightly different from the equivalent one in
5188 * heap_delete and heap_update.
5189 */
5190 Assert((result == TM_WouldBlock) ||
5191 !(tuple->t_data->t_infomask & HEAP_XMAX_INVALID));
5192 Assert(result != TM_Updated ||
5193 !ItemPointerEquals(&tuple->t_self, &tuple->t_data->t_ctid));
5194 tmfd->ctid = tuple->t_data->t_ctid;
5195 tmfd->xmax = HeapTupleHeaderGetUpdateXid(tuple->t_data);
5196 if (result == TM_SelfModified)
5197 tmfd->cmax = HeapTupleHeaderGetCmax(tuple->t_data);
5198 else
5199 tmfd->cmax = InvalidCommandId;
5200 goto out_locked;
5201 }
5202
5203 /*
5204 * If we didn't pin the visibility map page and the page has become all
5205 * visible while we were busy locking the buffer, or during some
5206 * subsequent window during which we had it unlocked, we'll have to unlock
5207 * and re-lock, to avoid holding the buffer lock across I/O. That's a bit
5208 * unfortunate, especially since we'll now have to recheck whether the
5209 * tuple has been locked or updated under us, but hopefully it won't
5210 * happen very often.
5211 */
5212 if (vmbuffer == InvalidBuffer && PageIsAllVisible(page))
5213 {
5215 visibilitymap_pin(relation, block, &vmbuffer);
5217 goto l3;
5218 }
5219
5220 xmax = HeapTupleHeaderGetRawXmax(tuple->t_data);
5221 old_infomask = tuple->t_data->t_infomask;
5222
5223 /*
5224 * If this is the first possibly-multixact-able operation in the current
5225 * transaction, set my per-backend OldestMemberMXactId setting. We can be
5226 * certain that the transaction will never become a member of any older
5227 * MultiXactIds than that. (We have to do this even if we end up just
5228 * using our own TransactionId below, since some other backend could
5229 * incorporate our XID into a MultiXact immediately afterwards.)
5230 */
5232
5233 /*
5234 * Compute the new xmax and infomask to store into the tuple. Note we do
5235 * not modify the tuple just yet, because that would leave it in the wrong
5236 * state if multixact.c elogs.
5237 */
5239 GetCurrentTransactionId(), mode, false,
5240 &xid, &new_infomask, &new_infomask2);
5241
5243
5244 /*
5245 * Store transaction information of xact locking the tuple.
5246 *
5247 * Note: Cmax is meaningless in this context, so don't set it; this avoids
5248 * possibly generating a useless combo CID. Moreover, if we're locking a
5249 * previously updated tuple, it's important to preserve the Cmax.
5250 *
5251 * Also reset the HOT UPDATE bit, but only if there's no update; otherwise
5252 * we would break the HOT chain.
5253 */
5256 tuple->t_data->t_infomask |= new_infomask;
5257 tuple->t_data->t_infomask2 |= new_infomask2;
5260 HeapTupleHeaderSetXmax(tuple->t_data, xid);
5261
5262 /*
5263 * Make sure there is no forward chain link in t_ctid. Note that in the
5264 * cases where the tuple has been updated, we must not overwrite t_ctid,
5265 * because it was set by the updater. Moreover, if the tuple has been
5266 * updated, we need to follow the update chain to lock the new versions of
5267 * the tuple as well.
5268 */
5270 tuple->t_data->t_ctid = *tid;
5271
5272 /* Clear only the all-frozen bit on visibility map if needed */
5273 if (PageIsAllVisible(page) &&
5274 visibilitymap_clear(relation, block, vmbuffer,
5276 cleared_all_frozen = true;
5277
5278
5279 MarkBufferDirty(*buffer);
5280
5281 /*
5282 * XLOG stuff. You might think that we don't need an XLOG record because
5283 * there is no state change worth restoring after a crash. You would be
5284 * wrong however: we have just written either a TransactionId or a
5285 * MultiXactId that may never have been seen on disk before, and we need
5286 * to make sure that there are XLOG entries covering those ID numbers.
5287 * Else the same IDs might be re-used after a crash, which would be
5288 * disastrous if this page made it to disk before the crash. Essentially
5289 * we have to enforce the WAL log-before-data rule even in this case.
5290 * (Also, in a PITR log-shipping or 2PC environment, we have to have XLOG
5291 * entries for everything anyway.)
5292 */
5293 if (RelationNeedsWAL(relation))
5294 {
5297
5300
5301 xlrec.offnum = ItemPointerGetOffsetNumber(&tuple->t_self);
5302 xlrec.xmax = xid;
5303 xlrec.infobits_set = compute_infobits(new_infomask,
5304 tuple->t_data->t_infomask2);
5307
5308 /* we don't decode row locks atm, so no need to log the origin */
5309
5311
5312 PageSetLSN(page, recptr);
5313 }
5314
5316
5317 result = TM_Ok;
5318
5321
5323 if (BufferIsValid(vmbuffer))
5324 ReleaseBuffer(vmbuffer);
5325
5326 /*
5327 * Don't update the visibility map here. Locking a tuple doesn't change
5328 * visibility info.
5329 */
5330
5331 /*
5332 * Now that we have successfully marked the tuple as locked, we can
5333 * release the lmgr tuple lock, if we had it.
5334 */
5335 if (have_tuple_lock)
5336 UnlockTupleTuplock(relation, tid, mode);
5337
5338 return result;
5339}
#define TUPLOCK_from_mxstatus(status)
Definition heapam.c:216
static bool ConditionalMultiXactIdWait(MultiXactId multi, MultiXactStatus status, uint16 infomask, Relation rel, int *remaining, bool logLockFailure)
Definition heapam.c:7884
static TM_Result heap_lock_updated_tuple(Relation rel, uint16 prior_infomask, TransactionId prior_raw_xmax, const ItemPointerData *prior_ctid, TransactionId xid, LockTupleMode mode)
Definition heapam.c:6123
static MultiXactStatus get_mxact_status_for_lock(LockTupleMode mode, bool is_update)
Definition heapam.c:4605
#define XLH_LOCK_ALL_FROZEN_CLEARED
#define XLOG_HEAP_LOCK
Definition heapam_xlog.h:39
#define SizeOfHeapLock
#define HEAP_KEYS_UPDATED
static bool HEAP_XMAX_IS_SHR_LOCKED(uint16 infomask)
static bool HEAP_XMAX_IS_EXCL_LOCKED(uint16 infomask)
static void ItemPointerCopy(const ItemPointerData *fromPointer, ItemPointerData *toPointer)
Definition itemptr.h:172
bool ConditionalXactLockTableWait(TransactionId xid, bool logLockFailure)
Definition lmgr.c:739
@ XLTW_Lock
Definition lmgr.h:29
bool log_lock_failures
Definition lock.c:54
@ LockWaitSkip
Definition lockoptions.h:41
@ LockWaitError
Definition lockoptions.h:43
@ LockTupleShare
Definition lockoptions.h:54
@ LockTupleKeyShare
Definition lockoptions.h:52
int GetMultiXactIdMembers(MultiXactId multi, MultiXactMember **members, bool from_pgupgrade, bool isLockOnly)
Definition multixact.c:1113
static PgChecksumMode mode
@ TM_WouldBlock
Definition tableam.h:103
#define VISIBILITYMAP_ALL_FROZEN

References Assert, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferGetPage(), BufferIsValid(), TM_FailureData::cmax, compute_infobits(), compute_new_xmax_infomask(), ConditionalMultiXactIdWait(), ConditionalXactLockTableWait(), TM_FailureData::ctid, DoesMultiXactIdConflict(), elog, END_CRIT_SECTION, ereport, errcode(), errmsg(), ERROR, fb(), get_mxact_status_for_lock(), GetCurrentTransactionId(), GetMultiXactIdMembers(), heap_acquire_tuplock(), HEAP_KEYS_UPDATED, heap_lock_updated_tuple(), HEAP_XMAX_INVALID, HEAP_XMAX_IS_EXCL_LOCKED(), HEAP_XMAX_IS_KEYSHR_LOCKED(), HEAP_XMAX_IS_LOCKED_ONLY(), HEAP_XMAX_IS_MULTI, HEAP_XMAX_IS_SHR_LOCKED(), HeapTupleHeaderClearHotUpdated(), HeapTupleHeaderGetCmax(), HeapTupleHeaderGetRawXmax(), HeapTupleHeaderGetUpdateXid(), HeapTupleHeaderIsOnlyLocked(), HeapTupleHeaderSetXmax(), HeapTupleSatisfiesUpdate(), i, InvalidBuffer, InvalidCommandId, ItemIdGetLength, ItemIdIsNormal, ItemPointerCopy(), ItemPointerEquals(), ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), LockBuffer(), LockTupleExclusive, LockTupleKeyShare, LockTupleNoKeyExclusive, LockTupleShare, LockWaitBlock, LockWaitError, LockWaitSkip, log_lock_failures, MarkBufferDirty(), mode, MultiXactIdSetOldestMember(), MultiXactIdWait(), MultiXactStatusNoKeyUpdate, PageGetItem(), PageGetItemId(), PageIsAllVisible(), PageSetLSN(), pfree(), ReadBuffer(), REGBUF_STANDARD, RelationGetRelationName, RelationGetRelid, RelationNeedsWAL, ReleaseBuffer(), SizeOfHeapLock, START_CRIT_SECTION, HeapTupleHeaderData::t_ctid, HeapTupleData::t_data, HeapTupleHeaderData::t_infomask, HeapTupleHeaderData::t_infomask2, HeapTupleData::t_len, HeapTupleData::t_self, HeapTupleData::t_tableOid, TM_BeingModified, TM_Deleted, TM_Invisible, TM_Ok, TM_SelfModified, TM_Updated, TM_WouldBlock, TransactionIdEquals, TransactionIdIsCurrentTransactionId(), TUPLOCK_from_mxstatus, UnlockTupleTuplock, UpdateXmaxHintBits(), VISIBILITYMAP_ALL_FROZEN, visibilitymap_clear(), visibilitymap_pin(), XactLockTableWait(), XLH_LOCK_ALL_FROZEN_CLEARED, XLOG_HEAP_LOCK, XLogBeginInsert(), XLogInsert(), XLogRegisterBuffer(), XLogRegisterData(), XLTW_Lock, TM_FailureData::xmax, and xmax_infomask_changed().

Referenced by heapam_tuple_lock().

◆ heap_multi_insert()

void heap_multi_insert ( Relation  relation,
TupleTableSlot **  slots,
int  ntuples,
CommandId  cid,
int  options,
BulkInsertState  bistate 
)
extern

Definition at line 2421 of file heapam.c.

2423{
2426 int i;
2427 int ndone;
2429 Page page;
2430 Buffer vmbuffer = InvalidBuffer;
2431 bool needwal;
2435 bool starting_with_empty_page = false;
2436 int npages = 0;
2437 int npages_used = 0;
2438
2439 /* currently not needed (thus unsupported) for heap_multi_insert() */
2441
2442 AssertHasSnapshotForToast(relation);
2443
2444 needwal = RelationNeedsWAL(relation);
2447
2448 /* Toast and set header data in all the slots */
2449 heaptuples = palloc(ntuples * sizeof(HeapTuple));
2450 for (i = 0; i < ntuples; i++)
2451 {
2452 HeapTuple tuple;
2453
2454 tuple = ExecFetchSlotHeapTuple(slots[i], true, NULL);
2455 slots[i]->tts_tableOid = RelationGetRelid(relation);
2456 tuple->t_tableOid = slots[i]->tts_tableOid;
2457 heaptuples[i] = heap_prepare_insert(relation, tuple, xid, cid,
2458 options);
2459 }
2460
2461 /*
2462 * We're about to do the actual inserts -- but check for conflict first,
2463 * to minimize the possibility of having to roll back work we've just
2464 * done.
2465 *
2466 * A check here does not definitively prevent a serialization anomaly;
2467 * that check MUST be done at least past the point of acquiring an
2468 * exclusive buffer content lock on every buffer that will be affected,
2469 * and MAY be done after all inserts are reflected in the buffers and
2470 * those locks are released; otherwise there is a race condition. Since
2471 * multiple buffers can be locked and unlocked in the loop below, and it
2472 * would not be feasible to identify and lock all of those buffers before
2473 * the loop, we must do a final check at the end.
2474 *
2475 * The check here could be omitted with no loss of correctness; it is
2476 * present strictly as an optimization.
2477 *
2478 * For heap inserts, we only need to check for table-level SSI locks. Our
2479 * new tuples can't possibly conflict with existing tuple locks, and heap
2480 * page locks are only consolidated versions of tuple locks; they do not
2481 * lock "gaps" as index page locks do. So we don't need to specify a
2482 * buffer when making the call, which makes for a faster check.
2483 */
2485
2486 ndone = 0;
2487 while (ndone < ntuples)
2488 {
2489 Buffer buffer;
2490 bool all_visible_cleared = false;
2491 bool all_frozen_set = false;
2492 int nthispage;
2493
2495
2496 /*
2497 * Compute number of pages needed to fit the to-be-inserted tuples in
2498 * the worst case. This will be used to determine how much to extend
2499 * the relation by in RelationGetBufferForTuple(), if needed. If we
2500 * filled a prior page from scratch, we can just update our last
2501 * computation, but if we started with a partially filled page,
2502 * recompute from scratch, the number of potentially required pages
2503 * can vary due to tuples needing to fit onto the page, page headers
2504 * etc.
2505 */
2506 if (ndone == 0 || !starting_with_empty_page)
2507 {
2508 npages = heap_multi_insert_pages(heaptuples, ndone, ntuples,
2510 npages_used = 0;
2511 }
2512 else
2513 npages_used++;
2514
2515 /*
2516 * Find buffer where at least the next tuple will fit. If the page is
2517 * all-visible, this will also pin the requisite visibility map page.
2518 *
2519 * Also pin visibility map page if COPY FREEZE inserts tuples into an
2520 * empty page. See all_frozen_set below.
2521 */
2522 buffer = RelationGetBufferForTuple(relation, heaptuples[ndone]->t_len,
2523 InvalidBuffer, options, bistate,
2524 &vmbuffer, NULL,
2525 npages - npages_used);
2526 page = BufferGetPage(buffer);
2527
2529
2531 {
2532 all_frozen_set = true;
2533 /* Lock the vmbuffer before entering the critical section */
2535 }
2536
2537 /* NO EREPORT(ERROR) from here till changes are logged */
2539
2540 /*
2541 * RelationGetBufferForTuple has ensured that the first tuple fits.
2542 * Put that on the page, and then as many other tuples as fit.
2543 */
2544 RelationPutHeapTuple(relation, buffer, heaptuples[ndone], false);
2545
2546 /*
2547 * For logical decoding we need combo CIDs to properly decode the
2548 * catalog.
2549 */
2550 if (needwal && need_cids)
2551 log_heap_new_cid(relation, heaptuples[ndone]);
2552
2553 for (nthispage = 1; ndone + nthispage < ntuples; nthispage++)
2554 {
2556
2557 if (PageGetHeapFreeSpace(page) < MAXALIGN(heaptup->t_len) + saveFreeSpace)
2558 break;
2559
2560 RelationPutHeapTuple(relation, buffer, heaptup, false);
2561
2562 /*
2563 * For logical decoding we need combo CIDs to properly decode the
2564 * catalog.
2565 */
2566 if (needwal && need_cids)
2567 log_heap_new_cid(relation, heaptup);
2568 }
2569
2570 /*
2571 * If the page is all visible, need to clear that, unless we're only
2572 * going to add further frozen rows to it.
2573 *
2574 * If we're only adding already frozen rows to a previously empty
2575 * page, mark it as all-frozen and update the visibility map. We're
2576 * already holding a pin on the vmbuffer.
2577 */
2579 {
2580 all_visible_cleared = true;
2581 PageClearAllVisible(page);
2582 visibilitymap_clear(relation,
2583 BufferGetBlockNumber(buffer),
2584 vmbuffer, VISIBILITYMAP_VALID_BITS);
2585 }
2586 else if (all_frozen_set)
2587 {
2588 PageSetAllVisible(page);
2590 vmbuffer,
2593 relation->rd_locator);
2594 }
2595
2596 /*
2597 * XXX Should we set PageSetPrunable on this page ? See heap_insert()
2598 */
2599
2600 MarkBufferDirty(buffer);
2601
2602 /* XLOG stuff */
2603 if (needwal)
2604 {
2608 char *tupledata;
2609 int totaldatalen;
2610 char *scratchptr = scratch.data;
2611 bool init;
2612 int bufflags = 0;
2613
2614 /*
2615 * If the page was previously empty, we can reinit the page
2616 * instead of restoring the whole thing.
2617 */
2619
2620 /* allocate xl_heap_multi_insert struct from the scratch area */
2623
2624 /*
2625 * Allocate offsets array. Unless we're reinitializing the page,
2626 * in that case the tuples are stored in order starting at
2627 * FirstOffsetNumber and we don't need to store the offsets
2628 * explicitly.
2629 */
2630 if (!init)
2631 scratchptr += nthispage * sizeof(OffsetNumber);
2632
2633 /* the rest of the scratch space is used for tuple data */
2634 tupledata = scratchptr;
2635
2636 /* check that the mutually exclusive flags are not both set */
2638
2639 xlrec->flags = 0;
2642
2643 /*
2644 * We don't have to worry about including a conflict xid in the
2645 * WAL record, as HEAP_INSERT_FROZEN intentionally violates
2646 * visibility rules.
2647 */
2648 if (all_frozen_set)
2650
2651 xlrec->ntuples = nthispage;
2652
2653 /*
2654 * Write out an xl_multi_insert_tuple and the tuple data itself
2655 * for each tuple.
2656 */
2657 for (i = 0; i < nthispage; i++)
2658 {
2660 xl_multi_insert_tuple *tuphdr;
2661 int datalen;
2662
2663 if (!init)
2664 xlrec->offsets[i] = ItemPointerGetOffsetNumber(&heaptup->t_self);
2665 /* xl_multi_insert_tuple needs two-byte alignment. */
2667 scratchptr = ((char *) tuphdr) + SizeOfMultiInsertTuple;
2668
2669 tuphdr->t_infomask2 = heaptup->t_data->t_infomask2;
2670 tuphdr->t_infomask = heaptup->t_data->t_infomask;
2671 tuphdr->t_hoff = heaptup->t_data->t_hoff;
2672
2673 /* write bitmap [+ padding] [+ oid] + data */
2674 datalen = heaptup->t_len - SizeofHeapTupleHeader;
2676 (char *) heaptup->t_data + SizeofHeapTupleHeader,
2677 datalen);
2678 tuphdr->datalen = datalen;
2679 scratchptr += datalen;
2680 }
2681 totaldatalen = scratchptr - tupledata;
2682 Assert((scratchptr - scratch.data) < BLCKSZ);
2683
2684 if (need_tuple_data)
2686
2687 /*
2688 * Signal that this is the last xl_heap_multi_insert record
2689 * emitted by this call to heap_multi_insert(). Needed for logical
2690 * decoding so it knows when to cleanup temporary data.
2691 */
2692 if (ndone + nthispage == ntuples)
2694
2695 if (init)
2696 {
2697 info |= XLOG_HEAP_INIT_PAGE;
2699 }
2700
2701 /*
2702 * If we're doing logical decoding, include the new tuple data
2703 * even if we take a full-page image of the page.
2704 */
2705 if (need_tuple_data)
2707
2709 XLogRegisterData(xlrec, tupledata - scratch.data);
2711 if (all_frozen_set)
2712 XLogRegisterBuffer(1, vmbuffer, 0);
2713
2714 XLogRegisterBufData(0, tupledata, totaldatalen);
2715
2716 /* filtering by origin on a row level is much more efficient */
2718
2719 recptr = XLogInsert(RM_HEAP2_ID, info);
2720
2721 PageSetLSN(page, recptr);
2722 if (all_frozen_set)
2723 {
2724 Assert(BufferIsDirty(vmbuffer));
2725 PageSetLSN(BufferGetPage(vmbuffer), recptr);
2726 }
2727 }
2728
2730
2731 if (all_frozen_set)
2732 LockBuffer(vmbuffer, BUFFER_LOCK_UNLOCK);
2733
2734 UnlockReleaseBuffer(buffer);
2735 ndone += nthispage;
2736
2737 /*
2738 * NB: Only release vmbuffer after inserting all tuples - it's fairly
2739 * likely that we'll insert into subsequent heap pages that are likely
2740 * to use the same vm page.
2741 */
2742 }
2743
2744 /* We're done with inserting all tuples, so release the last vmbuffer. */
2745 if (vmbuffer != InvalidBuffer)
2746 ReleaseBuffer(vmbuffer);
2747
2748 /*
2749 * We're done with the actual inserts. Check for conflicts again, to
2750 * ensure that all rw-conflicts in to these inserts are detected. Without
2751 * this final check, a sequential scan of the heap may have locked the
2752 * table after the "before" check, missing one opportunity to detect the
2753 * conflict, and then scanned the table before the new tuples were there,
2754 * missing the other chance to detect the conflict.
2755 *
2756 * For heap inserts, we only need to check for table-level SSI locks. Our
2757 * new tuples can't possibly conflict with existing tuple locks, and heap
2758 * page locks are only consolidated versions of tuple locks; they do not
2759 * lock "gaps" as index page locks do. So we don't need to specify a
2760 * buffer when making the call.
2761 */
2763
2764 /*
2765 * If tuples are cacheable, mark them for invalidation from the caches in
2766 * case we abort. Note it is OK to do this after releasing the buffer,
2767 * because the heaptuples data structure is all in local memory, not in
2768 * the shared buffer.
2769 */
2770 if (IsCatalogRelation(relation))
2771 {
2772 for (i = 0; i < ntuples; i++)
2774 }
2775
2776 /* copy t_self fields back to the caller's slots */
2777 for (i = 0; i < ntuples; i++)
2778 slots[i]->tts_tid = heaptuples[i]->t_self;
2779
2780 pgstat_count_heap_insert(relation, ntuples);
2781}
bool BufferIsDirty(Buffer buffer)
Definition bufmgr.c:3024
Size PageGetHeapFreeSpace(const PageData *page)
Definition bufpage.c:990
static void PageSetAllVisible(Page page)
Definition bufpage.h:433
#define MAXALIGN(LEN)
Definition c.h:826
#define SHORTALIGN(LEN)
Definition c.h:822
size_t Size
Definition c.h:619
HeapTuple ExecFetchSlotHeapTuple(TupleTableSlot *slot, bool materialize, bool *shouldFree)
static int heap_multi_insert_pages(HeapTuple *heaptuples, int done, int ntuples, Size saveFreeSpace)
Definition heapam.c:2389
#define HEAP_INSERT_FROZEN
Definition heapam.h:38
#define SizeOfHeapMultiInsert
#define XLOG_HEAP2_MULTI_INSERT
Definition heapam_xlog.h:64
#define XLH_INSERT_LAST_IN_MULTI
Definition heapam_xlog.h:73
#define XLH_INSERT_ALL_FROZEN_SET
Definition heapam_xlog.h:79
#define SizeOfMultiInsertTuple
void * palloc(Size size)
Definition mcxt.c:1387
#define CHECK_FOR_INTERRUPTS()
Definition miscadmin.h:123
#define RelationGetTargetPageFreeSpace(relation, defaultff)
Definition rel.h:389
#define HEAP_DEFAULT_FILLFACTOR
Definition rel.h:360
#define init()
RelFileLocator rd_locator
Definition rel.h:57
void visibilitymap_set_vmbits(BlockNumber heapBlk, Buffer vmBuf, uint8 flags, const RelFileLocator rlocator)
#define VISIBILITYMAP_ALL_VISIBLE

References Assert, AssertHasSnapshotForToast(), BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferGetBlockNumber(), BufferGetPage(), BufferIsDirty(), CacheInvalidateHeapTuple(), CHECK_FOR_INTERRUPTS, CheckForSerializableConflictIn(), xl_multi_insert_tuple::datalen, END_CRIT_SECTION, ExecFetchSlotHeapTuple(), fb(), GetCurrentTransactionId(), HEAP_DEFAULT_FILLFACTOR, HEAP_INSERT_FROZEN, HEAP_INSERT_NO_LOGICAL, heap_multi_insert_pages(), heap_prepare_insert(), i, init, InvalidBlockNumber, InvalidBuffer, IsCatalogRelation(), ItemPointerGetOffsetNumber(), LockBuffer(), log_heap_new_cid(), MarkBufferDirty(), MAXALIGN, PageClearAllVisible(), PageGetHeapFreeSpace(), PageGetMaxOffsetNumber(), PageIsAllVisible(), PageSetAllVisible(), PageSetLSN(), palloc(), pgstat_count_heap_insert(), RelationData::rd_locator, REGBUF_KEEP_DATA, REGBUF_STANDARD, REGBUF_WILL_INIT, RelationGetBufferForTuple(), RelationGetRelid, RelationGetTargetPageFreeSpace, RelationIsAccessibleInLogicalDecoding, RelationIsLogicallyLogged, RelationNeedsWAL, RelationPutHeapTuple(), ReleaseBuffer(), SHORTALIGN, SizeOfHeapMultiInsert, SizeofHeapTupleHeader, SizeOfMultiInsertTuple, START_CRIT_SECTION, xl_multi_insert_tuple::t_hoff, xl_multi_insert_tuple::t_infomask, xl_multi_insert_tuple::t_infomask2, HeapTupleData::t_tableOid, TupleTableSlot::tts_tableOid, UnlockReleaseBuffer(), VISIBILITYMAP_ALL_FROZEN, VISIBILITYMAP_ALL_VISIBLE, visibilitymap_clear(), visibilitymap_set_vmbits(), VISIBILITYMAP_VALID_BITS, XLH_INSERT_ALL_FROZEN_SET, XLH_INSERT_ALL_VISIBLE_CLEARED, XLH_INSERT_CONTAINS_NEW_TUPLE, XLH_INSERT_LAST_IN_MULTI, XLOG_HEAP2_MULTI_INSERT, XLOG_HEAP_INIT_PAGE, XLOG_INCLUDE_ORIGIN, XLogBeginInsert(), XLogInsert(), XLogRegisterBufData(), XLogRegisterBuffer(), XLogRegisterData(), and XLogSetRecordFlags().

Referenced by CatalogTuplesMultiInsertWithInfo().

◆ heap_page_prune_and_freeze()

void heap_page_prune_and_freeze ( PruneFreezeParams params,
PruneFreezeResult presult,
OffsetNumber off_loc,
TransactionId new_relfrozen_xid,
MultiXactId new_relmin_mxid 
)
extern

Definition at line 819 of file pruneheap.c.

824{
825 Buffer buffer = params->buffer;
826 Page page = BufferGetPage(buffer);
828 bool do_freeze;
829 bool do_prune;
830 bool do_hint_prune;
833
834 /* Initialize prstate */
835 prune_freeze_setup(params,
837 presult, &prstate);
838
839 /*
840 * Examine all line pointers and tuple visibility information to determine
841 * which line pointers should change state and which tuples may be frozen.
842 * Prepare queue of state changes to later be executed in a critical
843 * section.
844 */
846 buffer, &prstate, off_loc);
847
848 /*
849 * If checksums are enabled, calling heap_prune_satisfies_vacuum() while
850 * checking tuple visibility information in prune_freeze_plan() may have
851 * caused an FPI to be emitted.
852 */
854
855 do_prune = prstate.nredirected > 0 ||
856 prstate.ndead > 0 ||
857 prstate.nunused > 0;
858
859 /*
860 * Even if we don't prune anything, if we found a new value for the
861 * pd_prune_xid field or the page was marked full, we will update the hint
862 * bit.
863 */
864 do_hint_prune = ((PageHeader) page)->pd_prune_xid != prstate.new_prune_xid ||
865 PageIsFull(page);
866
867 /*
868 * Decide if we want to go ahead with freezing according to the freeze
869 * plans we prepared, or not.
870 */
871 do_freeze = heap_page_will_freeze(params->relation, buffer,
873 do_prune,
875 &prstate);
876
877 /*
878 * While scanning the line pointers, we did not clear
879 * all_visible/all_frozen when encountering LP_DEAD items because we
880 * wanted the decision whether or not to freeze the page to be unaffected
881 * by the short-term presence of LP_DEAD items. These LP_DEAD items are
882 * effectively assumed to be LP_UNUSED items in the making. It doesn't
883 * matter which vacuum heap pass (initial pass or final pass) ends up
884 * setting the page all-frozen, as long as the ongoing VACUUM does it.
885 *
886 * Now that we finished determining whether or not to freeze the page,
887 * update all_visible and all_frozen so that they reflect the true state
888 * of the page for setting PD_ALL_VISIBLE and VM bits.
889 */
890 if (prstate.lpdead_items > 0)
891 prstate.all_visible = prstate.all_frozen = false;
892
893 Assert(!prstate.all_frozen || prstate.all_visible);
894
895 /* Any error while applying the changes is critical */
897
898 if (do_hint_prune)
899 {
900 /*
901 * Update the page's pd_prune_xid field to either zero, or the lowest
902 * XID of any soon-prunable tuple.
903 */
904 ((PageHeader) page)->pd_prune_xid = prstate.new_prune_xid;
905
906 /*
907 * Also clear the "page is full" flag, since there's no point in
908 * repeating the prune/defrag process until something else happens to
909 * the page.
910 */
911 PageClearFull(page);
912
913 /*
914 * If that's all we had to do to the page, this is a non-WAL-logged
915 * hint. If we are going to freeze or prune the page, we will mark
916 * the buffer dirty below.
917 */
918 if (!do_freeze && !do_prune)
919 MarkBufferDirtyHint(buffer, true);
920 }
921
922 if (do_prune || do_freeze)
923 {
924 /* Apply the planned item changes and repair page fragmentation. */
925 if (do_prune)
926 {
927 heap_page_prune_execute(buffer, false,
928 prstate.redirected, prstate.nredirected,
929 prstate.nowdead, prstate.ndead,
930 prstate.nowunused, prstate.nunused);
931 }
932
933 if (do_freeze)
934 heap_freeze_prepared_tuples(buffer, prstate.frozen, prstate.nfrozen);
935
936 MarkBufferDirty(buffer);
937
938 /*
939 * Emit a WAL XLOG_HEAP2_PRUNE* record showing what we did
940 */
941 if (RelationNeedsWAL(params->relation))
942 {
943 /*
944 * The snapshotConflictHorizon for the whole record should be the
945 * most conservative of all the horizons calculated for any of the
946 * possible modifications. If this record will prune tuples, any
947 * transactions on the standby older than the youngest xmax of the
948 * most recently removed tuple this record will prune will
949 * conflict. If this record will freeze tuples, any transactions
950 * on the standby with xids older than the youngest tuple this
951 * record will freeze will conflict.
952 */
954
955 if (TransactionIdFollows(prstate.frz_conflict_horizon,
956 prstate.latest_xid_removed))
957 conflict_xid = prstate.frz_conflict_horizon;
958 else
959 conflict_xid = prstate.latest_xid_removed;
960
961 log_heap_prune_and_freeze(params->relation, buffer,
962 InvalidBuffer, /* vmbuffer */
963 0, /* vmflags */
965 true, params->reason,
966 prstate.frozen, prstate.nfrozen,
967 prstate.redirected, prstate.nredirected,
968 prstate.nowdead, prstate.ndead,
969 prstate.nowunused, prstate.nunused);
970 }
971 }
972
974
975 /* Copy information back for caller */
976 presult->ndeleted = prstate.ndeleted;
977 presult->nnewlpdead = prstate.ndead;
978 presult->nfrozen = prstate.nfrozen;
979 presult->live_tuples = prstate.live_tuples;
980 presult->recently_dead_tuples = prstate.recently_dead_tuples;
981 presult->all_visible = prstate.all_visible;
982 presult->all_frozen = prstate.all_frozen;
983 presult->hastup = prstate.hastup;
984
985 /*
986 * For callers planning to update the visibility map, the conflict horizon
987 * for that record must be the newest xmin on the page. However, if the
988 * page is completely frozen, there can be no conflict and the
989 * vm_conflict_horizon should remain InvalidTransactionId. This includes
990 * the case that we just froze all the tuples; the prune-freeze record
991 * included the conflict XID already so the caller doesn't need it.
992 */
993 if (presult->all_frozen)
994 presult->vm_conflict_horizon = InvalidTransactionId;
995 else
996 presult->vm_conflict_horizon = prstate.visibility_cutoff_xid;
997
998 presult->lpdead_items = prstate.lpdead_items;
999 /* the presult->deadoffsets array was already filled in */
1000
1001 if (prstate.attempt_freeze)
1002 {
1003 if (presult->nfrozen > 0)
1004 {
1005 *new_relfrozen_xid = prstate.pagefrz.FreezePageRelfrozenXid;
1006 *new_relmin_mxid = prstate.pagefrz.FreezePageRelminMxid;
1007 }
1008 else
1009 {
1010 *new_relfrozen_xid = prstate.pagefrz.NoFreezePageRelfrozenXid;
1011 *new_relmin_mxid = prstate.pagefrz.NoFreezePageRelminMxid;
1012 }
1013 }
1014}
void MarkBufferDirtyHint(Buffer buffer, bool buffer_std)
Definition bufmgr.c:5565
static void PageClearFull(Page page)
Definition bufpage.h:422
static bool PageIsFull(const PageData *page)
Definition bufpage.h:412
int64_t int64
Definition c.h:543
void heap_freeze_prepared_tuples(Buffer buffer, HeapTupleFreeze *tuples, int ntuples)
Definition heapam.c:7469
WalUsage pgWalUsage
Definition instrument.c:22
static bool heap_page_will_freeze(Relation relation, Buffer buffer, bool did_tuple_hint_fpi, bool do_prune, bool do_hint_prune, PruneState *prstate)
Definition pruneheap.c:663
static void prune_freeze_plan(Oid reloid, Buffer buffer, PruneState *prstate, OffsetNumber *off_loc)
Definition pruneheap.c:458
void log_heap_prune_and_freeze(Relation relation, Buffer buffer, Buffer vmbuffer, uint8 vmflags, TransactionId conflict_xid, bool cleanup_lock, PruneReason reason, HeapTupleFreeze *frozen, int nfrozen, OffsetNumber *redirected, int nredirected, OffsetNumber *dead, int ndead, OffsetNumber *unused, int nunused)
Definition pruneheap.c:2167
static void prune_freeze_setup(PruneFreezeParams *params, TransactionId *new_relfrozen_xid, MultiXactId *new_relmin_mxid, PruneFreezeResult *presult, PruneState *prstate)
Definition pruneheap.c:330
void heap_page_prune_execute(Buffer buffer, bool lp_truncate_only, OffsetNumber *redirected, int nredirected, OffsetNumber *nowdead, int ndead, OffsetNumber *nowunused, int nunused)
Definition pruneheap.c:1671
PruneReason reason
Definition heapam.h:245
Relation relation
Definition heapam.h:238
int64 wal_fpi
Definition instrument.h:54
static bool TransactionIdFollows(TransactionId id1, TransactionId id2)
Definition transam.h:297

References Assert, PruneFreezeParams::buffer, BufferGetPage(), END_CRIT_SECTION, fb(), heap_freeze_prepared_tuples(), heap_page_prune_execute(), heap_page_will_freeze(), InvalidBuffer, InvalidTransactionId, log_heap_prune_and_freeze(), MarkBufferDirty(), MarkBufferDirtyHint(), PageClearFull(), PageIsFull(), pgWalUsage, prune_freeze_plan(), prune_freeze_setup(), PruneFreezeParams::reason, PruneFreezeParams::relation, RelationGetRelid, RelationNeedsWAL, START_CRIT_SECTION, TransactionIdFollows(), and WalUsage::wal_fpi.

Referenced by heap_page_prune_opt(), and lazy_scan_prune().

◆ heap_page_prune_execute()

void heap_page_prune_execute ( Buffer  buffer,
bool  lp_truncate_only,
OffsetNumber redirected,
int  nredirected,
OffsetNumber nowdead,
int  ndead,
OffsetNumber nowunused,
int  nunused 
)
extern

Definition at line 1671 of file pruneheap.c.

1675{
1676 Page page = BufferGetPage(buffer);
1677 OffsetNumber *offnum;
1679
1680 /* Shouldn't be called unless there's something to do */
1681 Assert(nredirected > 0 || ndead > 0 || nunused > 0);
1682
1683 /* If 'lp_truncate_only', we can only remove already-dead line pointers */
1684 Assert(!lp_truncate_only || (nredirected == 0 && ndead == 0));
1685
1686 /* Update all redirected line pointers */
1687 offnum = redirected;
1688 for (int i = 0; i < nredirected; i++)
1689 {
1690 OffsetNumber fromoff = *offnum++;
1691 OffsetNumber tooff = *offnum++;
1694
1695#ifdef USE_ASSERT_CHECKING
1696
1697 /*
1698 * Any existing item that we set as an LP_REDIRECT (any 'from' item)
1699 * must be the first item from a HOT chain. If the item has tuple
1700 * storage then it can't be a heap-only tuple. Otherwise we are just
1701 * maintaining an existing LP_REDIRECT from an existing HOT chain that
1702 * has been pruned at least once before now.
1703 */
1705 {
1707
1708 htup = (HeapTupleHeader) PageGetItem(page, fromlp);
1710 }
1711 else
1712 {
1713 /* We shouldn't need to redundantly set the redirect */
1715 }
1716
1717 /*
1718 * The item that we're about to set as an LP_REDIRECT (the 'from'
1719 * item) will point to an existing item (the 'to' item) that is
1720 * already a heap-only tuple. There can be at most one LP_REDIRECT
1721 * item per HOT chain.
1722 *
1723 * We need to keep around an LP_REDIRECT item (after original
1724 * non-heap-only root tuple gets pruned away) so that it's always
1725 * possible for VACUUM to easily figure out what TID to delete from
1726 * indexes when an entire HOT chain becomes dead. A heap-only tuple
1727 * can never become LP_DEAD; an LP_REDIRECT item or a regular heap
1728 * tuple can.
1729 *
1730 * This check may miss problems, e.g. the target of a redirect could
1731 * be marked as unused subsequently. The page_verify_redirects() check
1732 * below will catch such problems.
1733 */
1734 tolp = PageGetItemId(page, tooff);
1736 htup = (HeapTupleHeader) PageGetItem(page, tolp);
1738#endif
1739
1741 }
1742
1743 /* Update all now-dead line pointers */
1744 offnum = nowdead;
1745 for (int i = 0; i < ndead; i++)
1746 {
1747 OffsetNumber off = *offnum++;
1748 ItemId lp = PageGetItemId(page, off);
1749
1750#ifdef USE_ASSERT_CHECKING
1751
1752 /*
1753 * An LP_DEAD line pointer must be left behind when the original item
1754 * (which is dead to everybody) could still be referenced by a TID in
1755 * an index. This should never be necessary with any individual
1756 * heap-only tuple item, though. (It's not clear how much of a problem
1757 * that would be, but there is no reason to allow it.)
1758 */
1759 if (ItemIdHasStorage(lp))
1760 {
1762 htup = (HeapTupleHeader) PageGetItem(page, lp);
1764 }
1765 else
1766 {
1767 /* Whole HOT chain becomes dead */
1769 }
1770#endif
1771
1773 }
1774
1775 /* Update all now-unused line pointers */
1776 offnum = nowunused;
1777 for (int i = 0; i < nunused; i++)
1778 {
1779 OffsetNumber off = *offnum++;
1780 ItemId lp = PageGetItemId(page, off);
1781
1782#ifdef USE_ASSERT_CHECKING
1783
1784 if (lp_truncate_only)
1785 {
1786 /* Setting LP_DEAD to LP_UNUSED in vacuum's second pass */
1788 }
1789 else
1790 {
1791 /*
1792 * When heap_page_prune_and_freeze() was called, mark_unused_now
1793 * may have been passed as true, which allows would-be LP_DEAD
1794 * items to be made LP_UNUSED instead. This is only possible if
1795 * the relation has no indexes. If there are any dead items, then
1796 * mark_unused_now was not true and every item being marked
1797 * LP_UNUSED must refer to a heap-only tuple.
1798 */
1799 if (ndead > 0)
1800 {
1802 htup = (HeapTupleHeader) PageGetItem(page, lp);
1804 }
1805 else
1807 }
1808
1809#endif
1810
1812 }
1813
1814 if (lp_truncate_only)
1816 else
1817 {
1818 /*
1819 * Finally, repair any fragmentation, and update the page's hint bit
1820 * about whether it has free pointers.
1821 */
1823
1824 /*
1825 * Now that the page has been modified, assert that redirect items
1826 * still point to valid targets.
1827 */
1829 }
1830}
void PageRepairFragmentation(Page page)
Definition bufpage.c:698
void PageTruncateLinePointerArray(Page page)
Definition bufpage.c:834
#define PG_USED_FOR_ASSERTS_ONLY
Definition c.h:223
#define ItemIdSetRedirect(itemId, link)
Definition itemid.h:152
#define ItemIdSetDead(itemId)
Definition itemid.h:164
#define ItemIdSetUnused(itemId)
Definition itemid.h:128
#define ItemIdHasStorage(itemId)
Definition itemid.h:120
static void page_verify_redirects(Page page)
Definition pruneheap.c:1847

References Assert, BufferGetPage(), fb(), HeapTupleHeaderIsHeapOnly(), i, ItemIdGetRedirect, ItemIdHasStorage, ItemIdIsDead, ItemIdIsNormal, ItemIdIsRedirected, ItemIdIsUsed, ItemIdSetDead, ItemIdSetRedirect, ItemIdSetUnused, page_verify_redirects(), PageGetItem(), PageGetItemId(), PageRepairFragmentation(), PageTruncateLinePointerArray(), and PG_USED_FOR_ASSERTS_ONLY.

Referenced by heap_page_prune_and_freeze(), and heap_xlog_prune_freeze().

◆ heap_page_prune_opt()

void heap_page_prune_opt ( Relation  relation,
Buffer  buffer 
)
extern

Definition at line 209 of file pruneheap.c.

210{
211 Page page = BufferGetPage(buffer);
213 GlobalVisState *vistest;
215
216 /*
217 * We can't write WAL in recovery mode, so there's no point trying to
218 * clean the page. The primary will likely issue a cleaning WAL record
219 * soon anyway, so this is no particular loss.
220 */
221 if (RecoveryInProgress())
222 return;
223
224 /*
225 * First check whether there's any chance there's something to prune,
226 * determining the appropriate horizon is a waste if there's no prune_xid
227 * (i.e. no updates/deletes left potentially dead tuples around).
228 */
229 prune_xid = ((PageHeader) page)->pd_prune_xid;
231 return;
232
233 /*
234 * Check whether prune_xid indicates that there may be dead rows that can
235 * be cleaned up.
236 */
237 vistest = GlobalVisTestFor(relation);
238
240 return;
241
242 /*
243 * We prune when a previous UPDATE failed to find enough space on the page
244 * for a new tuple version, or when free space falls below the relation's
245 * fill-factor target (but not less than 10%).
246 *
247 * Checking free space here is questionable since we aren't holding any
248 * lock on the buffer; in the worst case we could get a bogus answer. It's
249 * unlikely to be *seriously* wrong, though, since reading either pd_lower
250 * or pd_upper is probably atomic. Avoiding taking a lock seems more
251 * important than sometimes getting a wrong answer in what is after all
252 * just a heuristic estimate.
253 */
256 minfree = Max(minfree, BLCKSZ / 10);
257
258 if (PageIsFull(page) || PageGetHeapFreeSpace(page) < minfree)
259 {
260 /* OK, try to get exclusive buffer lock */
262 return;
263
264 /*
265 * Now that we have buffer lock, get accurate information about the
266 * page's free space, and recheck the heuristic about whether to
267 * prune.
268 */
269 if (PageIsFull(page) || PageGetHeapFreeSpace(page) < minfree)
270 {
273
274 /*
275 * We don't pass the HEAP_PAGE_PRUNE_MARK_UNUSED_NOW option
276 * regardless of whether or not the relation has indexes, since we
277 * cannot safely determine that during on-access pruning with the
278 * current implementation.
279 */
280 PruneFreezeParams params = {
281 .relation = relation,
282 .buffer = buffer,
283 .reason = PRUNE_ON_ACCESS,
284 .options = 0,
285 .vistest = vistest,
286 .cutoffs = NULL,
287 };
288
290 NULL, NULL);
291
292 /*
293 * Report the number of tuples reclaimed to pgstats. This is
294 * presult.ndeleted minus the number of newly-LP_DEAD-set items.
295 *
296 * We derive the number of dead tuples like this to avoid totally
297 * forgetting about items that were set to LP_DEAD, since they
298 * still need to be cleaned up by VACUUM. We only want to count
299 * heap-only tuples that just became LP_UNUSED in our report,
300 * which don't.
301 *
302 * VACUUM doesn't have to compensate in the same way when it
303 * tracks ndeleted, since it will set the same LP_DEAD items to
304 * LP_UNUSED separately.
305 */
306 if (presult.ndeleted > presult.nnewlpdead)
308 presult.ndeleted - presult.nnewlpdead);
309 }
310
311 /* And release buffer lock */
313
314 /*
315 * We avoid reuse of any free space created on the page by unrelated
316 * UPDATEs/INSERTs by opting to not update the FSM at this point. The
317 * free space should be reused by UPDATEs to *this* page.
318 */
319 }
320}
bool ConditionalLockBufferForCleanup(Buffer buffer)
Definition bufmgr.c:6690
#define Max(x, y)
Definition c.h:991
void pgstat_update_heap_dead_tuples(Relation rel, int delta)
bool GlobalVisTestIsRemovableXid(GlobalVisState *state, TransactionId xid)
Definition procarray.c:4243
void heap_page_prune_and_freeze(PruneFreezeParams *params, PruneFreezeResult *presult, OffsetNumber *off_loc, TransactionId *new_relfrozen_xid, MultiXactId *new_relmin_mxid)
Definition pruneheap.c:819
bool RecoveryInProgress(void)
Definition xlog.c:6460

References BUFFER_LOCK_UNLOCK, BufferGetPage(), ConditionalLockBufferForCleanup(), fb(), GlobalVisTestFor(), GlobalVisTestIsRemovableXid(), HEAP_DEFAULT_FILLFACTOR, heap_page_prune_and_freeze(), LockBuffer(), Max, PageGetHeapFreeSpace(), PageIsFull(), pgstat_update_heap_dead_tuples(), PRUNE_ON_ACCESS, RecoveryInProgress(), PruneFreezeParams::relation, RelationGetTargetPageFreeSpace, and TransactionIdIsValid.

Referenced by BitmapHeapScanNextBlock(), heap_prepare_pagescan(), and heapam_index_fetch_tuple().

◆ heap_pre_freeze_checks()

void heap_pre_freeze_checks ( Buffer  buffer,
HeapTupleFreeze tuples,
int  ntuples 
)
extern

Definition at line 7416 of file heapam.c.

7418{
7419 Page page = BufferGetPage(buffer);
7420
7421 for (int i = 0; i < ntuples; i++)
7422 {
7423 HeapTupleFreeze *frz = tuples + i;
7424 ItemId itemid = PageGetItemId(page, frz->offset);
7425 HeapTupleHeader htup;
7426
7427 htup = (HeapTupleHeader) PageGetItem(page, itemid);
7428
7429 /* Deliberately avoid relying on tuple hint bits here */
7430 if (frz->checkflags & HEAP_FREEZE_CHECK_XMIN_COMMITTED)
7431 {
7433
7435 if (unlikely(!TransactionIdDidCommit(xmin)))
7436 ereport(ERROR,
7438 errmsg_internal("uncommitted xmin %u needs to be frozen",
7439 xmin)));
7440 }
7441
7442 /*
7443 * TransactionIdDidAbort won't work reliably in the presence of XIDs
7444 * left behind by transactions that were in progress during a crash,
7445 * so we can only check that xmax didn't commit
7446 */
7447 if (frz->checkflags & HEAP_FREEZE_CHECK_XMAX_ABORTED)
7448 {
7450
7453 ereport(ERROR,
7455 errmsg_internal("cannot freeze committed xmax %u",
7456 xmax)));
7457 }
7458 }
7459}
#define HEAP_FREEZE_CHECK_XMAX_ABORTED
Definition heapam.h:138
#define HEAP_FREEZE_CHECK_XMIN_COMMITTED
Definition heapam.h:137
static bool HeapTupleHeaderXminFrozen(const HeapTupleHeaderData *tup)
static TransactionId HeapTupleHeaderGetRawXmin(const HeapTupleHeaderData *tup)
#define ERRCODE_DATA_CORRUPTED
bool TransactionIdDidCommit(TransactionId transactionId)
Definition transam.c:126
#define TransactionIdIsNormal(xid)
Definition transam.h:42

References Assert, BufferGetPage(), ereport, errcode(), ERRCODE_DATA_CORRUPTED, errmsg_internal(), ERROR, fb(), HEAP_FREEZE_CHECK_XMAX_ABORTED, HEAP_FREEZE_CHECK_XMIN_COMMITTED, HeapTupleHeaderGetRawXmax(), HeapTupleHeaderGetRawXmin(), HeapTupleHeaderXminFrozen(), i, PageGetItem(), PageGetItemId(), TransactionIdDidCommit(), TransactionIdIsNormal, and unlikely.

Referenced by heap_page_will_freeze().

◆ heap_prepare_freeze_tuple()

bool heap_prepare_freeze_tuple ( HeapTupleHeader  tuple,
const struct VacuumCutoffs cutoffs,
HeapPageFreeze pagefrz,
HeapTupleFreeze frz,
bool totally_frozen 
)
extern

Definition at line 7143 of file heapam.c.

7147{
7148 bool xmin_already_frozen = false,
7149 xmax_already_frozen = false;
7150 bool freeze_xmin = false,
7151 replace_xvac = false,
7152 replace_xmax = false,
7153 freeze_xmax = false;
7154 TransactionId xid;
7155
7156 frz->xmax = HeapTupleHeaderGetRawXmax(tuple);
7157 frz->t_infomask2 = tuple->t_infomask2;
7158 frz->t_infomask = tuple->t_infomask;
7159 frz->frzflags = 0;
7160 frz->checkflags = 0;
7161
7162 /*
7163 * Process xmin, while keeping track of whether it's already frozen, or
7164 * will become frozen iff our freeze plan is executed by caller (could be
7165 * neither).
7166 */
7167 xid = HeapTupleHeaderGetXmin(tuple);
7168 if (!TransactionIdIsNormal(xid))
7169 xmin_already_frozen = true;
7170 else
7171 {
7172 if (TransactionIdPrecedes(xid, cutoffs->relfrozenxid))
7173 ereport(ERROR,
7175 errmsg_internal("found xmin %u from before relfrozenxid %u",
7176 xid, cutoffs->relfrozenxid)));
7177
7178 /* Will set freeze_xmin flags in freeze plan below */
7180
7181 /* Verify that xmin committed if and when freeze plan is executed */
7182 if (freeze_xmin)
7184 }
7185
7186 /*
7187 * Old-style VACUUM FULL is gone, but we have to process xvac for as long
7188 * as we support having MOVED_OFF/MOVED_IN tuples in the database
7189 */
7190 xid = HeapTupleHeaderGetXvac(tuple);
7191 if (TransactionIdIsNormal(xid))
7192 {
7194 Assert(TransactionIdPrecedes(xid, cutoffs->OldestXmin));
7195
7196 /*
7197 * For Xvac, we always freeze proactively. This allows totally_frozen
7198 * tracking to ignore xvac.
7199 */
7200 replace_xvac = pagefrz->freeze_required = true;
7201
7202 /* Will set replace_xvac flags in freeze plan below */
7203 }
7204
7205 /* Now process xmax */
7206 xid = frz->xmax;
7207 if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
7208 {
7209 /* Raw xmax is a MultiXactId */
7211 uint16 flags;
7212
7213 /*
7214 * We will either remove xmax completely (in the "freeze_xmax" path),
7215 * process xmax by replacing it (in the "replace_xmax" path), or
7216 * perform no-op xmax processing. The only constraint is that the
7217 * FreezeLimit/MultiXactCutoff postcondition must never be violated.
7218 */
7219 newxmax = FreezeMultiXactId(xid, tuple->t_infomask, cutoffs,
7220 &flags, pagefrz);
7221
7222 if (flags & FRM_NOOP)
7223 {
7224 /*
7225 * xmax is a MultiXactId, and nothing about it changes for now.
7226 * This is the only case where 'freeze_required' won't have been
7227 * set for us by FreezeMultiXactId, as well as the only case where
7228 * neither freeze_xmax nor replace_xmax are set (given a multi).
7229 *
7230 * This is a no-op, but the call to FreezeMultiXactId might have
7231 * ratcheted back NewRelfrozenXid and/or NewRelminMxid trackers
7232 * for us (the "freeze page" variants, specifically). That'll
7233 * make it safe for our caller to freeze the page later on, while
7234 * leaving this particular xmax undisturbed.
7235 *
7236 * FreezeMultiXactId is _not_ responsible for the "no freeze"
7237 * NewRelfrozenXid/NewRelminMxid trackers, though -- that's our
7238 * job. A call to heap_tuple_should_freeze for this same tuple
7239 * will take place below if 'freeze_required' isn't set already.
7240 * (This repeats work from FreezeMultiXactId, but allows "no
7241 * freeze" tracker maintenance to happen in only one place.)
7242 */
7245 }
7246 else if (flags & FRM_RETURN_IS_XID)
7247 {
7248 /*
7249 * xmax will become an updater Xid (original MultiXact's updater
7250 * member Xid will be carried forward as a simple Xid in Xmax).
7251 */
7253
7254 /*
7255 * NB -- some of these transformations are only valid because we
7256 * know the return Xid is a tuple updater (i.e. not merely a
7257 * locker.) Also note that the only reason we don't explicitly
7258 * worry about HEAP_KEYS_UPDATED is because it lives in
7259 * t_infomask2 rather than t_infomask.
7260 */
7261 frz->t_infomask &= ~HEAP_XMAX_BITS;
7262 frz->xmax = newxmax;
7263 if (flags & FRM_MARK_COMMITTED)
7264 frz->t_infomask |= HEAP_XMAX_COMMITTED;
7265 replace_xmax = true;
7266 }
7267 else if (flags & FRM_RETURN_IS_MULTI)
7268 {
7271
7272 /*
7273 * xmax is an old MultiXactId that we have to replace with a new
7274 * MultiXactId, to carry forward two or more original member XIDs.
7275 */
7277
7278 /*
7279 * We can't use GetMultiXactIdHintBits directly on the new multi
7280 * here; that routine initializes the masks to all zeroes, which
7281 * would lose other bits we need. Doing it this way ensures all
7282 * unrelated bits remain untouched.
7283 */
7284 frz->t_infomask &= ~HEAP_XMAX_BITS;
7285 frz->t_infomask2 &= ~HEAP_KEYS_UPDATED;
7287 frz->t_infomask |= newbits;
7288 frz->t_infomask2 |= newbits2;
7289 frz->xmax = newxmax;
7290 replace_xmax = true;
7291 }
7292 else
7293 {
7294 /*
7295 * Freeze plan for tuple "freezes xmax" in the strictest sense:
7296 * it'll leave nothing in xmax (neither an Xid nor a MultiXactId).
7297 */
7298 Assert(flags & FRM_INVALIDATE_XMAX);
7300
7301 /* Will set freeze_xmax flags in freeze plan below */
7302 freeze_xmax = true;
7303 }
7304
7305 /* MultiXactId processing forces freezing (barring FRM_NOOP case) */
7306 Assert(pagefrz->freeze_required || (!freeze_xmax && !replace_xmax));
7307 }
7308 else if (TransactionIdIsNormal(xid))
7309 {
7310 /* Raw xmax is normal XID */
7311 if (TransactionIdPrecedes(xid, cutoffs->relfrozenxid))
7312 ereport(ERROR,
7314 errmsg_internal("found xmax %u from before relfrozenxid %u",
7315 xid, cutoffs->relfrozenxid)));
7316
7317 /* Will set freeze_xmax flags in freeze plan below */
7319
7320 /*
7321 * Verify that xmax aborted if and when freeze plan is executed,
7322 * provided it's from an update. (A lock-only xmax can be removed
7323 * independent of this, since the lock is released at xact end.)
7324 */
7326 frz->checkflags |= HEAP_FREEZE_CHECK_XMAX_ABORTED;
7327 }
7328 else if (!TransactionIdIsValid(xid))
7329 {
7330 /* Raw xmax is InvalidTransactionId XID */
7331 Assert((tuple->t_infomask & HEAP_XMAX_IS_MULTI) == 0);
7332 xmax_already_frozen = true;
7333 }
7334 else
7335 ereport(ERROR,
7337 errmsg_internal("found raw xmax %u (infomask 0x%04x) not invalid and not multi",
7338 xid, tuple->t_infomask)));
7339
7340 if (freeze_xmin)
7341 {
7343
7344 frz->t_infomask |= HEAP_XMIN_FROZEN;
7345 }
7346 if (replace_xvac)
7347 {
7348 /*
7349 * If a MOVED_OFF tuple is not dead, the xvac transaction must have
7350 * failed; whereas a non-dead MOVED_IN tuple must mean the xvac
7351 * transaction succeeded.
7352 */
7353 Assert(pagefrz->freeze_required);
7354 if (tuple->t_infomask & HEAP_MOVED_OFF)
7355 frz->frzflags |= XLH_INVALID_XVAC;
7356 else
7357 frz->frzflags |= XLH_FREEZE_XVAC;
7358 }
7359 if (replace_xmax)
7360 {
7362 Assert(pagefrz->freeze_required);
7363
7364 /* Already set replace_xmax flags in freeze plan earlier */
7365 }
7366 if (freeze_xmax)
7367 {
7369
7370 frz->xmax = InvalidTransactionId;
7371
7372 /*
7373 * The tuple might be marked either XMAX_INVALID or XMAX_COMMITTED +
7374 * LOCKED. Normalize to INVALID just to be sure no one gets confused.
7375 * Also get rid of the HEAP_KEYS_UPDATED bit.
7376 */
7377 frz->t_infomask &= ~HEAP_XMAX_BITS;
7378 frz->t_infomask |= HEAP_XMAX_INVALID;
7379 frz->t_infomask2 &= ~HEAP_HOT_UPDATED;
7380 frz->t_infomask2 &= ~HEAP_KEYS_UPDATED;
7381 }
7382
7383 /*
7384 * Determine if this tuple is already totally frozen, or will become
7385 * totally frozen (provided caller executes freeze plans for the page)
7386 */
7389
7390 if (!pagefrz->freeze_required && !(xmin_already_frozen &&
7392 {
7393 /*
7394 * So far no previous tuple from the page made freezing mandatory.
7395 * Does this tuple force caller to freeze the entire page?
7396 */
7397 pagefrz->freeze_required =
7398 heap_tuple_should_freeze(tuple, cutoffs,
7399 &pagefrz->NoFreezePageRelfrozenXid,
7400 &pagefrz->NoFreezePageRelminMxid);
7401 }
7402
7403 /* Tell caller if this tuple has a usable freeze plan set in *frz */
7405}
static void GetMultiXactIdHintBits(MultiXactId multi, uint16 *new_infomask, uint16 *new_infomask2)
Definition heapam.c:7535
#define FRM_RETURN_IS_XID
Definition heapam.c:6742
static TransactionId FreezeMultiXactId(MultiXactId multi, uint16 t_infomask, const struct VacuumCutoffs *cutoffs, uint16 *flags, HeapPageFreeze *pagefrz)
Definition heapam.c:6793
bool heap_tuple_should_freeze(HeapTupleHeader tuple, const struct VacuumCutoffs *cutoffs, TransactionId *NoFreezePageRelfrozenXid, MultiXactId *NoFreezePageRelminMxid)
Definition heapam.c:7954
#define FRM_MARK_COMMITTED
Definition heapam.c:6744
#define FRM_NOOP
Definition heapam.c:6740
#define FRM_RETURN_IS_MULTI
Definition heapam.c:6743
#define FRM_INVALIDATE_XMAX
Definition heapam.c:6741
#define HEAP_MOVED_OFF
#define HEAP_XMIN_FROZEN
static TransactionId HeapTupleHeaderGetXvac(const HeapTupleHeaderData *tup)
#define HEAP_XMAX_COMMITTED
bool MultiXactIdPrecedes(MultiXactId multi1, MultiXactId multi2)
Definition multixact.c:2765
#define MultiXactIdIsValid(multi)
Definition multixact.h:29
MultiXactId NoFreezePageRelminMxid
Definition heapam.h:220
TransactionId NoFreezePageRelfrozenXid
Definition heapam.h:219
TransactionId OldestXmin
Definition vacuum.h:279
MultiXactId OldestMxact
Definition vacuum.h:280
static bool TransactionIdPrecedesOrEquals(TransactionId id1, TransactionId id2)
Definition transam.h:282

References Assert, ereport, errcode(), ERRCODE_DATA_CORRUPTED, errmsg_internal(), ERROR, fb(), HeapPageFreeze::freeze_required, FreezeMultiXactId(), FRM_INVALIDATE_XMAX, FRM_MARK_COMMITTED, FRM_NOOP, FRM_RETURN_IS_MULTI, FRM_RETURN_IS_XID, GetMultiXactIdHintBits(), HEAP_FREEZE_CHECK_XMAX_ABORTED, HEAP_FREEZE_CHECK_XMIN_COMMITTED, HEAP_MOVED_OFF, heap_tuple_should_freeze(), HEAP_XMAX_COMMITTED, HEAP_XMAX_INVALID, HEAP_XMAX_IS_LOCKED_ONLY(), HEAP_XMAX_IS_MULTI, HEAP_XMIN_FROZEN, HeapTupleHeaderGetRawXmax(), HeapTupleHeaderGetXmin(), HeapTupleHeaderGetXvac(), InvalidTransactionId, VacuumCutoffs::MultiXactCutoff, MultiXactIdIsValid, MultiXactIdPrecedes(), HeapPageFreeze::NoFreezePageRelfrozenXid, HeapPageFreeze::NoFreezePageRelminMxid, VacuumCutoffs::OldestMxact, VacuumCutoffs::OldestXmin, VacuumCutoffs::relfrozenxid, HeapTupleHeaderData::t_infomask, HeapTupleHeaderData::t_infomask2, TransactionIdIsNormal, TransactionIdIsValid, TransactionIdPrecedes(), TransactionIdPrecedesOrEquals(), XLH_FREEZE_XVAC, and XLH_INVALID_XVAC.

Referenced by heap_freeze_tuple(), and heap_prune_record_unchanged_lp_normal().

◆ heap_prepare_pagescan()

void heap_prepare_pagescan ( TableScanDesc  sscan)
extern

Definition at line 614 of file heapam.c.

615{
617 Buffer buffer = scan->rs_cbuf;
618 BlockNumber block = scan->rs_cblock;
619 Snapshot snapshot;
620 Page page;
621 int lines;
622 bool all_visible;
624
625 Assert(BufferGetBlockNumber(buffer) == block);
626
627 /* ensure we're not accidentally being used when not in pagemode */
629 snapshot = scan->rs_base.rs_snapshot;
630
631 /*
632 * Prune and repair fragmentation for the whole page, if possible.
633 */
634 heap_page_prune_opt(scan->rs_base.rs_rd, buffer);
635
636 /*
637 * We must hold share lock on the buffer content while examining tuple
638 * visibility. Afterwards, however, the tuples we have found to be
639 * visible are guaranteed good as long as we hold the buffer pin.
640 */
642
643 page = BufferGetPage(buffer);
644 lines = PageGetMaxOffsetNumber(page);
645
646 /*
647 * If the all-visible flag indicates that all tuples on the page are
648 * visible to everyone, we can skip the per-tuple visibility tests.
649 *
650 * Note: In hot standby, a tuple that's already visible to all
651 * transactions on the primary might still be invisible to a read-only
652 * transaction in the standby. We partly handle this problem by tracking
653 * the minimum xmin of visible tuples as the cut-off XID while marking a
654 * page all-visible on the primary and WAL log that along with the
655 * visibility map SET operation. In hot standby, we wait for (or abort)
656 * all transactions that can potentially may not see one or more tuples on
657 * the page. That's how index-only scans work fine in hot standby. A
658 * crucial difference between index-only scans and heap scans is that the
659 * index-only scan completely relies on the visibility map where as heap
660 * scan looks at the page-level PD_ALL_VISIBLE flag. We are not sure if
661 * the page-level flag can be trusted in the same way, because it might
662 * get propagated somehow without being explicitly WAL-logged, e.g. via a
663 * full page write. Until we can prove that beyond doubt, let's check each
664 * tuple for visibility the hard way.
665 */
666 all_visible = PageIsAllVisible(page) && !snapshot->takenDuringRecovery;
669
670 /*
671 * We call page_collect_tuples() with constant arguments, to get the
672 * compiler to constant fold the constant arguments. Separate calls with
673 * constant arguments, rather than variables, are needed on several
674 * compilers to actually perform constant folding.
675 */
676 if (likely(all_visible))
677 {
679 scan->rs_ntuples = page_collect_tuples(scan, snapshot, page, buffer,
680 block, lines, true, false);
681 else
682 scan->rs_ntuples = page_collect_tuples(scan, snapshot, page, buffer,
683 block, lines, true, true);
684 }
685 else
686 {
688 scan->rs_ntuples = page_collect_tuples(scan, snapshot, page, buffer,
689 block, lines, false, false);
690 else
691 scan->rs_ntuples = page_collect_tuples(scan, snapshot, page, buffer,
692 block, lines, false, true);
693 }
694
696}
#define likely(x)
Definition c.h:411
static pg_attribute_always_inline int page_collect_tuples(HeapScanDesc scan, Snapshot snapshot, Page page, Buffer buffer, BlockNumber block, int lines, bool all_visible, bool check_serializable)
Definition heapam.c:520
bool CheckForSerializableConflictOutNeeded(Relation relation, Snapshot snapshot)
Definition predicate.c:3989
void heap_page_prune_opt(Relation relation, Buffer buffer)
Definition pruneheap.c:209
uint32 rs_ntuples
Definition heapam.h:99
BlockNumber rs_cblock
Definition heapam.h:69
bool takenDuringRecovery
Definition snapshot.h:180

References Assert, BUFFER_LOCK_SHARE, BUFFER_LOCK_UNLOCK, BufferGetBlockNumber(), BufferGetPage(), CheckForSerializableConflictOutNeeded(), fb(), heap_page_prune_opt(), likely, LockBuffer(), page_collect_tuples(), PageGetMaxOffsetNumber(), PageIsAllVisible(), HeapScanDescData::rs_base, HeapScanDescData::rs_cblock, HeapScanDescData::rs_cbuf, TableScanDescData::rs_flags, HeapScanDescData::rs_ntuples, TableScanDescData::rs_rd, TableScanDescData::rs_snapshot, SO_ALLOW_PAGEMODE, and SnapshotData::takenDuringRecovery.

Referenced by heapam_scan_sample_next_block(), and heapgettup_pagemode().

◆ heap_rescan()

void heap_rescan ( TableScanDesc  sscan,
ScanKey  key,
bool  set_params,
bool  allow_strat,
bool  allow_sync,
bool  allow_pagemode 
)
extern

Definition at line 1316 of file heapam.c.

1318{
1320
1321 if (set_params)
1322 {
1323 if (allow_strat)
1325 else
1327
1328 if (allow_sync)
1330 else
1332
1333 if (allow_pagemode && scan->rs_base.rs_snapshot &&
1336 else
1338 }
1339
1340 /*
1341 * unpin scan buffers
1342 */
1343 if (BufferIsValid(scan->rs_cbuf))
1344 {
1345 ReleaseBuffer(scan->rs_cbuf);
1346 scan->rs_cbuf = InvalidBuffer;
1347 }
1348
1349 /*
1350 * SO_TYPE_BITMAPSCAN would be cleaned up here, but it does not hold any
1351 * additional data vs a normal HeapScan
1352 */
1353
1354 /*
1355 * The read stream is reset on rescan. This must be done before
1356 * initscan(), as some state referred to by read_stream_reset() is reset
1357 * in initscan().
1358 */
1359 if (scan->rs_read_stream)
1361
1362 /*
1363 * reinitialize scan descriptor
1364 */
1365 initscan(scan, key, true);
1366}
void read_stream_reset(ReadStream *stream)
@ SO_ALLOW_STRAT
Definition tableam.h:58
@ SO_ALLOW_SYNC
Definition tableam.h:60

References BufferIsValid(), fb(), initscan(), InvalidBuffer, IsMVCCSnapshot, read_stream_reset(), ReleaseBuffer(), HeapScanDescData::rs_base, HeapScanDescData::rs_cbuf, TableScanDescData::rs_flags, HeapScanDescData::rs_read_stream, TableScanDescData::rs_snapshot, SO_ALLOW_PAGEMODE, SO_ALLOW_STRAT, and SO_ALLOW_SYNC.

◆ heap_set_tidrange()

void heap_set_tidrange ( TableScanDesc  sscan,
ItemPointer  mintid,
ItemPointer  maxtid 
)
extern

Definition at line 1487 of file heapam.c.

1489{
1495
1496 /*
1497 * For relations without any pages, we can simply leave the TID range
1498 * unset. There will be no tuples to scan, therefore no tuples outside
1499 * the given TID range.
1500 */
1501 if (scan->rs_nblocks == 0)
1502 return;
1503
1504 /*
1505 * Set up some ItemPointers which point to the first and last possible
1506 * tuples in the heap.
1507 */
1510
1511 /*
1512 * If the given maximum TID is below the highest possible TID in the
1513 * relation, then restrict the range to that, otherwise we scan to the end
1514 * of the relation.
1515 */
1518
1519 /*
1520 * If the given minimum TID is above the lowest possible TID in the
1521 * relation, then restrict the range to only scan for TIDs above that.
1522 */
1525
1526 /*
1527 * Check for an empty range and protect from would be negative results
1528 * from the numBlks calculation below.
1529 */
1531 {
1532 /* Set an empty range of blocks to scan */
1534 return;
1535 }
1536
1537 /*
1538 * Calculate the first block and the number of blocks we must scan. We
1539 * could be more aggressive here and perform some more validation to try
1540 * and further narrow the scope of blocks to scan by checking if the
1541 * lowestItem has an offset above MaxOffsetNumber. In this case, we could
1542 * advance startBlk by one. Likewise, if highestItem has an offset of 0
1543 * we could scan one fewer blocks. However, such an optimization does not
1544 * seem worth troubling over, currently.
1545 */
1547
1550
1551 /* Set the start block and number of blocks to scan */
1553
1554 /* Finally, set the TID range in sscan */
1555 ItemPointerCopy(&lowestItem, &sscan->st.tidrange.rs_mintid);
1556 ItemPointerCopy(&highestItem, &sscan->st.tidrange.rs_maxtid);
1557}
void heap_setscanlimits(TableScanDesc sscan, BlockNumber startBlk, BlockNumber numBlks)
Definition heapam.c:498
static BlockNumber ItemPointerGetBlockNumberNoCheck(const ItemPointerData *pointer)
Definition itemptr.h:93
#define MaxOffsetNumber
Definition off.h:28
BlockNumber rs_nblocks
Definition heapam.h:61

References fb(), FirstOffsetNumber, heap_setscanlimits(), ItemPointerCompare(), ItemPointerCopy(), ItemPointerGetBlockNumberNoCheck(), ItemPointerSet(), MaxOffsetNumber, and HeapScanDescData::rs_nblocks.

◆ heap_setscanlimits()

void heap_setscanlimits ( TableScanDesc  sscan,
BlockNumber  startBlk,
BlockNumber  numBlks 
)
extern

Definition at line 498 of file heapam.c.

499{
501
502 Assert(!scan->rs_inited); /* else too late to change */
503 /* else rs_startblock is significant */
505
506 /* Check startBlk is valid (but allow case of zero blocks...) */
507 Assert(startBlk == 0 || startBlk < scan->rs_nblocks);
508
509 scan->rs_startblock = startBlk;
510 scan->rs_numblocks = numBlks;
511}
BlockNumber rs_startblock
Definition heapam.h:62
BlockNumber rs_numblocks
Definition heapam.h:63

References Assert, fb(), HeapScanDescData::rs_base, TableScanDescData::rs_flags, HeapScanDescData::rs_inited, HeapScanDescData::rs_numblocks, HeapScanDescData::rs_startblock, and SO_ALLOW_SYNC.

Referenced by heap_set_tidrange(), and heapam_index_build_range_scan().

◆ heap_tuple_needs_eventual_freeze()

bool heap_tuple_needs_eventual_freeze ( HeapTupleHeader  tuple)
extern

Definition at line 7899 of file heapam.c.

7900{
7901 TransactionId xid;
7902
7903 /*
7904 * If xmin is a normal transaction ID, this tuple is definitely not
7905 * frozen.
7906 */
7907 xid = HeapTupleHeaderGetXmin(tuple);
7908 if (TransactionIdIsNormal(xid))
7909 return true;
7910
7911 /*
7912 * If xmax is a valid xact or multixact, this tuple is also not frozen.
7913 */
7914 if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
7915 {
7916 MultiXactId multi;
7917
7918 multi = HeapTupleHeaderGetRawXmax(tuple);
7919 if (MultiXactIdIsValid(multi))
7920 return true;
7921 }
7922 else
7923 {
7924 xid = HeapTupleHeaderGetRawXmax(tuple);
7925 if (TransactionIdIsNormal(xid))
7926 return true;
7927 }
7928
7929 if (tuple->t_infomask & HEAP_MOVED)
7930 {
7931 xid = HeapTupleHeaderGetXvac(tuple);
7932 if (TransactionIdIsNormal(xid))
7933 return true;
7934 }
7935
7936 return false;
7937}

References HEAP_MOVED, HEAP_XMAX_IS_MULTI, HeapTupleHeaderGetRawXmax(), HeapTupleHeaderGetXmin(), HeapTupleHeaderGetXvac(), MultiXactIdIsValid, HeapTupleHeaderData::t_infomask, and TransactionIdIsNormal.

Referenced by collect_corrupt_items(), and heap_page_would_be_all_visible().

◆ heap_tuple_should_freeze()

bool heap_tuple_should_freeze ( HeapTupleHeader  tuple,
const struct VacuumCutoffs cutoffs,
TransactionId NoFreezePageRelfrozenXid,
MultiXactId NoFreezePageRelminMxid 
)
extern

Definition at line 7954 of file heapam.c.

7958{
7959 TransactionId xid;
7960 MultiXactId multi;
7961 bool freeze = false;
7962
7963 /* First deal with xmin */
7964 xid = HeapTupleHeaderGetXmin(tuple);
7965 if (TransactionIdIsNormal(xid))
7966 {
7968 if (TransactionIdPrecedes(xid, *NoFreezePageRelfrozenXid))
7969 *NoFreezePageRelfrozenXid = xid;
7970 if (TransactionIdPrecedes(xid, cutoffs->FreezeLimit))
7971 freeze = true;
7972 }
7973
7974 /* Now deal with xmax */
7976 multi = InvalidMultiXactId;
7977 if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
7978 multi = HeapTupleHeaderGetRawXmax(tuple);
7979 else
7980 xid = HeapTupleHeaderGetRawXmax(tuple);
7981
7982 if (TransactionIdIsNormal(xid))
7983 {
7985 /* xmax is a non-permanent XID */
7986 if (TransactionIdPrecedes(xid, *NoFreezePageRelfrozenXid))
7987 *NoFreezePageRelfrozenXid = xid;
7988 if (TransactionIdPrecedes(xid, cutoffs->FreezeLimit))
7989 freeze = true;
7990 }
7991 else if (!MultiXactIdIsValid(multi))
7992 {
7993 /* xmax is a permanent XID or invalid MultiXactId/XID */
7994 }
7995 else if (HEAP_LOCKED_UPGRADED(tuple->t_infomask))
7996 {
7997 /* xmax is a pg_upgrade'd MultiXact, which can't have updater XID */
7998 if (MultiXactIdPrecedes(multi, *NoFreezePageRelminMxid))
7999 *NoFreezePageRelminMxid = multi;
8000 /* heap_prepare_freeze_tuple always freezes pg_upgrade'd xmax */
8001 freeze = true;
8002 }
8003 else
8004 {
8005 /* xmax is a MultiXactId that may have an updater XID */
8006 MultiXactMember *members;
8007 int nmembers;
8008
8010 if (MultiXactIdPrecedes(multi, *NoFreezePageRelminMxid))
8011 *NoFreezePageRelminMxid = multi;
8012 if (MultiXactIdPrecedes(multi, cutoffs->MultiXactCutoff))
8013 freeze = true;
8014
8015 /* need to check whether any member of the mxact is old */
8016 nmembers = GetMultiXactIdMembers(multi, &members, false,
8018
8019 for (int i = 0; i < nmembers; i++)
8020 {
8021 xid = members[i].xid;
8023 if (TransactionIdPrecedes(xid, *NoFreezePageRelfrozenXid))
8024 *NoFreezePageRelfrozenXid = xid;
8025 if (TransactionIdPrecedes(xid, cutoffs->FreezeLimit))
8026 freeze = true;
8027 }
8028 if (nmembers > 0)
8029 pfree(members);
8030 }
8031
8032 if (tuple->t_infomask & HEAP_MOVED)
8033 {
8034 xid = HeapTupleHeaderGetXvac(tuple);
8035 if (TransactionIdIsNormal(xid))
8036 {
8038 if (TransactionIdPrecedes(xid, *NoFreezePageRelfrozenXid))
8039 *NoFreezePageRelfrozenXid = xid;
8040 /* heap_prepare_freeze_tuple forces xvac freezing */
8041 freeze = true;
8042 }
8043 }
8044
8045 return freeze;
8046}
static bool HEAP_LOCKED_UPGRADED(uint16 infomask)
bool MultiXactIdPrecedesOrEquals(MultiXactId multi1, MultiXactId multi2)
Definition multixact.c:2779
#define InvalidMultiXactId
Definition multixact.h:25
TransactionId xid
Definition multixact.h:57

References Assert, VacuumCutoffs::FreezeLimit, GetMultiXactIdMembers(), HEAP_LOCKED_UPGRADED(), HEAP_MOVED, HEAP_XMAX_IS_LOCKED_ONLY(), HEAP_XMAX_IS_MULTI, HeapTupleHeaderGetRawXmax(), HeapTupleHeaderGetXmin(), HeapTupleHeaderGetXvac(), i, InvalidMultiXactId, InvalidTransactionId, VacuumCutoffs::MultiXactCutoff, MultiXactIdIsValid, MultiXactIdPrecedes(), MultiXactIdPrecedesOrEquals(), pfree(), VacuumCutoffs::relfrozenxid, VacuumCutoffs::relminmxid, HeapTupleHeaderData::t_infomask, TransactionIdIsNormal, TransactionIdPrecedes(), TransactionIdPrecedesOrEquals(), and MultiXactMember::xid.

Referenced by heap_prepare_freeze_tuple(), and lazy_scan_noprune().

◆ heap_update()

TM_Result heap_update ( Relation  relation,
const ItemPointerData otid,
HeapTuple  newtup,
CommandId  cid,
Snapshot  crosscheck,
bool  wait,
TM_FailureData tmfd,
LockTupleMode lockmode,
TU_UpdateIndexes update_indexes 
)
extern

Definition at line 3320 of file heapam.c.

3324{
3325 TM_Result result;
3333 ItemId lp;
3337 bool old_key_copied = false;
3338 Page page;
3339 BlockNumber block;
3341 Buffer buffer,
3342 newbuf,
3343 vmbuffer = InvalidBuffer,
3345 bool need_toast;
3347 pagefree;
3348 bool have_tuple_lock = false;
3349 bool iscombo;
3350 bool use_hot_update = false;
3351 bool summarized_update = false;
3352 bool key_intact;
3353 bool all_visible_cleared = false;
3354 bool all_visible_cleared_new = false;
3355 bool checked_lockers;
3356 bool locker_remains;
3357 bool id_has_external = false;
3364
3366
3367 /* Cheap, simplistic check that the tuple matches the rel's rowtype. */
3370
3371 AssertHasSnapshotForToast(relation);
3372
3373 /*
3374 * Forbid this during a parallel operation, lest it allocate a combo CID.
3375 * Other workers might need that combo CID for visibility checks, and we
3376 * have no provision for broadcasting it to them.
3377 */
3378 if (IsInParallelMode())
3379 ereport(ERROR,
3381 errmsg("cannot update tuples during a parallel operation")));
3382
3383#ifdef USE_ASSERT_CHECKING
3385#endif
3386
3387 /*
3388 * Fetch the list of attributes to be checked for various operations.
3389 *
3390 * For HOT considerations, this is wasted effort if we fail to update or
3391 * have to put the new tuple on a different page. But we must compute the
3392 * list before obtaining buffer lock --- in the worst case, if we are
3393 * doing an update on one of the relevant system catalogs, we could
3394 * deadlock if we try to fetch the list later. In any case, the relcache
3395 * caches the data so this is usually pretty cheap.
3396 *
3397 * We also need columns used by the replica identity and columns that are
3398 * considered the "key" of rows in the table.
3399 *
3400 * Note that we get copies of each bitmap, so we need not worry about
3401 * relcache flush happening midway through.
3402 */
3415
3417 INJECTION_POINT("heap_update-before-pin", NULL);
3418 buffer = ReadBuffer(relation, block);
3419 page = BufferGetPage(buffer);
3420
3421 /*
3422 * Before locking the buffer, pin the visibility map page if it appears to
3423 * be necessary. Since we haven't got the lock yet, someone else might be
3424 * in the middle of changing this, so we'll need to recheck after we have
3425 * the lock.
3426 */
3427 if (PageIsAllVisible(page))
3428 visibilitymap_pin(relation, block, &vmbuffer);
3429
3431
3433
3434 /*
3435 * Usually, a buffer pin and/or snapshot blocks pruning of otid, ensuring
3436 * we see LP_NORMAL here. When the otid origin is a syscache, we may have
3437 * neither a pin nor a snapshot. Hence, we may see other LP_ states, each
3438 * of which indicates concurrent pruning.
3439 *
3440 * Failing with TM_Updated would be most accurate. However, unlike other
3441 * TM_Updated scenarios, we don't know the successor ctid in LP_UNUSED and
3442 * LP_DEAD cases. While the distinction between TM_Updated and TM_Deleted
3443 * does matter to SQL statements UPDATE and MERGE, those SQL statements
3444 * hold a snapshot that ensures LP_NORMAL. Hence, the choice between
3445 * TM_Updated and TM_Deleted affects only the wording of error messages.
3446 * Settle on TM_Deleted, for two reasons. First, it avoids complicating
3447 * the specification of when tmfd->ctid is valid. Second, it creates
3448 * error log evidence that we took this branch.
3449 *
3450 * Since it's possible to see LP_UNUSED at otid, it's also possible to see
3451 * LP_NORMAL for a tuple that replaced LP_UNUSED. If it's a tuple for an
3452 * unrelated row, we'll fail with "duplicate key value violates unique".
3453 * XXX if otid is the live, newer version of the newtup row, we'll discard
3454 * changes originating in versions of this catalog row after the version
3455 * the caller got from syscache. See syscache-update-pruned.spec.
3456 */
3457 if (!ItemIdIsNormal(lp))
3458 {
3460
3461 UnlockReleaseBuffer(buffer);
3463 if (vmbuffer != InvalidBuffer)
3464 ReleaseBuffer(vmbuffer);
3465 tmfd->ctid = *otid;
3466 tmfd->xmax = InvalidTransactionId;
3467 tmfd->cmax = InvalidCommandId;
3469
3474 /* modified_attrs not yet initialized */
3476 return TM_Deleted;
3477 }
3478
3479 /*
3480 * Fill in enough data in oldtup for HeapDetermineColumnsInfo to work
3481 * properly.
3482 */
3483 oldtup.t_tableOid = RelationGetRelid(relation);
3484 oldtup.t_data = (HeapTupleHeader) PageGetItem(page, lp);
3485 oldtup.t_len = ItemIdGetLength(lp);
3486 oldtup.t_self = *otid;
3487
3488 /* the new tuple is ready, except for this: */
3489 newtup->t_tableOid = RelationGetRelid(relation);
3490
3491 /*
3492 * Determine columns modified by the update. Additionally, identify
3493 * whether any of the unmodified replica identity key attributes in the
3494 * old tuple is externally stored or not. This is required because for
3495 * such attributes the flattened value won't be WAL logged as part of the
3496 * new tuple so we must include it as part of the old_key_tuple. See
3497 * ExtractReplicaIdentity.
3498 */
3500 id_attrs, &oldtup,
3502
3503 /*
3504 * If we're not updating any "key" column, we can grab a weaker lock type.
3505 * This allows for more concurrency when we are running simultaneously
3506 * with foreign key checks.
3507 *
3508 * Note that if a column gets detoasted while executing the update, but
3509 * the value ends up being the same, this test will fail and we will use
3510 * the stronger lock. This is acceptable; the important case to optimize
3511 * is updates that don't manipulate key columns, not those that
3512 * serendipitously arrive at the same key values.
3513 */
3515 {
3516 *lockmode = LockTupleNoKeyExclusive;
3518 key_intact = true;
3519
3520 /*
3521 * If this is the first possibly-multixact-able operation in the
3522 * current transaction, set my per-backend OldestMemberMXactId
3523 * setting. We can be certain that the transaction will never become a
3524 * member of any older MultiXactIds than that. (We have to do this
3525 * even if we end up just using our own TransactionId below, since
3526 * some other backend could incorporate our XID into a MultiXact
3527 * immediately afterwards.)
3528 */
3530 }
3531 else
3532 {
3533 *lockmode = LockTupleExclusive;
3535 key_intact = false;
3536 }
3537
3538 /*
3539 * Note: beyond this point, use oldtup not otid to refer to old tuple.
3540 * otid may very well point at newtup->t_self, which we will overwrite
3541 * with the new tuple's location, so there's great risk of confusion if we
3542 * use otid anymore.
3543 */
3544
3545l2:
3546 checked_lockers = false;
3547 locker_remains = false;
3548 result = HeapTupleSatisfiesUpdate(&oldtup, cid, buffer);
3549
3550 /* see below about the "no wait" case */
3551 Assert(result != TM_BeingModified || wait);
3552
3553 if (result == TM_Invisible)
3554 {
3555 UnlockReleaseBuffer(buffer);
3556 ereport(ERROR,
3558 errmsg("attempted to update invisible tuple")));
3559 }
3560 else if (result == TM_BeingModified && wait)
3561 {
3564 bool can_continue = false;
3565
3566 /*
3567 * XXX note that we don't consider the "no wait" case here. This
3568 * isn't a problem currently because no caller uses that case, but it
3569 * should be fixed if such a caller is introduced. It wasn't a
3570 * problem previously because this code would always wait, but now
3571 * that some tuple locks do not conflict with one of the lock modes we
3572 * use, it is possible that this case is interesting to handle
3573 * specially.
3574 *
3575 * This may cause failures with third-party code that calls
3576 * heap_update directly.
3577 */
3578
3579 /* must copy state data before unlocking buffer */
3581 infomask = oldtup.t_data->t_infomask;
3582
3583 /*
3584 * Now we have to do something about the existing locker. If it's a
3585 * multi, sleep on it; we might be awakened before it is completely
3586 * gone (or even not sleep at all in some cases); we need to preserve
3587 * it as locker, unless it is gone completely.
3588 *
3589 * If it's not a multi, we need to check for sleeping conditions
3590 * before actually going to sleep. If the update doesn't conflict
3591 * with the locks, we just continue without sleeping (but making sure
3592 * it is preserved).
3593 *
3594 * Before sleeping, we need to acquire tuple lock to establish our
3595 * priority for the tuple (see heap_lock_tuple). LockTuple will
3596 * release us when we are next-in-line for the tuple. Note we must
3597 * not acquire the tuple lock until we're sure we're going to sleep;
3598 * otherwise we're open for race conditions with other transactions
3599 * holding the tuple lock which sleep on us.
3600 *
3601 * If we are forced to "start over" below, we keep the tuple lock;
3602 * this arranges that we stay at the head of the line while rechecking
3603 * tuple state.
3604 */
3606 {
3608 int remain;
3609 bool current_is_member = false;
3610
3612 *lockmode, &current_is_member))
3613 {
3615
3616 /*
3617 * Acquire the lock, if necessary (but skip it when we're
3618 * requesting a lock and already have one; avoids deadlock).
3619 */
3620 if (!current_is_member)
3621 heap_acquire_tuplock(relation, &(oldtup.t_self), *lockmode,
3623
3624 /* wait for multixact */
3626 relation, &oldtup.t_self, XLTW_Update,
3627 &remain);
3628 checked_lockers = true;
3629 locker_remains = remain != 0;
3631
3632 /*
3633 * If xwait had just locked the tuple then some other xact
3634 * could update this tuple before we get to this point. Check
3635 * for xmax change, and start over if so.
3636 */
3637 if (xmax_infomask_changed(oldtup.t_data->t_infomask,
3638 infomask) ||
3640 xwait))
3641 goto l2;
3642 }
3643
3644 /*
3645 * Note that the multixact may not be done by now. It could have
3646 * surviving members; our own xact or other subxacts of this
3647 * backend, and also any other concurrent transaction that locked
3648 * the tuple with LockTupleKeyShare if we only got
3649 * LockTupleNoKeyExclusive. If this is the case, we have to be
3650 * careful to mark the updated tuple with the surviving members in
3651 * Xmax.
3652 *
3653 * Note that there could have been another update in the
3654 * MultiXact. In that case, we need to check whether it committed
3655 * or aborted. If it aborted we are safe to update it again;
3656 * otherwise there is an update conflict, and we have to return
3657 * TableTuple{Deleted, Updated} below.
3658 *
3659 * In the LockTupleExclusive case, we still need to preserve the
3660 * surviving members: those would include the tuple locks we had
3661 * before this one, which are important to keep in case this
3662 * subxact aborts.
3663 */
3664 if (!HEAP_XMAX_IS_LOCKED_ONLY(oldtup.t_data->t_infomask))
3666 else
3668
3669 /*
3670 * There was no UPDATE in the MultiXact; or it aborted. No
3671 * TransactionIdIsInProgress() call needed here, since we called
3672 * MultiXactIdWait() above.
3673 */
3676 can_continue = true;
3677 }
3679 {
3680 /*
3681 * The only locker is ourselves; we can avoid grabbing the tuple
3682 * lock here, but must preserve our locking information.
3683 */
3684 checked_lockers = true;
3685 locker_remains = true;
3686 can_continue = true;
3687 }
3689 {
3690 /*
3691 * If it's just a key-share locker, and we're not changing the key
3692 * columns, we don't need to wait for it to end; but we need to
3693 * preserve it as locker.
3694 */
3695 checked_lockers = true;
3696 locker_remains = true;
3697 can_continue = true;
3698 }
3699 else
3700 {
3701 /*
3702 * Wait for regular transaction to end; but first, acquire tuple
3703 * lock.
3704 */
3706 heap_acquire_tuplock(relation, &(oldtup.t_self), *lockmode,
3708 XactLockTableWait(xwait, relation, &oldtup.t_self,
3709 XLTW_Update);
3710 checked_lockers = true;
3712
3713 /*
3714 * xwait is done, but if xwait had just locked the tuple then some
3715 * other xact could update this tuple before we get to this point.
3716 * Check for xmax change, and start over if so.
3717 */
3718 if (xmax_infomask_changed(oldtup.t_data->t_infomask, infomask) ||
3721 goto l2;
3722
3723 /* Otherwise check if it committed or aborted */
3724 UpdateXmaxHintBits(oldtup.t_data, buffer, xwait);
3725 if (oldtup.t_data->t_infomask & HEAP_XMAX_INVALID)
3726 can_continue = true;
3727 }
3728
3729 if (can_continue)
3730 result = TM_Ok;
3731 else if (!ItemPointerEquals(&oldtup.t_self, &oldtup.t_data->t_ctid))
3732 result = TM_Updated;
3733 else
3734 result = TM_Deleted;
3735 }
3736
3737 /* Sanity check the result HeapTupleSatisfiesUpdate() and the logic above */
3738 if (result != TM_Ok)
3739 {
3740 Assert(result == TM_SelfModified ||
3741 result == TM_Updated ||
3742 result == TM_Deleted ||
3743 result == TM_BeingModified);
3744 Assert(!(oldtup.t_data->t_infomask & HEAP_XMAX_INVALID));
3745 Assert(result != TM_Updated ||
3746 !ItemPointerEquals(&oldtup.t_self, &oldtup.t_data->t_ctid));
3747 }
3748
3749 if (crosscheck != InvalidSnapshot && result == TM_Ok)
3750 {
3751 /* Perform additional check for transaction-snapshot mode RI updates */
3753 result = TM_Updated;
3754 }
3755
3756 if (result != TM_Ok)
3757 {
3758 tmfd->ctid = oldtup.t_data->t_ctid;
3759 tmfd->xmax = HeapTupleHeaderGetUpdateXid(oldtup.t_data);
3760 if (result == TM_SelfModified)
3761 tmfd->cmax = HeapTupleHeaderGetCmax(oldtup.t_data);
3762 else
3763 tmfd->cmax = InvalidCommandId;
3764 UnlockReleaseBuffer(buffer);
3765 if (have_tuple_lock)
3766 UnlockTupleTuplock(relation, &(oldtup.t_self), *lockmode);
3767 if (vmbuffer != InvalidBuffer)
3768 ReleaseBuffer(vmbuffer);
3770
3777 return result;
3778 }
3779
3780 /*
3781 * If we didn't pin the visibility map page and the page has become all
3782 * visible while we were busy locking the buffer, or during some
3783 * subsequent window during which we had it unlocked, we'll have to unlock
3784 * and re-lock, to avoid holding the buffer lock across an I/O. That's a
3785 * bit unfortunate, especially since we'll now have to recheck whether the
3786 * tuple has been locked or updated under us, but hopefully it won't
3787 * happen very often.
3788 */
3789 if (vmbuffer == InvalidBuffer && PageIsAllVisible(page))
3790 {
3792 visibilitymap_pin(relation, block, &vmbuffer);
3794 goto l2;
3795 }
3796
3797 /* Fill in transaction status data */
3798
3799 /*
3800 * If the tuple we're updating is locked, we need to preserve the locking
3801 * info in the old tuple's Xmax. Prepare a new Xmax value for this.
3802 */
3804 oldtup.t_data->t_infomask,
3805 oldtup.t_data->t_infomask2,
3806 xid, *lockmode, true,
3809
3810 /*
3811 * And also prepare an Xmax value for the new copy of the tuple. If there
3812 * was no xmax previously, or there was one but all lockers are now gone,
3813 * then use InvalidTransactionId; otherwise, get the xmax from the old
3814 * tuple. (In rare cases that might also be InvalidTransactionId and yet
3815 * not have the HEAP_XMAX_INVALID bit set; that's fine.)
3816 */
3817 if ((oldtup.t_data->t_infomask & HEAP_XMAX_INVALID) ||
3818 HEAP_LOCKED_UPGRADED(oldtup.t_data->t_infomask) ||
3821 else
3823
3825 {
3828 }
3829 else
3830 {
3831 /*
3832 * If we found a valid Xmax for the new tuple, then the infomask bits
3833 * to use on the new tuple depend on what was there on the old one.
3834 * Note that since we're doing an update, the only possibility is that
3835 * the lockers had FOR KEY SHARE lock.
3836 */
3837 if (oldtup.t_data->t_infomask & HEAP_XMAX_IS_MULTI)
3838 {
3841 }
3842 else
3843 {
3846 }
3847 }
3848
3849 /*
3850 * Prepare the new tuple with the appropriate initial values of Xmin and
3851 * Xmax, as well as initial infomask bits as computed above.
3852 */
3853 newtup->t_data->t_infomask &= ~(HEAP_XACT_MASK);
3854 newtup->t_data->t_infomask2 &= ~(HEAP2_XACT_MASK);
3855 HeapTupleHeaderSetXmin(newtup->t_data, xid);
3857 newtup->t_data->t_infomask |= HEAP_UPDATED | infomask_new_tuple;
3858 newtup->t_data->t_infomask2 |= infomask2_new_tuple;
3860
3861 /*
3862 * Replace cid with a combo CID if necessary. Note that we already put
3863 * the plain cid into the new tuple.
3864 */
3866
3867 /*
3868 * If the toaster needs to be activated, OR if the new tuple will not fit
3869 * on the same page as the old, then we need to release the content lock
3870 * (but not the pin!) on the old tuple's buffer while we are off doing
3871 * TOAST and/or table-file-extension work. We must mark the old tuple to
3872 * show that it's locked, else other processes may try to update it
3873 * themselves.
3874 *
3875 * We need to invoke the toaster if there are already any out-of-line
3876 * toasted values present, or if the new tuple is over-threshold.
3877 */
3878 if (relation->rd_rel->relkind != RELKIND_RELATION &&
3879 relation->rd_rel->relkind != RELKIND_MATVIEW)
3880 {
3881 /* toast table entries should never be recursively toasted */
3884 need_toast = false;
3885 }
3886 else
3889 newtup->t_len > TOAST_TUPLE_THRESHOLD);
3890
3892
3893 newtupsize = MAXALIGN(newtup->t_len);
3894
3896 {
3900 bool cleared_all_frozen = false;
3901
3902 /*
3903 * To prevent concurrent sessions from updating the tuple, we have to
3904 * temporarily mark it locked, while we release the page-level lock.
3905 *
3906 * To satisfy the rule that any xid potentially appearing in a buffer
3907 * written out to disk, we unfortunately have to WAL log this
3908 * temporary modification. We can reuse xl_heap_lock for this
3909 * purpose. If we crash/error before following through with the
3910 * actual update, xmax will be of an aborted transaction, allowing
3911 * other sessions to proceed.
3912 */
3913
3914 /*
3915 * Compute xmax / infomask appropriate for locking the tuple. This has
3916 * to be done separately from the combo that's going to be used for
3917 * updating, because the potentially created multixact would otherwise
3918 * be wrong.
3919 */
3921 oldtup.t_data->t_infomask,
3922 oldtup.t_data->t_infomask2,
3923 xid, *lockmode, false,
3926
3928
3930
3931 /* Clear obsolete visibility flags ... */
3932 oldtup.t_data->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);
3933 oldtup.t_data->t_infomask2 &= ~HEAP_KEYS_UPDATED;
3935 /* ... and store info about transaction updating this tuple */
3938 oldtup.t_data->t_infomask |= infomask_lock_old_tuple;
3939 oldtup.t_data->t_infomask2 |= infomask2_lock_old_tuple;
3941
3942 /* temporarily make it look not-updated, but locked */
3943 oldtup.t_data->t_ctid = oldtup.t_self;
3944
3945 /*
3946 * Clear all-frozen bit on visibility map if needed. We could
3947 * immediately reset ALL_VISIBLE, but given that the WAL logging
3948 * overhead would be unchanged, that doesn't seem necessarily
3949 * worthwhile.
3950 */
3951 if (PageIsAllVisible(page) &&
3952 visibilitymap_clear(relation, block, vmbuffer,
3954 cleared_all_frozen = true;
3955
3956 MarkBufferDirty(buffer);
3957
3958 if (RelationNeedsWAL(relation))
3959 {
3962
3965
3966 xlrec.offnum = ItemPointerGetOffsetNumber(&oldtup.t_self);
3968 xlrec.infobits_set = compute_infobits(oldtup.t_data->t_infomask,
3969 oldtup.t_data->t_infomask2);
3970 xlrec.flags =
3974 PageSetLSN(page, recptr);
3975 }
3976
3978
3980
3981 /*
3982 * Let the toaster do its thing, if needed.
3983 *
3984 * Note: below this point, heaptup is the data we actually intend to
3985 * store into the relation; newtup is the caller's original untoasted
3986 * data.
3987 */
3988 if (need_toast)
3989 {
3990 /* Note we always use WAL and FSM during updates */
3992 newtupsize = MAXALIGN(heaptup->t_len);
3993 }
3994 else
3995 heaptup = newtup;
3996
3997 /*
3998 * Now, do we need a new page for the tuple, or not? This is a bit
3999 * tricky since someone else could have added tuples to the page while
4000 * we weren't looking. We have to recheck the available space after
4001 * reacquiring the buffer lock. But don't bother to do that if the
4002 * former amount of free space is still not enough; it's unlikely
4003 * there's more free now than before.
4004 *
4005 * What's more, if we need to get a new page, we will need to acquire
4006 * buffer locks on both old and new pages. To avoid deadlock against
4007 * some other backend trying to get the same two locks in the other
4008 * order, we must be consistent about the order we get the locks in.
4009 * We use the rule "lock the lower-numbered page of the relation
4010 * first". To implement this, we must do RelationGetBufferForTuple
4011 * while not holding the lock on the old page, and we must rely on it
4012 * to get the locks on both pages in the correct order.
4013 *
4014 * Another consideration is that we need visibility map page pin(s) if
4015 * we will have to clear the all-visible flag on either page. If we
4016 * call RelationGetBufferForTuple, we rely on it to acquire any such
4017 * pins; but if we don't, we have to handle that here. Hence we need
4018 * a loop.
4019 */
4020 for (;;)
4021 {
4022 if (newtupsize > pagefree)
4023 {
4024 /* It doesn't fit, must use RelationGetBufferForTuple. */
4025 newbuf = RelationGetBufferForTuple(relation, heaptup->t_len,
4026 buffer, 0, NULL,
4027 &vmbuffer_new, &vmbuffer,
4028 0);
4029 /* We're all done. */
4030 break;
4031 }
4032 /* Acquire VM page pin if needed and we don't have it. */
4033 if (vmbuffer == InvalidBuffer && PageIsAllVisible(page))
4034 visibilitymap_pin(relation, block, &vmbuffer);
4035 /* Re-acquire the lock on the old tuple's page. */
4037 /* Re-check using the up-to-date free space */
4039 if (newtupsize > pagefree ||
4040 (vmbuffer == InvalidBuffer && PageIsAllVisible(page)))
4041 {
4042 /*
4043 * Rats, it doesn't fit anymore, or somebody just now set the
4044 * all-visible flag. We must now unlock and loop to avoid
4045 * deadlock. Fortunately, this path should seldom be taken.
4046 */
4048 }
4049 else
4050 {
4051 /* We're all done. */
4052 newbuf = buffer;
4053 break;
4054 }
4055 }
4056 }
4057 else
4058 {
4059 /* No TOAST work needed, and it'll fit on same page */
4060 newbuf = buffer;
4061 heaptup = newtup;
4062 }
4063
4064 /*
4065 * We're about to do the actual update -- check for conflict first, to
4066 * avoid possibly having to roll back work we've just done.
4067 *
4068 * This is safe without a recheck as long as there is no possibility of
4069 * another process scanning the pages between this check and the update
4070 * being visible to the scan (i.e., exclusive buffer content lock(s) are
4071 * continuously held from this point until the tuple update is visible).
4072 *
4073 * For the new tuple the only check needed is at the relation level, but
4074 * since both tuples are in the same relation and the check for oldtup
4075 * will include checking the relation level, there is no benefit to a
4076 * separate check for the new tuple.
4077 */
4078 CheckForSerializableConflictIn(relation, &oldtup.t_self,
4079 BufferGetBlockNumber(buffer));
4080
4081 /*
4082 * At this point newbuf and buffer are both pinned and locked, and newbuf
4083 * has enough space for the new tuple. If they are the same buffer, only
4084 * one pin is held.
4085 */
4086
4087 if (newbuf == buffer)
4088 {
4089 /*
4090 * Since the new tuple is going into the same page, we might be able
4091 * to do a HOT update. Check if any of the index columns have been
4092 * changed.
4093 */
4095 {
4096 use_hot_update = true;
4097
4098 /*
4099 * If none of the columns that are used in hot-blocking indexes
4100 * were updated, we can apply HOT, but we do still need to check
4101 * if we need to update the summarizing indexes, and update those
4102 * indexes if the columns were updated, or we may fail to detect
4103 * e.g. value bound changes in BRIN minmax indexes.
4104 */
4106 summarized_update = true;
4107 }
4108 }
4109 else
4110 {
4111 /* Set a hint that the old page could use prune/defrag */
4112 PageSetFull(page);
4113 }
4114
4115 /*
4116 * Compute replica identity tuple before entering the critical section so
4117 * we don't PANIC upon a memory allocation failure.
4118 * ExtractReplicaIdentity() will return NULL if nothing needs to be
4119 * logged. Pass old key required as true only if the replica identity key
4120 * columns are modified or it has external data.
4121 */
4126
4127 /* NO EREPORT(ERROR) from here till changes are logged */
4129
4130 /*
4131 * If this transaction commits, the old tuple will become DEAD sooner or
4132 * later. Set flag that this page is a candidate for pruning once our xid
4133 * falls below the OldestXmin horizon. If the transaction finally aborts,
4134 * the subsequent page pruning will be a no-op and the hint will be
4135 * cleared.
4136 *
4137 * XXX Should we set hint on newbuf as well? If the transaction aborts,
4138 * there would be a prunable tuple in the newbuf; but for now we choose
4139 * not to optimize for aborts. Note that heap_xlog_update must be kept in
4140 * sync if this decision changes.
4141 */
4142 PageSetPrunable(page, xid);
4143
4144 if (use_hot_update)
4145 {
4146 /* Mark the old tuple as HOT-updated */
4148 /* And mark the new tuple as heap-only */
4150 /* Mark the caller's copy too, in case different from heaptup */
4152 }
4153 else
4154 {
4155 /* Make sure tuples are correctly marked as not-HOT */
4159 }
4160
4161 RelationPutHeapTuple(relation, newbuf, heaptup, false); /* insert new tuple */
4162
4163
4164 /* Clear obsolete visibility flags, possibly set by ourselves above... */
4165 oldtup.t_data->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);
4166 oldtup.t_data->t_infomask2 &= ~HEAP_KEYS_UPDATED;
4167 /* ... and store info about transaction updating this tuple */
4170 oldtup.t_data->t_infomask |= infomask_old_tuple;
4171 oldtup.t_data->t_infomask2 |= infomask2_old_tuple;
4173
4174 /* record address of new tuple in t_ctid of old one */
4175 oldtup.t_data->t_ctid = heaptup->t_self;
4176
4177 /* clear PD_ALL_VISIBLE flags, reset all visibilitymap bits */
4178 if (PageIsAllVisible(BufferGetPage(buffer)))
4179 {
4180 all_visible_cleared = true;
4182 visibilitymap_clear(relation, BufferGetBlockNumber(buffer),
4183 vmbuffer, VISIBILITYMAP_VALID_BITS);
4184 }
4185 if (newbuf != buffer && PageIsAllVisible(BufferGetPage(newbuf)))
4186 {
4191 }
4192
4193 if (newbuf != buffer)
4195 MarkBufferDirty(buffer);
4196
4197 /* XLOG stuff */
4198 if (RelationNeedsWAL(relation))
4199 {
4201
4202 /*
4203 * For logical decoding we need combo CIDs to properly decode the
4204 * catalog.
4205 */
4207 {
4208 log_heap_new_cid(relation, &oldtup);
4209 log_heap_new_cid(relation, heaptup);
4210 }
4211
4212 recptr = log_heap_update(relation, buffer,
4217 if (newbuf != buffer)
4218 {
4220 }
4222 }
4223
4225
4226 if (newbuf != buffer)
4229
4230 /*
4231 * Mark old tuple for invalidation from system caches at next command
4232 * boundary, and mark the new tuple for invalidation in case we abort. We
4233 * have to do this before releasing the buffer because oldtup is in the
4234 * buffer. (heaptup is all in local memory, but it's necessary to process
4235 * both tuple versions in one call to inval.c so we can avoid redundant
4236 * sinval messages.)
4237 */
4239
4240 /* Now we can release the buffer(s) */
4241 if (newbuf != buffer)
4243 ReleaseBuffer(buffer);
4246 if (BufferIsValid(vmbuffer))
4247 ReleaseBuffer(vmbuffer);
4248
4249 /*
4250 * Release the lmgr tuple lock, if we had it.
4251 */
4252 if (have_tuple_lock)
4253 UnlockTupleTuplock(relation, &(oldtup.t_self), *lockmode);
4254
4255 pgstat_count_heap_update(relation, use_hot_update, newbuf != buffer);
4256
4257 /*
4258 * If heaptup is a private copy, release it. Don't forget to copy t_self
4259 * back to the caller's image, too.
4260 */
4261 if (heaptup != newtup)
4262 {
4263 newtup->t_self = heaptup->t_self;
4265 }
4266
4267 /*
4268 * If it is a HOT update, the update may still need to update summarized
4269 * indexes, lest we fail to update those summaries and get incorrect
4270 * results (for example, minmax bounds of the block may change with this
4271 * update).
4272 */
4273 if (use_hot_update)
4274 {
4277 else
4279 }
4280 else
4282
4285
4292
4293 return TM_Ok;
4294}
void bms_free(Bitmapset *a)
Definition bitmapset.c:239
Bitmapset * bms_add_members(Bitmapset *a, const Bitmapset *b)
Definition bitmapset.c:916
bool bms_overlap(const Bitmapset *a, const Bitmapset *b)
Definition bitmapset.c:581
static void PageSetFull(Page page)
Definition bufpage.h:417
static Bitmapset * HeapDetermineColumnsInfo(Relation relation, Bitmapset *interesting_cols, Bitmapset *external_cols, HeapTuple oldtup, HeapTuple newtup, bool *has_external)
Definition heapam.c:4474
static XLogRecPtr log_heap_update(Relation reln, Buffer oldbuf, Buffer newbuf, HeapTuple oldtup, HeapTuple newtup, HeapTuple old_key_tuple, bool all_visible_cleared, bool new_all_visible_cleared)
Definition heapam.c:8927
TransactionId HeapTupleGetUpdateXid(const HeapTupleHeaderData *tup)
Definition heapam.c:7668
HeapTuple heap_toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup, int options)
Definition heaptoast.c:96
#define TOAST_TUPLE_THRESHOLD
Definition heaptoast.h:48
static void HeapTupleClearHotUpdated(const HeapTupleData *tuple)
#define HEAP2_XACT_MASK
#define HEAP_XMAX_LOCK_ONLY
static void HeapTupleHeaderSetCmin(HeapTupleHeaderData *tup, CommandId cid)
static void HeapTupleSetHeapOnly(const HeapTupleData *tuple)
#define HEAP_XACT_MASK
static void HeapTupleSetHotUpdated(const HeapTupleData *tuple)
static void HeapTupleClearHeapOnly(const HeapTupleData *tuple)
#define HEAP_UPDATED
#define HEAP_XMAX_KEYSHR_LOCK
#define INJECTION_POINT(name, arg)
void pgstat_count_heap_update(Relation rel, bool hot, bool newpage)
Bitmapset * RelationGetIndexAttrBitmap(Relation relation, IndexAttrBitmapKind attrKind)
Definition relcache.c:5298
@ INDEX_ATTR_BITMAP_KEY
Definition relcache.h:69
@ INDEX_ATTR_BITMAP_HOT_BLOCKING
Definition relcache.h:72
@ INDEX_ATTR_BITMAP_SUMMARIZED
Definition relcache.h:73
@ INDEX_ATTR_BITMAP_IDENTITY_KEY
Definition relcache.h:71
bool RelationSupportsSysCache(Oid relid)
Definition syscache.c:762
@ TU_Summarizing
Definition tableam.h:119
@ TU_All
Definition tableam.h:116
@ TU_None
Definition tableam.h:113
bool TransactionIdDidAbort(TransactionId transactionId)
Definition transam.c:188

References Assert, AssertHasSnapshotForToast(), bms_add_members(), bms_free(), bms_overlap(), BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferGetBlockNumber(), BufferGetPage(), BufferIsValid(), CacheInvalidateHeapTuple(), CheckForSerializableConflictIn(), TM_FailureData::cmax, compute_infobits(), compute_new_xmax_infomask(), TM_FailureData::ctid, DoesMultiXactIdConflict(), END_CRIT_SECTION, ereport, errcode(), errmsg(), ERROR, ExtractReplicaIdentity(), fb(), GetCurrentTransactionId(), GetMultiXactIdHintBits(), HEAP2_XACT_MASK, heap_acquire_tuplock(), heap_freetuple(), HEAP_LOCKED_UPGRADED(), HEAP_MOVED, heap_toast_insert_or_update(), HEAP_UPDATED, HEAP_XACT_MASK, HEAP_XMAX_BITS, HEAP_XMAX_INVALID, HEAP_XMAX_IS_KEYSHR_LOCKED(), HEAP_XMAX_IS_LOCKED_ONLY(), HEAP_XMAX_IS_MULTI, HEAP_XMAX_KEYSHR_LOCK, HEAP_XMAX_LOCK_ONLY, HeapDetermineColumnsInfo(), HeapTupleClearHeapOnly(), HeapTupleClearHotUpdated(), HeapTupleGetUpdateXid(), HeapTupleHasExternal(), HeapTupleHeaderAdjustCmax(), HeapTupleHeaderGetCmax(), HeapTupleHeaderGetNatts, HeapTupleHeaderGetRawXmax(), HeapTupleHeaderGetUpdateXid(), HeapTupleHeaderSetCmax(), HeapTupleHeaderSetCmin(), HeapTupleHeaderSetXmax(), HeapTupleHeaderSetXmin(), HeapTupleSatisfiesUpdate(), HeapTupleSatisfiesVisibility(), HeapTupleSetHeapOnly(), HeapTupleSetHotUpdated(), INDEX_ATTR_BITMAP_HOT_BLOCKING, INDEX_ATTR_BITMAP_IDENTITY_KEY, INDEX_ATTR_BITMAP_KEY, INDEX_ATTR_BITMAP_SUMMARIZED, INJECTION_POINT, InvalidBuffer, InvalidCommandId, InvalidSnapshot, InvalidTransactionId, IsInParallelMode(), ItemIdGetLength, ItemIdIsNormal, ItemPointerEquals(), ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), ItemPointerIsValid(), LockBuffer(), LockTupleExclusive, LockTupleNoKeyExclusive, LockWaitBlock, log_heap_new_cid(), log_heap_update(), MarkBufferDirty(), MAXALIGN, MultiXactIdSetOldestMember(), MultiXactIdWait(), MultiXactStatusNoKeyUpdate, MultiXactStatusUpdate, PageClearAllVisible(), PageGetHeapFreeSpace(), PageGetItem(), PageGetItemId(), PageIsAllVisible(), PageSetFull(), PageSetLSN(), PageSetPrunable, pgstat_count_heap_update(), RelationData::rd_rel, ReadBuffer(), REGBUF_STANDARD, RelationGetBufferForTuple(), RelationGetIndexAttrBitmap(), RelationGetNumberOfAttributes, RelationGetRelid, RelationIsAccessibleInLogicalDecoding, RelationNeedsWAL, RelationPutHeapTuple(), RelationSupportsSysCache(), ReleaseBuffer(), SizeOfHeapLock, START_CRIT_SECTION, TM_BeingModified, TM_Deleted, TM_Invisible, TM_Ok, TM_SelfModified, TM_Updated, TOAST_TUPLE_THRESHOLD, TransactionIdDidAbort(), TransactionIdEquals, TransactionIdIsCurrentTransactionId(), TransactionIdIsValid, TU_All, TU_None, TU_Summarizing, UnlockReleaseBuffer(), UnlockTupleTuplock, UpdateXmaxHintBits(), VISIBILITYMAP_ALL_FROZEN, visibilitymap_clear(), visibilitymap_pin(), VISIBILITYMAP_VALID_BITS, XactLockTableWait(), XLH_LOCK_ALL_FROZEN_CLEARED, XLOG_HEAP_LOCK, XLogBeginInsert(), XLogInsert(), XLogRegisterBuffer(), XLogRegisterData(), XLTW_Update, TM_FailureData::xmax, and xmax_infomask_changed().

Referenced by heapam_tuple_update(), and simple_heap_update().

◆ heap_vacuum_rel()

void heap_vacuum_rel ( Relation  rel,
const VacuumParams  params,
BufferAccessStrategy  bstrategy 
)
extern

Definition at line 624 of file vacuumlazy.c.

626{
628 bool verbose,
629 instrument,
630 skipwithvm,
638 TimestampTz starttime = 0;
640 startwritetime = 0;
643 ErrorContextCallback errcallback;
644 char **indnames = NULL;
646
647 verbose = (params.options & VACOPT_VERBOSE) != 0;
648 instrument = (verbose || (AmAutoVacuumWorkerProcess() &&
649 params.log_vacuum_min_duration >= 0));
650 if (instrument)
651 {
653 if (track_io_timing)
654 {
657 }
658 }
659
660 /* Used for instrumentation and stats report */
661 starttime = GetCurrentTimestamp();
662
664 RelationGetRelid(rel));
667 params.is_wraparound
670 else
673
674 /*
675 * Setup error traceback support for ereport() first. The idea is to set
676 * up an error context callback to display additional information on any
677 * error during a vacuum. During different phases of vacuum, we update
678 * the state so that the error context callback always display current
679 * information.
680 *
681 * Copy the names of heap rel into local memory for error reporting
682 * purposes, too. It isn't always safe to assume that we can get the name
683 * of each rel. It's convenient for code in lazy_scan_heap to always use
684 * these temp copies.
685 */
688 vacrel->relnamespace = get_namespace_name(RelationGetNamespace(rel));
689 vacrel->relname = pstrdup(RelationGetRelationName(rel));
690 vacrel->indname = NULL;
692 vacrel->verbose = verbose;
693 errcallback.callback = vacuum_error_callback;
694 errcallback.arg = vacrel;
695 errcallback.previous = error_context_stack;
696 error_context_stack = &errcallback;
697
698 /* Set up high level stuff about rel and its indexes */
699 vacrel->rel = rel;
701 &vacrel->indrels);
702 vacrel->bstrategy = bstrategy;
703 if (instrument && vacrel->nindexes > 0)
704 {
705 /* Copy index names used by instrumentation (not error reporting) */
706 indnames = palloc_array(char *, vacrel->nindexes);
707 for (int i = 0; i < vacrel->nindexes; i++)
709 }
710
711 /*
712 * The index_cleanup param either disables index vacuuming and cleanup or
713 * forces it to go ahead when we would otherwise apply the index bypass
714 * optimization. The default is 'auto', which leaves the final decision
715 * up to lazy_vacuum().
716 *
717 * The truncate param allows user to avoid attempting relation truncation,
718 * though it can't force truncation to happen.
719 */
722 params.truncate != VACOPTVALUE_AUTO);
723
724 /*
725 * While VacuumFailSafeActive is reset to false before calling this, we
726 * still need to reset it here due to recursive calls.
727 */
728 VacuumFailsafeActive = false;
729 vacrel->consider_bypass_optimization = true;
730 vacrel->do_index_vacuuming = true;
731 vacrel->do_index_cleanup = true;
732 vacrel->do_rel_truncate = (params.truncate != VACOPTVALUE_DISABLED);
734 {
735 /* Force disable index vacuuming up-front */
736 vacrel->do_index_vacuuming = false;
737 vacrel->do_index_cleanup = false;
738 }
739 else if (params.index_cleanup == VACOPTVALUE_ENABLED)
740 {
741 /* Force index vacuuming. Note that failsafe can still bypass. */
742 vacrel->consider_bypass_optimization = false;
743 }
744 else
745 {
746 /* Default/auto, make all decisions dynamically */
748 }
749
750 /* Initialize page counters explicitly (be tidy) */
751 vacrel->scanned_pages = 0;
752 vacrel->eager_scanned_pages = 0;
753 vacrel->removed_pages = 0;
754 vacrel->new_frozen_tuple_pages = 0;
755 vacrel->lpdead_item_pages = 0;
756 vacrel->missed_dead_pages = 0;
757 vacrel->nonempty_pages = 0;
758 /* dead_items_alloc allocates vacrel->dead_items later on */
759
760 /* Allocate/initialize output statistics state */
761 vacrel->new_rel_tuples = 0;
762 vacrel->new_live_tuples = 0;
763 vacrel->indstats = (IndexBulkDeleteResult **)
764 palloc0(vacrel->nindexes * sizeof(IndexBulkDeleteResult *));
765
766 /* Initialize remaining counters (be tidy) */
767 vacrel->num_index_scans = 0;
768 vacrel->num_dead_items_resets = 0;
769 vacrel->total_dead_items_bytes = 0;
770 vacrel->tuples_deleted = 0;
771 vacrel->tuples_frozen = 0;
772 vacrel->lpdead_items = 0;
773 vacrel->live_tuples = 0;
774 vacrel->recently_dead_tuples = 0;
775 vacrel->missed_dead_tuples = 0;
776
777 vacrel->vm_new_visible_pages = 0;
778 vacrel->vm_new_visible_frozen_pages = 0;
779 vacrel->vm_new_frozen_pages = 0;
780
781 /*
782 * Get cutoffs that determine which deleted tuples are considered DEAD,
783 * not just RECENTLY_DEAD, and which XIDs/MXIDs to freeze. Then determine
784 * the extent of the blocks that we'll scan in lazy_scan_heap. It has to
785 * happen in this order to ensure that the OldestXmin cutoff field works
786 * as an upper bound on the XIDs stored in the pages we'll actually scan
787 * (NewRelfrozenXid tracking must never be allowed to miss unfrozen XIDs).
788 *
789 * Next acquire vistest, a related cutoff that's used in pruning. We use
790 * vistest in combination with OldestXmin to ensure that
791 * heap_page_prune_and_freeze() always removes any deleted tuple whose
792 * xmax is < OldestXmin. lazy_scan_prune must never become confused about
793 * whether a tuple should be frozen or removed. (In the future we might
794 * want to teach lazy_scan_prune to recompute vistest from time to time,
795 * to increase the number of dead tuples it can prune away.)
796 */
797 vacrel->aggressive = vacuum_get_cutoffs(rel, params, &vacrel->cutoffs);
799 vacrel->vistest = GlobalVisTestFor(rel);
800
801 /* Initialize state used to track oldest extant XID/MXID */
802 vacrel->NewRelfrozenXid = vacrel->cutoffs.OldestXmin;
803 vacrel->NewRelminMxid = vacrel->cutoffs.OldestMxact;
804
805 /*
806 * Initialize state related to tracking all-visible page skipping. This is
807 * very important to determine whether or not it is safe to advance the
808 * relfrozenxid/relminmxid.
809 */
810 vacrel->skippedallvis = false;
811 skipwithvm = true;
813 {
814 /*
815 * Force aggressive mode, and disable skipping blocks using the
816 * visibility map (even those set all-frozen)
817 */
818 vacrel->aggressive = true;
819 skipwithvm = false;
820 }
821
822 vacrel->skipwithvm = skipwithvm;
823
824 /*
825 * Set up eager scan tracking state. This must happen after determining
826 * whether or not the vacuum must be aggressive, because only normal
827 * vacuums use the eager scan algorithm.
828 */
830
831 /* Report the vacuum mode: 'normal' or 'aggressive' */
833 vacrel->aggressive
836
837 if (verbose)
838 {
839 if (vacrel->aggressive)
841 (errmsg("aggressively vacuuming \"%s.%s.%s\"",
842 vacrel->dbname, vacrel->relnamespace,
843 vacrel->relname)));
844 else
846 (errmsg("vacuuming \"%s.%s.%s\"",
847 vacrel->dbname, vacrel->relnamespace,
848 vacrel->relname)));
849 }
850
851 /*
852 * Allocate dead_items memory using dead_items_alloc. This handles
853 * parallel VACUUM initialization as part of allocating shared memory
854 * space used for dead_items. (But do a failsafe precheck first, to
855 * ensure that parallel VACUUM won't be attempted at all when relfrozenxid
856 * is already dangerously old.)
857 */
860
861 /*
862 * Call lazy_scan_heap to perform all required heap pruning, index
863 * vacuuming, and heap vacuuming (plus related processing)
864 */
866
867 /*
868 * Save dead items max_bytes and update the memory usage statistics before
869 * cleanup, they are freed in parallel vacuum cases during
870 * dead_items_cleanup().
871 */
872 dead_items_max_bytes = vacrel->dead_items_info->max_bytes;
873 vacrel->total_dead_items_bytes += TidStoreMemoryUsage(vacrel->dead_items);
874
875 /*
876 * Free resources managed by dead_items_alloc. This ends parallel mode in
877 * passing when necessary.
878 */
881
882 /*
883 * Update pg_class entries for each of rel's indexes where appropriate.
884 *
885 * Unlike the later update to rel's pg_class entry, this is not critical.
886 * Maintains relpages/reltuples statistics used by the planner only.
887 */
888 if (vacrel->do_index_cleanup)
890
891 /* Done with rel's indexes */
892 vac_close_indexes(vacrel->nindexes, vacrel->indrels, NoLock);
893
894 /* Optionally truncate rel */
897
898 /* Pop the error context stack */
899 error_context_stack = errcallback.previous;
900
901 /* Report that we are now doing final cleanup */
904
905 /*
906 * Prepare to update rel's pg_class entry.
907 *
908 * Aggressive VACUUMs must always be able to advance relfrozenxid to a
909 * value >= FreezeLimit, and relminmxid to a value >= MultiXactCutoff.
910 * Non-aggressive VACUUMs may advance them by any amount, or not at all.
911 */
912 Assert(vacrel->NewRelfrozenXid == vacrel->cutoffs.OldestXmin ||
913 TransactionIdPrecedesOrEquals(vacrel->aggressive ? vacrel->cutoffs.FreezeLimit :
914 vacrel->cutoffs.relfrozenxid,
915 vacrel->NewRelfrozenXid));
916 Assert(vacrel->NewRelminMxid == vacrel->cutoffs.OldestMxact ||
917 MultiXactIdPrecedesOrEquals(vacrel->aggressive ? vacrel->cutoffs.MultiXactCutoff :
918 vacrel->cutoffs.relminmxid,
919 vacrel->NewRelminMxid));
920 if (vacrel->skippedallvis)
921 {
922 /*
923 * Must keep original relfrozenxid in a non-aggressive VACUUM that
924 * chose to skip an all-visible page range. The state that tracks new
925 * values will have missed unfrozen XIDs from the pages we skipped.
926 */
927 Assert(!vacrel->aggressive);
928 vacrel->NewRelfrozenXid = InvalidTransactionId;
929 vacrel->NewRelminMxid = InvalidMultiXactId;
930 }
931
932 /*
933 * For safety, clamp relallvisible to be not more than what we're setting
934 * pg_class.relpages to
935 */
936 new_rel_pages = vacrel->rel_pages; /* After possible rel truncation */
940
941 /*
942 * An all-frozen block _must_ be all-visible. As such, clamp the count of
943 * all-frozen blocks to the count of all-visible blocks. This matches the
944 * clamping of relallvisible above.
945 */
948
949 /*
950 * Now actually update rel's pg_class entry.
951 *
952 * In principle new_live_tuples could be -1 indicating that we (still)
953 * don't know the tuple count. In practice that can't happen, since we
954 * scan every page that isn't skipped using the visibility map.
955 */
956 vac_update_relstats(rel, new_rel_pages, vacrel->new_live_tuples,
958 vacrel->nindexes > 0,
959 vacrel->NewRelfrozenXid, vacrel->NewRelminMxid,
961
962 /*
963 * Report results to the cumulative stats system, too.
964 *
965 * Deliberately avoid telling the stats system about LP_DEAD items that
966 * remain in the table due to VACUUM bypassing index and heap vacuuming.
967 * ANALYZE will consider the remaining LP_DEAD items to be dead "tuples".
968 * It seems like a good idea to err on the side of not vacuuming again too
969 * soon in cases where the failsafe prevented significant amounts of heap
970 * vacuuming.
971 */
973 Max(vacrel->new_live_tuples, 0),
974 vacrel->recently_dead_tuples +
975 vacrel->missed_dead_tuples,
976 starttime);
978
979 if (instrument)
980 {
982
983 if (verbose || params.log_vacuum_min_duration == 0 ||
986 {
987 long secs_dur;
988 int usecs_dur;
989 WalUsage walusage;
990 BufferUsage bufferusage;
992 char *msgfmt;
993 int32 diff;
994 double read_rate = 0,
995 write_rate = 0;
999
1001 memset(&walusage, 0, sizeof(WalUsage));
1003 memset(&bufferusage, 0, sizeof(BufferUsage));
1005
1006 total_blks_hit = bufferusage.shared_blks_hit +
1007 bufferusage.local_blks_hit;
1008 total_blks_read = bufferusage.shared_blks_read +
1009 bufferusage.local_blks_read;
1011 bufferusage.local_blks_dirtied;
1012
1014 if (verbose)
1015 {
1016 /*
1017 * Aggressiveness already reported earlier, in dedicated
1018 * VACUUM VERBOSE ereport
1019 */
1020 Assert(!params.is_wraparound);
1021 msgfmt = _("finished vacuuming \"%s.%s.%s\": index scans: %d\n");
1022 }
1023 else if (params.is_wraparound)
1024 {
1025 /*
1026 * While it's possible for a VACUUM to be both is_wraparound
1027 * and !aggressive, that's just a corner-case -- is_wraparound
1028 * implies aggressive. Produce distinct output for the corner
1029 * case all the same, just in case.
1030 */
1031 if (vacrel->aggressive)
1032 msgfmt = _("automatic aggressive vacuum to prevent wraparound of table \"%s.%s.%s\": index scans: %d\n");
1033 else
1034 msgfmt = _("automatic vacuum to prevent wraparound of table \"%s.%s.%s\": index scans: %d\n");
1035 }
1036 else
1037 {
1038 if (vacrel->aggressive)
1039 msgfmt = _("automatic aggressive vacuum of table \"%s.%s.%s\": index scans: %d\n");
1040 else
1041 msgfmt = _("automatic vacuum of table \"%s.%s.%s\": index scans: %d\n");
1042 }
1044 vacrel->dbname,
1045 vacrel->relnamespace,
1046 vacrel->relname,
1047 vacrel->num_index_scans);
1048 appendStringInfo(&buf, _("pages: %u removed, %u remain, %u scanned (%.2f%% of total), %u eagerly scanned\n"),
1049 vacrel->removed_pages,
1051 vacrel->scanned_pages,
1052 orig_rel_pages == 0 ? 100.0 :
1053 100.0 * vacrel->scanned_pages /
1055 vacrel->eager_scanned_pages);
1057 _("tuples: %" PRId64 " removed, %" PRId64 " remain, %" PRId64 " are dead but not yet removable\n"),
1058 vacrel->tuples_deleted,
1059 (int64) vacrel->new_rel_tuples,
1060 vacrel->recently_dead_tuples);
1061 if (vacrel->missed_dead_tuples > 0)
1063 _("tuples missed: %" PRId64 " dead from %u pages not removed due to cleanup lock contention\n"),
1064 vacrel->missed_dead_tuples,
1065 vacrel->missed_dead_pages);
1067 vacrel->cutoffs.OldestXmin);
1069 _("removable cutoff: %u, which was %d XIDs old when operation ended\n"),
1070 vacrel->cutoffs.OldestXmin, diff);
1072 {
1073 diff = (int32) (vacrel->NewRelfrozenXid -
1074 vacrel->cutoffs.relfrozenxid);
1076 _("new relfrozenxid: %u, which is %d XIDs ahead of previous value\n"),
1077 vacrel->NewRelfrozenXid, diff);
1078 }
1079 if (minmulti_updated)
1080 {
1081 diff = (int32) (vacrel->NewRelminMxid -
1082 vacrel->cutoffs.relminmxid);
1084 _("new relminmxid: %u, which is %d MXIDs ahead of previous value\n"),
1085 vacrel->NewRelminMxid, diff);
1086 }
1087 appendStringInfo(&buf, _("frozen: %u pages from table (%.2f%% of total) had %" PRId64 " tuples frozen\n"),
1088 vacrel->new_frozen_tuple_pages,
1089 orig_rel_pages == 0 ? 100.0 :
1090 100.0 * vacrel->new_frozen_tuple_pages /
1092 vacrel->tuples_frozen);
1093
1095 _("visibility map: %u pages set all-visible, %u pages set all-frozen (%u were all-visible)\n"),
1096 vacrel->vm_new_visible_pages,
1097 vacrel->vm_new_visible_frozen_pages +
1098 vacrel->vm_new_frozen_pages,
1099 vacrel->vm_new_frozen_pages);
1100 if (vacrel->do_index_vacuuming)
1101 {
1102 if (vacrel->nindexes == 0 || vacrel->num_index_scans == 0)
1103 appendStringInfoString(&buf, _("index scan not needed: "));
1104 else
1105 appendStringInfoString(&buf, _("index scan needed: "));
1106
1107 msgfmt = _("%u pages from table (%.2f%% of total) had %" PRId64 " dead item identifiers removed\n");
1108 }
1109 else
1110 {
1112 appendStringInfoString(&buf, _("index scan bypassed: "));
1113 else
1114 appendStringInfoString(&buf, _("index scan bypassed by failsafe: "));
1115
1116 msgfmt = _("%u pages from table (%.2f%% of total) have %" PRId64 " dead item identifiers\n");
1117 }
1119 vacrel->lpdead_item_pages,
1120 orig_rel_pages == 0 ? 100.0 :
1121 100.0 * vacrel->lpdead_item_pages / orig_rel_pages,
1122 vacrel->lpdead_items);
1123 for (int i = 0; i < vacrel->nindexes; i++)
1124 {
1125 IndexBulkDeleteResult *istat = vacrel->indstats[i];
1126
1127 if (!istat)
1128 continue;
1129
1131 _("index \"%s\": pages: %u in total, %u newly deleted, %u currently deleted, %u reusable\n"),
1132 indnames[i],
1133 istat->num_pages,
1134 istat->pages_newly_deleted,
1135 istat->pages_deleted,
1136 istat->pages_free);
1137 }
1139 {
1140 /*
1141 * We bypass the changecount mechanism because this value is
1142 * only updated by the calling process. We also rely on the
1143 * above call to pgstat_progress_end_command() to not clear
1144 * the st_progress_param array.
1145 */
1146 appendStringInfo(&buf, _("delay time: %.3f ms\n"),
1148 }
1149 if (track_io_timing)
1150 {
1151 double read_ms = (double) (pgStatBlockReadTime - startreadtime) / 1000;
1152 double write_ms = (double) (pgStatBlockWriteTime - startwritetime) / 1000;
1153
1154 appendStringInfo(&buf, _("I/O timings: read: %.3f ms, write: %.3f ms\n"),
1155 read_ms, write_ms);
1156 }
1157 if (secs_dur > 0 || usecs_dur > 0)
1158 {
1160 (1024 * 1024) / (secs_dur + usecs_dur / 1000000.0);
1162 (1024 * 1024) / (secs_dur + usecs_dur / 1000000.0);
1163 }
1164 appendStringInfo(&buf, _("avg read rate: %.3f MB/s, avg write rate: %.3f MB/s\n"),
1167 _("buffer usage: %" PRId64 " hits, %" PRId64 " reads, %" PRId64 " dirtied\n"),
1172 _("WAL usage: %" PRId64 " records, %" PRId64 " full page images, %" PRIu64 " bytes, %" PRIu64 " full page image bytes, %" PRId64 " buffers full\n"),
1173 walusage.wal_records,
1174 walusage.wal_fpi,
1175 walusage.wal_bytes,
1176 walusage.wal_fpi_bytes,
1177 walusage.wal_buffers_full);
1178
1179 /*
1180 * Report the dead items memory usage.
1181 *
1182 * The num_dead_items_resets counter increases when we reset the
1183 * collected dead items, so the counter is non-zero if at least
1184 * one dead items are collected, even if index vacuuming is
1185 * disabled.
1186 */
1188 ngettext("memory usage: dead item storage %.2f MB accumulated across %d reset (limit %.2f MB each)\n",
1189 "memory usage: dead item storage %.2f MB accumulated across %d resets (limit %.2f MB each)\n",
1190 vacrel->num_dead_items_resets),
1191 (double) vacrel->total_dead_items_bytes / (1024 * 1024),
1192 vacrel->num_dead_items_resets,
1193 (double) dead_items_max_bytes / (1024 * 1024));
1194 appendStringInfo(&buf, _("system usage: %s"), pg_rusage_show(&ru0));
1195
1196 ereport(verbose ? INFO : LOG,
1197 (errmsg_internal("%s", buf.data)));
1198 pfree(buf.data);
1199 }
1200 }
1201
1202 /* Cleanup index statistics and index names */
1203 for (int i = 0; i < vacrel->nindexes; i++)
1204 {
1205 if (vacrel->indstats[i])
1206 pfree(vacrel->indstats[i]);
1207
1208 if (instrument)
1209 pfree(indnames[i]);
1210 }
1211}
void TimestampDifference(TimestampTz start_time, TimestampTz stop_time, long *secs, int *microsecs)
Definition timestamp.c:1721
bool TimestampDifferenceExceeds(TimestampTz start_time, TimestampTz stop_time, int msec)
Definition timestamp.c:1781
TimestampTz GetCurrentTimestamp(void)
Definition timestamp.c:1645
void pgstat_progress_start_command(ProgressCommandType cmdtype, Oid relid)
void pgstat_progress_update_param(int index, int64 val)
void pgstat_progress_end_command(void)
@ PROGRESS_COMMAND_VACUUM
PgBackendStatus * MyBEEntry
bool track_io_timing
Definition bufmgr.c:176
#define RelationGetNumberOfBlocks(reln)
Definition bufmgr.h:307
#define ngettext(s, p, n)
Definition c.h:1170
int32_t int32
Definition c.h:542
int64 TimestampTz
Definition timestamp.h:39
ErrorContextCallback * error_context_stack
Definition elog.c:95
#define _(x)
Definition elog.c:91
#define LOG
Definition elog.h:31
#define INFO
Definition elog.h:34
#define palloc0_object(type)
Definition fe_memutils.h:75
void WalUsageAccumDiff(WalUsage *dst, const WalUsage *add, const WalUsage *sub)
Definition instrument.c:285
BufferUsage pgBufferUsage
Definition instrument.c:20
void BufferUsageAccumDiff(BufferUsage *dst, const BufferUsage *add, const BufferUsage *sub)
Definition instrument.c:245
#define NoLock
Definition lockdefs.h:34
#define RowExclusiveLock
Definition lockdefs.h:38
char * get_database_name(Oid dbid)
Definition lsyscache.c:1242
char * get_namespace_name(Oid nspid)
Definition lsyscache.c:3516
char * pstrdup(const char *in)
Definition mcxt.c:1781
void * palloc0(Size size)
Definition mcxt.c:1417
#define AmAutoVacuumWorkerProcess()
Definition miscadmin.h:383
static int verbose
const void * data
const char * pg_rusage_show(const PGRUsage *ru0)
Definition pg_rusage.c:40
void pg_rusage_init(PGRUsage *ru0)
Definition pg_rusage.c:27
int64 PgStat_Counter
Definition pgstat.h:67
PgStat_Counter pgStatBlockReadTime
PgStat_Counter pgStatBlockWriteTime
void pgstat_report_vacuum(Relation rel, PgStat_Counter livetuples, PgStat_Counter deadtuples, TimestampTz starttime)
#define PROGRESS_VACUUM_PHASE_FINAL_CLEANUP
Definition progress.h:41
#define PROGRESS_VACUUM_MODE
Definition progress.h:32
#define PROGRESS_VACUUM_MODE_NORMAL
Definition progress.h:44
#define PROGRESS_VACUUM_STARTED_BY_AUTOVACUUM
Definition progress.h:50
#define PROGRESS_VACUUM_PHASE
Definition progress.h:21
#define PROGRESS_VACUUM_DELAY_TIME
Definition progress.h:31
#define PROGRESS_VACUUM_STARTED_BY_AUTOVACUUM_WRAPAROUND
Definition progress.h:51
#define PROGRESS_VACUUM_STARTED_BY_MANUAL
Definition progress.h:49
#define PROGRESS_VACUUM_STARTED_BY
Definition progress.h:33
#define PROGRESS_VACUUM_MODE_AGGRESSIVE
Definition progress.h:45
#define RelationGetNamespace(relation)
Definition rel.h:555
void appendStringInfo(StringInfo str, const char *fmt,...)
Definition stringinfo.c:145
void appendStringInfoString(StringInfo str, const char *s)
Definition stringinfo.c:230
void initStringInfo(StringInfo str)
Definition stringinfo.c:97
int64 shared_blks_dirtied
Definition instrument.h:28
int64 local_blks_hit
Definition instrument.h:30
int64 shared_blks_read
Definition instrument.h:27
int64 local_blks_read
Definition instrument.h:31
int64 local_blks_dirtied
Definition instrument.h:32
int64 shared_blks_hit
Definition instrument.h:26
struct ErrorContextCallback * previous
Definition elog.h:297
void(* callback)(void *arg)
Definition elog.h:298
BlockNumber pages_deleted
Definition genam.h:88
BlockNumber pages_newly_deleted
Definition genam.h:87
BlockNumber pages_free
Definition genam.h:89
BlockNumber num_pages
Definition genam.h:83
int64 st_progress_param[PGSTAT_NUM_PROGRESS_PARAM]
int nworkers
Definition vacuum.h:251
VacOptValue truncate
Definition vacuum.h:236
bits32 options
Definition vacuum.h:219
int log_vacuum_min_duration
Definition vacuum.h:227
bool is_wraparound
Definition vacuum.h:226
VacOptValue index_cleanup
Definition vacuum.h:235
int64 wal_buffers_full
Definition instrument.h:57
uint64 wal_bytes
Definition instrument.h:55
uint64 wal_fpi_bytes
Definition instrument.h:56
int64 wal_records
Definition instrument.h:53
size_t TidStoreMemoryUsage(TidStore *ts)
Definition tidstore.c:532
static TransactionId ReadNextTransactionId(void)
Definition transam.h:377
bool track_cost_delay_timing
Definition vacuum.c:82
void vac_open_indexes(Relation relation, LOCKMODE lockmode, int *nindexes, Relation **Irel)
Definition vacuum.c:2362
void vac_close_indexes(int nindexes, Relation *Irel, LOCKMODE lockmode)
Definition vacuum.c:2405
bool VacuumFailsafeActive
Definition vacuum.c:110
void vac_update_relstats(Relation relation, BlockNumber num_pages, double num_tuples, BlockNumber num_all_visible_pages, BlockNumber num_all_frozen_pages, bool hasindex, TransactionId frozenxid, MultiXactId minmulti, bool *frozenxid_updated, bool *minmulti_updated, bool in_outer_xact)
Definition vacuum.c:1426
bool vacuum_get_cutoffs(Relation rel, const VacuumParams params, struct VacuumCutoffs *cutoffs)
Definition vacuum.c:1100
#define VACOPT_VERBOSE
Definition vacuum.h:182
@ VACOPTVALUE_AUTO
Definition vacuum.h:203
@ VACOPTVALUE_ENABLED
Definition vacuum.h:205
@ VACOPTVALUE_UNSPECIFIED
Definition vacuum.h:202
@ VACOPTVALUE_DISABLED
Definition vacuum.h:204
#define VACOPT_DISABLE_PAGE_SKIPPING
Definition vacuum.h:188
static void dead_items_cleanup(LVRelState *vacrel)
static void update_relstats_all_indexes(LVRelState *vacrel)
static void heap_vacuum_eager_scan_setup(LVRelState *vacrel, const VacuumParams params)
Definition vacuumlazy.c:497
static void vacuum_error_callback(void *arg)
static void lazy_truncate_heap(LVRelState *vacrel)
static bool should_attempt_truncation(LVRelState *vacrel)
@ VACUUM_ERRCB_PHASE_UNKNOWN
Definition vacuumlazy.c:223
static void lazy_scan_heap(LVRelState *vacrel)
static bool lazy_check_wraparound_failsafe(LVRelState *vacrel)
static void dead_items_alloc(LVRelState *vacrel, int nworkers)
void visibilitymap_count(Relation rel, BlockNumber *all_visible, BlockNumber *all_frozen)

References _, AmAutoVacuumWorkerProcess, appendStringInfo(), appendStringInfoString(), ErrorContextCallback::arg, Assert, buf, BufferUsageAccumDiff(), ErrorContextCallback::callback, dead_items_alloc(), dead_items_cleanup(), ereport, errmsg(), errmsg_internal(), error_context_stack, fb(), get_database_name(), get_namespace_name(), GetCurrentTimestamp(), GlobalVisTestFor(), heap_vacuum_eager_scan_setup(), i, VacuumParams::index_cleanup, INFO, initStringInfo(), InvalidMultiXactId, InvalidTransactionId, VacuumParams::is_wraparound, IsInParallelMode(), lazy_check_wraparound_failsafe(), lazy_scan_heap(), lazy_truncate_heap(), BufferUsage::local_blks_dirtied, BufferUsage::local_blks_hit, BufferUsage::local_blks_read, LOG, VacuumParams::log_vacuum_min_duration, Max, MultiXactIdPrecedesOrEquals(), MyBEEntry, MyDatabaseId, ngettext, NoLock, IndexBulkDeleteResult::num_pages, VacuumParams::nworkers, VacuumParams::options, IndexBulkDeleteResult::pages_deleted, IndexBulkDeleteResult::pages_free, IndexBulkDeleteResult::pages_newly_deleted, palloc0(), palloc0_object, palloc_array, pfree(), pg_rusage_init(), pg_rusage_show(), pgBufferUsage, pgstat_progress_end_command(), pgstat_progress_start_command(), pgstat_progress_update_param(), pgstat_report_vacuum(), pgStatBlockReadTime, pgStatBlockWriteTime, pgWalUsage, ErrorContextCallback::previous, PROGRESS_COMMAND_VACUUM, PROGRESS_VACUUM_DELAY_TIME, PROGRESS_VACUUM_MODE, PROGRESS_VACUUM_MODE_AGGRESSIVE, PROGRESS_VACUUM_MODE_NORMAL, PROGRESS_VACUUM_PHASE, PROGRESS_VACUUM_PHASE_FINAL_CLEANUP, PROGRESS_VACUUM_STARTED_BY, PROGRESS_VACUUM_STARTED_BY_AUTOVACUUM, PROGRESS_VACUUM_STARTED_BY_AUTOVACUUM_WRAPAROUND, PROGRESS_VACUUM_STARTED_BY_MANUAL, pstrdup(), ReadNextTransactionId(), RelationGetNamespace, RelationGetNumberOfBlocks, RelationGetRelationName, RelationGetRelid, RowExclusiveLock, BufferUsage::shared_blks_dirtied, BufferUsage::shared_blks_hit, BufferUsage::shared_blks_read, should_attempt_truncation(), PgBackendStatus::st_progress_param, TidStoreMemoryUsage(), TimestampDifference(), TimestampDifferenceExceeds(), track_cost_delay_timing, track_io_timing, TransactionIdPrecedesOrEquals(), VacuumParams::truncate, update_relstats_all_indexes(), vac_close_indexes(), vac_open_indexes(), vac_update_relstats(), VACOPT_DISABLE_PAGE_SKIPPING, VACOPT_VERBOSE, VACOPTVALUE_AUTO, VACOPTVALUE_DISABLED, VACOPTVALUE_ENABLED, VACOPTVALUE_UNSPECIFIED, VACUUM_ERRCB_PHASE_UNKNOWN, vacuum_error_callback(), vacuum_get_cutoffs(), VacuumFailsafeActive, verbose, visibilitymap_count(), WalUsage::wal_buffers_full, WalUsage::wal_bytes, WalUsage::wal_fpi, WalUsage::wal_fpi_bytes, WalUsage::wal_records, and WalUsageAccumDiff().

◆ HeapCheckForSerializableConflictOut()

void HeapCheckForSerializableConflictOut ( bool  visible,
Relation  relation,
HeapTuple  tuple,
Buffer  buffer,
Snapshot  snapshot 
)
extern

Definition at line 9334 of file heapam.c.

9337{
9338 TransactionId xid;
9340
9341 if (!CheckForSerializableConflictOutNeeded(relation, snapshot))
9342 return;
9343
9344 /*
9345 * Check to see whether the tuple has been written to by a concurrent
9346 * transaction, either to create it not visible to us, or to delete it
9347 * while it is visible to us. The "visible" bool indicates whether the
9348 * tuple is visible to us, while HeapTupleSatisfiesVacuum checks what else
9349 * is going on with it.
9350 *
9351 * In the event of a concurrently inserted tuple that also happens to have
9352 * been concurrently updated (by a separate transaction), the xmin of the
9353 * tuple will be used -- not the updater's xid.
9354 */
9356 switch (htsvResult)
9357 {
9358 case HEAPTUPLE_LIVE:
9359 if (visible)
9360 return;
9361 xid = HeapTupleHeaderGetXmin(tuple->t_data);
9362 break;
9365 if (visible)
9366 xid = HeapTupleHeaderGetUpdateXid(tuple->t_data);
9367 else
9368 xid = HeapTupleHeaderGetXmin(tuple->t_data);
9369
9371 {
9372 /* This is like the HEAPTUPLE_DEAD case */
9373 Assert(!visible);
9374 return;
9375 }
9376 break;
9378 xid = HeapTupleHeaderGetXmin(tuple->t_data);
9379 break;
9380 case HEAPTUPLE_DEAD:
9381 Assert(!visible);
9382 return;
9383 default:
9384
9385 /*
9386 * The only way to get to this default clause is if a new value is
9387 * added to the enum type without adding it to this switch
9388 * statement. That's a bug, so elog.
9389 */
9390 elog(ERROR, "unrecognized return value from HeapTupleSatisfiesVacuum: %u", htsvResult);
9391
9392 /*
9393 * In spite of having all enum values covered and calling elog on
9394 * this default, some compilers think this is a code path which
9395 * allows xid to be used below without initialization. Silence
9396 * that warning.
9397 */
9399 }
9400
9403
9404 /*
9405 * Find top level xid. Bail out if xid is too early to be a conflict, or
9406 * if it's our own xid.
9407 */
9409 return;
9412 return;
9413
9414 CheckForSerializableConflictOut(relation, xid, snapshot);
9415}
HTSV_Result HeapTupleSatisfiesVacuum(HeapTuple htup, TransactionId OldestXmin, Buffer buffer)
void CheckForSerializableConflictOut(Relation relation, TransactionId xid, Snapshot snapshot)
Definition predicate.c:4021
TransactionId SubTransGetTopmostTransaction(TransactionId xid)
Definition subtrans.c:162
static bool TransactionIdFollowsOrEquals(TransactionId id1, TransactionId id2)
Definition transam.h:312
TransactionId GetTopTransactionIdIfAny(void)
Definition xact.c:442

References Assert, CheckForSerializableConflictOut(), CheckForSerializableConflictOutNeeded(), elog, ERROR, fb(), GetTopTransactionIdIfAny(), HEAPTUPLE_DEAD, HEAPTUPLE_DELETE_IN_PROGRESS, HEAPTUPLE_INSERT_IN_PROGRESS, HEAPTUPLE_LIVE, HEAPTUPLE_RECENTLY_DEAD, HeapTupleHeaderGetUpdateXid(), HeapTupleHeaderGetXmin(), HeapTupleSatisfiesVacuum(), InvalidTransactionId, SubTransGetTopmostTransaction(), HeapTupleData::t_data, TransactionIdEquals, TransactionIdFollowsOrEquals(), TransactionIdIsValid, TransactionIdPrecedes(), and TransactionXmin.

Referenced by BitmapHeapScanNextBlock(), heap_fetch(), heap_get_latest_tid(), heap_hot_search_buffer(), heapam_scan_sample_next_tuple(), heapgettup(), and page_collect_tuples().

◆ HeapTupleHeaderIsOnlyLocked()

bool HeapTupleHeaderIsOnlyLocked ( HeapTupleHeader  tuple)
extern

Definition at line 1365 of file heapam_visibility.c.

1366{
1367 TransactionId xmax;
1368
1369 /* if there's no valid Xmax, then there's obviously no update either */
1370 if (tuple->t_infomask & HEAP_XMAX_INVALID)
1371 return true;
1372
1373 if (tuple->t_infomask & HEAP_XMAX_LOCK_ONLY)
1374 return true;
1375
1376 /* invalid xmax means no update */
1378 return true;
1379
1380 /*
1381 * if HEAP_XMAX_LOCK_ONLY is not set and not a multi, then this must
1382 * necessarily have been updated
1383 */
1384 if (!(tuple->t_infomask & HEAP_XMAX_IS_MULTI))
1385 return false;
1386
1387 /* ... but if it's a multi, then perhaps the updating Xid aborted. */
1388 xmax = HeapTupleGetUpdateXid(tuple);
1389
1390 /* not LOCKED_ONLY, so it has to have an xmax */
1392
1394 return false;
1395 if (TransactionIdIsInProgress(xmax))
1396 return false;
1397 if (TransactionIdDidCommit(xmax))
1398 return false;
1399
1400 /*
1401 * not current, not in progress, not committed -- must have aborted or
1402 * crashed
1403 */
1404 return true;
1405}
bool TransactionIdIsInProgress(TransactionId xid)
Definition procarray.c:1404

References Assert, HEAP_XMAX_INVALID, HEAP_XMAX_IS_MULTI, HEAP_XMAX_LOCK_ONLY, HeapTupleGetUpdateXid(), HeapTupleHeaderGetRawXmax(), HeapTupleHeaderData::t_infomask, TransactionIdDidCommit(), TransactionIdIsCurrentTransactionId(), TransactionIdIsInProgress(), and TransactionIdIsValid.

Referenced by heap_delete(), heap_get_latest_tid(), heap_lock_tuple(), heap_lock_updated_tuple_rec(), HeapTupleSatisfiesVacuumHorizon(), and rewrite_heap_tuple().

◆ HeapTupleIsSurelyDead()

bool HeapTupleIsSurelyDead ( HeapTuple  htup,
GlobalVisState vistest 
)
extern

Definition at line 1310 of file heapam_visibility.c.

1311{
1312 HeapTupleHeader tuple = htup->t_data;
1313
1315 Assert(htup->t_tableOid != InvalidOid);
1316
1317 /*
1318 * If the inserting transaction is marked invalid, then it aborted, and
1319 * the tuple is definitely dead. If it's marked neither committed nor
1320 * invalid, then we assume it's still alive (since the presumption is that
1321 * all relevant hint bits were just set moments ago).
1322 */
1323 if (!HeapTupleHeaderXminCommitted(tuple))
1324 return HeapTupleHeaderXminInvalid(tuple);
1325
1326 /*
1327 * If the inserting transaction committed, but any deleting transaction
1328 * aborted, the tuple is still alive.
1329 */
1330 if (tuple->t_infomask & HEAP_XMAX_INVALID)
1331 return false;
1332
1333 /*
1334 * If the XMAX is just a lock, the tuple is still alive.
1335 */
1337 return false;
1338
1339 /*
1340 * If the Xmax is a MultiXact, it might be dead or alive, but we cannot
1341 * know without checking pg_multixact.
1342 */
1343 if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
1344 return false;
1345
1346 /* If deleter isn't known to have committed, assume it's still running. */
1347 if (!(tuple->t_infomask & HEAP_XMAX_COMMITTED))
1348 return false;
1349
1350 /* Deleter committed, so tuple is dead if the XID is old enough. */
1351 return GlobalVisTestIsRemovableXid(vistest,
1353}
static bool HeapTupleHeaderXminInvalid(const HeapTupleHeaderData *tup)
static bool HeapTupleHeaderXminCommitted(const HeapTupleHeaderData *tup)
#define InvalidOid

References Assert, GlobalVisTestIsRemovableXid(), HEAP_XMAX_COMMITTED, HEAP_XMAX_INVALID, HEAP_XMAX_IS_LOCKED_ONLY(), HEAP_XMAX_IS_MULTI, HeapTupleHeaderGetRawXmax(), HeapTupleHeaderXminCommitted(), HeapTupleHeaderXminInvalid(), InvalidOid, ItemPointerIsValid(), HeapTupleData::t_data, HeapTupleHeaderData::t_infomask, HeapTupleData::t_self, and HeapTupleData::t_tableOid.

Referenced by heap_hot_search_buffer().

◆ HeapTupleSatisfiesMVCCBatch()

int HeapTupleSatisfiesMVCCBatch ( Snapshot  snapshot,
Buffer  buffer,
int  ntups,
BatchMVCCState batchmvcc,
OffsetNumber vistuples_dense 
)
extern

Definition at line 1617 of file heapam_visibility.c.

1621{
1622 int nvis = 0;
1623
1624 Assert(IsMVCCSnapshot(snapshot));
1625
1626 for (int i = 0; i < ntups; i++)
1627 {
1628 bool valid;
1629 HeapTuple tup = &batchmvcc->tuples[i];
1630
1631 valid = HeapTupleSatisfiesMVCC(tup, snapshot, buffer);
1632 batchmvcc->visible[i] = valid;
1633
1634 if (likely(valid))
1635 {
1636 vistuples_dense[nvis] = tup->t_self.ip_posid;
1637 nvis++;
1638 }
1639 }
1640
1641 return nvis;
1642}
static bool HeapTupleSatisfiesMVCC(HeapTuple htup, Snapshot snapshot, Buffer buffer)

References Assert, fb(), HeapTupleSatisfiesMVCC(), i, IsMVCCSnapshot, and likely.

Referenced by page_collect_tuples().

◆ HeapTupleSatisfiesUpdate()

TM_Result HeapTupleSatisfiesUpdate ( HeapTuple  htup,
CommandId  curcid,
Buffer  buffer 
)
extern

Definition at line 440 of file heapam_visibility.c.

442{
443 HeapTupleHeader tuple = htup->t_data;
444
446 Assert(htup->t_tableOid != InvalidOid);
447
449 {
451 return TM_Invisible;
452
453 else if (!HeapTupleCleanMoved(tuple, buffer))
454 return TM_Invisible;
456 {
457 if (HeapTupleHeaderGetCmin(tuple) >= curcid)
458 return TM_Invisible; /* inserted after scan started */
459
460 if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid */
461 return TM_Ok;
462
464 {
465 TransactionId xmax;
466
467 xmax = HeapTupleHeaderGetRawXmax(tuple);
468
469 /*
470 * Careful here: even though this tuple was created by our own
471 * transaction, it might be locked by other transactions, if
472 * the original version was key-share locked when we updated
473 * it.
474 */
475
476 if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
477 {
478 if (MultiXactIdIsRunning(xmax, true))
479 return TM_BeingModified;
480 else
481 return TM_Ok;
482 }
483
484 /*
485 * If the locker is gone, then there is nothing of interest
486 * left in this Xmax; otherwise, report the tuple as
487 * locked/updated.
488 */
489 if (!TransactionIdIsInProgress(xmax))
490 return TM_Ok;
491 return TM_BeingModified;
492 }
493
494 if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
495 {
496 TransactionId xmax;
497
498 xmax = HeapTupleGetUpdateXid(tuple);
499
500 /* not LOCKED_ONLY, so it has to have an xmax */
502
503 /* deleting subtransaction must have aborted */
505 {
507 false))
508 return TM_BeingModified;
509 return TM_Ok;
510 }
511 else
512 {
513 if (HeapTupleHeaderGetCmax(tuple) >= curcid)
514 return TM_SelfModified; /* updated after scan started */
515 else
516 return TM_Invisible; /* updated before scan started */
517 }
518 }
519
521 {
522 /* deleting subtransaction must have aborted */
523 SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
525 return TM_Ok;
526 }
527
528 if (HeapTupleHeaderGetCmax(tuple) >= curcid)
529 return TM_SelfModified; /* updated after scan started */
530 else
531 return TM_Invisible; /* updated before scan started */
532 }
534 return TM_Invisible;
536 SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED,
538 else
539 {
540 /* it must have aborted or crashed */
541 SetHintBits(tuple, buffer, HEAP_XMIN_INVALID,
543 return TM_Invisible;
544 }
545 }
546
547 /* by here, the inserting transaction has committed */
548
549 if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid or aborted */
550 return TM_Ok;
551
552 if (tuple->t_infomask & HEAP_XMAX_COMMITTED)
553 {
555 return TM_Ok;
556 if (!ItemPointerEquals(&htup->t_self, &tuple->t_ctid))
557 return TM_Updated; /* updated by other */
558 else
559 return TM_Deleted; /* deleted by other */
560 }
561
562 if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
563 {
564 TransactionId xmax;
565
567 return TM_Ok;
568
570 {
572 return TM_BeingModified;
573
575 return TM_Ok;
576 }
577
578 xmax = HeapTupleGetUpdateXid(tuple);
579 if (!TransactionIdIsValid(xmax))
580 {
582 return TM_BeingModified;
583 }
584
585 /* not LOCKED_ONLY, so it has to have an xmax */
587
589 {
590 if (HeapTupleHeaderGetCmax(tuple) >= curcid)
591 return TM_SelfModified; /* updated after scan started */
592 else
593 return TM_Invisible; /* updated before scan started */
594 }
595
597 return TM_BeingModified;
598
599 if (TransactionIdDidCommit(xmax))
600 {
601 if (!ItemPointerEquals(&htup->t_self, &tuple->t_ctid))
602 return TM_Updated;
603 else
604 return TM_Deleted;
605 }
606
607 /*
608 * By here, the update in the Xmax is either aborted or crashed, but
609 * what about the other members?
610 */
611
613 {
614 /*
615 * There's no member, even just a locker, alive anymore, so we can
616 * mark the Xmax as invalid.
617 */
618 SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
620 return TM_Ok;
621 }
622 else
623 {
624 /* There are lockers running */
625 return TM_BeingModified;
626 }
627 }
628
630 {
632 return TM_BeingModified;
633 if (HeapTupleHeaderGetCmax(tuple) >= curcid)
634 return TM_SelfModified; /* updated after scan started */
635 else
636 return TM_Invisible; /* updated before scan started */
637 }
638
640 return TM_BeingModified;
641
643 {
644 /* it must have aborted or crashed */
645 SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
647 return TM_Ok;
648 }
649
650 /* xmax transaction committed */
651
653 {
654 SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
656 return TM_Ok;
657 }
658
659 SetHintBits(tuple, buffer, HEAP_XMAX_COMMITTED,
661 if (!ItemPointerEquals(&htup->t_self, &tuple->t_ctid))
662 return TM_Updated; /* updated by other */
663 else
664 return TM_Deleted; /* deleted by other */
665}
CommandId HeapTupleHeaderGetCmin(const HeapTupleHeaderData *tup)
Definition combocid.c:104
static void SetHintBits(HeapTupleHeader tuple, Buffer buffer, uint16 infomask, TransactionId xid)
static bool HeapTupleCleanMoved(HeapTupleHeader tuple, Buffer buffer)
#define HEAP_XMIN_COMMITTED
#define HEAP_XMIN_INVALID
bool MultiXactIdIsRunning(MultiXactId multi, bool isLockOnly)
Definition multixact.c:463

References Assert, HEAP_LOCKED_UPGRADED(), HEAP_XMAX_COMMITTED, HEAP_XMAX_INVALID, HEAP_XMAX_IS_LOCKED_ONLY(), HEAP_XMAX_IS_MULTI, HEAP_XMIN_COMMITTED, HEAP_XMIN_INVALID, HeapTupleCleanMoved(), HeapTupleGetUpdateXid(), HeapTupleHeaderGetCmax(), HeapTupleHeaderGetCmin(), HeapTupleHeaderGetRawXmax(), HeapTupleHeaderGetRawXmin(), HeapTupleHeaderXminCommitted(), HeapTupleHeaderXminInvalid(), InvalidOid, InvalidTransactionId, ItemPointerEquals(), ItemPointerIsValid(), MultiXactIdIsRunning(), SetHintBits(), HeapTupleHeaderData::t_ctid, HeapTupleData::t_data, HeapTupleHeaderData::t_infomask, HeapTupleData::t_self, HeapTupleData::t_tableOid, TM_BeingModified, TM_Deleted, TM_Invisible, TM_Ok, TM_SelfModified, TM_Updated, TransactionIdDidCommit(), TransactionIdIsCurrentTransactionId(), TransactionIdIsInProgress(), and TransactionIdIsValid.

Referenced by heap_delete(), heap_inplace_lock(), heap_lock_tuple(), heap_update(), and pgrowlocks().

◆ HeapTupleSatisfiesVacuum()

◆ HeapTupleSatisfiesVacuumHorizon()

HTSV_Result HeapTupleSatisfiesVacuumHorizon ( HeapTuple  htup,
Buffer  buffer,
TransactionId dead_after 
)
extern

Definition at line 1076 of file heapam_visibility.c.

1077{
1078 HeapTupleHeader tuple = htup->t_data;
1079
1081 Assert(htup->t_tableOid != InvalidOid);
1083
1085
1086 /*
1087 * Has inserting transaction committed?
1088 *
1089 * If the inserting transaction aborted, then the tuple was never visible
1090 * to any other transaction, so we can delete it immediately.
1091 */
1092 if (!HeapTupleHeaderXminCommitted(tuple))
1093 {
1094 if (HeapTupleHeaderXminInvalid(tuple))
1095 return HEAPTUPLE_DEAD;
1096 else if (!HeapTupleCleanMoved(tuple, buffer))
1097 return HEAPTUPLE_DEAD;
1099 {
1100 if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid */
1102 /* only locked? run infomask-only check first, for performance */
1106 /* inserted and then deleted by same xact */
1109 /* deleting subtransaction must have aborted */
1111 }
1113 {
1114 /*
1115 * It'd be possible to discern between INSERT/DELETE in progress
1116 * here by looking at xmax - but that doesn't seem beneficial for
1117 * the majority of callers and even detrimental for some. We'd
1118 * rather have callers look at/wait for xmin than xmax. It's
1119 * always correct to return INSERT_IN_PROGRESS because that's
1120 * what's happening from the view of other backends.
1121 */
1123 }
1125 SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED,
1127 else
1128 {
1129 /*
1130 * Not in Progress, Not Committed, so either Aborted or crashed
1131 */
1132 SetHintBits(tuple, buffer, HEAP_XMIN_INVALID,
1134 return HEAPTUPLE_DEAD;
1135 }
1136
1137 /*
1138 * At this point the xmin is known committed, but we might not have
1139 * been able to set the hint bit yet; so we can no longer Assert that
1140 * it's set.
1141 */
1142 }
1143
1144 /*
1145 * Okay, the inserter committed, so it was good at some point. Now what
1146 * about the deleting transaction?
1147 */
1148 if (tuple->t_infomask & HEAP_XMAX_INVALID)
1149 return HEAPTUPLE_LIVE;
1150
1152 {
1153 /*
1154 * "Deleting" xact really only locked it, so the tuple is live in any
1155 * case. However, we should make sure that either XMAX_COMMITTED or
1156 * XMAX_INVALID gets set once the xact is gone, to reduce the costs of
1157 * examining the tuple for future xacts.
1158 */
1159 if (!(tuple->t_infomask & HEAP_XMAX_COMMITTED))
1160 {
1161 if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
1162 {
1163 /*
1164 * If it's a pre-pg_upgrade tuple, the multixact cannot
1165 * possibly be running; otherwise have to check.
1166 */
1167 if (!HEAP_LOCKED_UPGRADED(tuple->t_infomask) &&
1169 true))
1170 return HEAPTUPLE_LIVE;
1172 }
1173 else
1174 {
1176 return HEAPTUPLE_LIVE;
1177 SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
1179 }
1180 }
1181
1182 /*
1183 * We don't really care whether xmax did commit, abort or crash. We
1184 * know that xmax did lock the tuple, but it did not and will never
1185 * actually update it.
1186 */
1187
1188 return HEAPTUPLE_LIVE;
1189 }
1190
1191 if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
1192 {
1194
1195 /* already checked above */
1197
1198 /* not LOCKED_ONLY, so it has to have an xmax */
1200
1201 if (TransactionIdIsInProgress(xmax))
1203 else if (TransactionIdDidCommit(xmax))
1204 {
1205 /*
1206 * The multixact might still be running due to lockers. Need to
1207 * allow for pruning if below the xid horizon regardless --
1208 * otherwise we could end up with a tuple where the updater has to
1209 * be removed due to the horizon, but is not pruned away. It's
1210 * not a problem to prune that tuple, because any remaining
1211 * lockers will also be present in newer tuple versions.
1212 */
1213 *dead_after = xmax;
1215 }
1216 else if (!MultiXactIdIsRunning(HeapTupleHeaderGetRawXmax(tuple), false))
1217 {
1218 /*
1219 * Not in Progress, Not Committed, so either Aborted or crashed.
1220 * Mark the Xmax as invalid.
1221 */
1223 }
1224
1225 return HEAPTUPLE_LIVE;
1226 }
1227
1228 if (!(tuple->t_infomask & HEAP_XMAX_COMMITTED))
1229 {
1233 SetHintBits(tuple, buffer, HEAP_XMAX_COMMITTED,
1235 else
1236 {
1237 /*
1238 * Not in Progress, Not Committed, so either Aborted or crashed
1239 */
1240 SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
1242 return HEAPTUPLE_LIVE;
1243 }
1244
1245 /*
1246 * At this point the xmax is known committed, but we might not have
1247 * been able to set the hint bit yet; so we can no longer Assert that
1248 * it's set.
1249 */
1250 }
1251
1252 /*
1253 * Deleter committed, allow caller to check if it was recent enough that
1254 * some open transactions could still see the tuple.
1255 */
1258}

References Assert, fb(), HEAP_LOCKED_UPGRADED(), HEAP_XMAX_COMMITTED, HEAP_XMAX_INVALID, HEAP_XMAX_IS_LOCKED_ONLY(), HEAP_XMAX_IS_MULTI, HEAP_XMIN_COMMITTED, HEAP_XMIN_INVALID, HEAPTUPLE_DEAD, HEAPTUPLE_DELETE_IN_PROGRESS, HEAPTUPLE_INSERT_IN_PROGRESS, HEAPTUPLE_LIVE, HEAPTUPLE_RECENTLY_DEAD, HeapTupleCleanMoved(), HeapTupleGetUpdateXid(), HeapTupleHeaderGetRawXmax(), HeapTupleHeaderGetRawXmin(), HeapTupleHeaderGetUpdateXid(), HeapTupleHeaderIsOnlyLocked(), HeapTupleHeaderXminCommitted(), HeapTupleHeaderXminInvalid(), InvalidOid, InvalidTransactionId, ItemPointerIsValid(), MultiXactIdIsRunning(), SetHintBits(), HeapTupleData::t_data, HeapTupleHeaderData::t_infomask, HeapTupleData::t_self, HeapTupleData::t_tableOid, TransactionIdDidCommit(), TransactionIdIsCurrentTransactionId(), TransactionIdIsInProgress(), and TransactionIdIsValid.

Referenced by heap_prune_satisfies_vacuum(), HeapTupleSatisfiesNonVacuumable(), and HeapTupleSatisfiesVacuum().

◆ HeapTupleSatisfiesVisibility()

bool HeapTupleSatisfiesVisibility ( HeapTuple  htup,
Snapshot  snapshot,
Buffer  buffer 
)
extern

Definition at line 1655 of file heapam_visibility.c.

1656{
1657 switch (snapshot->snapshot_type)
1658 {
1659 case SNAPSHOT_MVCC:
1660 return HeapTupleSatisfiesMVCC(htup, snapshot, buffer);
1661 case SNAPSHOT_SELF:
1662 return HeapTupleSatisfiesSelf(htup, snapshot, buffer);
1663 case SNAPSHOT_ANY:
1664 return HeapTupleSatisfiesAny(htup, snapshot, buffer);
1665 case SNAPSHOT_TOAST:
1666 return HeapTupleSatisfiesToast(htup, snapshot, buffer);
1667 case SNAPSHOT_DIRTY:
1668 return HeapTupleSatisfiesDirty(htup, snapshot, buffer);
1670 return HeapTupleSatisfiesHistoricMVCC(htup, snapshot, buffer);
1672 return HeapTupleSatisfiesNonVacuumable(htup, snapshot, buffer);
1673 }
1674
1675 return false; /* keep compiler quiet */
1676}
static bool HeapTupleSatisfiesAny(HeapTuple htup, Snapshot snapshot, Buffer buffer)
static bool HeapTupleSatisfiesNonVacuumable(HeapTuple htup, Snapshot snapshot, Buffer buffer)
static bool HeapTupleSatisfiesToast(HeapTuple htup, Snapshot snapshot, Buffer buffer)
static bool HeapTupleSatisfiesDirty(HeapTuple htup, Snapshot snapshot, Buffer buffer)
static bool HeapTupleSatisfiesSelf(HeapTuple htup, Snapshot snapshot, Buffer buffer)
static bool HeapTupleSatisfiesHistoricMVCC(HeapTuple htup, Snapshot snapshot, Buffer buffer)
@ SNAPSHOT_TOAST
Definition snapshot.h:70
@ SNAPSHOT_SELF
Definition snapshot.h:60
@ SNAPSHOT_NON_VACUUMABLE
Definition snapshot.h:114
@ SNAPSHOT_MVCC
Definition snapshot.h:46
@ SNAPSHOT_ANY
Definition snapshot.h:65
@ SNAPSHOT_HISTORIC_MVCC
Definition snapshot.h:105
@ SNAPSHOT_DIRTY
Definition snapshot.h:98
SnapshotType snapshot_type
Definition snapshot.h:140

References HeapTupleSatisfiesAny(), HeapTupleSatisfiesDirty(), HeapTupleSatisfiesHistoricMVCC(), HeapTupleSatisfiesMVCC(), HeapTupleSatisfiesNonVacuumable(), HeapTupleSatisfiesSelf(), HeapTupleSatisfiesToast(), SNAPSHOT_ANY, SNAPSHOT_DIRTY, SNAPSHOT_HISTORIC_MVCC, SNAPSHOT_MVCC, SNAPSHOT_NON_VACUUMABLE, SNAPSHOT_SELF, SNAPSHOT_TOAST, and SnapshotData::snapshot_type.

Referenced by BitmapHeapScanNextBlock(), heap_delete(), heap_fetch(), heap_get_latest_tid(), heap_hot_search_buffer(), heap_update(), heapam_tuple_satisfies_snapshot(), heapgettup(), pgstat_heap(), SampleHeapTupleVisible(), and ScanSourceDatabasePgClassPage().

◆ HeapTupleSetHintBits()

void HeapTupleSetHintBits ( HeapTupleHeader  tuple,
Buffer  buffer,
uint16  infomask,
TransactionId  xid 
)
extern

Definition at line 141 of file heapam_visibility.c.

143{
144 /*
145 * The uses from heapam.c rely on being able to perform the hint bit
146 * updates, which can only be guaranteed if we are holding an exclusive
147 * lock on the buffer - which all callers are doing.
148 */
150
151 SetHintBits(tuple, buffer, infomask, xid);
152}
bool BufferIsLockedByMeInMode(Buffer buffer, BufferLockMode mode)
Definition bufmgr.c:2997

References Assert, BUFFER_LOCK_EXCLUSIVE, BufferIsLockedByMeInMode(), fb(), and SetHintBits().

Referenced by UpdateXmaxHintBits().

◆ log_heap_prune_and_freeze()

void log_heap_prune_and_freeze ( Relation  relation,
Buffer  buffer,
Buffer  vmbuffer,
uint8  vmflags,
TransactionId  conflict_xid,
bool  cleanup_lock,
PruneReason  reason,
HeapTupleFreeze frozen,
int  nfrozen,
OffsetNumber redirected,
int  nredirected,
OffsetNumber dead,
int  ndead,
OffsetNumber unused,
int  nunused 
)
extern

Definition at line 2167 of file pruneheap.c.

2176{
2179 uint8 info;
2181
2182 /* The following local variables hold data registered in the WAL record: */
2186 xlhp_prune_items dead_items;
2189 bool do_prune = nredirected > 0 || ndead > 0 || nunused > 0;
2191
2193
2194 xlrec.flags = 0;
2196
2197 /*
2198 * We can avoid an FPI of the heap page if the only modification we are
2199 * making to it is to set PD_ALL_VISIBLE and checksums/wal_log_hints are
2200 * disabled. Note that if we explicitly skip an FPI, we must not stamp the
2201 * heap page with this record's LSN. Recovery skips records <= the stamped
2202 * LSN, so this could lead to skipping an earlier FPI needed to repair a
2203 * torn page.
2204 */
2205 if (!do_prune &&
2206 nfrozen == 0 &&
2209
2210 /*
2211 * Prepare data for the buffer. The arrays are not actually in the
2212 * buffer, but we pretend that they are. When XLogInsert stores a full
2213 * page image, the arrays can be omitted.
2214 */
2217
2218 if (do_set_vm)
2219 XLogRegisterBuffer(1, vmbuffer, 0);
2220
2221 if (nfrozen > 0)
2222 {
2223 int nplans;
2224
2226
2227 /*
2228 * Prepare deduplicated representation for use in the WAL record. This
2229 * destructively sorts frozen tuples array in-place.
2230 */
2231 nplans = heap_log_freeze_plan(frozen, nfrozen, plans, frz_offsets);
2232
2233 freeze_plans.nplans = nplans;
2235 offsetof(xlhp_freeze_plans, plans));
2236 XLogRegisterBufData(0, plans,
2237 sizeof(xlhp_freeze_plan) * nplans);
2238 }
2239 if (nredirected > 0)
2240 {
2242
2243 redirect_items.ntargets = nredirected;
2246 XLogRegisterBufData(0, redirected,
2247 sizeof(OffsetNumber[2]) * nredirected);
2248 }
2249 if (ndead > 0)
2250 {
2251 xlrec.flags |= XLHP_HAS_DEAD_ITEMS;
2252
2253 dead_items.ntargets = ndead;
2254 XLogRegisterBufData(0, &dead_items,
2256 XLogRegisterBufData(0, dead,
2257 sizeof(OffsetNumber) * ndead);
2258 }
2259 if (nunused > 0)
2260 {
2262
2263 unused_items.ntargets = nunused;
2266 XLogRegisterBufData(0, unused,
2267 sizeof(OffsetNumber) * nunused);
2268 }
2269 if (nfrozen > 0)
2271 sizeof(OffsetNumber) * nfrozen);
2272
2273 /*
2274 * Prepare the main xl_heap_prune record. We already set the XLHP_HAS_*
2275 * flag above.
2276 */
2278 {
2279 xlrec.flags |= XLHP_VM_ALL_VISIBLE;
2281 xlrec.flags |= XLHP_VM_ALL_FROZEN;
2282 }
2284 xlrec.flags |= XLHP_IS_CATALOG_REL;
2287 if (cleanup_lock)
2288 xlrec.flags |= XLHP_CLEANUP_LOCK;
2289 else
2290 {
2291 Assert(nredirected == 0 && ndead == 0);
2292 /* also, any items in 'unused' must've been LP_DEAD previously */
2293 }
2297
2298 switch (reason)
2299 {
2300 case PRUNE_ON_ACCESS:
2302 break;
2303 case PRUNE_VACUUM_SCAN:
2305 break;
2308 break;
2309 default:
2310 elog(ERROR, "unrecognized prune reason: %d", (int) reason);
2311 break;
2312 }
2313 recptr = XLogInsert(RM_HEAP2_ID, info);
2314
2315 if (do_set_vm)
2316 {
2317 Assert(BufferIsDirty(vmbuffer));
2318 PageSetLSN(BufferGetPage(vmbuffer), recptr);
2319 }
2320
2321 /*
2322 * See comment at the top of the function about regbuf_flags_heap for
2323 * details on when we can advance the page LSN.
2324 */
2325 if (do_prune || nfrozen > 0 || (do_set_vm && XLogHintBitIsNeeded()))
2326 {
2327 Assert(BufferIsDirty(buffer));
2329 }
2330}
#define XLHP_HAS_CONFLICT_HORIZON
#define XLHP_HAS_FREEZE_PLANS
#define XLHP_VM_ALL_VISIBLE
#define SizeOfHeapPrune
#define XLHP_HAS_NOW_UNUSED_ITEMS
#define XLHP_VM_ALL_FROZEN
#define XLHP_HAS_REDIRECTIONS
#define XLOG_HEAP2_PRUNE_VACUUM_SCAN
Definition heapam_xlog.h:61
#define XLOG_HEAP2_PRUNE_ON_ACCESS
Definition heapam_xlog.h:60
#define XLHP_CLEANUP_LOCK
#define XLHP_HAS_DEAD_ITEMS
#define XLOG_HEAP2_PRUNE_VACUUM_CLEANUP
Definition heapam_xlog.h:62
#define XLHP_IS_CATALOG_REL
static int heap_log_freeze_plan(HeapTupleFreeze *tuples, int ntuples, xlhp_freeze_plan *plans_out, OffsetNumber *offsets_out)
Definition pruneheap.c:2088
#define XLogHintBitIsNeeded()
Definition xlog.h:122
#define REGBUF_NO_IMAGE
Definition xloginsert.h:33

References Assert, BufferGetPage(), BufferIsDirty(), data, elog, ERROR, fb(), heap_log_freeze_plan(), MaxHeapTuplesPerPage, xlhp_prune_items::ntargets, PageSetLSN(), PRUNE_ON_ACCESS, PRUNE_VACUUM_CLEANUP, PRUNE_VACUUM_SCAN, REGBUF_NO_IMAGE, REGBUF_STANDARD, RelationIsAccessibleInLogicalDecoding, SizeOfHeapPrune, TransactionIdIsValid, VISIBILITYMAP_ALL_FROZEN, VISIBILITYMAP_ALL_VISIBLE, VISIBILITYMAP_VALID_BITS, XLHP_CLEANUP_LOCK, XLHP_HAS_CONFLICT_HORIZON, XLHP_HAS_DEAD_ITEMS, XLHP_HAS_FREEZE_PLANS, XLHP_HAS_NOW_UNUSED_ITEMS, XLHP_HAS_REDIRECTIONS, XLHP_IS_CATALOG_REL, XLHP_VM_ALL_FROZEN, XLHP_VM_ALL_VISIBLE, XLOG_HEAP2_PRUNE_ON_ACCESS, XLOG_HEAP2_PRUNE_VACUUM_CLEANUP, XLOG_HEAP2_PRUNE_VACUUM_SCAN, XLogBeginInsert(), XLogHintBitIsNeeded, XLogInsert(), XLogRegisterBufData(), XLogRegisterBuffer(), and XLogRegisterData().

Referenced by heap_page_prune_and_freeze(), and lazy_vacuum_heap_page().

◆ ReleaseBulkInsertStatePin()

void ReleaseBulkInsertStatePin ( BulkInsertState  bistate)
extern

Definition at line 2112 of file heapam.c.

2113{
2114 if (bistate->current_buf != InvalidBuffer)
2115 ReleaseBuffer(bistate->current_buf);
2116 bistate->current_buf = InvalidBuffer;
2117
2118 /*
2119 * Despite the name, we also reset bulk relation extension state.
2120 * Otherwise we can end up erroring out due to looking for free space in
2121 * ->next_free of one partition, even though ->next_free was set when
2122 * extending another partition. It could obviously also be bad for
2123 * efficiency to look at existing blocks at offsets from another
2124 * partition, even if we don't error out.
2125 */
2126 bistate->next_free = InvalidBlockNumber;
2127 bistate->last_free = InvalidBlockNumber;
2128}

References BulkInsertStateData::current_buf, InvalidBlockNumber, InvalidBuffer, BulkInsertStateData::last_free, BulkInsertStateData::next_free, and ReleaseBuffer().

Referenced by CopyFrom().

◆ ResolveCminCmaxDuringDecoding()

bool ResolveCminCmaxDuringDecoding ( struct HTAB tuplecid_data,
Snapshot  snapshot,
HeapTuple  htup,
Buffer  buffer,
CommandId cmin,
CommandId cmax 
)
extern

Definition at line 5557 of file reorderbuffer.c.

5561{
5564 ForkNumber forkno;
5565 BlockNumber blockno;
5566 bool updated_mapping = false;
5567
5568 /*
5569 * Return unresolved if tuplecid_data is not valid. That's because when
5570 * streaming in-progress transactions we may run into tuples with the CID
5571 * before actually decoding them. Think e.g. about INSERT followed by
5572 * TRUNCATE, where the TRUNCATE may not be decoded yet when applying the
5573 * INSERT. So in such cases, we assume the CID is from the future
5574 * command.
5575 */
5576 if (tuplecid_data == NULL)
5577 return false;
5578
5579 /* be careful about padding */
5580 memset(&key, 0, sizeof(key));
5581
5582 Assert(!BufferIsLocal(buffer));
5583
5584 /*
5585 * get relfilelocator from the buffer, no convenient way to access it
5586 * other than that.
5587 */
5588 BufferGetTag(buffer, &key.rlocator, &forkno, &blockno);
5589
5590 /* tuples can only be in the main fork */
5591 Assert(forkno == MAIN_FORKNUM);
5592 Assert(blockno == ItemPointerGetBlockNumber(&htup->t_self));
5593
5594 ItemPointerCopy(&htup->t_self,
5595 &key.tid);
5596
5597restart:
5600
5601 /*
5602 * failed to find a mapping, check whether the table was rewritten and
5603 * apply mapping if so, but only do that once - there can be no new
5604 * mappings while we are in here since we have to hold a lock on the
5605 * relation.
5606 */
5607 if (ent == NULL && !updated_mapping)
5608 {
5610 /* now check but don't update for a mapping again */
5611 updated_mapping = true;
5612 goto restart;
5613 }
5614 else if (ent == NULL)
5615 return false;
5616
5617 if (cmin)
5618 *cmin = ent->cmin;
5619 if (cmax)
5620 *cmax = ent->cmax;
5621 return true;
5622}
#define BufferIsLocal(buffer)
Definition buf.h:37
void * hash_search(HTAB *hashp, const void *keyPtr, HASHACTION action, bool *foundPtr)
Definition dynahash.c:952
@ HASH_FIND
Definition hsearch.h:113
static void UpdateLogicalMappings(HTAB *tuplecid_data, Oid relid, Snapshot snapshot)
static HTAB * tuplecid_data
Definition snapmgr.c:163

References Assert, BufferGetTag(), BufferIsLocal, fb(), HASH_FIND, hash_search(), ItemPointerCopy(), ItemPointerGetBlockNumber(), MAIN_FORKNUM, HeapTupleData::t_self, HeapTupleData::t_tableOid, tuplecid_data, and UpdateLogicalMappings().

Referenced by HeapTupleSatisfiesHistoricMVCC().

◆ simple_heap_delete()

void simple_heap_delete ( Relation  relation,
const ItemPointerData tid 
)
extern

Definition at line 3274 of file heapam.c.

3275{
3276 TM_Result result;
3277 TM_FailureData tmfd;
3278
3279 result = heap_delete(relation, tid,
3281 true /* wait for commit */ ,
3282 &tmfd, false /* changingPart */ );
3283 switch (result)
3284 {
3285 case TM_SelfModified:
3286 /* Tuple was already updated in current command? */
3287 elog(ERROR, "tuple already updated by self");
3288 break;
3289
3290 case TM_Ok:
3291 /* done successfully */
3292 break;
3293
3294 case TM_Updated:
3295 elog(ERROR, "tuple concurrently updated");
3296 break;
3297
3298 case TM_Deleted:
3299 elog(ERROR, "tuple concurrently deleted");
3300 break;
3301
3302 default:
3303 elog(ERROR, "unrecognized heap_delete status: %u", result);
3304 break;
3305 }
3306}
TM_Result heap_delete(Relation relation, const ItemPointerData *tid, CommandId cid, Snapshot crosscheck, bool wait, TM_FailureData *tmfd, bool changingPart)
Definition heapam.c:2851

References elog, ERROR, GetCurrentCommandId(), heap_delete(), InvalidSnapshot, TM_Deleted, TM_Ok, TM_SelfModified, and TM_Updated.

Referenced by CatalogTupleDelete(), and toast_delete_datum().

◆ simple_heap_insert()

void simple_heap_insert ( Relation  relation,
HeapTuple  tup 
)
extern

Definition at line 2793 of file heapam.c.

2794{
2795 heap_insert(relation, tup, GetCurrentCommandId(true), 0, NULL);
2796}
void heap_insert(Relation relation, HeapTuple tup, CommandId cid, int options, BulkInsertState bistate)
Definition heapam.c:2150

References fb(), GetCurrentCommandId(), and heap_insert().

Referenced by CatalogTupleInsert(), CatalogTupleInsertWithInfo(), and InsertOneTuple().

◆ simple_heap_update()

void simple_heap_update ( Relation  relation,
const ItemPointerData otid,
HeapTuple  tup,
TU_UpdateIndexes update_indexes 
)
extern

Definition at line 4564 of file heapam.c.

4566{
4567 TM_Result result;
4568 TM_FailureData tmfd;
4569 LockTupleMode lockmode;
4570
4571 result = heap_update(relation, otid, tup,
4573 true /* wait for commit */ ,
4574 &tmfd, &lockmode, update_indexes);
4575 switch (result)
4576 {
4577 case TM_SelfModified:
4578 /* Tuple was already updated in current command? */
4579 elog(ERROR, "tuple already updated by self");
4580 break;
4581
4582 case TM_Ok:
4583 /* done successfully */
4584 break;
4585
4586 case TM_Updated:
4587 elog(ERROR, "tuple concurrently updated");
4588 break;
4589
4590 case TM_Deleted:
4591 elog(ERROR, "tuple concurrently deleted");
4592 break;
4593
4594 default:
4595 elog(ERROR, "unrecognized heap_update status: %u", result);
4596 break;
4597 }
4598}
TM_Result heap_update(Relation relation, const ItemPointerData *otid, HeapTuple newtup, CommandId cid, Snapshot crosscheck, bool wait, TM_FailureData *tmfd, LockTupleMode *lockmode, TU_UpdateIndexes *update_indexes)
Definition heapam.c:3320

References elog, ERROR, fb(), GetCurrentCommandId(), heap_update(), InvalidSnapshot, TM_Deleted, TM_Ok, TM_SelfModified, and TM_Updated.

Referenced by CatalogTupleUpdate(), and CatalogTupleUpdateWithInfo().