PostgreSQL Source Code git master
Loading...
Searching...
No Matches
heapam.h File Reference
#include "access/heapam_xlog.h"
#include "access/relation.h"
#include "access/relscan.h"
#include "access/sdir.h"
#include "access/skey.h"
#include "access/table.h"
#include "access/tableam.h"
#include "commands/vacuum.h"
#include "nodes/lockoptions.h"
#include "nodes/primnodes.h"
#include "storage/bufpage.h"
#include "storage/dsm.h"
#include "storage/lockdefs.h"
#include "storage/read_stream.h"
#include "storage/shm_toc.h"
#include "utils/relcache.h"
#include "utils/snapshot.h"
Include dependency graph for heapam.h:
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Data Structures

struct  HeapScanDescData
 
struct  BitmapHeapScanDescData
 
struct  IndexFetchHeapData
 
struct  HeapTupleFreeze
 
struct  HeapPageFreeze
 
struct  PruneFreezeParams
 
struct  PruneFreezeResult
 
struct  BatchMVCCState
 

Macros

#define HEAP_INSERT_SKIP_FSM   TABLE_INSERT_SKIP_FSM
 
#define HEAP_INSERT_FROZEN   TABLE_INSERT_FROZEN
 
#define HEAP_INSERT_NO_LOGICAL   TABLE_INSERT_NO_LOGICAL
 
#define HEAP_INSERT_SPECULATIVE   0x0010
 
#define HEAP_PAGE_PRUNE_MARK_UNUSED_NOW   (1 << 0)
 
#define HEAP_PAGE_PRUNE_FREEZE   (1 << 1)
 
#define MaxLockTupleMode   LockTupleExclusive
 
#define HEAP_FREEZE_CHECK_XMIN_COMMITTED   0x01
 
#define HEAP_FREEZE_CHECK_XMAX_ABORTED   0x02
 

Typedefs

typedef struct BulkInsertStateDataBulkInsertState
 
typedef struct GlobalVisState GlobalVisState
 
typedef struct TupleTableSlot TupleTableSlot
 
typedef struct HeapScanDescData HeapScanDescData
 
typedef struct HeapScanDescDataHeapScanDesc
 
typedef struct BitmapHeapScanDescData BitmapHeapScanDescData
 
typedef struct BitmapHeapScanDescDataBitmapHeapScanDesc
 
typedef struct IndexFetchHeapData IndexFetchHeapData
 
typedef struct HeapTupleFreeze HeapTupleFreeze
 
typedef struct HeapPageFreeze HeapPageFreeze
 
typedef struct PruneFreezeParams PruneFreezeParams
 
typedef struct PruneFreezeResult PruneFreezeResult
 
typedef struct BatchMVCCState BatchMVCCState
 

Enumerations

enum  HTSV_Result {
  HEAPTUPLE_DEAD , HEAPTUPLE_LIVE , HEAPTUPLE_RECENTLY_DEAD , HEAPTUPLE_INSERT_IN_PROGRESS ,
  HEAPTUPLE_DELETE_IN_PROGRESS
}
 
enum  PruneReason { PRUNE_ON_ACCESS , PRUNE_VACUUM_SCAN , PRUNE_VACUUM_CLEANUP }
 

Functions

TableScanDesc heap_beginscan (Relation relation, Snapshot snapshot, int nkeys, ScanKey key, ParallelTableScanDesc parallel_scan, uint32 flags)
 
void heap_setscanlimits (TableScanDesc sscan, BlockNumber startBlk, BlockNumber numBlks)
 
void heap_prepare_pagescan (TableScanDesc sscan)
 
void heap_rescan (TableScanDesc sscan, ScanKey key, bool set_params, bool allow_strat, bool allow_sync, bool allow_pagemode)
 
void heap_endscan (TableScanDesc sscan)
 
HeapTuple heap_getnext (TableScanDesc sscan, ScanDirection direction)
 
bool heap_getnextslot (TableScanDesc sscan, ScanDirection direction, TupleTableSlot *slot)
 
void heap_set_tidrange (TableScanDesc sscan, ItemPointer mintid, ItemPointer maxtid)
 
bool heap_getnextslot_tidrange (TableScanDesc sscan, ScanDirection direction, TupleTableSlot *slot)
 
bool heap_fetch (Relation relation, Snapshot snapshot, HeapTuple tuple, Buffer *userbuf, bool keep_buf)
 
bool heap_hot_search_buffer (ItemPointer tid, Relation relation, Buffer buffer, Snapshot snapshot, HeapTuple heapTuple, bool *all_dead, bool first_call)
 
void heap_get_latest_tid (TableScanDesc sscan, ItemPointer tid)
 
BulkInsertState GetBulkInsertState (void)
 
void FreeBulkInsertState (BulkInsertState)
 
void ReleaseBulkInsertStatePin (BulkInsertState bistate)
 
void heap_insert (Relation relation, HeapTuple tup, CommandId cid, int options, BulkInsertState bistate)
 
void heap_multi_insert (Relation relation, TupleTableSlot **slots, int ntuples, CommandId cid, int options, BulkInsertState bistate)
 
TM_Result heap_delete (Relation relation, const ItemPointerData *tid, CommandId cid, Snapshot crosscheck, bool wait, TM_FailureData *tmfd, bool changingPart)
 
void heap_finish_speculative (Relation relation, const ItemPointerData *tid)
 
void heap_abort_speculative (Relation relation, const ItemPointerData *tid)
 
TM_Result heap_update (Relation relation, const ItemPointerData *otid, HeapTuple newtup, CommandId cid, Snapshot crosscheck, bool wait, TM_FailureData *tmfd, LockTupleMode *lockmode, TU_UpdateIndexes *update_indexes)
 
TM_Result heap_lock_tuple (Relation relation, HeapTuple tuple, CommandId cid, LockTupleMode mode, LockWaitPolicy wait_policy, bool follow_updates, Buffer *buffer, TM_FailureData *tmfd)
 
bool heap_inplace_lock (Relation relation, HeapTuple oldtup_ptr, Buffer buffer, void(*release_callback)(void *), void *arg)
 
void heap_inplace_update_and_unlock (Relation relation, HeapTuple oldtup, HeapTuple tuple, Buffer buffer)
 
void heap_inplace_unlock (Relation relation, HeapTuple oldtup, Buffer buffer)
 
bool heap_prepare_freeze_tuple (HeapTupleHeader tuple, const struct VacuumCutoffs *cutoffs, HeapPageFreeze *pagefrz, HeapTupleFreeze *frz, bool *totally_frozen)
 
void heap_pre_freeze_checks (Buffer buffer, HeapTupleFreeze *tuples, int ntuples)
 
void heap_freeze_prepared_tuples (Buffer buffer, HeapTupleFreeze *tuples, int ntuples)
 
bool heap_freeze_tuple (HeapTupleHeader tuple, TransactionId relfrozenxid, TransactionId relminmxid, TransactionId FreezeLimit, TransactionId MultiXactCutoff)
 
bool heap_tuple_should_freeze (HeapTupleHeader tuple, const struct VacuumCutoffs *cutoffs, TransactionId *NoFreezePageRelfrozenXid, MultiXactId *NoFreezePageRelminMxid)
 
bool heap_tuple_needs_eventual_freeze (HeapTupleHeader tuple)
 
void simple_heap_insert (Relation relation, HeapTuple tup)
 
void simple_heap_delete (Relation relation, const ItemPointerData *tid)
 
void simple_heap_update (Relation relation, const ItemPointerData *otid, HeapTuple tup, TU_UpdateIndexes *update_indexes)
 
TransactionId heap_index_delete_tuples (Relation rel, TM_IndexDeleteOp *delstate)
 
void heap_page_prune_opt (Relation relation, Buffer buffer)
 
void heap_page_prune_and_freeze (PruneFreezeParams *params, PruneFreezeResult *presult, OffsetNumber *off_loc, TransactionId *new_relfrozen_xid, MultiXactId *new_relmin_mxid)
 
void heap_page_prune_execute (Buffer buffer, bool lp_truncate_only, OffsetNumber *redirected, int nredirected, OffsetNumber *nowdead, int ndead, OffsetNumber *nowunused, int nunused)
 
void heap_get_root_tuples (Page page, OffsetNumber *root_offsets)
 
void log_heap_prune_and_freeze (Relation relation, Buffer buffer, Buffer vmbuffer, uint8 vmflags, TransactionId conflict_xid, bool cleanup_lock, PruneReason reason, HeapTupleFreeze *frozen, int nfrozen, OffsetNumber *redirected, int nredirected, OffsetNumber *dead, int ndead, OffsetNumber *unused, int nunused)
 
void heap_vacuum_rel (Relation rel, const VacuumParams params, BufferAccessStrategy bstrategy)
 
bool HeapTupleSatisfiesVisibility (HeapTuple htup, Snapshot snapshot, Buffer buffer)
 
TM_Result HeapTupleSatisfiesUpdate (HeapTuple htup, CommandId curcid, Buffer buffer)
 
HTSV_Result HeapTupleSatisfiesVacuum (HeapTuple htup, TransactionId OldestXmin, Buffer buffer)
 
HTSV_Result HeapTupleSatisfiesVacuumHorizon (HeapTuple htup, Buffer buffer, TransactionId *dead_after)
 
void HeapTupleSetHintBits (HeapTupleHeader tuple, Buffer buffer, uint16 infomask, TransactionId xid)
 
bool HeapTupleHeaderIsOnlyLocked (HeapTupleHeader tuple)
 
bool HeapTupleIsSurelyDead (HeapTuple htup, GlobalVisState *vistest)
 
int HeapTupleSatisfiesMVCCBatch (Snapshot snapshot, Buffer buffer, int ntups, BatchMVCCState *batchmvcc, OffsetNumber *vistuples_dense)
 
bool ResolveCminCmaxDuringDecoding (struct HTAB *tuplecid_data, Snapshot snapshot, HeapTuple htup, Buffer buffer, CommandId *cmin, CommandId *cmax)
 
void HeapCheckForSerializableConflictOut (bool visible, Relation relation, HeapTuple tuple, Buffer buffer, Snapshot snapshot)
 
static void heap_execute_freeze_tuple (HeapTupleHeader tuple, HeapTupleFreeze *frz)
 

Macro Definition Documentation

◆ HEAP_FREEZE_CHECK_XMAX_ABORTED

#define HEAP_FREEZE_CHECK_XMAX_ABORTED   0x02

Definition at line 138 of file heapam.h.

◆ HEAP_FREEZE_CHECK_XMIN_COMMITTED

#define HEAP_FREEZE_CHECK_XMIN_COMMITTED   0x01

Definition at line 137 of file heapam.h.

◆ HEAP_INSERT_FROZEN

#define HEAP_INSERT_FROZEN   TABLE_INSERT_FROZEN

Definition at line 38 of file heapam.h.

◆ HEAP_INSERT_NO_LOGICAL

#define HEAP_INSERT_NO_LOGICAL   TABLE_INSERT_NO_LOGICAL

Definition at line 39 of file heapam.h.

◆ HEAP_INSERT_SKIP_FSM

#define HEAP_INSERT_SKIP_FSM   TABLE_INSERT_SKIP_FSM

Definition at line 37 of file heapam.h.

◆ HEAP_INSERT_SPECULATIVE

#define HEAP_INSERT_SPECULATIVE   0x0010

Definition at line 40 of file heapam.h.

◆ HEAP_PAGE_PRUNE_FREEZE

#define HEAP_PAGE_PRUNE_FREEZE   (1 << 1)

Definition at line 44 of file heapam.h.

◆ HEAP_PAGE_PRUNE_MARK_UNUSED_NOW

#define HEAP_PAGE_PRUNE_MARK_UNUSED_NOW   (1 << 0)

Definition at line 43 of file heapam.h.

◆ MaxLockTupleMode

#define MaxLockTupleMode   LockTupleExclusive

Definition at line 51 of file heapam.h.

Typedef Documentation

◆ BatchMVCCState

◆ BitmapHeapScanDesc

◆ BitmapHeapScanDescData

◆ BulkInsertState

◆ GlobalVisState

Definition at line 47 of file heapam.h.

◆ HeapPageFreeze

◆ HeapScanDesc

Definition at line 102 of file heapam.h.

◆ HeapScanDescData

◆ HeapTupleFreeze

◆ IndexFetchHeapData

◆ PruneFreezeParams

◆ PruneFreezeResult

◆ TupleTableSlot

Definition at line 48 of file heapam.h.

Enumeration Type Documentation

◆ HTSV_Result

Enumerator
HEAPTUPLE_DEAD 
HEAPTUPLE_LIVE 
HEAPTUPLE_RECENTLY_DEAD 
HEAPTUPLE_INSERT_IN_PROGRESS 
HEAPTUPLE_DELETE_IN_PROGRESS 

Definition at line 124 of file heapam.h.

125{
126 HEAPTUPLE_DEAD, /* tuple is dead and deletable */
127 HEAPTUPLE_LIVE, /* tuple is live (committed, no deleter) */
128 HEAPTUPLE_RECENTLY_DEAD, /* tuple is dead, but not deletable yet */
129 HEAPTUPLE_INSERT_IN_PROGRESS, /* inserting xact is still in progress */
130 HEAPTUPLE_DELETE_IN_PROGRESS, /* deleting xact is still in progress */
HTSV_Result
Definition heapam.h:125
@ HEAPTUPLE_RECENTLY_DEAD
Definition heapam.h:128
@ HEAPTUPLE_INSERT_IN_PROGRESS
Definition heapam.h:129
@ HEAPTUPLE_LIVE
Definition heapam.h:127
@ HEAPTUPLE_DELETE_IN_PROGRESS
Definition heapam.h:130
@ HEAPTUPLE_DEAD
Definition heapam.h:126

◆ PruneReason

Enumerator
PRUNE_ON_ACCESS 
PRUNE_VACUUM_SCAN 
PRUNE_VACUUM_CLEANUP 

Definition at line 226 of file heapam.h.

227{
228 PRUNE_ON_ACCESS, /* on-access pruning */
229 PRUNE_VACUUM_SCAN, /* VACUUM 1st heap pass */
230 PRUNE_VACUUM_CLEANUP, /* VACUUM 2nd heap pass */
PruneReason
Definition heapam.h:227
@ PRUNE_VACUUM_CLEANUP
Definition heapam.h:230
@ PRUNE_ON_ACCESS
Definition heapam.h:228
@ PRUNE_VACUUM_SCAN
Definition heapam.h:229

Function Documentation

◆ FreeBulkInsertState()

void FreeBulkInsertState ( BulkInsertState  bistate)
extern

Definition at line 2091 of file heapam.c.

2092{
2093 if (bistate->current_buf != InvalidBuffer)
2094 ReleaseBuffer(bistate->current_buf);
2095 FreeAccessStrategy(bistate->strategy);
2096 pfree(bistate);
2097}
#define InvalidBuffer
Definition buf.h:25
void ReleaseBuffer(Buffer buffer)
Definition bufmgr.c:5501
void FreeAccessStrategy(BufferAccessStrategy strategy)
Definition freelist.c:643
void pfree(void *pointer)
Definition mcxt.c:1616
BufferAccessStrategy strategy
Definition hio.h:31
Buffer current_buf
Definition hio.h:32

References BulkInsertStateData::current_buf, FreeAccessStrategy(), InvalidBuffer, pfree(), ReleaseBuffer(), and BulkInsertStateData::strategy.

Referenced by ATRewriteTable(), CopyFrom(), CopyMultiInsertBufferCleanup(), deleteSplitPartitionContext(), intorel_shutdown(), MergePartitionsMoveRows(), and transientrel_shutdown().

◆ GetBulkInsertState()

BulkInsertState GetBulkInsertState ( void  )
extern

Definition at line 2074 of file heapam.c.

2075{
2076 BulkInsertState bistate;
2077
2080 bistate->current_buf = InvalidBuffer;
2081 bistate->next_free = InvalidBlockNumber;
2082 bistate->last_free = InvalidBlockNumber;
2083 bistate->already_extended_by = 0;
2084 return bistate;
2085}
#define InvalidBlockNumber
Definition block.h:33
@ BAS_BULKWRITE
Definition bufmgr.h:39
#define palloc_object(type)
Definition fe_memutils.h:74
BufferAccessStrategy GetAccessStrategy(BufferAccessStrategyType btype)
Definition freelist.c:461
struct BulkInsertStateData * BulkInsertState
Definition heapam.h:46
BlockNumber last_free
Definition hio.h:49
uint32 already_extended_by
Definition hio.h:50
BlockNumber next_free
Definition hio.h:48

References BulkInsertStateData::already_extended_by, BAS_BULKWRITE, BulkInsertStateData::current_buf, GetAccessStrategy(), InvalidBlockNumber, InvalidBuffer, BulkInsertStateData::last_free, BulkInsertStateData::next_free, palloc_object, and BulkInsertStateData::strategy.

Referenced by ATRewriteTable(), CopyFrom(), CopyMultiInsertBufferInit(), createSplitPartitionContext(), intorel_startup(), MergePartitionsMoveRows(), and transientrel_startup().

◆ heap_abort_speculative()

void heap_abort_speculative ( Relation  relation,
const ItemPointerData tid 
)
extern

Definition at line 6254 of file heapam.c.

6255{
6257 ItemId lp;
6258 HeapTupleData tp;
6259 Page page;
6260 BlockNumber block;
6261 Buffer buffer;
6262
6264
6265 block = ItemPointerGetBlockNumber(tid);
6266 buffer = ReadBuffer(relation, block);
6267 page = BufferGetPage(buffer);
6268
6270
6271 /*
6272 * Page can't be all visible, we just inserted into it, and are still
6273 * running.
6274 */
6275 Assert(!PageIsAllVisible(page));
6276
6279
6280 tp.t_tableOid = RelationGetRelid(relation);
6281 tp.t_data = (HeapTupleHeader) PageGetItem(page, lp);
6282 tp.t_len = ItemIdGetLength(lp);
6283 tp.t_self = *tid;
6284
6285 /*
6286 * Sanity check that the tuple really is a speculatively inserted tuple,
6287 * inserted by us.
6288 */
6289 if (tp.t_data->t_choice.t_heap.t_xmin != xid)
6290 elog(ERROR, "attempted to kill a tuple inserted by another transaction");
6291 if (!(IsToastRelation(relation) || HeapTupleHeaderIsSpeculative(tp.t_data)))
6292 elog(ERROR, "attempted to kill a non-speculative tuple");
6294
6295 /*
6296 * No need to check for serializable conflicts here. There is never a
6297 * need for a combo CID, either. No need to extract replica identity, or
6298 * do anything special with infomask bits.
6299 */
6300
6302
6303 /*
6304 * The tuple will become DEAD immediately. Flag that this page is a
6305 * candidate for pruning by setting xmin to TransactionXmin. While not
6306 * immediately prunable, it is the oldest xid we can cheaply determine
6307 * that's safe against wraparound / being older than the table's
6308 * relfrozenxid. To defend against the unlikely case of a new relation
6309 * having a newer relfrozenxid than our TransactionXmin, use relfrozenxid
6310 * if so (vacuum can't subsequently move relfrozenxid to beyond
6311 * TransactionXmin, so there's no race here).
6312 */
6314 {
6315 TransactionId relfrozenxid = relation->rd_rel->relfrozenxid;
6317
6318 if (TransactionIdPrecedes(TransactionXmin, relfrozenxid))
6319 prune_xid = relfrozenxid;
6320 else
6323 }
6324
6325 /* store transaction information of xact deleting the tuple */
6328
6329 /*
6330 * Set the tuple header xmin to InvalidTransactionId. This makes the
6331 * tuple immediately invisible everyone. (In particular, to any
6332 * transactions waiting on the speculative token, woken up later.)
6333 */
6335
6336 /* Clear the speculative insertion token too */
6337 tp.t_data->t_ctid = tp.t_self;
6338
6339 MarkBufferDirty(buffer);
6340
6341 /*
6342 * XLOG stuff
6343 *
6344 * The WAL records generated here match heap_delete(). The same recovery
6345 * routines are used.
6346 */
6347 if (RelationNeedsWAL(relation))
6348 {
6351
6353 xlrec.infobits_set = compute_infobits(tp.t_data->t_infomask,
6354 tp.t_data->t_infomask2);
6356 xlrec.xmax = xid;
6357
6361
6362 /* No replica identity & replication origin logged */
6363
6365
6366 PageSetLSN(page, recptr);
6367 }
6368
6370
6372
6373 if (HeapTupleHasExternal(&tp))
6374 {
6375 Assert(!IsToastRelation(relation));
6376 heap_toast_delete(relation, &tp, true);
6377 }
6378
6379 /*
6380 * Never need to mark tuple for invalidation, since catalogs don't support
6381 * speculative insertion
6382 */
6383
6384 /* Now we can release the buffer */
6385 ReleaseBuffer(buffer);
6386
6387 /* count deletion, as we counted the insertion too */
6388 pgstat_count_heap_delete(relation);
6389}
uint32 BlockNumber
Definition block.h:31
int Buffer
Definition buf.h:23
void MarkBufferDirty(Buffer buffer)
Definition bufmgr.c:3056
Buffer ReadBuffer(Relation reln, BlockNumber blockNum)
Definition bufmgr.c:864
static Page BufferGetPage(Buffer buffer)
Definition bufmgr.h:466
@ BUFFER_LOCK_EXCLUSIVE
Definition bufmgr.h:220
@ BUFFER_LOCK_UNLOCK
Definition bufmgr.h:205
static void LockBuffer(Buffer buffer, BufferLockMode mode)
Definition bufmgr.h:328
static bool PageIsAllVisible(const PageData *page)
Definition bufpage.h:428
static ItemId PageGetItemId(Page page, OffsetNumber offsetNumber)
Definition bufpage.h:243
static void * PageGetItem(PageData *page, const ItemIdData *itemId)
Definition bufpage.h:353
static void PageSetLSN(Page page, XLogRecPtr lsn)
Definition bufpage.h:390
PageData * Page
Definition bufpage.h:81
#define PageSetPrunable(page, xid)
Definition bufpage.h:446
#define Assert(condition)
Definition c.h:873
uint32 TransactionId
Definition c.h:666
bool IsToastRelation(Relation relation)
Definition catalog.c:206
#define ERROR
Definition elog.h:39
#define elog(elevel,...)
Definition elog.h:226
static uint8 compute_infobits(uint16 infomask, uint16 infomask2)
Definition heapam.c:2797
#define XLOG_HEAP_DELETE
Definition heapam_xlog.h:34
#define SizeOfHeapDelete
#define XLH_DELETE_IS_SUPER
void heap_toast_delete(Relation rel, HeapTuple oldtup, bool is_speculative)
Definition heaptoast.c:43
HeapTupleHeaderData * HeapTupleHeader
Definition htup.h:23
static bool HeapTupleHasExternal(const HeapTupleData *tuple)
#define HEAP_XMAX_BITS
static bool HeapTupleHeaderIsHeapOnly(const HeapTupleHeaderData *tup)
#define HEAP_MOVED
static bool HeapTupleHeaderIsSpeculative(const HeapTupleHeaderData *tup)
static void HeapTupleHeaderSetXmin(HeapTupleHeaderData *tup, TransactionId xid)
#define ItemIdGetLength(itemId)
Definition itemid.h:59
#define ItemIdIsNormal(itemId)
Definition itemid.h:99
static OffsetNumber ItemPointerGetOffsetNumber(const ItemPointerData *pointer)
Definition itemptr.h:124
static BlockNumber ItemPointerGetBlockNumber(const ItemPointerData *pointer)
Definition itemptr.h:103
static bool ItemPointerIsValid(const ItemPointerData *pointer)
Definition itemptr.h:83
#define START_CRIT_SECTION()
Definition miscadmin.h:150
#define END_CRIT_SECTION()
Definition miscadmin.h:152
void pgstat_count_heap_delete(Relation rel)
static int fb(int x)
#define RelationGetRelid(relation)
Definition rel.h:514
#define RelationNeedsWAL(relation)
Definition rel.h:637
TransactionId TransactionXmin
Definition snapmgr.c:159
ItemPointerData t_self
Definition htup.h:65
uint32 t_len
Definition htup.h:64
HeapTupleHeader t_data
Definition htup.h:68
Oid t_tableOid
Definition htup.h:66
TransactionId t_xmin
union HeapTupleHeaderData::@49 t_choice
ItemPointerData t_ctid
HeapTupleFields t_heap
Form_pg_class rd_rel
Definition rel.h:111
#define InvalidTransactionId
Definition transam.h:31
#define TransactionIdIsValid(xid)
Definition transam.h:41
static bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition transam.h:263
TransactionId GetCurrentTransactionId(void)
Definition xact.c:455
uint64 XLogRecPtr
Definition xlogdefs.h:21
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition xloginsert.c:478
void XLogRegisterData(const void *data, uint32 len)
Definition xloginsert.c:368
void XLogRegisterBuffer(uint8 block_id, Buffer buffer, uint8 flags)
Definition xloginsert.c:245
void XLogBeginInsert(void)
Definition xloginsert.c:152
#define REGBUF_STANDARD
Definition xloginsert.h:35

References Assert, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferGetPage(), compute_infobits(), elog, END_CRIT_SECTION, ERROR, fb(), xl_heap_delete::flags, GetCurrentTransactionId(), HEAP_MOVED, heap_toast_delete(), HEAP_XMAX_BITS, HeapTupleHasExternal(), HeapTupleHeaderIsHeapOnly(), HeapTupleHeaderIsSpeculative(), HeapTupleHeaderSetXmin(), InvalidTransactionId, IsToastRelation(), ItemIdGetLength, ItemIdIsNormal, ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), ItemPointerIsValid(), LockBuffer(), MarkBufferDirty(), PageGetItem(), PageGetItemId(), PageIsAllVisible(), PageSetLSN(), PageSetPrunable, pgstat_count_heap_delete(), RelationData::rd_rel, ReadBuffer(), REGBUF_STANDARD, RelationGetRelid, RelationNeedsWAL, ReleaseBuffer(), SizeOfHeapDelete, START_CRIT_SECTION, HeapTupleHeaderData::t_choice, HeapTupleHeaderData::t_ctid, HeapTupleData::t_data, HeapTupleHeaderData::t_heap, HeapTupleHeaderData::t_infomask, HeapTupleHeaderData::t_infomask2, HeapTupleData::t_len, HeapTupleData::t_self, HeapTupleData::t_tableOid, HeapTupleFields::t_xmin, TransactionIdIsValid, TransactionIdPrecedes(), TransactionXmin, XLH_DELETE_IS_SUPER, XLOG_HEAP_DELETE, XLogBeginInsert(), XLogInsert(), XLogRegisterBuffer(), and XLogRegisterData().

Referenced by heapam_tuple_complete_speculative(), and toast_delete_datum().

◆ heap_beginscan()

TableScanDesc heap_beginscan ( Relation  relation,
Snapshot  snapshot,
int  nkeys,
ScanKey  key,
ParallelTableScanDesc  parallel_scan,
uint32  flags 
)
extern

Definition at line 1163 of file heapam.c.

1167{
1168 HeapScanDesc scan;
1169
1170 /*
1171 * increment relation ref count while scanning relation
1172 *
1173 * This is just to make really sure the relcache entry won't go away while
1174 * the scan has a pointer to it. Caller should be holding the rel open
1175 * anyway, so this is redundant in all normal scenarios...
1176 */
1178
1179 /*
1180 * allocate and initialize scan descriptor
1181 */
1182 if (flags & SO_TYPE_BITMAPSCAN)
1183 {
1185
1186 /*
1187 * Bitmap Heap scans do not have any fields that a normal Heap Scan
1188 * does not have, so no special initializations required here.
1189 */
1190 scan = (HeapScanDesc) bscan;
1191 }
1192 else
1194
1195 scan->rs_base.rs_rd = relation;
1196 scan->rs_base.rs_snapshot = snapshot;
1197 scan->rs_base.rs_nkeys = nkeys;
1198 scan->rs_base.rs_flags = flags;
1199 scan->rs_base.rs_parallel = parallel_scan;
1200 scan->rs_strategy = NULL; /* set in initscan */
1201 scan->rs_cbuf = InvalidBuffer;
1202
1203 /*
1204 * Disable page-at-a-time mode if it's not a MVCC-safe snapshot.
1205 */
1206 if (!(snapshot && IsMVCCSnapshot(snapshot)))
1208
1209 /* Check that a historic snapshot is not used for non-catalog tables */
1210 if (snapshot &&
1211 IsHistoricMVCCSnapshot(snapshot) &&
1213 {
1214 ereport(ERROR,
1216 errmsg("cannot query non-catalog table \"%s\" during logical decoding",
1217 RelationGetRelationName(relation))));
1218 }
1219
1220 /*
1221 * For seqscan and sample scans in a serializable transaction, acquire a
1222 * predicate lock on the entire relation. This is required not only to
1223 * lock all the matching tuples, but also to conflict with new insertions
1224 * into the table. In an indexscan, we take page locks on the index pages
1225 * covering the range specified in the scan qual, but in a heap scan there
1226 * is nothing more fine-grained to lock. A bitmap scan is a different
1227 * story, there we have already scanned the index and locked the index
1228 * pages covering the predicate. But in that case we still have to lock
1229 * any matching heap tuples. For sample scan we could optimize the locking
1230 * to be at least page-level granularity, but we'd need to add per-tuple
1231 * locking for that.
1232 */
1234 {
1235 /*
1236 * Ensure a missing snapshot is noticed reliably, even if the
1237 * isolation mode means predicate locking isn't performed (and
1238 * therefore the snapshot isn't used here).
1239 */
1240 Assert(snapshot);
1241 PredicateLockRelation(relation, snapshot);
1242 }
1243
1244 /* we only need to set this up once */
1245 scan->rs_ctup.t_tableOid = RelationGetRelid(relation);
1246
1247 /*
1248 * Allocate memory to keep track of page allocation for parallel workers
1249 * when doing a parallel scan.
1250 */
1251 if (parallel_scan != NULL)
1253 else
1255
1256 /*
1257 * we do this here instead of in initscan() because heap_rescan also calls
1258 * initscan() and we don't want to allocate memory again
1259 */
1260 if (nkeys > 0)
1261 scan->rs_base.rs_key = palloc_array(ScanKeyData, nkeys);
1262 else
1263 scan->rs_base.rs_key = NULL;
1264
1265 initscan(scan, key, false);
1266
1267 scan->rs_read_stream = NULL;
1268
1269 /*
1270 * Set up a read stream for sequential scans and TID range scans. This
1271 * should be done after initscan() because initscan() allocates the
1272 * BufferAccessStrategy object passed to the read stream API.
1273 */
1274 if (scan->rs_base.rs_flags & SO_TYPE_SEQSCAN ||
1276 {
1278
1279 if (scan->rs_base.rs_parallel)
1281 else
1283
1284 /* ---
1285 * It is safe to use batchmode as the only locks taken by `cb`
1286 * are never taken while waiting for IO:
1287 * - SyncScanLock is used in the non-parallel case
1288 * - in the parallel case, only spinlocks and atomics are used
1289 * ---
1290 */
1293 scan->rs_strategy,
1294 scan->rs_base.rs_rd,
1296 cb,
1297 scan,
1298 0);
1299 }
1300 else if (scan->rs_base.rs_flags & SO_TYPE_BITMAPSCAN)
1301 {
1304 scan->rs_strategy,
1305 scan->rs_base.rs_rd,
1308 scan,
1309 sizeof(TBMIterateResult));
1310 }
1311
1312
1313 return (TableScanDesc) scan;
1314}
int errcode(int sqlerrcode)
Definition elog.c:863
int errmsg(const char *fmt,...)
Definition elog.c:1080
#define ereport(elevel,...)
Definition elog.h:150
#define palloc_array(type, count)
Definition fe_memutils.h:76
static BlockNumber heap_scan_stream_read_next_parallel(ReadStream *stream, void *callback_private_data, void *per_buffer_data)
Definition heapam.c:251
static BlockNumber heap_scan_stream_read_next_serial(ReadStream *stream, void *callback_private_data, void *per_buffer_data)
Definition heapam.c:291
static BlockNumber bitmapheap_stream_read_next(ReadStream *pgsr, void *private_data, void *per_buffer_data)
Definition heapam.c:316
static void initscan(HeapScanDesc scan, ScanKey key, bool keep_startblock)
Definition heapam.c:356
struct HeapScanDescData * HeapScanDesc
Definition heapam.h:102
void PredicateLockRelation(Relation relation, Snapshot snapshot)
Definition predicate.c:2574
ReadStream * read_stream_begin_relation(int flags, BufferAccessStrategy strategy, Relation rel, ForkNumber forknum, ReadStreamBlockNumberCB callback, void *callback_private_data, size_t per_buffer_data_size)
#define READ_STREAM_USE_BATCHING
Definition read_stream.h:64
BlockNumber(* ReadStreamBlockNumberCB)(ReadStream *stream, void *callback_private_data, void *per_buffer_data)
Definition read_stream.h:77
#define READ_STREAM_DEFAULT
Definition read_stream.h:21
#define READ_STREAM_SEQUENTIAL
Definition read_stream.h:36
#define RelationGetRelationName(relation)
Definition rel.h:548
#define RelationIsAccessibleInLogicalDecoding(relation)
Definition rel.h:693
void RelationIncrementReferenceCount(Relation rel)
Definition relcache.c:2182
@ MAIN_FORKNUM
Definition relpath.h:58
#define IsHistoricMVCCSnapshot(snapshot)
Definition snapmgr.h:59
#define IsMVCCSnapshot(snapshot)
Definition snapmgr.h:55
BufferAccessStrategy rs_strategy
Definition heapam.h:73
Buffer rs_cbuf
Definition heapam.h:70
ParallelBlockTableScanWorkerData * rs_parallelworkerdata
Definition heapam.h:95
HeapTupleData rs_ctup
Definition heapam.h:75
ReadStream * rs_read_stream
Definition heapam.h:78
TableScanDescData rs_base
Definition heapam.h:58
Relation rs_rd
Definition relscan.h:35
uint32 rs_flags
Definition relscan.h:63
struct ScanKeyData * rs_key
Definition relscan.h:38
struct SnapshotData * rs_snapshot
Definition relscan.h:36
struct ParallelTableScanDescData * rs_parallel
Definition relscan.h:65
@ SO_TYPE_TIDRANGESCAN
Definition tableam.h:53
@ SO_TYPE_SAMPLESCAN
Definition tableam.h:51
@ SO_TYPE_SEQSCAN
Definition tableam.h:49
@ SO_TYPE_BITMAPSCAN
Definition tableam.h:50

References Assert, bitmapheap_stream_read_next(), ereport, errcode(), errmsg(), ERROR, fb(), heap_scan_stream_read_next_parallel(), heap_scan_stream_read_next_serial(), initscan(), InvalidBuffer, IsHistoricMVCCSnapshot, IsMVCCSnapshot, MAIN_FORKNUM, palloc_array, palloc_object, PredicateLockRelation(), read_stream_begin_relation(), READ_STREAM_DEFAULT, READ_STREAM_SEQUENTIAL, READ_STREAM_USE_BATCHING, RelationGetRelationName, RelationGetRelid, RelationIncrementReferenceCount(), RelationIsAccessibleInLogicalDecoding, HeapScanDescData::rs_base, HeapScanDescData::rs_cbuf, HeapScanDescData::rs_ctup, TableScanDescData::rs_flags, TableScanDescData::rs_key, TableScanDescData::rs_nkeys, TableScanDescData::rs_parallel, HeapScanDescData::rs_parallelworkerdata, TableScanDescData::rs_rd, HeapScanDescData::rs_read_stream, TableScanDescData::rs_snapshot, HeapScanDescData::rs_strategy, SO_TYPE_BITMAPSCAN, SO_TYPE_SAMPLESCAN, SO_TYPE_SEQSCAN, SO_TYPE_TIDRANGESCAN, and HeapTupleData::t_tableOid.

◆ heap_delete()

TM_Result heap_delete ( Relation  relation,
const ItemPointerData tid,
CommandId  cid,
Snapshot  crosscheck,
bool  wait,
TM_FailureData tmfd,
bool  changingPart 
)
extern

Definition at line 2842 of file heapam.c.

2845{
2846 TM_Result result;
2848 ItemId lp;
2849 HeapTupleData tp;
2850 Page page;
2851 BlockNumber block;
2852 Buffer buffer;
2853 Buffer vmbuffer = InvalidBuffer;
2854 TransactionId new_xmax;
2857 bool have_tuple_lock = false;
2858 bool iscombo;
2859 bool all_visible_cleared = false;
2860 HeapTuple old_key_tuple = NULL; /* replica identity of the tuple */
2861 bool old_key_copied = false;
2862
2864
2865 AssertHasSnapshotForToast(relation);
2866
2867 /*
2868 * Forbid this during a parallel operation, lest it allocate a combo CID.
2869 * Other workers might need that combo CID for visibility checks, and we
2870 * have no provision for broadcasting it to them.
2871 */
2872 if (IsInParallelMode())
2873 ereport(ERROR,
2875 errmsg("cannot delete tuples during a parallel operation")));
2876
2877 block = ItemPointerGetBlockNumber(tid);
2878 buffer = ReadBuffer(relation, block);
2879 page = BufferGetPage(buffer);
2880
2881 /*
2882 * Before locking the buffer, pin the visibility map page if it appears to
2883 * be necessary. Since we haven't got the lock yet, someone else might be
2884 * in the middle of changing this, so we'll need to recheck after we have
2885 * the lock.
2886 */
2887 if (PageIsAllVisible(page))
2888 visibilitymap_pin(relation, block, &vmbuffer);
2889
2891
2894
2895 tp.t_tableOid = RelationGetRelid(relation);
2896 tp.t_data = (HeapTupleHeader) PageGetItem(page, lp);
2897 tp.t_len = ItemIdGetLength(lp);
2898 tp.t_self = *tid;
2899
2900l1:
2901
2902 /*
2903 * If we didn't pin the visibility map page and the page has become all
2904 * visible while we were busy locking the buffer, we'll have to unlock and
2905 * re-lock, to avoid holding the buffer lock across an I/O. That's a bit
2906 * unfortunate, but hopefully shouldn't happen often.
2907 */
2908 if (vmbuffer == InvalidBuffer && PageIsAllVisible(page))
2909 {
2911 visibilitymap_pin(relation, block, &vmbuffer);
2913 }
2914
2915 result = HeapTupleSatisfiesUpdate(&tp, cid, buffer);
2916
2917 if (result == TM_Invisible)
2918 {
2919 UnlockReleaseBuffer(buffer);
2920 ereport(ERROR,
2922 errmsg("attempted to delete invisible tuple")));
2923 }
2924 else if (result == TM_BeingModified && wait)
2925 {
2928
2929 /* must copy state data before unlocking buffer */
2932
2933 /*
2934 * Sleep until concurrent transaction ends -- except when there's a
2935 * single locker and it's our own transaction. Note we don't care
2936 * which lock mode the locker has, because we need the strongest one.
2937 *
2938 * Before sleeping, we need to acquire tuple lock to establish our
2939 * priority for the tuple (see heap_lock_tuple). LockTuple will
2940 * release us when we are next-in-line for the tuple.
2941 *
2942 * If we are forced to "start over" below, we keep the tuple lock;
2943 * this arranges that we stay at the head of the line while rechecking
2944 * tuple state.
2945 */
2947 {
2948 bool current_is_member = false;
2949
2952 {
2954
2955 /*
2956 * Acquire the lock, if necessary (but skip it when we're
2957 * requesting a lock and already have one; avoids deadlock).
2958 */
2959 if (!current_is_member)
2962
2963 /* wait for multixact */
2965 relation, &(tp.t_self), XLTW_Delete,
2966 NULL);
2968
2969 /*
2970 * If xwait had just locked the tuple then some other xact
2971 * could update this tuple before we get to this point. Check
2972 * for xmax change, and start over if so.
2973 *
2974 * We also must start over if we didn't pin the VM page, and
2975 * the page has become all visible.
2976 */
2977 if ((vmbuffer == InvalidBuffer && PageIsAllVisible(page)) ||
2980 xwait))
2981 goto l1;
2982 }
2983
2984 /*
2985 * You might think the multixact is necessarily done here, but not
2986 * so: it could have surviving members, namely our own xact or
2987 * other subxacts of this backend. It is legal for us to delete
2988 * the tuple in either case, however (the latter case is
2989 * essentially a situation of upgrading our former shared lock to
2990 * exclusive). We don't bother changing the on-disk hint bits
2991 * since we are about to overwrite the xmax altogether.
2992 */
2993 }
2995 {
2996 /*
2997 * Wait for regular transaction to end; but first, acquire tuple
2998 * lock.
2999 */
3003 XactLockTableWait(xwait, relation, &(tp.t_self), XLTW_Delete);
3005
3006 /*
3007 * xwait is done, but if xwait had just locked the tuple then some
3008 * other xact could update this tuple before we get to this point.
3009 * Check for xmax change, and start over if so.
3010 *
3011 * We also must start over if we didn't pin the VM page, and the
3012 * page has become all visible.
3013 */
3014 if ((vmbuffer == InvalidBuffer && PageIsAllVisible(page)) ||
3017 xwait))
3018 goto l1;
3019
3020 /* Otherwise check if it committed or aborted */
3021 UpdateXmaxHintBits(tp.t_data, buffer, xwait);
3022 }
3023
3024 /*
3025 * We may overwrite if previous xmax aborted, or if it committed but
3026 * only locked the tuple without updating it.
3027 */
3028 if ((tp.t_data->t_infomask & HEAP_XMAX_INVALID) ||
3031 result = TM_Ok;
3032 else if (!ItemPointerEquals(&tp.t_self, &tp.t_data->t_ctid))
3033 result = TM_Updated;
3034 else
3035 result = TM_Deleted;
3036 }
3037
3038 /* sanity check the result HeapTupleSatisfiesUpdate() and the logic above */
3039 if (result != TM_Ok)
3040 {
3041 Assert(result == TM_SelfModified ||
3042 result == TM_Updated ||
3043 result == TM_Deleted ||
3044 result == TM_BeingModified);
3046 Assert(result != TM_Updated ||
3048 }
3049
3050 if (crosscheck != InvalidSnapshot && result == TM_Ok)
3051 {
3052 /* Perform additional check for transaction-snapshot mode RI updates */
3053 if (!HeapTupleSatisfiesVisibility(&tp, crosscheck, buffer))
3054 result = TM_Updated;
3055 }
3056
3057 if (result != TM_Ok)
3058 {
3059 tmfd->ctid = tp.t_data->t_ctid;
3061 if (result == TM_SelfModified)
3063 else
3064 tmfd->cmax = InvalidCommandId;
3065 UnlockReleaseBuffer(buffer);
3066 if (have_tuple_lock)
3068 if (vmbuffer != InvalidBuffer)
3069 ReleaseBuffer(vmbuffer);
3070 return result;
3071 }
3072
3073 /*
3074 * We're about to do the actual delete -- check for conflict first, to
3075 * avoid possibly having to roll back work we've just done.
3076 *
3077 * This is safe without a recheck as long as there is no possibility of
3078 * another process scanning the page between this check and the delete
3079 * being visible to the scan (i.e., an exclusive buffer content lock is
3080 * continuously held from this point until the tuple delete is visible).
3081 */
3083
3084 /* replace cid with a combo CID if necessary */
3086
3087 /*
3088 * Compute replica identity tuple before entering the critical section so
3089 * we don't PANIC upon a memory allocation failure.
3090 */
3091 old_key_tuple = ExtractReplicaIdentity(relation, &tp, true, &old_key_copied);
3092
3093 /*
3094 * If this is the first possibly-multixact-able operation in the current
3095 * transaction, set my per-backend OldestMemberMXactId setting. We can be
3096 * certain that the transaction will never become a member of any older
3097 * MultiXactIds than that. (We have to do this even if we end up just
3098 * using our own TransactionId below, since some other backend could
3099 * incorporate our XID into a MultiXact immediately afterwards.)
3100 */
3102
3105 xid, LockTupleExclusive, true,
3106 &new_xmax, &new_infomask, &new_infomask2);
3107
3109
3110 /*
3111 * If this transaction commits, the tuple will become DEAD sooner or
3112 * later. Set flag that this page is a candidate for pruning once our xid
3113 * falls below the OldestXmin horizon. If the transaction finally aborts,
3114 * the subsequent page pruning will be a no-op and the hint will be
3115 * cleared.
3116 */
3117 PageSetPrunable(page, xid);
3118
3119 if (PageIsAllVisible(page))
3120 {
3121 all_visible_cleared = true;
3122 PageClearAllVisible(page);
3123 visibilitymap_clear(relation, BufferGetBlockNumber(buffer),
3124 vmbuffer, VISIBILITYMAP_VALID_BITS);
3125 }
3126
3127 /* store transaction information of xact deleting the tuple */
3133 HeapTupleHeaderSetXmax(tp.t_data, new_xmax);
3135 /* Make sure there is no forward chain link in t_ctid */
3136 tp.t_data->t_ctid = tp.t_self;
3137
3138 /* Signal that this is actually a move into another partition */
3139 if (changingPart)
3141
3142 MarkBufferDirty(buffer);
3143
3144 /*
3145 * XLOG stuff
3146 *
3147 * NB: heap_abort_speculative() uses the same xlog record and replay
3148 * routines.
3149 */
3150 if (RelationNeedsWAL(relation))
3151 {
3155
3156 /*
3157 * For logical decode we need combo CIDs to properly decode the
3158 * catalog
3159 */
3161 log_heap_new_cid(relation, &tp);
3162
3163 xlrec.flags = 0;
3166 if (changingPart)
3168 xlrec.infobits_set = compute_infobits(tp.t_data->t_infomask,
3169 tp.t_data->t_infomask2);
3171 xlrec.xmax = new_xmax;
3172
3173 if (old_key_tuple != NULL)
3174 {
3175 if (relation->rd_rel->relreplident == REPLICA_IDENTITY_FULL)
3177 else
3179 }
3180
3183
3185
3186 /*
3187 * Log replica identity of the deleted tuple if there is one
3188 */
3189 if (old_key_tuple != NULL)
3190 {
3191 xlhdr.t_infomask2 = old_key_tuple->t_data->t_infomask2;
3192 xlhdr.t_infomask = old_key_tuple->t_data->t_infomask;
3193 xlhdr.t_hoff = old_key_tuple->t_data->t_hoff;
3194
3196 XLogRegisterData((char *) old_key_tuple->t_data
3198 old_key_tuple->t_len
3200 }
3201
3202 /* filtering by origin on a row level is much more efficient */
3204
3206
3207 PageSetLSN(page, recptr);
3208 }
3209
3211
3213
3214 if (vmbuffer != InvalidBuffer)
3215 ReleaseBuffer(vmbuffer);
3216
3217 /*
3218 * If the tuple has toasted out-of-line attributes, we need to delete
3219 * those items too. We have to do this before releasing the buffer
3220 * because we need to look at the contents of the tuple, but it's OK to
3221 * release the content lock on the buffer first.
3222 */
3223 if (relation->rd_rel->relkind != RELKIND_RELATION &&
3224 relation->rd_rel->relkind != RELKIND_MATVIEW)
3225 {
3226 /* toast table entries should never be recursively toasted */
3228 }
3229 else if (HeapTupleHasExternal(&tp))
3230 heap_toast_delete(relation, &tp, false);
3231
3232 /*
3233 * Mark tuple for invalidation from system caches at next command
3234 * boundary. We have to do this before releasing the buffer because we
3235 * need to look at the contents of the tuple.
3236 */
3237 CacheInvalidateHeapTuple(relation, &tp, NULL);
3238
3239 /* Now we can release the buffer */
3240 ReleaseBuffer(buffer);
3241
3242 /*
3243 * Release the lmgr tuple lock, if we had it.
3244 */
3245 if (have_tuple_lock)
3247
3248 pgstat_count_heap_delete(relation);
3249
3252
3253 return TM_Ok;
3254}
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition bufmgr.c:4356
void UnlockReleaseBuffer(Buffer buffer)
Definition bufmgr.c:5518
static void PageClearAllVisible(Page page)
Definition bufpage.h:438
#define InvalidCommandId
Definition c.h:683
TransactionId MultiXactId
Definition c.h:676
uint16_t uint16
Definition c.h:545
void HeapTupleHeaderAdjustCmax(const HeapTupleHeaderData *tup, CommandId *cmax, bool *iscombo)
Definition combocid.c:153
CommandId HeapTupleHeaderGetCmax(const HeapTupleHeaderData *tup)
Definition combocid.c:118
static bool DoesMultiXactIdConflict(MultiXactId multi, uint16 infomask, LockTupleMode lockmode, bool *current_is_member)
Definition heapam.c:7675
static XLogRecPtr log_heap_new_cid(Relation relation, HeapTuple tup)
Definition heapam.c:9140
static void compute_new_xmax_infomask(TransactionId xmax, uint16 old_infomask, uint16 old_infomask2, TransactionId add_to_xmax, LockTupleMode mode, bool is_update, TransactionId *result_xmax, uint16 *result_infomask, uint16 *result_infomask2)
Definition heapam.c:5394
static bool heap_acquire_tuplock(Relation relation, const ItemPointerData *tid, LockTupleMode mode, LockWaitPolicy wait_policy, bool *have_tuple_lock)
Definition heapam.c:5345
static HeapTuple ExtractReplicaIdentity(Relation relation, HeapTuple tp, bool key_required, bool *copy)
Definition heapam.c:9221
static void MultiXactIdWait(MultiXactId multi, MultiXactStatus status, uint16 infomask, Relation rel, const ItemPointerData *ctid, XLTW_Oper oper, int *remaining)
Definition heapam.c:7853
static bool xmax_infomask_changed(uint16 new_infomask, uint16 old_infomask)
Definition heapam.c:2819
#define UnlockTupleTuplock(rel, tup, mode)
Definition heapam.c:168
static void AssertHasSnapshotForToast(Relation rel)
Definition heapam.c:224
static void UpdateXmaxHintBits(HeapTupleHeader tuple, Buffer buffer, TransactionId xid)
Definition heapam.c:2052
bool HeapTupleSatisfiesVisibility(HeapTuple htup, Snapshot snapshot, Buffer buffer)
bool HeapTupleHeaderIsOnlyLocked(HeapTupleHeader tuple)
TM_Result HeapTupleSatisfiesUpdate(HeapTuple htup, CommandId curcid, Buffer buffer)
#define XLH_DELETE_CONTAINS_OLD_KEY
#define XLH_DELETE_ALL_VISIBLE_CLEARED
#define SizeOfHeapHeader
#define XLH_DELETE_IS_PARTITION_MOVE
#define XLH_DELETE_CONTAINS_OLD_TUPLE
void heap_freetuple(HeapTuple htup)
Definition heaptuple.c:1435
#define SizeofHeapTupleHeader
static bool HEAP_XMAX_IS_LOCKED_ONLY(uint16 infomask)
static void HeapTupleHeaderSetCmax(HeapTupleHeaderData *tup, CommandId cid, bool iscombo)
static void HeapTupleHeaderClearHotUpdated(HeapTupleHeaderData *tup)
static TransactionId HeapTupleHeaderGetRawXmax(const HeapTupleHeaderData *tup)
#define HEAP_XMAX_IS_MULTI
#define HEAP_XMAX_INVALID
static TransactionId HeapTupleHeaderGetUpdateXid(const HeapTupleHeaderData *tup)
static void HeapTupleHeaderSetMovedPartitions(HeapTupleHeaderData *tup)
static void HeapTupleHeaderSetXmax(HeapTupleHeaderData *tup, TransactionId xid)
void CacheInvalidateHeapTuple(Relation relation, HeapTuple tuple, HeapTuple newtuple)
Definition inval.c:1571
bool ItemPointerEquals(const ItemPointerData *pointer1, const ItemPointerData *pointer2)
Definition itemptr.c:35
void XactLockTableWait(TransactionId xid, Relation rel, const ItemPointerData *ctid, XLTW_Oper oper)
Definition lmgr.c:663
@ XLTW_Delete
Definition lmgr.h:28
@ LockWaitBlock
Definition lockoptions.h:39
@ LockTupleExclusive
Definition lockoptions.h:58
void MultiXactIdSetOldestMember(void)
Definition multixact.c:537
@ MultiXactStatusUpdate
Definition multixact.h:45
void CheckForSerializableConflictIn(Relation relation, const ItemPointerData *tid, BlockNumber blkno)
Definition predicate.c:4334
#define InvalidSnapshot
Definition snapshot.h:119
TransactionId xmax
Definition tableam.h:150
CommandId cmax
Definition tableam.h:151
ItemPointerData ctid
Definition tableam.h:149
TM_Result
Definition tableam.h:73
@ TM_Ok
Definition tableam.h:78
@ TM_BeingModified
Definition tableam.h:100
@ TM_Deleted
Definition tableam.h:93
@ TM_Updated
Definition tableam.h:90
@ TM_SelfModified
Definition tableam.h:84
@ TM_Invisible
Definition tableam.h:81
#define TransactionIdEquals(id1, id2)
Definition transam.h:43
bool visibilitymap_clear(Relation rel, BlockNumber heapBlk, Buffer vmbuf, uint8 flags)
void visibilitymap_pin(Relation rel, BlockNumber heapBlk, Buffer *vmbuf)
#define VISIBILITYMAP_VALID_BITS
bool TransactionIdIsCurrentTransactionId(TransactionId xid)
Definition xact.c:942
bool IsInParallelMode(void)
Definition xact.c:1090
#define XLOG_INCLUDE_ORIGIN
Definition xlog.h:165
void XLogSetRecordFlags(uint8 flags)
Definition xloginsert.c:460

References Assert, AssertHasSnapshotForToast(), BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferGetBlockNumber(), BufferGetPage(), CacheInvalidateHeapTuple(), CheckForSerializableConflictIn(), TM_FailureData::cmax, compute_infobits(), compute_new_xmax_infomask(), TM_FailureData::ctid, DoesMultiXactIdConflict(), END_CRIT_SECTION, ereport, errcode(), errmsg(), ERROR, ExtractReplicaIdentity(), fb(), GetCurrentTransactionId(), heap_acquire_tuplock(), heap_freetuple(), HEAP_MOVED, heap_toast_delete(), HEAP_XMAX_BITS, HEAP_XMAX_INVALID, HEAP_XMAX_IS_LOCKED_ONLY(), HEAP_XMAX_IS_MULTI, HeapTupleHasExternal(), HeapTupleHeaderAdjustCmax(), HeapTupleHeaderClearHotUpdated(), HeapTupleHeaderGetCmax(), HeapTupleHeaderGetRawXmax(), HeapTupleHeaderGetUpdateXid(), HeapTupleHeaderIsOnlyLocked(), HeapTupleHeaderSetCmax(), HeapTupleHeaderSetMovedPartitions(), HeapTupleHeaderSetXmax(), HeapTupleSatisfiesUpdate(), HeapTupleSatisfiesVisibility(), InvalidBuffer, InvalidCommandId, InvalidSnapshot, IsInParallelMode(), ItemIdGetLength, ItemIdIsNormal, ItemPointerEquals(), ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), ItemPointerIsValid(), LockBuffer(), LockTupleExclusive, LockWaitBlock, log_heap_new_cid(), MarkBufferDirty(), MultiXactIdSetOldestMember(), MultiXactIdWait(), MultiXactStatusUpdate, PageClearAllVisible(), PageGetItem(), PageGetItemId(), PageIsAllVisible(), PageSetLSN(), PageSetPrunable, pgstat_count_heap_delete(), RelationData::rd_rel, ReadBuffer(), REGBUF_STANDARD, RelationGetRelid, RelationIsAccessibleInLogicalDecoding, RelationNeedsWAL, ReleaseBuffer(), SizeOfHeapDelete, SizeOfHeapHeader, SizeofHeapTupleHeader, START_CRIT_SECTION, HeapTupleHeaderData::t_ctid, HeapTupleData::t_data, HeapTupleHeaderData::t_infomask, HeapTupleHeaderData::t_infomask2, HeapTupleData::t_len, HeapTupleData::t_self, HeapTupleData::t_tableOid, TM_BeingModified, TM_Deleted, TM_Invisible, TM_Ok, TM_SelfModified, TM_Updated, TransactionIdEquals, TransactionIdIsCurrentTransactionId(), UnlockReleaseBuffer(), UnlockTupleTuplock, UpdateXmaxHintBits(), visibilitymap_clear(), visibilitymap_pin(), VISIBILITYMAP_VALID_BITS, XactLockTableWait(), XLH_DELETE_ALL_VISIBLE_CLEARED, XLH_DELETE_CONTAINS_OLD_KEY, XLH_DELETE_CONTAINS_OLD_TUPLE, XLH_DELETE_IS_PARTITION_MOVE, XLOG_HEAP_DELETE, XLOG_INCLUDE_ORIGIN, XLogBeginInsert(), XLogInsert(), XLogRegisterBuffer(), XLogRegisterData(), XLogSetRecordFlags(), XLTW_Delete, TM_FailureData::xmax, and xmax_infomask_changed().

Referenced by heapam_tuple_delete(), and simple_heap_delete().

◆ heap_endscan()

void heap_endscan ( TableScanDesc  sscan)
extern

Definition at line 1370 of file heapam.c.

1371{
1373
1374 /* Note: no locking manipulations needed */
1375
1376 /*
1377 * unpin scan buffers
1378 */
1379 if (BufferIsValid(scan->rs_cbuf))
1380 ReleaseBuffer(scan->rs_cbuf);
1381
1382 /*
1383 * Must free the read stream before freeing the BufferAccessStrategy.
1384 */
1385 if (scan->rs_read_stream)
1387
1388 /*
1389 * decrement relation reference count and free scan descriptor storage
1390 */
1392
1393 if (scan->rs_base.rs_key)
1394 pfree(scan->rs_base.rs_key);
1395
1396 if (scan->rs_strategy != NULL)
1398
1399 if (scan->rs_parallelworkerdata != NULL)
1401
1402 if (scan->rs_base.rs_flags & SO_TEMP_SNAPSHOT)
1404
1405 pfree(scan);
1406}
static bool BufferIsValid(Buffer bufnum)
Definition bufmgr.h:417
void read_stream_end(ReadStream *stream)
void RelationDecrementReferenceCount(Relation rel)
Definition relcache.c:2195
void UnregisterSnapshot(Snapshot snapshot)
Definition snapmgr.c:866
@ SO_TEMP_SNAPSHOT
Definition tableam.h:65

References BufferIsValid(), fb(), FreeAccessStrategy(), pfree(), read_stream_end(), RelationDecrementReferenceCount(), ReleaseBuffer(), HeapScanDescData::rs_base, HeapScanDescData::rs_cbuf, TableScanDescData::rs_flags, TableScanDescData::rs_key, HeapScanDescData::rs_parallelworkerdata, TableScanDescData::rs_rd, HeapScanDescData::rs_read_stream, TableScanDescData::rs_snapshot, HeapScanDescData::rs_strategy, SO_TEMP_SNAPSHOT, and UnregisterSnapshot().

◆ heap_execute_freeze_tuple()

static void heap_execute_freeze_tuple ( HeapTupleHeader  tuple,
HeapTupleFreeze frz 
)
inlinestatic

Definition at line 492 of file heapam.h.

493{
494 HeapTupleHeaderSetXmax(tuple, frz->xmax);
495
496 if (frz->frzflags & XLH_FREEZE_XVAC)
498
499 if (frz->frzflags & XLH_INVALID_XVAC)
501
502 tuple->t_infomask = frz->t_infomask;
503 tuple->t_infomask2 = frz->t_infomask2;
504}
#define XLH_INVALID_XVAC
#define XLH_FREEZE_XVAC
static void HeapTupleHeaderSetXvac(HeapTupleHeaderData *tup, TransactionId xid)
#define FrozenTransactionId
Definition transam.h:33

References fb(), FrozenTransactionId, HeapTupleHeaderSetXmax(), HeapTupleHeaderSetXvac(), InvalidTransactionId, HeapTupleHeaderData::t_infomask, HeapTupleHeaderData::t_infomask2, XLH_FREEZE_XVAC, and XLH_INVALID_XVAC.

Referenced by heap_freeze_prepared_tuples(), heap_freeze_tuple(), and heap_xlog_prune_freeze().

◆ heap_fetch()

bool heap_fetch ( Relation  relation,
Snapshot  snapshot,
HeapTuple  tuple,
Buffer userbuf,
bool  keep_buf 
)
extern

Definition at line 1658 of file heapam.c.

1663{
1664 ItemPointer tid = &(tuple->t_self);
1665 ItemId lp;
1666 Buffer buffer;
1667 Page page;
1668 OffsetNumber offnum;
1669 bool valid;
1670
1671 /*
1672 * Fetch and pin the appropriate page of the relation.
1673 */
1674 buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(tid));
1675
1676 /*
1677 * Need share lock on buffer to examine tuple commit status.
1678 */
1680 page = BufferGetPage(buffer);
1681
1682 /*
1683 * We'd better check for out-of-range offnum in case of VACUUM since the
1684 * TID was obtained.
1685 */
1686 offnum = ItemPointerGetOffsetNumber(tid);
1688 {
1690 ReleaseBuffer(buffer);
1692 tuple->t_data = NULL;
1693 return false;
1694 }
1695
1696 /*
1697 * get the item line pointer corresponding to the requested tid
1698 */
1699 lp = PageGetItemId(page, offnum);
1700
1701 /*
1702 * Must check for deleted tuple.
1703 */
1704 if (!ItemIdIsNormal(lp))
1705 {
1707 ReleaseBuffer(buffer);
1709 tuple->t_data = NULL;
1710 return false;
1711 }
1712
1713 /*
1714 * fill in *tuple fields
1715 */
1716 tuple->t_data = (HeapTupleHeader) PageGetItem(page, lp);
1717 tuple->t_len = ItemIdGetLength(lp);
1718 tuple->t_tableOid = RelationGetRelid(relation);
1719
1720 /*
1721 * check tuple visibility, then release lock
1722 */
1723 valid = HeapTupleSatisfiesVisibility(tuple, snapshot, buffer);
1724
1725 if (valid)
1726 PredicateLockTID(relation, &(tuple->t_self), snapshot,
1728
1729 HeapCheckForSerializableConflictOut(valid, relation, tuple, buffer, snapshot);
1730
1732
1733 if (valid)
1734 {
1735 /*
1736 * All checks passed, so return the tuple as valid. Caller is now
1737 * responsible for releasing the buffer.
1738 */
1739 *userbuf = buffer;
1740
1741 return true;
1742 }
1743
1744 /* Tuple failed time qual, but maybe caller wants to see it anyway. */
1745 if (keep_buf)
1746 *userbuf = buffer;
1747 else
1748 {
1749 ReleaseBuffer(buffer);
1751 tuple->t_data = NULL;
1752 }
1753
1754 return false;
1755}
@ BUFFER_LOCK_SHARE
Definition bufmgr.h:210
static OffsetNumber PageGetMaxOffsetNumber(const PageData *page)
Definition bufpage.h:371
void HeapCheckForSerializableConflictOut(bool visible, Relation relation, HeapTuple tuple, Buffer buffer, Snapshot snapshot)
Definition heapam.c:9325
static TransactionId HeapTupleHeaderGetXmin(const HeapTupleHeaderData *tup)
uint16 OffsetNumber
Definition off.h:24
void PredicateLockTID(Relation relation, const ItemPointerData *tid, Snapshot snapshot, TransactionId tuple_xid)
Definition predicate.c:2619

References BUFFER_LOCK_SHARE, BUFFER_LOCK_UNLOCK, BufferGetPage(), fb(), HeapCheckForSerializableConflictOut(), HeapTupleHeaderGetXmin(), HeapTupleSatisfiesVisibility(), InvalidBuffer, ItemIdGetLength, ItemIdIsNormal, ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), LockBuffer(), PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), PredicateLockTID(), ReadBuffer(), RelationGetRelid, ReleaseBuffer(), HeapTupleData::t_data, HeapTupleData::t_len, HeapTupleData::t_self, and HeapTupleData::t_tableOid.

Referenced by heap_lock_updated_tuple_rec(), heapam_fetch_row_version(), and heapam_tuple_lock().

◆ heap_finish_speculative()

void heap_finish_speculative ( Relation  relation,
const ItemPointerData tid 
)
extern

Definition at line 6167 of file heapam.c.

6168{
6169 Buffer buffer;
6170 Page page;
6171 OffsetNumber offnum;
6172 ItemId lp;
6173 HeapTupleHeader htup;
6174
6175 buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(tid));
6177 page = BufferGetPage(buffer);
6178
6179 offnum = ItemPointerGetOffsetNumber(tid);
6181 elog(ERROR, "offnum out of range");
6182 lp = PageGetItemId(page, offnum);
6183 if (!ItemIdIsNormal(lp))
6184 elog(ERROR, "invalid lp");
6185
6186 htup = (HeapTupleHeader) PageGetItem(page, lp);
6187
6188 /* NO EREPORT(ERROR) from here till changes are logged */
6190
6192
6193 MarkBufferDirty(buffer);
6194
6195 /*
6196 * Replace the speculative insertion token with a real t_ctid, pointing to
6197 * itself like it does on regular tuples.
6198 */
6199 htup->t_ctid = *tid;
6200
6201 /* XLOG stuff */
6202 if (RelationNeedsWAL(relation))
6203 {
6206
6208
6210
6211 /* We want the same filtering on this as on a plain insert */
6213
6216
6218
6219 PageSetLSN(page, recptr);
6220 }
6221
6223
6224 UnlockReleaseBuffer(buffer);
6225}
#define SizeOfHeapConfirm
#define XLOG_HEAP_CONFIRM
Definition heapam_xlog.h:38
OffsetNumber offnum

References Assert, BUFFER_LOCK_EXCLUSIVE, BufferGetPage(), elog, END_CRIT_SECTION, ERROR, fb(), HeapTupleHeaderIsSpeculative(), ItemIdIsNormal, ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), LockBuffer(), MarkBufferDirty(), xl_heap_confirm::offnum, PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), PageSetLSN(), ReadBuffer(), REGBUF_STANDARD, RelationNeedsWAL, SizeOfHeapConfirm, START_CRIT_SECTION, HeapTupleHeaderData::t_ctid, UnlockReleaseBuffer(), XLOG_HEAP_CONFIRM, XLOG_INCLUDE_ORIGIN, XLogBeginInsert(), XLogInsert(), XLogRegisterBuffer(), XLogRegisterData(), and XLogSetRecordFlags().

Referenced by heapam_tuple_complete_speculative().

◆ heap_freeze_prepared_tuples()

void heap_freeze_prepared_tuples ( Buffer  buffer,
HeapTupleFreeze tuples,
int  ntuples 
)
extern

Definition at line 7460 of file heapam.c.

7461{
7462 Page page = BufferGetPage(buffer);
7463
7464 for (int i = 0; i < ntuples; i++)
7465 {
7466 HeapTupleFreeze *frz = tuples + i;
7467 ItemId itemid = PageGetItemId(page, frz->offset);
7468 HeapTupleHeader htup;
7469
7470 htup = (HeapTupleHeader) PageGetItem(page, itemid);
7472 }
7473}
static void heap_execute_freeze_tuple(HeapTupleHeader tuple, HeapTupleFreeze *frz)
Definition heapam.h:492
int i
Definition isn.c:77

References BufferGetPage(), fb(), heap_execute_freeze_tuple(), i, PageGetItem(), and PageGetItemId().

Referenced by heap_page_prune_and_freeze().

◆ heap_freeze_tuple()

bool heap_freeze_tuple ( HeapTupleHeader  tuple,
TransactionId  relfrozenxid,
TransactionId  relminmxid,
TransactionId  FreezeLimit,
TransactionId  MultiXactCutoff 
)
extern

Definition at line 7482 of file heapam.c.

7485{
7487 bool do_freeze;
7488 bool totally_frozen;
7489 struct VacuumCutoffs cutoffs;
7490 HeapPageFreeze pagefrz;
7491
7492 cutoffs.relfrozenxid = relfrozenxid;
7493 cutoffs.relminmxid = relminmxid;
7494 cutoffs.OldestXmin = FreezeLimit;
7495 cutoffs.OldestMxact = MultiXactCutoff;
7496 cutoffs.FreezeLimit = FreezeLimit;
7497 cutoffs.MultiXactCutoff = MultiXactCutoff;
7498
7499 pagefrz.freeze_required = true;
7500 pagefrz.FreezePageRelfrozenXid = FreezeLimit;
7501 pagefrz.FreezePageRelminMxid = MultiXactCutoff;
7502 pagefrz.NoFreezePageRelfrozenXid = FreezeLimit;
7503 pagefrz.NoFreezePageRelminMxid = MultiXactCutoff;
7504
7505 do_freeze = heap_prepare_freeze_tuple(tuple, &cutoffs,
7506 &pagefrz, &frz, &totally_frozen);
7507
7508 /*
7509 * Note that because this is not a WAL-logged operation, we don't need to
7510 * fill in the offset in the freeze record.
7511 */
7512
7513 if (do_freeze)
7515 return do_freeze;
7516}
bool heap_prepare_freeze_tuple(HeapTupleHeader tuple, const struct VacuumCutoffs *cutoffs, HeapPageFreeze *pagefrz, HeapTupleFreeze *frz, bool *totally_frozen)
Definition heapam.c:7134
bool freeze_required
Definition heapam.h:182
TransactionId FreezeLimit
Definition vacuum.h:289
TransactionId relfrozenxid
Definition vacuum.h:263
MultiXactId relminmxid
Definition vacuum.h:264
MultiXactId MultiXactCutoff
Definition vacuum.h:290

References fb(), VacuumCutoffs::FreezeLimit, heap_execute_freeze_tuple(), heap_prepare_freeze_tuple(), VacuumCutoffs::MultiXactCutoff, VacuumCutoffs::OldestMxact, VacuumCutoffs::OldestXmin, VacuumCutoffs::relfrozenxid, and VacuumCutoffs::relminmxid.

Referenced by rewrite_heap_tuple().

◆ heap_get_latest_tid()

void heap_get_latest_tid ( TableScanDesc  sscan,
ItemPointer  tid 
)
extern

Definition at line 1930 of file heapam.c.

1932{
1933 Relation relation = sscan->rs_rd;
1934 Snapshot snapshot = sscan->rs_snapshot;
1935 ItemPointerData ctid;
1937
1938 /*
1939 * table_tuple_get_latest_tid() verified that the passed in tid is valid.
1940 * Assume that t_ctid links are valid however - there shouldn't be invalid
1941 * ones in the table.
1942 */
1944
1945 /*
1946 * Loop to chase down t_ctid links. At top of loop, ctid is the tuple we
1947 * need to examine, and *tid is the TID we will return if ctid turns out
1948 * to be bogus.
1949 *
1950 * Note that we will loop until we reach the end of the t_ctid chain.
1951 * Depending on the snapshot passed, there might be at most one visible
1952 * version of the row, but we don't try to optimize for that.
1953 */
1954 ctid = *tid;
1955 priorXmax = InvalidTransactionId; /* cannot check first XMIN */
1956 for (;;)
1957 {
1958 Buffer buffer;
1959 Page page;
1960 OffsetNumber offnum;
1961 ItemId lp;
1962 HeapTupleData tp;
1963 bool valid;
1964
1965 /*
1966 * Read, pin, and lock the page.
1967 */
1968 buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(&ctid));
1970 page = BufferGetPage(buffer);
1971
1972 /*
1973 * Check for bogus item number. This is not treated as an error
1974 * condition because it can happen while following a t_ctid link. We
1975 * just assume that the prior tid is OK and return it unchanged.
1976 */
1977 offnum = ItemPointerGetOffsetNumber(&ctid);
1979 {
1980 UnlockReleaseBuffer(buffer);
1981 break;
1982 }
1983 lp = PageGetItemId(page, offnum);
1984 if (!ItemIdIsNormal(lp))
1985 {
1986 UnlockReleaseBuffer(buffer);
1987 break;
1988 }
1989
1990 /* OK to access the tuple */
1991 tp.t_self = ctid;
1992 tp.t_data = (HeapTupleHeader) PageGetItem(page, lp);
1993 tp.t_len = ItemIdGetLength(lp);
1994 tp.t_tableOid = RelationGetRelid(relation);
1995
1996 /*
1997 * After following a t_ctid link, we might arrive at an unrelated
1998 * tuple. Check for XMIN match.
1999 */
2002 {
2003 UnlockReleaseBuffer(buffer);
2004 break;
2005 }
2006
2007 /*
2008 * Check tuple visibility; if visible, set it as the new result
2009 * candidate.
2010 */
2011 valid = HeapTupleSatisfiesVisibility(&tp, snapshot, buffer);
2012 HeapCheckForSerializableConflictOut(valid, relation, &tp, buffer, snapshot);
2013 if (valid)
2014 *tid = ctid;
2015
2016 /*
2017 * If there's a valid t_ctid link, follow it, else we're done.
2018 */
2019 if ((tp.t_data->t_infomask & HEAP_XMAX_INVALID) ||
2023 {
2024 UnlockReleaseBuffer(buffer);
2025 break;
2026 }
2027
2028 ctid = tp.t_data->t_ctid;
2030 UnlockReleaseBuffer(buffer);
2031 } /* end of loop */
2032}
static bool HeapTupleHeaderIndicatesMovedPartitions(const HeapTupleHeaderData *tup)

References Assert, BUFFER_LOCK_SHARE, BufferGetPage(), fb(), HEAP_XMAX_INVALID, HeapCheckForSerializableConflictOut(), HeapTupleHeaderGetUpdateXid(), HeapTupleHeaderGetXmin(), HeapTupleHeaderIndicatesMovedPartitions(), HeapTupleHeaderIsOnlyLocked(), HeapTupleSatisfiesVisibility(), InvalidTransactionId, ItemIdGetLength, ItemIdIsNormal, ItemPointerEquals(), ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), ItemPointerIsValid(), LockBuffer(), PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), ReadBuffer(), RelationGetRelid, HeapTupleHeaderData::t_ctid, HeapTupleData::t_data, HeapTupleHeaderData::t_infomask, HeapTupleData::t_len, HeapTupleData::t_self, HeapTupleData::t_tableOid, TransactionIdEquals, TransactionIdIsValid, and UnlockReleaseBuffer().

◆ heap_get_root_tuples()

void heap_get_root_tuples ( Page  page,
OffsetNumber root_offsets 
)
extern

Definition at line 1895 of file pruneheap.c.

1896{
1897 OffsetNumber offnum,
1898 maxoff;
1899
1902
1903 maxoff = PageGetMaxOffsetNumber(page);
1904 for (offnum = FirstOffsetNumber; offnum <= maxoff; offnum = OffsetNumberNext(offnum))
1905 {
1906 ItemId lp = PageGetItemId(page, offnum);
1907 HeapTupleHeader htup;
1910
1911 /* skip unused and dead items */
1912 if (!ItemIdIsUsed(lp) || ItemIdIsDead(lp))
1913 continue;
1914
1915 if (ItemIdIsNormal(lp))
1916 {
1917 htup = (HeapTupleHeader) PageGetItem(page, lp);
1918
1919 /*
1920 * Check if this tuple is part of a HOT-chain rooted at some other
1921 * tuple. If so, skip it for now; we'll process it when we find
1922 * its root.
1923 */
1924 if (HeapTupleHeaderIsHeapOnly(htup))
1925 continue;
1926
1927 /*
1928 * This is either a plain tuple or the root of a HOT-chain.
1929 * Remember it in the mapping.
1930 */
1931 root_offsets[offnum - 1] = offnum;
1932
1933 /* If it's not the start of a HOT-chain, we're done with it */
1934 if (!HeapTupleHeaderIsHotUpdated(htup))
1935 continue;
1936
1937 /* Set up to scan the HOT-chain */
1940 }
1941 else
1942 {
1943 /* Must be a redirect item. We do not set its root_offsets entry */
1945 /* Set up to scan the HOT-chain */
1948 }
1949
1950 /*
1951 * Now follow the HOT-chain and collect other tuples in the chain.
1952 *
1953 * Note: Even though this is a nested loop, the complexity of the
1954 * function is O(N) because a tuple in the page should be visited not
1955 * more than twice, once in the outer loop and once in HOT-chain
1956 * chases.
1957 */
1958 for (;;)
1959 {
1960 /* Sanity check (pure paranoia) */
1961 if (offnum < FirstOffsetNumber)
1962 break;
1963
1964 /*
1965 * An offset past the end of page's line pointer array is possible
1966 * when the array was truncated
1967 */
1968 if (offnum > maxoff)
1969 break;
1970
1971 lp = PageGetItemId(page, nextoffnum);
1972
1973 /* Check for broken chains */
1974 if (!ItemIdIsNormal(lp))
1975 break;
1976
1977 htup = (HeapTupleHeader) PageGetItem(page, lp);
1978
1981 break;
1982
1983 /* Remember the root line pointer for this item */
1984 root_offsets[nextoffnum - 1] = offnum;
1985
1986 /* Advance to next chain member, if any */
1987 if (!HeapTupleHeaderIsHotUpdated(htup))
1988 break;
1989
1990 /* HOT implies it can't have moved to different partition */
1992
1995 }
1996 }
1997}
#define MemSet(start, val, len)
Definition c.h:1013
static bool HeapTupleHeaderIsHotUpdated(const HeapTupleHeaderData *tup)
#define MaxHeapTuplesPerPage
#define ItemIdGetRedirect(itemId)
Definition itemid.h:78
#define ItemIdIsDead(itemId)
Definition itemid.h:113
#define ItemIdIsUsed(itemId)
Definition itemid.h:92
#define ItemIdIsRedirected(itemId)
Definition itemid.h:106
#define InvalidOffsetNumber
Definition off.h:26
#define OffsetNumberNext(offsetNumber)
Definition off.h:52
#define FirstOffsetNumber
Definition off.h:27

References Assert, fb(), FirstOffsetNumber, HeapTupleHeaderGetUpdateXid(), HeapTupleHeaderGetXmin(), HeapTupleHeaderIndicatesMovedPartitions(), HeapTupleHeaderIsHeapOnly(), HeapTupleHeaderIsHotUpdated(), InvalidOffsetNumber, InvalidTransactionId, ItemIdGetRedirect, ItemIdIsDead, ItemIdIsNormal, ItemIdIsRedirected, ItemIdIsUsed, ItemPointerGetOffsetNumber(), MaxHeapTuplesPerPage, MemSet, OffsetNumberNext, PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), HeapTupleHeaderData::t_ctid, TransactionIdEquals, and TransactionIdIsValid.

Referenced by heapam_index_build_range_scan(), and heapam_index_validate_scan().

◆ heap_getnext()

HeapTuple heap_getnext ( TableScanDesc  sscan,
ScanDirection  direction 
)
extern

Definition at line 1409 of file heapam.c.

1410{
1412
1413 /*
1414 * This is still widely used directly, without going through table AM, so
1415 * add a safety check. It's possible we should, at a later point,
1416 * downgrade this to an assert. The reason for checking the AM routine,
1417 * rather than the AM oid, is that this allows to write regression tests
1418 * that create another AM reusing the heap handler.
1419 */
1420 if (unlikely(sscan->rs_rd->rd_tableam != GetHeapamTableAmRoutine()))
1421 ereport(ERROR,
1423 errmsg_internal("only heap AM is supported")));
1424
1425 /* Note: no locking manipulations needed */
1426
1428 heapgettup_pagemode(scan, direction,
1429 scan->rs_base.rs_nkeys, scan->rs_base.rs_key);
1430 else
1431 heapgettup(scan, direction,
1432 scan->rs_base.rs_nkeys, scan->rs_base.rs_key);
1433
1434 if (scan->rs_ctup.t_data == NULL)
1435 return NULL;
1436
1437 /*
1438 * if we get here it means we have a new current scan tuple, so point to
1439 * the proper return buffer and return the tuple.
1440 */
1441
1443
1444 return &scan->rs_ctup;
1445}
#define unlikely(x)
Definition c.h:412
int errmsg_internal(const char *fmt,...)
Definition elog.c:1170
static void heapgettup(HeapScanDesc scan, ScanDirection dir, int nkeys, ScanKey key)
Definition heapam.c:959
static void heapgettup_pagemode(HeapScanDesc scan, ScanDirection dir, int nkeys, ScanKey key)
Definition heapam.c:1069
const TableAmRoutine * GetHeapamTableAmRoutine(void)
#define pgstat_count_heap_getnext(rel)
Definition pgstat.h:695
@ SO_ALLOW_PAGEMODE
Definition tableam.h:62

References ereport, errcode(), errmsg_internal(), ERROR, fb(), GetHeapamTableAmRoutine(), heapgettup(), heapgettup_pagemode(), pgstat_count_heap_getnext, HeapScanDescData::rs_base, HeapScanDescData::rs_ctup, TableScanDescData::rs_flags, TableScanDescData::rs_key, TableScanDescData::rs_nkeys, TableScanDescData::rs_rd, SO_ALLOW_PAGEMODE, HeapTupleData::t_data, and unlikely.

Referenced by AlterTableMoveAll(), AlterTableSpaceOptions(), check_db_file_conflict(), CreateDatabaseUsingFileCopy(), do_autovacuum(), DropSetting(), DropTableSpace(), find_typed_table_dependencies(), get_all_vacuum_rels(), get_database_list(), get_subscription_list(), get_tables_to_cluster(), get_tablespace_name(), get_tablespace_oid(), GetAllPublicationRelations(), getRelationsInNamespace(), GetSchemaPublicationRelations(), heapam_index_build_range_scan(), heapam_index_validate_scan(), objectsInSchemaToOids(), pgrowlocks(), pgstat_heap(), populate_typ_list(), ReindexMultipleTables(), remove_dbtablespaces(), RemoveSubscriptionRel(), RenameTableSpace(), ThereIsAtLeastOneRole(), and vac_truncate_clog().

◆ heap_getnextslot()

bool heap_getnextslot ( TableScanDesc  sscan,
ScanDirection  direction,
TupleTableSlot slot 
)
extern

Definition at line 1448 of file heapam.c.

1449{
1451
1452 /* Note: no locking manipulations needed */
1453
1454 if (sscan->rs_flags & SO_ALLOW_PAGEMODE)
1455 heapgettup_pagemode(scan, direction, sscan->rs_nkeys, sscan->rs_key);
1456 else
1457 heapgettup(scan, direction, sscan->rs_nkeys, sscan->rs_key);
1458
1459 if (scan->rs_ctup.t_data == NULL)
1460 {
1461 ExecClearTuple(slot);
1462 return false;
1463 }
1464
1465 /*
1466 * if we get here it means we have a new current scan tuple, so point to
1467 * the proper return buffer and return the tuple.
1468 */
1469
1471
1472 ExecStoreBufferHeapTuple(&scan->rs_ctup, slot,
1473 scan->rs_cbuf);
1474 return true;
1475}
TupleTableSlot * ExecStoreBufferHeapTuple(HeapTuple tuple, TupleTableSlot *slot, Buffer buffer)
static TupleTableSlot * ExecClearTuple(TupleTableSlot *slot)
Definition tuptable.h:457

References ExecClearTuple(), ExecStoreBufferHeapTuple(), fb(), heapgettup(), heapgettup_pagemode(), pgstat_count_heap_getnext, HeapScanDescData::rs_base, HeapScanDescData::rs_cbuf, HeapScanDescData::rs_ctup, TableScanDescData::rs_rd, SO_ALLOW_PAGEMODE, and HeapTupleData::t_data.

◆ heap_getnextslot_tidrange()

bool heap_getnextslot_tidrange ( TableScanDesc  sscan,
ScanDirection  direction,
TupleTableSlot slot 
)
extern

Definition at line 1551 of file heapam.c.

1553{
1555 ItemPointer mintid = &sscan->st.tidrange.rs_mintid;
1556 ItemPointer maxtid = &sscan->st.tidrange.rs_maxtid;
1557
1558 /* Note: no locking manipulations needed */
1559 for (;;)
1560 {
1561 if (sscan->rs_flags & SO_ALLOW_PAGEMODE)
1562 heapgettup_pagemode(scan, direction, sscan->rs_nkeys, sscan->rs_key);
1563 else
1564 heapgettup(scan, direction, sscan->rs_nkeys, sscan->rs_key);
1565
1566 if (scan->rs_ctup.t_data == NULL)
1567 {
1568 ExecClearTuple(slot);
1569 return false;
1570 }
1571
1572 /*
1573 * heap_set_tidrange will have used heap_setscanlimits to limit the
1574 * range of pages we scan to only ones that can contain the TID range
1575 * we're scanning for. Here we must filter out any tuples from these
1576 * pages that are outside of that range.
1577 */
1578 if (ItemPointerCompare(&scan->rs_ctup.t_self, mintid) < 0)
1579 {
1580 ExecClearTuple(slot);
1581
1582 /*
1583 * When scanning backwards, the TIDs will be in descending order.
1584 * Future tuples in this direction will be lower still, so we can
1585 * just return false to indicate there will be no more tuples.
1586 */
1587 if (ScanDirectionIsBackward(direction))
1588 return false;
1589
1590 continue;
1591 }
1592
1593 /*
1594 * Likewise for the final page, we must filter out TIDs greater than
1595 * maxtid.
1596 */
1597 if (ItemPointerCompare(&scan->rs_ctup.t_self, maxtid) > 0)
1598 {
1599 ExecClearTuple(slot);
1600
1601 /*
1602 * When scanning forward, the TIDs will be in ascending order.
1603 * Future tuples in this direction will be higher still, so we can
1604 * just return false to indicate there will be no more tuples.
1605 */
1606 if (ScanDirectionIsForward(direction))
1607 return false;
1608 continue;
1609 }
1610
1611 break;
1612 }
1613
1614 /*
1615 * if we get here it means we have a new current scan tuple, so point to
1616 * the proper return buffer and return the tuple.
1617 */
1619
1620 ExecStoreBufferHeapTuple(&scan->rs_ctup, slot, scan->rs_cbuf);
1621 return true;
1622}
int32 ItemPointerCompare(const ItemPointerData *arg1, const ItemPointerData *arg2)
Definition itemptr.c:51
#define ScanDirectionIsForward(direction)
Definition sdir.h:64
#define ScanDirectionIsBackward(direction)
Definition sdir.h:50

References ExecClearTuple(), ExecStoreBufferHeapTuple(), fb(), heapgettup(), heapgettup_pagemode(), ItemPointerCompare(), pgstat_count_heap_getnext, HeapScanDescData::rs_base, HeapScanDescData::rs_cbuf, HeapScanDescData::rs_ctup, TableScanDescData::rs_rd, ScanDirectionIsBackward, ScanDirectionIsForward, SO_ALLOW_PAGEMODE, HeapTupleData::t_data, and HeapTupleData::t_self.

◆ heap_hot_search_buffer()

bool heap_hot_search_buffer ( ItemPointer  tid,
Relation  relation,
Buffer  buffer,
Snapshot  snapshot,
HeapTuple  heapTuple,
bool all_dead,
bool  first_call 
)
extern

Definition at line 1778 of file heapam.c.

1781{
1782 Page page = BufferGetPage(buffer);
1784 BlockNumber blkno;
1785 OffsetNumber offnum;
1786 bool at_chain_start;
1787 bool valid;
1788 bool skip;
1789 GlobalVisState *vistest = NULL;
1790
1791 /* If this is not the first call, previous call returned a (live!) tuple */
1792 if (all_dead)
1794
1795 blkno = ItemPointerGetBlockNumber(tid);
1796 offnum = ItemPointerGetOffsetNumber(tid);
1798 skip = !first_call;
1799
1800 /* XXX: we should assert that a snapshot is pushed or registered */
1802 Assert(BufferGetBlockNumber(buffer) == blkno);
1803
1804 /* Scan through possible multiple members of HOT-chain */
1805 for (;;)
1806 {
1807 ItemId lp;
1808
1809 /* check for bogus TID */
1811 break;
1812
1813 lp = PageGetItemId(page, offnum);
1814
1815 /* check for unused, dead, or redirected items */
1816 if (!ItemIdIsNormal(lp))
1817 {
1818 /* We should only see a redirect at start of chain */
1820 {
1821 /* Follow the redirect */
1822 offnum = ItemIdGetRedirect(lp);
1823 at_chain_start = false;
1824 continue;
1825 }
1826 /* else must be end of chain */
1827 break;
1828 }
1829
1830 /*
1831 * Update heapTuple to point to the element of the HOT chain we're
1832 * currently investigating. Having t_self set correctly is important
1833 * because the SSI checks and the *Satisfies routine for historical
1834 * MVCC snapshots need the correct tid to decide about the visibility.
1835 */
1836 heapTuple->t_data = (HeapTupleHeader) PageGetItem(page, lp);
1837 heapTuple->t_len = ItemIdGetLength(lp);
1838 heapTuple->t_tableOid = RelationGetRelid(relation);
1839 ItemPointerSet(&heapTuple->t_self, blkno, offnum);
1840
1841 /*
1842 * Shouldn't see a HEAP_ONLY tuple at chain start.
1843 */
1845 break;
1846
1847 /*
1848 * The xmin should match the previous xmax value, else chain is
1849 * broken.
1850 */
1854 break;
1855
1856 /*
1857 * When first_call is true (and thus, skip is initially false) we'll
1858 * return the first tuple we find. But on later passes, heapTuple
1859 * will initially be pointing to the tuple we returned last time.
1860 * Returning it again would be incorrect (and would loop forever), so
1861 * we skip it and return the next match we find.
1862 */
1863 if (!skip)
1864 {
1865 /* If it's visible per the snapshot, we must return it */
1866 valid = HeapTupleSatisfiesVisibility(heapTuple, snapshot, buffer);
1868 buffer, snapshot);
1869
1870 if (valid)
1871 {
1872 ItemPointerSetOffsetNumber(tid, offnum);
1873 PredicateLockTID(relation, &heapTuple->t_self, snapshot,
1875 if (all_dead)
1876 *all_dead = false;
1877 return true;
1878 }
1879 }
1880 skip = false;
1881
1882 /*
1883 * If we can't see it, maybe no one else can either. At caller
1884 * request, check whether all chain members are dead to all
1885 * transactions.
1886 *
1887 * Note: if you change the criterion here for what is "dead", fix the
1888 * planner's get_actual_variable_range() function to match.
1889 */
1890 if (all_dead && *all_dead)
1891 {
1892 if (!vistest)
1893 vistest = GlobalVisTestFor(relation);
1894
1895 if (!HeapTupleIsSurelyDead(heapTuple, vistest))
1896 *all_dead = false;
1897 }
1898
1899 /*
1900 * Check to see if HOT chain continues past this tuple; if so fetch
1901 * the next offnum and loop around.
1902 */
1904 {
1905 Assert(ItemPointerGetBlockNumber(&heapTuple->t_data->t_ctid) ==
1906 blkno);
1907 offnum = ItemPointerGetOffsetNumber(&heapTuple->t_data->t_ctid);
1908 at_chain_start = false;
1910 }
1911 else
1912 break; /* end of chain */
1913 }
1914
1915 return false;
1916}
bool HeapTupleIsSurelyDead(HeapTuple htup, GlobalVisState *vistest)
static bool HeapTupleIsHotUpdated(const HeapTupleData *tuple)
static bool HeapTupleIsHeapOnly(const HeapTupleData *tuple)
static void ItemPointerSet(ItemPointerData *pointer, BlockNumber blockNumber, OffsetNumber offNum)
Definition itemptr.h:135
static void ItemPointerSetOffsetNumber(ItemPointerData *pointer, OffsetNumber offsetNumber)
Definition itemptr.h:158
static const struct exclude_list_item skip[]
GlobalVisState * GlobalVisTestFor(Relation rel)
Definition procarray.c:4072
TransactionId RecentXmin
Definition snapmgr.c:160

References Assert, BufferGetBlockNumber(), BufferGetPage(), fb(), GlobalVisTestFor(), HeapCheckForSerializableConflictOut(), HeapTupleHeaderGetUpdateXid(), HeapTupleHeaderGetXmin(), HeapTupleIsHeapOnly(), HeapTupleIsHotUpdated(), HeapTupleIsSurelyDead(), HeapTupleSatisfiesVisibility(), InvalidTransactionId, ItemIdGetLength, ItemIdGetRedirect, ItemIdIsNormal, ItemIdIsRedirected, ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), ItemPointerSet(), ItemPointerSetOffsetNumber(), PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), PredicateLockTID(), RecentXmin, RelationGetRelid, skip, TransactionIdEquals, and TransactionIdIsValid.

Referenced by BitmapHeapScanNextBlock(), heap_index_delete_tuples(), and heapam_index_fetch_tuple().

◆ heap_index_delete_tuples()

TransactionId heap_index_delete_tuples ( Relation  rel,
TM_IndexDeleteOp delstate 
)
extern

Definition at line 8198 of file heapam.c.

8199{
8200 /* Initial assumption is that earlier pruning took care of conflict */
8201 TransactionId snapshotConflictHorizon = InvalidTransactionId;
8204 Page page = NULL;
8207#ifdef USE_PREFETCH
8210#endif
8212 int finalndeltids = 0,
8213 nblocksaccessed = 0;
8214
8215 /* State that's only used in bottom-up index deletion case */
8216 int nblocksfavorable = 0;
8217 int curtargetfreespace = delstate->bottomupfreespace,
8218 lastfreespace = 0,
8219 actualfreespace = 0;
8220 bool bottomup_final_block = false;
8221
8223
8224 /* Sort caller's deltids array by TID for further processing */
8226
8227 /*
8228 * Bottom-up case: resort deltids array in an order attuned to where the
8229 * greatest number of promising TIDs are to be found, and determine how
8230 * many blocks from the start of sorted array should be considered
8231 * favorable. This will also shrink the deltids array in order to
8232 * eliminate completely unfavorable blocks up front.
8233 */
8234 if (delstate->bottomup)
8236
8237#ifdef USE_PREFETCH
8238 /* Initialize prefetch state. */
8240 prefetch_state.next_item = 0;
8241 prefetch_state.ndeltids = delstate->ndeltids;
8242 prefetch_state.deltids = delstate->deltids;
8243
8244 /*
8245 * Determine the prefetch distance that we will attempt to maintain.
8246 *
8247 * Since the caller holds a buffer lock somewhere in rel, we'd better make
8248 * sure that isn't a catalog relation before we call code that does
8249 * syscache lookups, to avoid risk of deadlock.
8250 */
8251 if (IsCatalogRelation(rel))
8253 else
8256
8257 /* Cap initial prefetch distance for bottom-up deletion caller */
8258 if (delstate->bottomup)
8259 {
8263 }
8264
8265 /* Start prefetching. */
8267#endif
8268
8269 /* Iterate over deltids, determine which to delete, check their horizon */
8270 Assert(delstate->ndeltids > 0);
8271 for (int i = 0; i < delstate->ndeltids; i++)
8272 {
8273 TM_IndexDelete *ideltid = &delstate->deltids[i];
8274 TM_IndexStatus *istatus = delstate->status + ideltid->id;
8275 ItemPointer htid = &ideltid->tid;
8276 OffsetNumber offnum;
8277
8278 /*
8279 * Read buffer, and perform required extra steps each time a new block
8280 * is encountered. Avoid refetching if it's the same block as the one
8281 * from the last htid.
8282 */
8283 if (blkno == InvalidBlockNumber ||
8285 {
8286 /*
8287 * Consider giving up early for bottom-up index deletion caller
8288 * first. (Only prefetch next-next block afterwards, when it
8289 * becomes clear that we're at least going to access the next
8290 * block in line.)
8291 *
8292 * Sometimes the first block frees so much space for bottom-up
8293 * caller that the deletion process can end without accessing any
8294 * more blocks. It is usually necessary to access 2 or 3 blocks
8295 * per bottom-up deletion operation, though.
8296 */
8297 if (delstate->bottomup)
8298 {
8299 /*
8300 * We often allow caller to delete a few additional items
8301 * whose entries we reached after the point that space target
8302 * from caller was satisfied. The cost of accessing the page
8303 * was already paid at that point, so it made sense to finish
8304 * it off. When that happened, we finalize everything here
8305 * (by finishing off the whole bottom-up deletion operation
8306 * without needlessly paying the cost of accessing any more
8307 * blocks).
8308 */
8310 break;
8311
8312 /*
8313 * Give up when we didn't enable our caller to free any
8314 * additional space as a result of processing the page that we
8315 * just finished up with. This rule is the main way in which
8316 * we keep the cost of bottom-up deletion under control.
8317 */
8319 break;
8320 lastfreespace = actualfreespace; /* for next time */
8321
8322 /*
8323 * Deletion operation (which is bottom-up) will definitely
8324 * access the next block in line. Prepare for that now.
8325 *
8326 * Decay target free space so that we don't hang on for too
8327 * long with a marginal case. (Space target is only truly
8328 * helpful when it allows us to recognize that we don't need
8329 * to access more than 1 or 2 blocks to satisfy caller due to
8330 * agreeable workload characteristics.)
8331 *
8332 * We are a bit more patient when we encounter contiguous
8333 * blocks, though: these are treated as favorable blocks. The
8334 * decay process is only applied when the next block in line
8335 * is not a favorable/contiguous block. This is not an
8336 * exception to the general rule; we still insist on finding
8337 * at least one deletable item per block accessed. See
8338 * bottomup_nblocksfavorable() for full details of the theory
8339 * behind favorable blocks and heap block locality in general.
8340 *
8341 * Note: The first block in line is always treated as a
8342 * favorable block, so the earliest possible point that the
8343 * decay can be applied is just before we access the second
8344 * block in line. The Assert() verifies this for us.
8345 */
8347 if (nblocksfavorable > 0)
8349 else
8350 curtargetfreespace /= 2;
8351 }
8352
8353 /* release old buffer */
8354 if (BufferIsValid(buf))
8356
8358 buf = ReadBuffer(rel, blkno);
8360 Assert(!delstate->bottomup ||
8362
8363#ifdef USE_PREFETCH
8364
8365 /*
8366 * To maintain the prefetch distance, prefetch one more page for
8367 * each page we read.
8368 */
8370#endif
8371
8373
8374 page = BufferGetPage(buf);
8375 maxoff = PageGetMaxOffsetNumber(page);
8376 }
8377
8378 /*
8379 * In passing, detect index corruption involving an index page with a
8380 * TID that points to a location in the heap that couldn't possibly be
8381 * correct. We only do this with actual TIDs from caller's index page
8382 * (not items reached by traversing through a HOT chain).
8383 */
8385
8386 if (istatus->knowndeletable)
8387 Assert(!delstate->bottomup && !istatus->promising);
8388 else
8389 {
8390 ItemPointerData tmp = *htid;
8392
8393 /* Are any tuples from this HOT chain non-vacuumable? */
8395 &heapTuple, NULL, true))
8396 continue; /* can't delete entry */
8397
8398 /* Caller will delete, since whole HOT chain is vacuumable */
8399 istatus->knowndeletable = true;
8400
8401 /* Maintain index free space info for bottom-up deletion case */
8402 if (delstate->bottomup)
8403 {
8404 Assert(istatus->freespace > 0);
8405 actualfreespace += istatus->freespace;
8407 bottomup_final_block = true;
8408 }
8409 }
8410
8411 /*
8412 * Maintain snapshotConflictHorizon value for deletion operation as a
8413 * whole by advancing current value using heap tuple headers. This is
8414 * loosely based on the logic for pruning a HOT chain.
8415 */
8417 priorXmax = InvalidTransactionId; /* cannot check first XMIN */
8418 for (;;)
8419 {
8420 ItemId lp;
8421 HeapTupleHeader htup;
8422
8423 /* Sanity check (pure paranoia) */
8424 if (offnum < FirstOffsetNumber)
8425 break;
8426
8427 /*
8428 * An offset past the end of page's line pointer array is possible
8429 * when the array was truncated
8430 */
8431 if (offnum > maxoff)
8432 break;
8433
8434 lp = PageGetItemId(page, offnum);
8436 {
8437 offnum = ItemIdGetRedirect(lp);
8438 continue;
8439 }
8440
8441 /*
8442 * We'll often encounter LP_DEAD line pointers (especially with an
8443 * entry marked knowndeletable by our caller up front). No heap
8444 * tuple headers get examined for an htid that leads us to an
8445 * LP_DEAD item. This is okay because the earlier pruning
8446 * operation that made the line pointer LP_DEAD in the first place
8447 * must have considered the original tuple header as part of
8448 * generating its own snapshotConflictHorizon value.
8449 *
8450 * Relying on XLOG_HEAP2_PRUNE_VACUUM_SCAN records like this is
8451 * the same strategy that index vacuuming uses in all cases. Index
8452 * VACUUM WAL records don't even have a snapshotConflictHorizon
8453 * field of their own for this reason.
8454 */
8455 if (!ItemIdIsNormal(lp))
8456 break;
8457
8458 htup = (HeapTupleHeader) PageGetItem(page, lp);
8459
8460 /*
8461 * Check the tuple XMIN against prior XMAX, if any
8462 */
8465 break;
8466
8468 &snapshotConflictHorizon);
8469
8470 /*
8471 * If the tuple is not HOT-updated, then we are at the end of this
8472 * HOT-chain. No need to visit later tuples from the same update
8473 * chain (they get their own index entries) -- just move on to
8474 * next htid from index AM caller.
8475 */
8476 if (!HeapTupleHeaderIsHotUpdated(htup))
8477 break;
8478
8479 /* Advance to next HOT chain member */
8480 Assert(ItemPointerGetBlockNumber(&htup->t_ctid) == blkno);
8481 offnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
8483 }
8484
8485 /* Enable further/final shrinking of deltids for caller */
8486 finalndeltids = i + 1;
8487 }
8488
8490
8491 /*
8492 * Shrink deltids array to exclude non-deletable entries at the end. This
8493 * is not just a minor optimization. Final deltids array size might be
8494 * zero for a bottom-up caller. Index AM is explicitly allowed to rely on
8495 * ndeltids being zero in all cases with zero total deletable entries.
8496 */
8497 Assert(finalndeltids > 0 || delstate->bottomup);
8498 delstate->ndeltids = finalndeltids;
8499
8500 return snapshotConflictHorizon;
8501}
int maintenance_io_concurrency
Definition bufmgr.c:191
#define Min(x, y)
Definition c.h:997
bool IsCatalogRelation(Relation relation)
Definition catalog.c:104
static int bottomup_sort_and_shrink(TM_IndexDeleteOp *delstate)
Definition heapam.c:8755
void HeapTupleHeaderAdvanceConflictHorizon(HeapTupleHeader tuple, TransactionId *snapshotConflictHorizon)
Definition heapam.c:8053
#define BOTTOMUP_MAX_NBLOCKS
Definition heapam.c:188
static void index_delete_check_htid(TM_IndexDeleteOp *delstate, Page page, OffsetNumber maxoff, const ItemPointerData *htid, TM_IndexStatus *istatus)
Definition heapam.c:8138
bool heap_hot_search_buffer(ItemPointer tid, Relation relation, Buffer buffer, Snapshot snapshot, HeapTuple heapTuple, bool *all_dead, bool first_call)
Definition heapam.c:1778
static void index_delete_sort(TM_IndexDeleteOp *delstate)
Definition heapam.c:8543
static char buf[DEFAULT_XLOG_SEG_SIZE]
#define InitNonVacuumableSnapshot(snapshotdata, vistestp)
Definition snapmgr.h:50
int get_tablespace_maintenance_io_concurrency(Oid spcid)
Definition spccache.c:229

References Assert, BOTTOMUP_MAX_NBLOCKS, bottomup_sort_and_shrink(), buf, BUFFER_LOCK_SHARE, BufferGetPage(), BufferIsValid(), fb(), FirstOffsetNumber, get_tablespace_maintenance_io_concurrency(), GlobalVisTestFor(), heap_hot_search_buffer(), HeapTupleHeaderAdvanceConflictHorizon(), HeapTupleHeaderGetUpdateXid(), HeapTupleHeaderGetXmin(), HeapTupleHeaderIsHotUpdated(), i, index_delete_check_htid(), index_delete_sort(), InitNonVacuumableSnapshot, InvalidBlockNumber, InvalidBuffer, InvalidOffsetNumber, InvalidTransactionId, IsCatalogRelation(), ItemIdGetRedirect, ItemIdIsNormal, ItemIdIsRedirected, ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), LockBuffer(), maintenance_io_concurrency, Min, PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), RelationData::rd_rel, ReadBuffer(), HeapTupleHeaderData::t_ctid, TransactionIdEquals, TransactionIdIsValid, and UnlockReleaseBuffer().

◆ heap_inplace_lock()

bool heap_inplace_lock ( Relation  relation,
HeapTuple  oldtup_ptr,
Buffer  buffer,
void(*)(void *)  release_callback,
void arg 
)
extern

Definition at line 6436 of file heapam.c.

6439{
6440 HeapTupleData oldtup = *oldtup_ptr; /* minimize diff vs. heap_update() */
6441 TM_Result result;
6442 bool ret;
6443
6444#ifdef USE_ASSERT_CHECKING
6445 if (RelationGetRelid(relation) == RelationRelationId)
6447#endif
6448
6449 Assert(BufferIsValid(buffer));
6450
6451 /*
6452 * Register shared cache invals if necessary. Other sessions may finish
6453 * inplace updates of this tuple between this step and LockTuple(). Since
6454 * inplace updates don't change cache keys, that's harmless.
6455 *
6456 * While it's tempting to register invals only after confirming we can
6457 * return true, the following obstacle precludes reordering steps that
6458 * way. Registering invals might reach a CatalogCacheInitializeCache()
6459 * that locks "buffer". That would hang indefinitely if running after our
6460 * own LockBuffer(). Hence, we must register invals before LockBuffer().
6461 */
6463
6464 LockTuple(relation, &oldtup.t_self, InplaceUpdateTupleLock);
6466
6467 /*----------
6468 * Interpret HeapTupleSatisfiesUpdate() like heap_update() does, except:
6469 *
6470 * - wait unconditionally
6471 * - already locked tuple above, since inplace needs that unconditionally
6472 * - don't recheck header after wait: simpler to defer to next iteration
6473 * - don't try to continue even if the updater aborts: likewise
6474 * - no crosscheck
6475 */
6477 buffer);
6478
6479 if (result == TM_Invisible)
6480 {
6481 /* no known way this can happen */
6482 ereport(ERROR,
6484 errmsg_internal("attempted to overwrite invisible tuple")));
6485 }
6486 else if (result == TM_SelfModified)
6487 {
6488 /*
6489 * CREATE INDEX might reach this if an expression is silly enough to
6490 * call e.g. SELECT ... FROM pg_class FOR SHARE. C code of other SQL
6491 * statements might get here after a heap_update() of the same row, in
6492 * the absence of an intervening CommandCounterIncrement().
6493 */
6494 ereport(ERROR,
6496 errmsg("tuple to be updated was already modified by an operation triggered by the current command")));
6497 }
6498 else if (result == TM_BeingModified)
6499 {
6502
6504 infomask = oldtup.t_data->t_infomask;
6505
6507 {
6510 int remain;
6511
6513 lockmode, NULL))
6514 {
6517 ret = false;
6519 relation, &oldtup.t_self, XLTW_Update,
6520 &remain);
6521 }
6522 else
6523 ret = true;
6524 }
6526 ret = true;
6528 ret = true;
6529 else
6530 {
6533 ret = false;
6534 XactLockTableWait(xwait, relation, &oldtup.t_self,
6535 XLTW_Update);
6536 }
6537 }
6538 else
6539 {
6540 ret = (result == TM_Ok);
6541 if (!ret)
6542 {
6545 }
6546 }
6547
6548 /*
6549 * GetCatalogSnapshot() relies on invalidation messages to know when to
6550 * take a new snapshot. COMMIT of xwait is responsible for sending the
6551 * invalidation. We're not acquiring heavyweight locks sufficient to
6552 * block if not yet sent, so we must take a new snapshot to ensure a later
6553 * attempt has a fair chance. While we don't need this if xwait aborted,
6554 * don't bother optimizing that.
6555 */
6556 if (!ret)
6557 {
6558 UnlockTuple(relation, &oldtup.t_self, InplaceUpdateTupleLock);
6561 }
6562 return ret;
6563}
static bool HEAP_XMAX_IS_KEYSHR_LOCKED(uint16 infomask)
void CacheInvalidateHeapTupleInplace(Relation relation, HeapTuple key_equivalent_tuple)
Definition inval.c:1593
void ForgetInplace_Inval(void)
Definition inval.c:1286
void UnlockTuple(Relation relation, const ItemPointerData *tid, LOCKMODE lockmode)
Definition lmgr.c:601
void LockTuple(Relation relation, const ItemPointerData *tid, LOCKMODE lockmode)
Definition lmgr.c:562
@ XLTW_Update
Definition lmgr.h:27
#define InplaceUpdateTupleLock
Definition lockdefs.h:48
LockTupleMode
Definition lockoptions.h:50
@ LockTupleNoKeyExclusive
Definition lockoptions.h:56
MultiXactStatus
Definition multixact.h:37
@ MultiXactStatusNoKeyUpdate
Definition multixact.h:43
void * arg
void InvalidateCatalogSnapshot(void)
Definition snapmgr.c:455
CommandId GetCurrentCommandId(bool used)
Definition xact.c:830

References arg, Assert, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferIsValid(), CacheInvalidateHeapTupleInplace(), DoesMultiXactIdConflict(), ereport, errcode(), errmsg(), errmsg_internal(), ERROR, fb(), ForgetInplace_Inval(), GetCurrentCommandId(), HEAP_XMAX_IS_KEYSHR_LOCKED(), HEAP_XMAX_IS_MULTI, HeapTupleHeaderGetRawXmax(), HeapTupleSatisfiesUpdate(), InplaceUpdateTupleLock, InvalidateCatalogSnapshot(), LockBuffer(), LockTuple(), LockTupleNoKeyExclusive, MultiXactIdWait(), MultiXactStatusNoKeyUpdate, RelationGetRelid, TM_BeingModified, TM_Invisible, TM_Ok, TM_SelfModified, TransactionIdIsCurrentTransactionId(), UnlockTuple(), XactLockTableWait(), and XLTW_Update.

Referenced by systable_inplace_update_begin().

◆ heap_inplace_unlock()

void heap_inplace_unlock ( Relation  relation,
HeapTuple  oldtup,
Buffer  buffer 
)
extern

◆ heap_inplace_update_and_unlock()

void heap_inplace_update_and_unlock ( Relation  relation,
HeapTuple  oldtup,
HeapTuple  tuple,
Buffer  buffer 
)
extern

Definition at line 6574 of file heapam.c.

6577{
6578 HeapTupleHeader htup = oldtup->t_data;
6579 uint32 oldlen;
6580 uint32 newlen;
6581 char *dst;
6582 char *src;
6583 int nmsgs = 0;
6585 bool RelcacheInitFileInval = false;
6586
6587 Assert(ItemPointerEquals(&oldtup->t_self, &tuple->t_self));
6588 oldlen = oldtup->t_len - htup->t_hoff;
6589 newlen = tuple->t_len - tuple->t_data->t_hoff;
6590 if (oldlen != newlen || htup->t_hoff != tuple->t_data->t_hoff)
6591 elog(ERROR, "wrong tuple length");
6592
6593 dst = (char *) htup + htup->t_hoff;
6594 src = (char *) tuple->t_data + tuple->t_data->t_hoff;
6595
6596 /* Like RecordTransactionCommit(), log only if needed */
6599 &RelcacheInitFileInval);
6600
6601 /*
6602 * Unlink relcache init files as needed. If unlinking, acquire
6603 * RelCacheInitLock until after associated invalidations. By doing this
6604 * in advance, if we checkpoint and then crash between inplace
6605 * XLogInsert() and inval, we don't rely on StartupXLOG() ->
6606 * RelationCacheInitFileRemove(). That uses elevel==LOG, so replay would
6607 * neglect to PANIC on EIO.
6608 */
6610
6611 /*----------
6612 * NO EREPORT(ERROR) from here till changes are complete
6613 *
6614 * Our buffer lock won't stop a reader having already pinned and checked
6615 * visibility for this tuple. Hence, we write WAL first, then mutate the
6616 * buffer. Like in MarkBufferDirtyHint() or RecordTransactionCommit(),
6617 * checkpoint delay makes that acceptable. With the usual order of
6618 * changes, a crash after memcpy() and before XLogInsert() could allow
6619 * datfrozenxid to overtake relfrozenxid:
6620 *
6621 * ["D" is a VACUUM (ONLY_DATABASE_STATS)]
6622 * ["R" is a VACUUM tbl]
6623 * D: vac_update_datfrozenxid() -> systable_beginscan(pg_class)
6624 * D: systable_getnext() returns pg_class tuple of tbl
6625 * R: memcpy() into pg_class tuple of tbl
6626 * D: raise pg_database.datfrozenxid, XLogInsert(), finish
6627 * [crash]
6628 * [recovery restores datfrozenxid w/o relfrozenxid]
6629 *
6630 * Mimic MarkBufferDirtyHint() subroutine XLogSaveBufferForHint().
6631 * Specifically, use DELAY_CHKPT_START, and copy the buffer to the stack.
6632 * The stack copy facilitates a FPI of the post-mutation block before we
6633 * accept other sessions seeing it. DELAY_CHKPT_START allows us to
6634 * XLogInsert() before MarkBufferDirty(). Since XLogSaveBufferForHint()
6635 * can operate under BUFFER_LOCK_SHARED, it can't avoid DELAY_CHKPT_START.
6636 * This function, however, likely could avoid it with the following order
6637 * of operations: MarkBufferDirty(), XLogInsert(), memcpy(). Opt to use
6638 * DELAY_CHKPT_START here, too, as a way to have fewer distinct code
6639 * patterns to analyze. Inplace update isn't so frequent that it should
6640 * pursue the small optimization of skipping DELAY_CHKPT_START.
6641 */
6645
6646 /* XLOG stuff */
6647 if (RelationNeedsWAL(relation))
6648 {
6651 char *origdata = (char *) BufferGetBlock(buffer);
6652 Page page = BufferGetPage(buffer);
6653 uint16 lower = ((PageHeader) page)->pd_lower;
6654 uint16 upper = ((PageHeader) page)->pd_upper;
6656 RelFileLocator rlocator;
6657 ForkNumber forkno;
6658 BlockNumber blkno;
6660
6661 xlrec.offnum = ItemPointerGetOffsetNumber(&tuple->t_self);
6662 xlrec.dbId = MyDatabaseId;
6664 xlrec.relcacheInitFileInval = RelcacheInitFileInval;
6665 xlrec.nmsgs = nmsgs;
6666
6669 if (nmsgs != 0)
6671 nmsgs * sizeof(SharedInvalidationMessage));
6672
6673 /* register block matching what buffer will look like after changes */
6678 BufferGetTag(buffer, &rlocator, &forkno, &blkno);
6679 Assert(forkno == MAIN_FORKNUM);
6680 XLogRegisterBlock(0, &rlocator, forkno, blkno, copied_buffer.data,
6682 XLogRegisterBufData(0, src, newlen);
6683
6684 /* inplace updates aren't decoded atm, don't log the origin */
6685
6687
6688 PageSetLSN(page, recptr);
6689 }
6690
6691 memcpy(dst, src, newlen);
6692
6693 MarkBufferDirty(buffer);
6694
6696
6697 /*
6698 * Send invalidations to shared queue. SearchSysCacheLocked1() assumes we
6699 * do this before UnlockTuple().
6700 */
6702
6705 UnlockTuple(relation, &tuple->t_self, InplaceUpdateTupleLock);
6706
6707 AcceptInvalidationMessages(); /* local processing of just-sent inval */
6708
6709 /*
6710 * Queue a transactional inval, for logical decoding and for third-party
6711 * code that might have been relying on it since long before inplace
6712 * update adopted immediate invalidation. See README.tuplock section
6713 * "Reading inplace-updated columns" for logical decoding details.
6714 */
6716 CacheInvalidateHeapTuple(relation, tuple, NULL);
6717}
void BufferGetTag(Buffer buffer, RelFileLocator *rlocator, ForkNumber *forknum, BlockNumber *blknum)
Definition bufmgr.c:4377
static Block BufferGetBlock(Buffer buffer)
Definition bufmgr.h:433
PageHeaderData * PageHeader
Definition bufpage.h:173
uint32_t uint32
Definition c.h:546
Oid MyDatabaseTableSpace
Definition globals.c:96
Oid MyDatabaseId
Definition globals.c:94
#define MinSizeOfHeapInplace
#define XLOG_HEAP_INPLACE
Definition heapam_xlog.h:40
void AcceptInvalidationMessages(void)
Definition inval.c:930
int inplaceGetInvalidationMessages(SharedInvalidationMessage **msgs, bool *RelcacheInitFileInval)
Definition inval.c:1088
void PreInplace_Inval(void)
Definition inval.c:1250
void AtInplace_Inval(void)
Definition inval.c:1263
#define IsBootstrapProcessingMode()
Definition miscadmin.h:477
Datum lower(PG_FUNCTION_ARGS)
Datum upper(PG_FUNCTION_ARGS)
#define DELAY_CHKPT_START
Definition proc.h:136
ForkNumber
Definition relpath.h:56
PGPROC * MyProc
Definition proc.c:67
int delayChkptFlags
Definition proc.h:257
#define XLogStandbyInfoActive()
Definition xlog.h:125
void XLogRegisterBufData(uint8 block_id, const void *data, uint32 len)
Definition xloginsert.c:409
void XLogRegisterBlock(uint8 block_id, RelFileLocator *rlocator, ForkNumber forknum, BlockNumber blknum, const PageData *page, uint8 flags)
Definition xloginsert.c:313

References AcceptInvalidationMessages(), Assert, AtInplace_Inval(), BUFFER_LOCK_UNLOCK, BufferGetBlock(), BufferGetPage(), BufferGetTag(), CacheInvalidateHeapTuple(), DELAY_CHKPT_START, PGPROC::delayChkptFlags, elog, END_CRIT_SECTION, ERROR, fb(), inplaceGetInvalidationMessages(), InplaceUpdateTupleLock, IsBootstrapProcessingMode, ItemPointerEquals(), ItemPointerGetOffsetNumber(), LockBuffer(), lower(), MAIN_FORKNUM, MarkBufferDirty(), MinSizeOfHeapInplace, MyDatabaseId, MyDatabaseTableSpace, MyProc, PageSetLSN(), PreInplace_Inval(), REGBUF_STANDARD, RelationNeedsWAL, START_CRIT_SECTION, HeapTupleData::t_data, HeapTupleHeaderData::t_hoff, HeapTupleData::t_len, HeapTupleData::t_self, UnlockTuple(), upper(), XLOG_HEAP_INPLACE, XLogBeginInsert(), XLogInsert(), XLogRegisterBlock(), XLogRegisterBufData(), XLogRegisterData(), and XLogStandbyInfoActive.

Referenced by systable_inplace_update_finish().

◆ heap_insert()

void heap_insert ( Relation  relation,
HeapTuple  tup,
CommandId  cid,
int  options,
BulkInsertState  bistate 
)
extern

Definition at line 2141 of file heapam.c.

2143{
2146 Buffer buffer;
2147 Buffer vmbuffer = InvalidBuffer;
2148 bool all_visible_cleared = false;
2149
2150 /* Cheap, simplistic check that the tuple matches the rel's rowtype. */
2153
2154 AssertHasSnapshotForToast(relation);
2155
2156 /*
2157 * Fill in tuple header fields and toast the tuple if necessary.
2158 *
2159 * Note: below this point, heaptup is the data we actually intend to store
2160 * into the relation; tup is the caller's original untoasted data.
2161 */
2162 heaptup = heap_prepare_insert(relation, tup, xid, cid, options);
2163
2164 /*
2165 * Find buffer to insert this tuple into. If the page is all visible,
2166 * this will also pin the requisite visibility map page.
2167 */
2168 buffer = RelationGetBufferForTuple(relation, heaptup->t_len,
2169 InvalidBuffer, options, bistate,
2170 &vmbuffer, NULL,
2171 0);
2172
2173 /*
2174 * We're about to do the actual insert -- but check for conflict first, to
2175 * avoid possibly having to roll back work we've just done.
2176 *
2177 * This is safe without a recheck as long as there is no possibility of
2178 * another process scanning the page between this check and the insert
2179 * being visible to the scan (i.e., an exclusive buffer content lock is
2180 * continuously held from this point until the tuple insert is visible).
2181 *
2182 * For a heap insert, we only need to check for table-level SSI locks. Our
2183 * new tuple can't possibly conflict with existing tuple locks, and heap
2184 * page locks are only consolidated versions of tuple locks; they do not
2185 * lock "gaps" as index page locks do. So we don't need to specify a
2186 * buffer when making the call, which makes for a faster check.
2187 */
2189
2190 /* NO EREPORT(ERROR) from here till changes are logged */
2192
2193 RelationPutHeapTuple(relation, buffer, heaptup,
2195
2196 if (PageIsAllVisible(BufferGetPage(buffer)))
2197 {
2198 all_visible_cleared = true;
2200 visibilitymap_clear(relation,
2202 vmbuffer, VISIBILITYMAP_VALID_BITS);
2203 }
2204
2205 /*
2206 * XXX Should we set PageSetPrunable on this page ?
2207 *
2208 * The inserting transaction may eventually abort thus making this tuple
2209 * DEAD and hence available for pruning. Though we don't want to optimize
2210 * for aborts, if no other tuple in this page is UPDATEd/DELETEd, the
2211 * aborted tuple will never be pruned until next vacuum is triggered.
2212 *
2213 * If you do add PageSetPrunable here, add it in heap_xlog_insert too.
2214 */
2215
2216 MarkBufferDirty(buffer);
2217
2218 /* XLOG stuff */
2219 if (RelationNeedsWAL(relation))
2220 {
2224 Page page = BufferGetPage(buffer);
2225 uint8 info = XLOG_HEAP_INSERT;
2226 int bufflags = 0;
2227
2228 /*
2229 * If this is a catalog, we need to transmit combo CIDs to properly
2230 * decode, so log that as well.
2231 */
2233 log_heap_new_cid(relation, heaptup);
2234
2235 /*
2236 * If this is the single and first tuple on page, we can reinit the
2237 * page instead of restoring the whole thing. Set flag, and hide
2238 * buffer references from XLogInsert.
2239 */
2242 {
2243 info |= XLOG_HEAP_INIT_PAGE;
2245 }
2246
2247 xlrec.offnum = ItemPointerGetOffsetNumber(&heaptup->t_self);
2248 xlrec.flags = 0;
2254
2255 /*
2256 * For logical decoding, we need the tuple even if we're doing a full
2257 * page write, so make sure it's included even if we take a full-page
2258 * image. (XXX We could alternatively store a pointer into the FPW).
2259 */
2260 if (RelationIsLogicallyLogged(relation) &&
2262 {
2265
2266 if (IsToastRelation(relation))
2268 }
2269
2272
2273 xlhdr.t_infomask2 = heaptup->t_data->t_infomask2;
2274 xlhdr.t_infomask = heaptup->t_data->t_infomask;
2275 xlhdr.t_hoff = heaptup->t_data->t_hoff;
2276
2277 /*
2278 * note we mark xlhdr as belonging to buffer; if XLogInsert decides to
2279 * write the whole page to the xlog, we don't need to store
2280 * xl_heap_header in the xlog.
2281 */
2284 /* PG73FORMAT: write bitmap [+ padding] [+ oid] + data */
2286 (char *) heaptup->t_data + SizeofHeapTupleHeader,
2288
2289 /* filtering by origin on a row level is much more efficient */
2291
2292 recptr = XLogInsert(RM_HEAP_ID, info);
2293
2294 PageSetLSN(page, recptr);
2295 }
2296
2298
2299 UnlockReleaseBuffer(buffer);
2300 if (vmbuffer != InvalidBuffer)
2301 ReleaseBuffer(vmbuffer);
2302
2303 /*
2304 * If tuple is cacheable, mark it for invalidation from the caches in case
2305 * we abort. Note it is OK to do this after releasing the buffer, because
2306 * the heaptup data structure is all in local memory, not in the shared
2307 * buffer.
2308 */
2310
2311 /* Note: speculative insertions are counted too, even if aborted later */
2312 pgstat_count_heap_insert(relation, 1);
2313
2314 /*
2315 * If heaptup is a private copy, release it. Don't forget to copy t_self
2316 * back to the caller's image, too.
2317 */
2318 if (heaptup != tup)
2319 {
2320 tup->t_self = heaptup->t_self;
2322 }
2323}
uint8_t uint8
Definition c.h:544
static HeapTuple heap_prepare_insert(Relation relation, HeapTuple tup, TransactionId xid, CommandId cid, int options)
Definition heapam.c:2332
#define HEAP_INSERT_SPECULATIVE
Definition heapam.h:40
#define HEAP_INSERT_NO_LOGICAL
Definition heapam.h:39
#define XLH_INSERT_ON_TOAST_RELATION
Definition heapam_xlog.h:76
#define XLH_INSERT_IS_SPECULATIVE
Definition heapam_xlog.h:74
#define XLH_INSERT_ALL_VISIBLE_CLEARED
Definition heapam_xlog.h:72
#define XLOG_HEAP_INSERT
Definition heapam_xlog.h:33
#define SizeOfHeapInsert
#define XLH_INSERT_CONTAINS_NEW_TUPLE
Definition heapam_xlog.h:75
#define XLOG_HEAP_INIT_PAGE
Definition heapam_xlog.h:47
void RelationPutHeapTuple(Relation relation, Buffer buffer, HeapTuple tuple, bool token)
Definition hio.c:35
Buffer RelationGetBufferForTuple(Relation relation, Size len, Buffer otherBuffer, int options, BulkInsertState bistate, Buffer *vmbuffer, Buffer *vmbuffer_other, int num_pages)
Definition hio.c:500
#define HeapTupleHeaderGetNatts(tup)
void pgstat_count_heap_insert(Relation rel, PgStat_Counter n)
#define RelationIsLogicallyLogged(relation)
Definition rel.h:710
#define RelationGetNumberOfAttributes(relation)
Definition rel.h:520
#define REGBUF_KEEP_DATA
Definition xloginsert.h:36
#define REGBUF_WILL_INIT
Definition xloginsert.h:34

References Assert, AssertHasSnapshotForToast(), BufferGetBlockNumber(), BufferGetPage(), CacheInvalidateHeapTuple(), CheckForSerializableConflictIn(), END_CRIT_SECTION, fb(), FirstOffsetNumber, GetCurrentTransactionId(), heap_freetuple(), HEAP_INSERT_NO_LOGICAL, HEAP_INSERT_SPECULATIVE, heap_prepare_insert(), HeapTupleHeaderGetNatts, InvalidBlockNumber, InvalidBuffer, IsToastRelation(), ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), log_heap_new_cid(), MarkBufferDirty(), PageClearAllVisible(), PageGetMaxOffsetNumber(), PageIsAllVisible(), PageSetLSN(), pgstat_count_heap_insert(), REGBUF_KEEP_DATA, REGBUF_STANDARD, REGBUF_WILL_INIT, RelationGetBufferForTuple(), RelationGetNumberOfAttributes, RelationIsAccessibleInLogicalDecoding, RelationIsLogicallyLogged, RelationNeedsWAL, RelationPutHeapTuple(), ReleaseBuffer(), SizeOfHeapHeader, SizeOfHeapInsert, SizeofHeapTupleHeader, START_CRIT_SECTION, UnlockReleaseBuffer(), visibilitymap_clear(), VISIBILITYMAP_VALID_BITS, XLH_INSERT_ALL_VISIBLE_CLEARED, XLH_INSERT_CONTAINS_NEW_TUPLE, XLH_INSERT_IS_SPECULATIVE, XLH_INSERT_ON_TOAST_RELATION, XLOG_HEAP_INIT_PAGE, XLOG_HEAP_INSERT, XLOG_INCLUDE_ORIGIN, XLogBeginInsert(), XLogInsert(), XLogRegisterBufData(), XLogRegisterBuffer(), XLogRegisterData(), and XLogSetRecordFlags().

Referenced by heapam_tuple_insert(), heapam_tuple_insert_speculative(), simple_heap_insert(), and toast_save_datum().

◆ heap_lock_tuple()

TM_Result heap_lock_tuple ( Relation  relation,
HeapTuple  tuple,
CommandId  cid,
LockTupleMode  mode,
LockWaitPolicy  wait_policy,
bool  follow_updates,
Buffer buffer,
TM_FailureData tmfd 
)
extern

Definition at line 4643 of file heapam.c.

4647{
4648 TM_Result result;
4649 ItemPointer tid = &(tuple->t_self);
4650 ItemId lp;
4651 Page page;
4652 Buffer vmbuffer = InvalidBuffer;
4653 BlockNumber block;
4654 TransactionId xid,
4655 xmax;
4659 bool first_time = true;
4660 bool skip_tuple_lock = false;
4661 bool have_tuple_lock = false;
4662 bool cleared_all_frozen = false;
4663
4664 *buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(tid));
4665 block = ItemPointerGetBlockNumber(tid);
4666
4667 /*
4668 * Before locking the buffer, pin the visibility map page if it appears to
4669 * be necessary. Since we haven't got the lock yet, someone else might be
4670 * in the middle of changing this, so we'll need to recheck after we have
4671 * the lock.
4672 */
4673 if (PageIsAllVisible(BufferGetPage(*buffer)))
4674 visibilitymap_pin(relation, block, &vmbuffer);
4675
4677
4678 page = BufferGetPage(*buffer);
4681
4682 tuple->t_data = (HeapTupleHeader) PageGetItem(page, lp);
4683 tuple->t_len = ItemIdGetLength(lp);
4684 tuple->t_tableOid = RelationGetRelid(relation);
4685
4686l3:
4687 result = HeapTupleSatisfiesUpdate(tuple, cid, *buffer);
4688
4689 if (result == TM_Invisible)
4690 {
4691 /*
4692 * This is possible, but only when locking a tuple for ON CONFLICT
4693 * UPDATE. We return this value here rather than throwing an error in
4694 * order to give that case the opportunity to throw a more specific
4695 * error.
4696 */
4697 result = TM_Invisible;
4698 goto out_locked;
4699 }
4700 else if (result == TM_BeingModified ||
4701 result == TM_Updated ||
4702 result == TM_Deleted)
4703 {
4707 bool require_sleep;
4708 ItemPointerData t_ctid;
4709
4710 /* must copy state data before unlocking buffer */
4712 infomask = tuple->t_data->t_infomask;
4713 infomask2 = tuple->t_data->t_infomask2;
4714 ItemPointerCopy(&tuple->t_data->t_ctid, &t_ctid);
4715
4717
4718 /*
4719 * If any subtransaction of the current top transaction already holds
4720 * a lock as strong as or stronger than what we're requesting, we
4721 * effectively hold the desired lock already. We *must* succeed
4722 * without trying to take the tuple lock, else we will deadlock
4723 * against anyone wanting to acquire a stronger lock.
4724 *
4725 * Note we only do this the first time we loop on the HTSU result;
4726 * there is no point in testing in subsequent passes, because
4727 * evidently our own transaction cannot have acquired a new lock after
4728 * the first time we checked.
4729 */
4730 if (first_time)
4731 {
4732 first_time = false;
4733
4735 {
4736 int i;
4737 int nmembers;
4738 MultiXactMember *members;
4739
4740 /*
4741 * We don't need to allow old multixacts here; if that had
4742 * been the case, HeapTupleSatisfiesUpdate would have returned
4743 * MayBeUpdated and we wouldn't be here.
4744 */
4745 nmembers =
4746 GetMultiXactIdMembers(xwait, &members, false,
4748
4749 for (i = 0; i < nmembers; i++)
4750 {
4751 /* only consider members of our own transaction */
4752 if (!TransactionIdIsCurrentTransactionId(members[i].xid))
4753 continue;
4754
4755 if (TUPLOCK_from_mxstatus(members[i].status) >= mode)
4756 {
4757 pfree(members);
4758 result = TM_Ok;
4759 goto out_unlocked;
4760 }
4761 else
4762 {
4763 /*
4764 * Disable acquisition of the heavyweight tuple lock.
4765 * Otherwise, when promoting a weaker lock, we might
4766 * deadlock with another locker that has acquired the
4767 * heavyweight tuple lock and is waiting for our
4768 * transaction to finish.
4769 *
4770 * Note that in this case we still need to wait for
4771 * the multixact if required, to avoid acquiring
4772 * conflicting locks.
4773 */
4774 skip_tuple_lock = true;
4775 }
4776 }
4777
4778 if (members)
4779 pfree(members);
4780 }
4782 {
4783 switch (mode)
4784 {
4785 case LockTupleKeyShare:
4789 result = TM_Ok;
4790 goto out_unlocked;
4791 case LockTupleShare:
4794 {
4795 result = TM_Ok;
4796 goto out_unlocked;
4797 }
4798 break;
4801 {
4802 result = TM_Ok;
4803 goto out_unlocked;
4804 }
4805 break;
4806 case LockTupleExclusive:
4809 {
4810 result = TM_Ok;
4811 goto out_unlocked;
4812 }
4813 break;
4814 }
4815 }
4816 }
4817
4818 /*
4819 * Initially assume that we will have to wait for the locking
4820 * transaction(s) to finish. We check various cases below in which
4821 * this can be turned off.
4822 */
4823 require_sleep = true;
4824 if (mode == LockTupleKeyShare)
4825 {
4826 /*
4827 * If we're requesting KeyShare, and there's no update present, we
4828 * don't need to wait. Even if there is an update, we can still
4829 * continue if the key hasn't been modified.
4830 *
4831 * However, if there are updates, we need to walk the update chain
4832 * to mark future versions of the row as locked, too. That way,
4833 * if somebody deletes that future version, we're protected
4834 * against the key going away. This locking of future versions
4835 * could block momentarily, if a concurrent transaction is
4836 * deleting a key; or it could return a value to the effect that
4837 * the transaction deleting the key has already committed. So we
4838 * do this before re-locking the buffer; otherwise this would be
4839 * prone to deadlocks.
4840 *
4841 * Note that the TID we're locking was grabbed before we unlocked
4842 * the buffer. For it to change while we're not looking, the
4843 * other properties we're testing for below after re-locking the
4844 * buffer would also change, in which case we would restart this
4845 * loop above.
4846 */
4848 {
4849 bool updated;
4850
4852
4853 /*
4854 * If there are updates, follow the update chain; bail out if
4855 * that cannot be done.
4856 */
4857 if (follow_updates && updated &&
4858 !ItemPointerEquals(&tuple->t_self, &t_ctid))
4859 {
4860 TM_Result res;
4861
4862 res = heap_lock_updated_tuple(relation,
4863 infomask, xwait, &t_ctid,
4865 mode);
4866 if (res != TM_Ok)
4867 {
4868 result = res;
4869 /* recovery code expects to have buffer lock held */
4871 goto failed;
4872 }
4873 }
4874
4876
4877 /*
4878 * Make sure it's still an appropriate lock, else start over.
4879 * Also, if it wasn't updated before we released the lock, but
4880 * is updated now, we start over too; the reason is that we
4881 * now need to follow the update chain to lock the new
4882 * versions.
4883 */
4884 if (!HeapTupleHeaderIsOnlyLocked(tuple->t_data) &&
4885 ((tuple->t_data->t_infomask2 & HEAP_KEYS_UPDATED) ||
4886 !updated))
4887 goto l3;
4888
4889 /* Things look okay, so we can skip sleeping */
4890 require_sleep = false;
4891
4892 /*
4893 * Note we allow Xmax to change here; other updaters/lockers
4894 * could have modified it before we grabbed the buffer lock.
4895 * However, this is not a problem, because with the recheck we
4896 * just did we ensure that they still don't conflict with the
4897 * lock we want.
4898 */
4899 }
4900 }
4901 else if (mode == LockTupleShare)
4902 {
4903 /*
4904 * If we're requesting Share, we can similarly avoid sleeping if
4905 * there's no update and no exclusive lock present.
4906 */
4909 {
4911
4912 /*
4913 * Make sure it's still an appropriate lock, else start over.
4914 * See above about allowing xmax to change.
4915 */
4918 goto l3;
4919 require_sleep = false;
4920 }
4921 }
4922 else if (mode == LockTupleNoKeyExclusive)
4923 {
4924 /*
4925 * If we're requesting NoKeyExclusive, we might also be able to
4926 * avoid sleeping; just ensure that there no conflicting lock
4927 * already acquired.
4928 */
4930 {
4932 mode, NULL))
4933 {
4934 /*
4935 * No conflict, but if the xmax changed under us in the
4936 * meantime, start over.
4937 */
4941 xwait))
4942 goto l3;
4943
4944 /* otherwise, we're good */
4945 require_sleep = false;
4946 }
4947 }
4949 {
4951
4952 /* if the xmax changed in the meantime, start over */
4955 xwait))
4956 goto l3;
4957 /* otherwise, we're good */
4958 require_sleep = false;
4959 }
4960 }
4961
4962 /*
4963 * As a check independent from those above, we can also avoid sleeping
4964 * if the current transaction is the sole locker of the tuple. Note
4965 * that the strength of the lock already held is irrelevant; this is
4966 * not about recording the lock in Xmax (which will be done regardless
4967 * of this optimization, below). Also, note that the cases where we
4968 * hold a lock stronger than we are requesting are already handled
4969 * above by not doing anything.
4970 *
4971 * Note we only deal with the non-multixact case here; MultiXactIdWait
4972 * is well equipped to deal with this situation on its own.
4973 */
4976 {
4977 /* ... but if the xmax changed in the meantime, start over */
4981 xwait))
4982 goto l3;
4984 require_sleep = false;
4985 }
4986
4987 /*
4988 * Time to sleep on the other transaction/multixact, if necessary.
4989 *
4990 * If the other transaction is an update/delete that's already
4991 * committed, then sleeping cannot possibly do any good: if we're
4992 * required to sleep, get out to raise an error instead.
4993 *
4994 * By here, we either have already acquired the buffer exclusive lock,
4995 * or we must wait for the locking transaction or multixact; so below
4996 * we ensure that we grab buffer lock after the sleep.
4997 */
4998 if (require_sleep && (result == TM_Updated || result == TM_Deleted))
4999 {
5001 goto failed;
5002 }
5003 else if (require_sleep)
5004 {
5005 /*
5006 * Acquire tuple lock to establish our priority for the tuple, or
5007 * die trying. LockTuple will release us when we are next-in-line
5008 * for the tuple. We must do this even if we are share-locking,
5009 * but not if we already have a weaker lock on the tuple.
5010 *
5011 * If we are forced to "start over" below, we keep the tuple lock;
5012 * this arranges that we stay at the head of the line while
5013 * rechecking tuple state.
5014 */
5015 if (!skip_tuple_lock &&
5016 !heap_acquire_tuplock(relation, tid, mode, wait_policy,
5018 {
5019 /*
5020 * This can only happen if wait_policy is Skip and the lock
5021 * couldn't be obtained.
5022 */
5023 result = TM_WouldBlock;
5024 /* recovery code expects to have buffer lock held */
5026 goto failed;
5027 }
5028
5030 {
5032
5033 /* We only ever lock tuples, never update them */
5034 if (status >= MultiXactStatusNoKeyUpdate)
5035 elog(ERROR, "invalid lock mode in heap_lock_tuple");
5036
5037 /* wait for multixact to end, or die trying */
5038 switch (wait_policy)
5039 {
5040 case LockWaitBlock:
5042 relation, &tuple->t_self, XLTW_Lock, NULL);
5043 break;
5044 case LockWaitSkip:
5046 status, infomask, relation,
5047 NULL, false))
5048 {
5049 result = TM_WouldBlock;
5050 /* recovery code expects to have buffer lock held */
5052 goto failed;
5053 }
5054 break;
5055 case LockWaitError:
5057 status, infomask, relation,
5059 ereport(ERROR,
5061 errmsg("could not obtain lock on row in relation \"%s\"",
5062 RelationGetRelationName(relation))));
5063
5064 break;
5065 }
5066
5067 /*
5068 * Of course, the multixact might not be done here: if we're
5069 * requesting a light lock mode, other transactions with light
5070 * locks could still be alive, as well as locks owned by our
5071 * own xact or other subxacts of this backend. We need to
5072 * preserve the surviving MultiXact members. Note that it
5073 * isn't absolutely necessary in the latter case, but doing so
5074 * is simpler.
5075 */
5076 }
5077 else
5078 {
5079 /* wait for regular transaction to end, or die trying */
5080 switch (wait_policy)
5081 {
5082 case LockWaitBlock:
5083 XactLockTableWait(xwait, relation, &tuple->t_self,
5084 XLTW_Lock);
5085 break;
5086 case LockWaitSkip:
5088 {
5089 result = TM_WouldBlock;
5090 /* recovery code expects to have buffer lock held */
5092 goto failed;
5093 }
5094 break;
5095 case LockWaitError:
5097 ereport(ERROR,
5099 errmsg("could not obtain lock on row in relation \"%s\"",
5100 RelationGetRelationName(relation))));
5101 break;
5102 }
5103 }
5104
5105 /* if there are updates, follow the update chain */
5107 !ItemPointerEquals(&tuple->t_self, &t_ctid))
5108 {
5109 TM_Result res;
5110
5111 res = heap_lock_updated_tuple(relation,
5112 infomask, xwait, &t_ctid,
5114 mode);
5115 if (res != TM_Ok)
5116 {
5117 result = res;
5118 /* recovery code expects to have buffer lock held */
5120 goto failed;
5121 }
5122 }
5123
5125
5126 /*
5127 * xwait is done, but if xwait had just locked the tuple then some
5128 * other xact could update this tuple before we get to this point.
5129 * Check for xmax change, and start over if so.
5130 */
5133 xwait))
5134 goto l3;
5135
5137 {
5138 /*
5139 * Otherwise check if it committed or aborted. Note we cannot
5140 * be here if the tuple was only locked by somebody who didn't
5141 * conflict with us; that would have been handled above. So
5142 * that transaction must necessarily be gone by now. But
5143 * don't check for this in the multixact case, because some
5144 * locker transactions might still be running.
5145 */
5146 UpdateXmaxHintBits(tuple->t_data, *buffer, xwait);
5147 }
5148 }
5149
5150 /* By here, we're certain that we hold buffer exclusive lock again */
5151
5152 /*
5153 * We may lock if previous xmax aborted, or if it committed but only
5154 * locked the tuple without updating it; or if we didn't have to wait
5155 * at all for whatever reason.
5156 */
5157 if (!require_sleep ||
5158 (tuple->t_data->t_infomask & HEAP_XMAX_INVALID) ||
5161 result = TM_Ok;
5162 else if (!ItemPointerEquals(&tuple->t_self, &tuple->t_data->t_ctid))
5163 result = TM_Updated;
5164 else
5165 result = TM_Deleted;
5166 }
5167
5168failed:
5169 if (result != TM_Ok)
5170 {
5171 Assert(result == TM_SelfModified || result == TM_Updated ||
5172 result == TM_Deleted || result == TM_WouldBlock);
5173
5174 /*
5175 * When locking a tuple under LockWaitSkip semantics and we fail with
5176 * TM_WouldBlock above, it's possible for concurrent transactions to
5177 * release the lock and set HEAP_XMAX_INVALID in the meantime. So
5178 * this assert is slightly different from the equivalent one in
5179 * heap_delete and heap_update.
5180 */
5181 Assert((result == TM_WouldBlock) ||
5182 !(tuple->t_data->t_infomask & HEAP_XMAX_INVALID));
5183 Assert(result != TM_Updated ||
5184 !ItemPointerEquals(&tuple->t_self, &tuple->t_data->t_ctid));
5185 tmfd->ctid = tuple->t_data->t_ctid;
5186 tmfd->xmax = HeapTupleHeaderGetUpdateXid(tuple->t_data);
5187 if (result == TM_SelfModified)
5188 tmfd->cmax = HeapTupleHeaderGetCmax(tuple->t_data);
5189 else
5190 tmfd->cmax = InvalidCommandId;
5191 goto out_locked;
5192 }
5193
5194 /*
5195 * If we didn't pin the visibility map page and the page has become all
5196 * visible while we were busy locking the buffer, or during some
5197 * subsequent window during which we had it unlocked, we'll have to unlock
5198 * and re-lock, to avoid holding the buffer lock across I/O. That's a bit
5199 * unfortunate, especially since we'll now have to recheck whether the
5200 * tuple has been locked or updated under us, but hopefully it won't
5201 * happen very often.
5202 */
5203 if (vmbuffer == InvalidBuffer && PageIsAllVisible(page))
5204 {
5206 visibilitymap_pin(relation, block, &vmbuffer);
5208 goto l3;
5209 }
5210
5211 xmax = HeapTupleHeaderGetRawXmax(tuple->t_data);
5212 old_infomask = tuple->t_data->t_infomask;
5213
5214 /*
5215 * If this is the first possibly-multixact-able operation in the current
5216 * transaction, set my per-backend OldestMemberMXactId setting. We can be
5217 * certain that the transaction will never become a member of any older
5218 * MultiXactIds than that. (We have to do this even if we end up just
5219 * using our own TransactionId below, since some other backend could
5220 * incorporate our XID into a MultiXact immediately afterwards.)
5221 */
5223
5224 /*
5225 * Compute the new xmax and infomask to store into the tuple. Note we do
5226 * not modify the tuple just yet, because that would leave it in the wrong
5227 * state if multixact.c elogs.
5228 */
5230 GetCurrentTransactionId(), mode, false,
5231 &xid, &new_infomask, &new_infomask2);
5232
5234
5235 /*
5236 * Store transaction information of xact locking the tuple.
5237 *
5238 * Note: Cmax is meaningless in this context, so don't set it; this avoids
5239 * possibly generating a useless combo CID. Moreover, if we're locking a
5240 * previously updated tuple, it's important to preserve the Cmax.
5241 *
5242 * Also reset the HOT UPDATE bit, but only if there's no update; otherwise
5243 * we would break the HOT chain.
5244 */
5247 tuple->t_data->t_infomask |= new_infomask;
5248 tuple->t_data->t_infomask2 |= new_infomask2;
5251 HeapTupleHeaderSetXmax(tuple->t_data, xid);
5252
5253 /*
5254 * Make sure there is no forward chain link in t_ctid. Note that in the
5255 * cases where the tuple has been updated, we must not overwrite t_ctid,
5256 * because it was set by the updater. Moreover, if the tuple has been
5257 * updated, we need to follow the update chain to lock the new versions of
5258 * the tuple as well.
5259 */
5261 tuple->t_data->t_ctid = *tid;
5262
5263 /* Clear only the all-frozen bit on visibility map if needed */
5264 if (PageIsAllVisible(page) &&
5265 visibilitymap_clear(relation, block, vmbuffer,
5267 cleared_all_frozen = true;
5268
5269
5270 MarkBufferDirty(*buffer);
5271
5272 /*
5273 * XLOG stuff. You might think that we don't need an XLOG record because
5274 * there is no state change worth restoring after a crash. You would be
5275 * wrong however: we have just written either a TransactionId or a
5276 * MultiXactId that may never have been seen on disk before, and we need
5277 * to make sure that there are XLOG entries covering those ID numbers.
5278 * Else the same IDs might be re-used after a crash, which would be
5279 * disastrous if this page made it to disk before the crash. Essentially
5280 * we have to enforce the WAL log-before-data rule even in this case.
5281 * (Also, in a PITR log-shipping or 2PC environment, we have to have XLOG
5282 * entries for everything anyway.)
5283 */
5284 if (RelationNeedsWAL(relation))
5285 {
5288
5291
5292 xlrec.offnum = ItemPointerGetOffsetNumber(&tuple->t_self);
5293 xlrec.xmax = xid;
5294 xlrec.infobits_set = compute_infobits(new_infomask,
5295 tuple->t_data->t_infomask2);
5298
5299 /* we don't decode row locks atm, so no need to log the origin */
5300
5302
5303 PageSetLSN(page, recptr);
5304 }
5305
5307
5308 result = TM_Ok;
5309
5312
5314 if (BufferIsValid(vmbuffer))
5315 ReleaseBuffer(vmbuffer);
5316
5317 /*
5318 * Don't update the visibility map here. Locking a tuple doesn't change
5319 * visibility info.
5320 */
5321
5322 /*
5323 * Now that we have successfully marked the tuple as locked, we can
5324 * release the lmgr tuple lock, if we had it.
5325 */
5326 if (have_tuple_lock)
5327 UnlockTupleTuplock(relation, tid, mode);
5328
5329 return result;
5330}
#define TUPLOCK_from_mxstatus(status)
Definition heapam.c:217
static bool ConditionalMultiXactIdWait(MultiXactId multi, MultiXactStatus status, uint16 infomask, Relation rel, int *remaining, bool logLockFailure)
Definition heapam.c:7875
static TM_Result heap_lock_updated_tuple(Relation rel, uint16 prior_infomask, TransactionId prior_raw_xmax, const ItemPointerData *prior_ctid, TransactionId xid, LockTupleMode mode)
Definition heapam.c:6114
static MultiXactStatus get_mxact_status_for_lock(LockTupleMode mode, bool is_update)
Definition heapam.c:4596
#define XLH_LOCK_ALL_FROZEN_CLEARED
#define XLOG_HEAP_LOCK
Definition heapam_xlog.h:39
#define SizeOfHeapLock
#define HEAP_KEYS_UPDATED
static bool HEAP_XMAX_IS_SHR_LOCKED(uint16 infomask)
static bool HEAP_XMAX_IS_EXCL_LOCKED(uint16 infomask)
static void ItemPointerCopy(const ItemPointerData *fromPointer, ItemPointerData *toPointer)
Definition itemptr.h:172
bool ConditionalXactLockTableWait(TransactionId xid, bool logLockFailure)
Definition lmgr.c:739
@ XLTW_Lock
Definition lmgr.h:29
bool log_lock_failures
Definition lock.c:54
@ LockWaitSkip
Definition lockoptions.h:41
@ LockWaitError
Definition lockoptions.h:43
@ LockTupleShare
Definition lockoptions.h:54
@ LockTupleKeyShare
Definition lockoptions.h:52
int GetMultiXactIdMembers(MultiXactId multi, MultiXactMember **members, bool from_pgupgrade, bool isLockOnly)
Definition multixact.c:1113
static PgChecksumMode mode
@ TM_WouldBlock
Definition tableam.h:103
#define VISIBILITYMAP_ALL_FROZEN

References Assert, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferGetPage(), BufferIsValid(), TM_FailureData::cmax, compute_infobits(), compute_new_xmax_infomask(), ConditionalMultiXactIdWait(), ConditionalXactLockTableWait(), TM_FailureData::ctid, DoesMultiXactIdConflict(), elog, END_CRIT_SECTION, ereport, errcode(), errmsg(), ERROR, fb(), get_mxact_status_for_lock(), GetCurrentTransactionId(), GetMultiXactIdMembers(), heap_acquire_tuplock(), HEAP_KEYS_UPDATED, heap_lock_updated_tuple(), HEAP_XMAX_INVALID, HEAP_XMAX_IS_EXCL_LOCKED(), HEAP_XMAX_IS_KEYSHR_LOCKED(), HEAP_XMAX_IS_LOCKED_ONLY(), HEAP_XMAX_IS_MULTI, HEAP_XMAX_IS_SHR_LOCKED(), HeapTupleHeaderClearHotUpdated(), HeapTupleHeaderGetCmax(), HeapTupleHeaderGetRawXmax(), HeapTupleHeaderGetUpdateXid(), HeapTupleHeaderIsOnlyLocked(), HeapTupleHeaderSetXmax(), HeapTupleSatisfiesUpdate(), i, InvalidBuffer, InvalidCommandId, ItemIdGetLength, ItemIdIsNormal, ItemPointerCopy(), ItemPointerEquals(), ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), LockBuffer(), LockTupleExclusive, LockTupleKeyShare, LockTupleNoKeyExclusive, LockTupleShare, LockWaitBlock, LockWaitError, LockWaitSkip, log_lock_failures, MarkBufferDirty(), mode, MultiXactIdSetOldestMember(), MultiXactIdWait(), MultiXactStatusNoKeyUpdate, PageGetItem(), PageGetItemId(), PageIsAllVisible(), PageSetLSN(), pfree(), ReadBuffer(), REGBUF_STANDARD, RelationGetRelationName, RelationGetRelid, RelationNeedsWAL, ReleaseBuffer(), SizeOfHeapLock, START_CRIT_SECTION, HeapTupleHeaderData::t_ctid, HeapTupleData::t_data, HeapTupleHeaderData::t_infomask, HeapTupleHeaderData::t_infomask2, HeapTupleData::t_len, HeapTupleData::t_self, HeapTupleData::t_tableOid, TM_BeingModified, TM_Deleted, TM_Invisible, TM_Ok, TM_SelfModified, TM_Updated, TM_WouldBlock, TransactionIdEquals, TransactionIdIsCurrentTransactionId(), TUPLOCK_from_mxstatus, UnlockTupleTuplock, UpdateXmaxHintBits(), VISIBILITYMAP_ALL_FROZEN, visibilitymap_clear(), visibilitymap_pin(), XactLockTableWait(), XLH_LOCK_ALL_FROZEN_CLEARED, XLOG_HEAP_LOCK, XLogBeginInsert(), XLogInsert(), XLogRegisterBuffer(), XLogRegisterData(), XLTW_Lock, TM_FailureData::xmax, and xmax_infomask_changed().

Referenced by heapam_tuple_lock().

◆ heap_multi_insert()

void heap_multi_insert ( Relation  relation,
TupleTableSlot **  slots,
int  ntuples,
CommandId  cid,
int  options,
BulkInsertState  bistate 
)
extern

Definition at line 2412 of file heapam.c.

2414{
2417 int i;
2418 int ndone;
2420 Page page;
2421 Buffer vmbuffer = InvalidBuffer;
2422 bool needwal;
2426 bool starting_with_empty_page = false;
2427 int npages = 0;
2428 int npages_used = 0;
2429
2430 /* currently not needed (thus unsupported) for heap_multi_insert() */
2432
2433 AssertHasSnapshotForToast(relation);
2434
2435 needwal = RelationNeedsWAL(relation);
2438
2439 /* Toast and set header data in all the slots */
2440 heaptuples = palloc(ntuples * sizeof(HeapTuple));
2441 for (i = 0; i < ntuples; i++)
2442 {
2443 HeapTuple tuple;
2444
2445 tuple = ExecFetchSlotHeapTuple(slots[i], true, NULL);
2446 slots[i]->tts_tableOid = RelationGetRelid(relation);
2447 tuple->t_tableOid = slots[i]->tts_tableOid;
2448 heaptuples[i] = heap_prepare_insert(relation, tuple, xid, cid,
2449 options);
2450 }
2451
2452 /*
2453 * We're about to do the actual inserts -- but check for conflict first,
2454 * to minimize the possibility of having to roll back work we've just
2455 * done.
2456 *
2457 * A check here does not definitively prevent a serialization anomaly;
2458 * that check MUST be done at least past the point of acquiring an
2459 * exclusive buffer content lock on every buffer that will be affected,
2460 * and MAY be done after all inserts are reflected in the buffers and
2461 * those locks are released; otherwise there is a race condition. Since
2462 * multiple buffers can be locked and unlocked in the loop below, and it
2463 * would not be feasible to identify and lock all of those buffers before
2464 * the loop, we must do a final check at the end.
2465 *
2466 * The check here could be omitted with no loss of correctness; it is
2467 * present strictly as an optimization.
2468 *
2469 * For heap inserts, we only need to check for table-level SSI locks. Our
2470 * new tuples can't possibly conflict with existing tuple locks, and heap
2471 * page locks are only consolidated versions of tuple locks; they do not
2472 * lock "gaps" as index page locks do. So we don't need to specify a
2473 * buffer when making the call, which makes for a faster check.
2474 */
2476
2477 ndone = 0;
2478 while (ndone < ntuples)
2479 {
2480 Buffer buffer;
2481 bool all_visible_cleared = false;
2482 bool all_frozen_set = false;
2483 int nthispage;
2484
2486
2487 /*
2488 * Compute number of pages needed to fit the to-be-inserted tuples in
2489 * the worst case. This will be used to determine how much to extend
2490 * the relation by in RelationGetBufferForTuple(), if needed. If we
2491 * filled a prior page from scratch, we can just update our last
2492 * computation, but if we started with a partially filled page,
2493 * recompute from scratch, the number of potentially required pages
2494 * can vary due to tuples needing to fit onto the page, page headers
2495 * etc.
2496 */
2497 if (ndone == 0 || !starting_with_empty_page)
2498 {
2499 npages = heap_multi_insert_pages(heaptuples, ndone, ntuples,
2501 npages_used = 0;
2502 }
2503 else
2504 npages_used++;
2505
2506 /*
2507 * Find buffer where at least the next tuple will fit. If the page is
2508 * all-visible, this will also pin the requisite visibility map page.
2509 *
2510 * Also pin visibility map page if COPY FREEZE inserts tuples into an
2511 * empty page. See all_frozen_set below.
2512 */
2513 buffer = RelationGetBufferForTuple(relation, heaptuples[ndone]->t_len,
2514 InvalidBuffer, options, bistate,
2515 &vmbuffer, NULL,
2516 npages - npages_used);
2517 page = BufferGetPage(buffer);
2518
2520
2522 {
2523 all_frozen_set = true;
2524 /* Lock the vmbuffer before entering the critical section */
2526 }
2527
2528 /* NO EREPORT(ERROR) from here till changes are logged */
2530
2531 /*
2532 * RelationGetBufferForTuple has ensured that the first tuple fits.
2533 * Put that on the page, and then as many other tuples as fit.
2534 */
2535 RelationPutHeapTuple(relation, buffer, heaptuples[ndone], false);
2536
2537 /*
2538 * For logical decoding we need combo CIDs to properly decode the
2539 * catalog.
2540 */
2541 if (needwal && need_cids)
2542 log_heap_new_cid(relation, heaptuples[ndone]);
2543
2544 for (nthispage = 1; ndone + nthispage < ntuples; nthispage++)
2545 {
2547
2548 if (PageGetHeapFreeSpace(page) < MAXALIGN(heaptup->t_len) + saveFreeSpace)
2549 break;
2550
2551 RelationPutHeapTuple(relation, buffer, heaptup, false);
2552
2553 /*
2554 * For logical decoding we need combo CIDs to properly decode the
2555 * catalog.
2556 */
2557 if (needwal && need_cids)
2558 log_heap_new_cid(relation, heaptup);
2559 }
2560
2561 /*
2562 * If the page is all visible, need to clear that, unless we're only
2563 * going to add further frozen rows to it.
2564 *
2565 * If we're only adding already frozen rows to a previously empty
2566 * page, mark it as all-frozen and update the visibility map. We're
2567 * already holding a pin on the vmbuffer.
2568 */
2570 {
2571 all_visible_cleared = true;
2572 PageClearAllVisible(page);
2573 visibilitymap_clear(relation,
2574 BufferGetBlockNumber(buffer),
2575 vmbuffer, VISIBILITYMAP_VALID_BITS);
2576 }
2577 else if (all_frozen_set)
2578 {
2579 PageSetAllVisible(page);
2581 vmbuffer,
2584 relation->rd_locator);
2585 }
2586
2587 /*
2588 * XXX Should we set PageSetPrunable on this page ? See heap_insert()
2589 */
2590
2591 MarkBufferDirty(buffer);
2592
2593 /* XLOG stuff */
2594 if (needwal)
2595 {
2599 char *tupledata;
2600 int totaldatalen;
2601 char *scratchptr = scratch.data;
2602 bool init;
2603 int bufflags = 0;
2604
2605 /*
2606 * If the page was previously empty, we can reinit the page
2607 * instead of restoring the whole thing.
2608 */
2610
2611 /* allocate xl_heap_multi_insert struct from the scratch area */
2614
2615 /*
2616 * Allocate offsets array. Unless we're reinitializing the page,
2617 * in that case the tuples are stored in order starting at
2618 * FirstOffsetNumber and we don't need to store the offsets
2619 * explicitly.
2620 */
2621 if (!init)
2622 scratchptr += nthispage * sizeof(OffsetNumber);
2623
2624 /* the rest of the scratch space is used for tuple data */
2625 tupledata = scratchptr;
2626
2627 /* check that the mutually exclusive flags are not both set */
2629
2630 xlrec->flags = 0;
2633
2634 /*
2635 * We don't have to worry about including a conflict xid in the
2636 * WAL record, as HEAP_INSERT_FROZEN intentionally violates
2637 * visibility rules.
2638 */
2639 if (all_frozen_set)
2641
2642 xlrec->ntuples = nthispage;
2643
2644 /*
2645 * Write out an xl_multi_insert_tuple and the tuple data itself
2646 * for each tuple.
2647 */
2648 for (i = 0; i < nthispage; i++)
2649 {
2651 xl_multi_insert_tuple *tuphdr;
2652 int datalen;
2653
2654 if (!init)
2655 xlrec->offsets[i] = ItemPointerGetOffsetNumber(&heaptup->t_self);
2656 /* xl_multi_insert_tuple needs two-byte alignment. */
2658 scratchptr = ((char *) tuphdr) + SizeOfMultiInsertTuple;
2659
2660 tuphdr->t_infomask2 = heaptup->t_data->t_infomask2;
2661 tuphdr->t_infomask = heaptup->t_data->t_infomask;
2662 tuphdr->t_hoff = heaptup->t_data->t_hoff;
2663
2664 /* write bitmap [+ padding] [+ oid] + data */
2665 datalen = heaptup->t_len - SizeofHeapTupleHeader;
2667 (char *) heaptup->t_data + SizeofHeapTupleHeader,
2668 datalen);
2669 tuphdr->datalen = datalen;
2670 scratchptr += datalen;
2671 }
2672 totaldatalen = scratchptr - tupledata;
2673 Assert((scratchptr - scratch.data) < BLCKSZ);
2674
2675 if (need_tuple_data)
2677
2678 /*
2679 * Signal that this is the last xl_heap_multi_insert record
2680 * emitted by this call to heap_multi_insert(). Needed for logical
2681 * decoding so it knows when to cleanup temporary data.
2682 */
2683 if (ndone + nthispage == ntuples)
2685
2686 if (init)
2687 {
2688 info |= XLOG_HEAP_INIT_PAGE;
2690 }
2691
2692 /*
2693 * If we're doing logical decoding, include the new tuple data
2694 * even if we take a full-page image of the page.
2695 */
2696 if (need_tuple_data)
2698
2700 XLogRegisterData(xlrec, tupledata - scratch.data);
2702 if (all_frozen_set)
2703 XLogRegisterBuffer(1, vmbuffer, 0);
2704
2705 XLogRegisterBufData(0, tupledata, totaldatalen);
2706
2707 /* filtering by origin on a row level is much more efficient */
2709
2710 recptr = XLogInsert(RM_HEAP2_ID, info);
2711
2712 PageSetLSN(page, recptr);
2713 if (all_frozen_set)
2714 {
2715 Assert(BufferIsDirty(vmbuffer));
2716 PageSetLSN(BufferGetPage(vmbuffer), recptr);
2717 }
2718 }
2719
2721
2722 if (all_frozen_set)
2723 LockBuffer(vmbuffer, BUFFER_LOCK_UNLOCK);
2724
2725 UnlockReleaseBuffer(buffer);
2726 ndone += nthispage;
2727
2728 /*
2729 * NB: Only release vmbuffer after inserting all tuples - it's fairly
2730 * likely that we'll insert into subsequent heap pages that are likely
2731 * to use the same vm page.
2732 */
2733 }
2734
2735 /* We're done with inserting all tuples, so release the last vmbuffer. */
2736 if (vmbuffer != InvalidBuffer)
2737 ReleaseBuffer(vmbuffer);
2738
2739 /*
2740 * We're done with the actual inserts. Check for conflicts again, to
2741 * ensure that all rw-conflicts in to these inserts are detected. Without
2742 * this final check, a sequential scan of the heap may have locked the
2743 * table after the "before" check, missing one opportunity to detect the
2744 * conflict, and then scanned the table before the new tuples were there,
2745 * missing the other chance to detect the conflict.
2746 *
2747 * For heap inserts, we only need to check for table-level SSI locks. Our
2748 * new tuples can't possibly conflict with existing tuple locks, and heap
2749 * page locks are only consolidated versions of tuple locks; they do not
2750 * lock "gaps" as index page locks do. So we don't need to specify a
2751 * buffer when making the call.
2752 */
2754
2755 /*
2756 * If tuples are cacheable, mark them for invalidation from the caches in
2757 * case we abort. Note it is OK to do this after releasing the buffer,
2758 * because the heaptuples data structure is all in local memory, not in
2759 * the shared buffer.
2760 */
2761 if (IsCatalogRelation(relation))
2762 {
2763 for (i = 0; i < ntuples; i++)
2765 }
2766
2767 /* copy t_self fields back to the caller's slots */
2768 for (i = 0; i < ntuples; i++)
2769 slots[i]->tts_tid = heaptuples[i]->t_self;
2770
2771 pgstat_count_heap_insert(relation, ntuples);
2772}
bool BufferIsDirty(Buffer buffer)
Definition bufmgr.c:3024
Size PageGetHeapFreeSpace(const PageData *page)
Definition bufpage.c:990
static void PageSetAllVisible(Page page)
Definition bufpage.h:433
#define MAXALIGN(LEN)
Definition c.h:826
#define SHORTALIGN(LEN)
Definition c.h:822
size_t Size
Definition c.h:619
HeapTuple ExecFetchSlotHeapTuple(TupleTableSlot *slot, bool materialize, bool *shouldFree)
static int heap_multi_insert_pages(HeapTuple *heaptuples, int done, int ntuples, Size saveFreeSpace)
Definition heapam.c:2380
#define HEAP_INSERT_FROZEN
Definition heapam.h:38
#define SizeOfHeapMultiInsert
#define XLOG_HEAP2_MULTI_INSERT
Definition heapam_xlog.h:64
#define XLH_INSERT_LAST_IN_MULTI
Definition heapam_xlog.h:73
#define XLH_INSERT_ALL_FROZEN_SET
Definition heapam_xlog.h:79
#define SizeOfMultiInsertTuple
void * palloc(Size size)
Definition mcxt.c:1387
#define CHECK_FOR_INTERRUPTS()
Definition miscadmin.h:123
#define RelationGetTargetPageFreeSpace(relation, defaultff)
Definition rel.h:389
#define HEAP_DEFAULT_FILLFACTOR
Definition rel.h:360
#define init()
RelFileLocator rd_locator
Definition rel.h:57
void visibilitymap_set_vmbits(BlockNumber heapBlk, Buffer vmBuf, uint8 flags, const RelFileLocator rlocator)
#define VISIBILITYMAP_ALL_VISIBLE

References Assert, AssertHasSnapshotForToast(), BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferGetBlockNumber(), BufferGetPage(), BufferIsDirty(), CacheInvalidateHeapTuple(), CHECK_FOR_INTERRUPTS, CheckForSerializableConflictIn(), xl_multi_insert_tuple::datalen, END_CRIT_SECTION, ExecFetchSlotHeapTuple(), fb(), GetCurrentTransactionId(), HEAP_DEFAULT_FILLFACTOR, HEAP_INSERT_FROZEN, HEAP_INSERT_NO_LOGICAL, heap_multi_insert_pages(), heap_prepare_insert(), i, init, InvalidBlockNumber, InvalidBuffer, IsCatalogRelation(), ItemPointerGetOffsetNumber(), LockBuffer(), log_heap_new_cid(), MarkBufferDirty(), MAXALIGN, PageClearAllVisible(), PageGetHeapFreeSpace(), PageGetMaxOffsetNumber(), PageIsAllVisible(), PageSetAllVisible(), PageSetLSN(), palloc(), pgstat_count_heap_insert(), RelationData::rd_locator, REGBUF_KEEP_DATA, REGBUF_STANDARD, REGBUF_WILL_INIT, RelationGetBufferForTuple(), RelationGetRelid, RelationGetTargetPageFreeSpace, RelationIsAccessibleInLogicalDecoding, RelationIsLogicallyLogged, RelationNeedsWAL, RelationPutHeapTuple(), ReleaseBuffer(), SHORTALIGN, SizeOfHeapMultiInsert, SizeofHeapTupleHeader, SizeOfMultiInsertTuple, START_CRIT_SECTION, xl_multi_insert_tuple::t_hoff, xl_multi_insert_tuple::t_infomask, xl_multi_insert_tuple::t_infomask2, HeapTupleData::t_tableOid, TupleTableSlot::tts_tableOid, UnlockReleaseBuffer(), VISIBILITYMAP_ALL_FROZEN, VISIBILITYMAP_ALL_VISIBLE, visibilitymap_clear(), visibilitymap_set_vmbits(), VISIBILITYMAP_VALID_BITS, XLH_INSERT_ALL_FROZEN_SET, XLH_INSERT_ALL_VISIBLE_CLEARED, XLH_INSERT_CONTAINS_NEW_TUPLE, XLH_INSERT_LAST_IN_MULTI, XLOG_HEAP2_MULTI_INSERT, XLOG_HEAP_INIT_PAGE, XLOG_INCLUDE_ORIGIN, XLogBeginInsert(), XLogInsert(), XLogRegisterBufData(), XLogRegisterBuffer(), XLogRegisterData(), and XLogSetRecordFlags().

Referenced by CatalogTuplesMultiInsertWithInfo().

◆ heap_page_prune_and_freeze()

void heap_page_prune_and_freeze ( PruneFreezeParams params,
PruneFreezeResult presult,
OffsetNumber off_loc,
TransactionId new_relfrozen_xid,
MultiXactId new_relmin_mxid 
)
extern

Definition at line 819 of file pruneheap.c.

824{
825 Buffer buffer = params->buffer;
826 Page page = BufferGetPage(buffer);
828 bool do_freeze;
829 bool do_prune;
830 bool do_hint_prune;
833
834 /* Initialize prstate */
835 prune_freeze_setup(params,
837 presult, &prstate);
838
839 /*
840 * Examine all line pointers and tuple visibility information to determine
841 * which line pointers should change state and which tuples may be frozen.
842 * Prepare queue of state changes to later be executed in a critical
843 * section.
844 */
846 buffer, &prstate, off_loc);
847
848 /*
849 * If checksums are enabled, calling heap_prune_satisfies_vacuum() while
850 * checking tuple visibility information in prune_freeze_plan() may have
851 * caused an FPI to be emitted.
852 */
854
855 do_prune = prstate.nredirected > 0 ||
856 prstate.ndead > 0 ||
857 prstate.nunused > 0;
858
859 /*
860 * Even if we don't prune anything, if we found a new value for the
861 * pd_prune_xid field or the page was marked full, we will update the hint
862 * bit.
863 */
864 do_hint_prune = ((PageHeader) page)->pd_prune_xid != prstate.new_prune_xid ||
865 PageIsFull(page);
866
867 /*
868 * Decide if we want to go ahead with freezing according to the freeze
869 * plans we prepared, or not.
870 */
871 do_freeze = heap_page_will_freeze(params->relation, buffer,
873 do_prune,
875 &prstate);
876
877 /*
878 * While scanning the line pointers, we did not clear
879 * all_visible/all_frozen when encountering LP_DEAD items because we
880 * wanted the decision whether or not to freeze the page to be unaffected
881 * by the short-term presence of LP_DEAD items. These LP_DEAD items are
882 * effectively assumed to be LP_UNUSED items in the making. It doesn't
883 * matter which vacuum heap pass (initial pass or final pass) ends up
884 * setting the page all-frozen, as long as the ongoing VACUUM does it.
885 *
886 * Now that we finished determining whether or not to freeze the page,
887 * update all_visible and all_frozen so that they reflect the true state
888 * of the page for setting PD_ALL_VISIBLE and VM bits.
889 */
890 if (prstate.lpdead_items > 0)
891 prstate.all_visible = prstate.all_frozen = false;
892
893 Assert(!prstate.all_frozen || prstate.all_visible);
894
895 /* Any error while applying the changes is critical */
897
898 if (do_hint_prune)
899 {
900 /*
901 * Update the page's pd_prune_xid field to either zero, or the lowest
902 * XID of any soon-prunable tuple.
903 */
904 ((PageHeader) page)->pd_prune_xid = prstate.new_prune_xid;
905
906 /*
907 * Also clear the "page is full" flag, since there's no point in
908 * repeating the prune/defrag process until something else happens to
909 * the page.
910 */
911 PageClearFull(page);
912
913 /*
914 * If that's all we had to do to the page, this is a non-WAL-logged
915 * hint. If we are going to freeze or prune the page, we will mark
916 * the buffer dirty below.
917 */
918 if (!do_freeze && !do_prune)
919 MarkBufferDirtyHint(buffer, true);
920 }
921
922 if (do_prune || do_freeze)
923 {
924 /* Apply the planned item changes and repair page fragmentation. */
925 if (do_prune)
926 {
927 heap_page_prune_execute(buffer, false,
928 prstate.redirected, prstate.nredirected,
929 prstate.nowdead, prstate.ndead,
930 prstate.nowunused, prstate.nunused);
931 }
932
933 if (do_freeze)
934 heap_freeze_prepared_tuples(buffer, prstate.frozen, prstate.nfrozen);
935
936 MarkBufferDirty(buffer);
937
938 /*
939 * Emit a WAL XLOG_HEAP2_PRUNE* record showing what we did
940 */
941 if (RelationNeedsWAL(params->relation))
942 {
943 /*
944 * The snapshotConflictHorizon for the whole record should be the
945 * most conservative of all the horizons calculated for any of the
946 * possible modifications. If this record will prune tuples, any
947 * transactions on the standby older than the youngest xmax of the
948 * most recently removed tuple this record will prune will
949 * conflict. If this record will freeze tuples, any transactions
950 * on the standby with xids older than the youngest tuple this
951 * record will freeze will conflict.
952 */
954
955 if (TransactionIdFollows(prstate.frz_conflict_horizon,
956 prstate.latest_xid_removed))
957 conflict_xid = prstate.frz_conflict_horizon;
958 else
959 conflict_xid = prstate.latest_xid_removed;
960
961 log_heap_prune_and_freeze(params->relation, buffer,
962 InvalidBuffer, /* vmbuffer */
963 0, /* vmflags */
965 true, params->reason,
966 prstate.frozen, prstate.nfrozen,
967 prstate.redirected, prstate.nredirected,
968 prstate.nowdead, prstate.ndead,
969 prstate.nowunused, prstate.nunused);
970 }
971 }
972
974
975 /* Copy information back for caller */
976 presult->ndeleted = prstate.ndeleted;
977 presult->nnewlpdead = prstate.ndead;
978 presult->nfrozen = prstate.nfrozen;
979 presult->live_tuples = prstate.live_tuples;
980 presult->recently_dead_tuples = prstate.recently_dead_tuples;
981 presult->all_visible = prstate.all_visible;
982 presult->all_frozen = prstate.all_frozen;
983 presult->hastup = prstate.hastup;
984
985 /*
986 * For callers planning to update the visibility map, the conflict horizon
987 * for that record must be the newest xmin on the page. However, if the
988 * page is completely frozen, there can be no conflict and the
989 * vm_conflict_horizon should remain InvalidTransactionId. This includes
990 * the case that we just froze all the tuples; the prune-freeze record
991 * included the conflict XID already so the caller doesn't need it.
992 */
993 if (presult->all_frozen)
994 presult->vm_conflict_horizon = InvalidTransactionId;
995 else
996 presult->vm_conflict_horizon = prstate.visibility_cutoff_xid;
997
998 presult->lpdead_items = prstate.lpdead_items;
999 /* the presult->deadoffsets array was already filled in */
1000
1001 if (prstate.attempt_freeze)
1002 {
1003 if (presult->nfrozen > 0)
1004 {
1005 *new_relfrozen_xid = prstate.pagefrz.FreezePageRelfrozenXid;
1006 *new_relmin_mxid = prstate.pagefrz.FreezePageRelminMxid;
1007 }
1008 else
1009 {
1010 *new_relfrozen_xid = prstate.pagefrz.NoFreezePageRelfrozenXid;
1011 *new_relmin_mxid = prstate.pagefrz.NoFreezePageRelminMxid;
1012 }
1013 }
1014}
void MarkBufferDirtyHint(Buffer buffer, bool buffer_std)
Definition bufmgr.c:5565
static void PageClearFull(Page page)
Definition bufpage.h:422
static bool PageIsFull(const PageData *page)
Definition bufpage.h:412
int64_t int64
Definition c.h:543
void heap_freeze_prepared_tuples(Buffer buffer, HeapTupleFreeze *tuples, int ntuples)
Definition heapam.c:7460
WalUsage pgWalUsage
Definition instrument.c:22
static bool heap_page_will_freeze(Relation relation, Buffer buffer, bool did_tuple_hint_fpi, bool do_prune, bool do_hint_prune, PruneState *prstate)
Definition pruneheap.c:663
static void prune_freeze_plan(Oid reloid, Buffer buffer, PruneState *prstate, OffsetNumber *off_loc)
Definition pruneheap.c:458
void log_heap_prune_and_freeze(Relation relation, Buffer buffer, Buffer vmbuffer, uint8 vmflags, TransactionId conflict_xid, bool cleanup_lock, PruneReason reason, HeapTupleFreeze *frozen, int nfrozen, OffsetNumber *redirected, int nredirected, OffsetNumber *dead, int ndead, OffsetNumber *unused, int nunused)
Definition pruneheap.c:2167
static void prune_freeze_setup(PruneFreezeParams *params, TransactionId *new_relfrozen_xid, MultiXactId *new_relmin_mxid, PruneFreezeResult *presult, PruneState *prstate)
Definition pruneheap.c:330
void heap_page_prune_execute(Buffer buffer, bool lp_truncate_only, OffsetNumber *redirected, int nredirected, OffsetNumber *nowdead, int ndead, OffsetNumber *nowunused, int nunused)
Definition pruneheap.c:1671
PruneReason reason
Definition heapam.h:245
Relation relation
Definition heapam.h:238
int64 wal_fpi
Definition instrument.h:54
static bool TransactionIdFollows(TransactionId id1, TransactionId id2)
Definition transam.h:297

References Assert, PruneFreezeParams::buffer, BufferGetPage(), END_CRIT_SECTION, fb(), heap_freeze_prepared_tuples(), heap_page_prune_execute(), heap_page_will_freeze(), InvalidBuffer, InvalidTransactionId, log_heap_prune_and_freeze(), MarkBufferDirty(), MarkBufferDirtyHint(), PageClearFull(), PageIsFull(), pgWalUsage, prune_freeze_plan(), prune_freeze_setup(), PruneFreezeParams::reason, PruneFreezeParams::relation, RelationGetRelid, RelationNeedsWAL, START_CRIT_SECTION, TransactionIdFollows(), and WalUsage::wal_fpi.

Referenced by heap_page_prune_opt(), and lazy_scan_prune().

◆ heap_page_prune_execute()

void heap_page_prune_execute ( Buffer  buffer,
bool  lp_truncate_only,
OffsetNumber redirected,
int  nredirected,
OffsetNumber nowdead,
int  ndead,
OffsetNumber nowunused,
int  nunused 
)
extern

Definition at line 1671 of file pruneheap.c.

1675{
1676 Page page = BufferGetPage(buffer);
1677 OffsetNumber *offnum;
1679
1680 /* Shouldn't be called unless there's something to do */
1681 Assert(nredirected > 0 || ndead > 0 || nunused > 0);
1682
1683 /* If 'lp_truncate_only', we can only remove already-dead line pointers */
1684 Assert(!lp_truncate_only || (nredirected == 0 && ndead == 0));
1685
1686 /* Update all redirected line pointers */
1687 offnum = redirected;
1688 for (int i = 0; i < nredirected; i++)
1689 {
1690 OffsetNumber fromoff = *offnum++;
1691 OffsetNumber tooff = *offnum++;
1694
1695#ifdef USE_ASSERT_CHECKING
1696
1697 /*
1698 * Any existing item that we set as an LP_REDIRECT (any 'from' item)
1699 * must be the first item from a HOT chain. If the item has tuple
1700 * storage then it can't be a heap-only tuple. Otherwise we are just
1701 * maintaining an existing LP_REDIRECT from an existing HOT chain that
1702 * has been pruned at least once before now.
1703 */
1705 {
1707
1708 htup = (HeapTupleHeader) PageGetItem(page, fromlp);
1710 }
1711 else
1712 {
1713 /* We shouldn't need to redundantly set the redirect */
1715 }
1716
1717 /*
1718 * The item that we're about to set as an LP_REDIRECT (the 'from'
1719 * item) will point to an existing item (the 'to' item) that is
1720 * already a heap-only tuple. There can be at most one LP_REDIRECT
1721 * item per HOT chain.
1722 *
1723 * We need to keep around an LP_REDIRECT item (after original
1724 * non-heap-only root tuple gets pruned away) so that it's always
1725 * possible for VACUUM to easily figure out what TID to delete from
1726 * indexes when an entire HOT chain becomes dead. A heap-only tuple
1727 * can never become LP_DEAD; an LP_REDIRECT item or a regular heap
1728 * tuple can.
1729 *
1730 * This check may miss problems, e.g. the target of a redirect could
1731 * be marked as unused subsequently. The page_verify_redirects() check
1732 * below will catch such problems.
1733 */
1734 tolp = PageGetItemId(page, tooff);
1736 htup = (HeapTupleHeader) PageGetItem(page, tolp);
1738#endif
1739
1741 }
1742
1743 /* Update all now-dead line pointers */
1744 offnum = nowdead;
1745 for (int i = 0; i < ndead; i++)
1746 {
1747 OffsetNumber off = *offnum++;
1748 ItemId lp = PageGetItemId(page, off);
1749
1750#ifdef USE_ASSERT_CHECKING
1751
1752 /*
1753 * An LP_DEAD line pointer must be left behind when the original item
1754 * (which is dead to everybody) could still be referenced by a TID in
1755 * an index. This should never be necessary with any individual
1756 * heap-only tuple item, though. (It's not clear how much of a problem
1757 * that would be, but there is no reason to allow it.)
1758 */
1759 if (ItemIdHasStorage(lp))
1760 {
1762 htup = (HeapTupleHeader) PageGetItem(page, lp);
1764 }
1765 else
1766 {
1767 /* Whole HOT chain becomes dead */
1769 }
1770#endif
1771
1773 }
1774
1775 /* Update all now-unused line pointers */
1776 offnum = nowunused;
1777 for (int i = 0; i < nunused; i++)
1778 {
1779 OffsetNumber off = *offnum++;
1780 ItemId lp = PageGetItemId(page, off);
1781
1782#ifdef USE_ASSERT_CHECKING
1783
1784 if (lp_truncate_only)
1785 {
1786 /* Setting LP_DEAD to LP_UNUSED in vacuum's second pass */
1788 }
1789 else
1790 {
1791 /*
1792 * When heap_page_prune_and_freeze() was called, mark_unused_now
1793 * may have been passed as true, which allows would-be LP_DEAD
1794 * items to be made LP_UNUSED instead. This is only possible if
1795 * the relation has no indexes. If there are any dead items, then
1796 * mark_unused_now was not true and every item being marked
1797 * LP_UNUSED must refer to a heap-only tuple.
1798 */
1799 if (ndead > 0)
1800 {
1802 htup = (HeapTupleHeader) PageGetItem(page, lp);
1804 }
1805 else
1807 }
1808
1809#endif
1810
1812 }
1813
1814 if (lp_truncate_only)
1816 else
1817 {
1818 /*
1819 * Finally, repair any fragmentation, and update the page's hint bit
1820 * about whether it has free pointers.
1821 */
1823
1824 /*
1825 * Now that the page has been modified, assert that redirect items
1826 * still point to valid targets.
1827 */
1829 }
1830}
void PageRepairFragmentation(Page page)
Definition bufpage.c:698
void PageTruncateLinePointerArray(Page page)
Definition bufpage.c:834
#define PG_USED_FOR_ASSERTS_ONLY
Definition c.h:223
#define ItemIdSetRedirect(itemId, link)
Definition itemid.h:152
#define ItemIdSetDead(itemId)
Definition itemid.h:164
#define ItemIdSetUnused(itemId)
Definition itemid.h:128
#define ItemIdHasStorage(itemId)
Definition itemid.h:120
static void page_verify_redirects(Page page)
Definition pruneheap.c:1847

References Assert, BufferGetPage(), fb(), HeapTupleHeaderIsHeapOnly(), i, ItemIdGetRedirect, ItemIdHasStorage, ItemIdIsDead, ItemIdIsNormal, ItemIdIsRedirected, ItemIdIsUsed, ItemIdSetDead, ItemIdSetRedirect, ItemIdSetUnused, page_verify_redirects(), PageGetItem(), PageGetItemId(), PageRepairFragmentation(), PageTruncateLinePointerArray(), and PG_USED_FOR_ASSERTS_ONLY.

Referenced by heap_page_prune_and_freeze(), and heap_xlog_prune_freeze().

◆ heap_page_prune_opt()

void heap_page_prune_opt ( Relation  relation,
Buffer  buffer 
)
extern

Definition at line 209 of file pruneheap.c.

210{
211 Page page = BufferGetPage(buffer);
213 GlobalVisState *vistest;
215
216 /*
217 * We can't write WAL in recovery mode, so there's no point trying to
218 * clean the page. The primary will likely issue a cleaning WAL record
219 * soon anyway, so this is no particular loss.
220 */
221 if (RecoveryInProgress())
222 return;
223
224 /*
225 * First check whether there's any chance there's something to prune,
226 * determining the appropriate horizon is a waste if there's no prune_xid
227 * (i.e. no updates/deletes left potentially dead tuples around).
228 */
229 prune_xid = ((PageHeader) page)->pd_prune_xid;
231 return;
232
233 /*
234 * Check whether prune_xid indicates that there may be dead rows that can
235 * be cleaned up.
236 */
237 vistest = GlobalVisTestFor(relation);
238
240 return;
241
242 /*
243 * We prune when a previous UPDATE failed to find enough space on the page
244 * for a new tuple version, or when free space falls below the relation's
245 * fill-factor target (but not less than 10%).
246 *
247 * Checking free space here is questionable since we aren't holding any
248 * lock on the buffer; in the worst case we could get a bogus answer. It's
249 * unlikely to be *seriously* wrong, though, since reading either pd_lower
250 * or pd_upper is probably atomic. Avoiding taking a lock seems more
251 * important than sometimes getting a wrong answer in what is after all
252 * just a heuristic estimate.
253 */
256 minfree = Max(minfree, BLCKSZ / 10);
257
258 if (PageIsFull(page) || PageGetHeapFreeSpace(page) < minfree)
259 {
260 /* OK, try to get exclusive buffer lock */
262 return;
263
264 /*
265 * Now that we have buffer lock, get accurate information about the
266 * page's free space, and recheck the heuristic about whether to
267 * prune.
268 */
269 if (PageIsFull(page) || PageGetHeapFreeSpace(page) < minfree)
270 {
273
274 /*
275 * We don't pass the HEAP_PAGE_PRUNE_MARK_UNUSED_NOW option
276 * regardless of whether or not the relation has indexes, since we
277 * cannot safely determine that during on-access pruning with the
278 * current implementation.
279 */
280 PruneFreezeParams params = {
281 .relation = relation,
282 .buffer = buffer,
283 .reason = PRUNE_ON_ACCESS,
284 .options = 0,
285 .vistest = vistest,
286 .cutoffs = NULL,
287 };
288
290 NULL, NULL);
291
292 /*
293 * Report the number of tuples reclaimed to pgstats. This is
294 * presult.ndeleted minus the number of newly-LP_DEAD-set items.
295 *
296 * We derive the number of dead tuples like this to avoid totally
297 * forgetting about items that were set to LP_DEAD, since they
298 * still need to be cleaned up by VACUUM. We only want to count
299 * heap-only tuples that just became LP_UNUSED in our report,
300 * which don't.
301 *
302 * VACUUM doesn't have to compensate in the same way when it
303 * tracks ndeleted, since it will set the same LP_DEAD items to
304 * LP_UNUSED separately.
305 */
306 if (presult.ndeleted > presult.nnewlpdead)
308 presult.ndeleted - presult.nnewlpdead);
309 }
310
311 /* And release buffer lock */
313
314 /*
315 * We avoid reuse of any free space created on the page by unrelated
316 * UPDATEs/INSERTs by opting to not update the FSM at this point. The
317 * free space should be reused by UPDATEs to *this* page.
318 */
319 }
320}
bool ConditionalLockBufferForCleanup(Buffer buffer)
Definition bufmgr.c:6700
#define Max(x, y)
Definition c.h:991
void pgstat_update_heap_dead_tuples(Relation rel, int delta)
bool GlobalVisTestIsRemovableXid(GlobalVisState *state, TransactionId xid)
Definition procarray.c:4229
void heap_page_prune_and_freeze(PruneFreezeParams *params, PruneFreezeResult *presult, OffsetNumber *off_loc, TransactionId *new_relfrozen_xid, MultiXactId *new_relmin_mxid)
Definition pruneheap.c:819
bool RecoveryInProgress(void)
Definition xlog.c:6460

References BUFFER_LOCK_UNLOCK, BufferGetPage(), ConditionalLockBufferForCleanup(), fb(), GlobalVisTestFor(), GlobalVisTestIsRemovableXid(), HEAP_DEFAULT_FILLFACTOR, heap_page_prune_and_freeze(), LockBuffer(), Max, PageGetHeapFreeSpace(), PageIsFull(), pgstat_update_heap_dead_tuples(), PRUNE_ON_ACCESS, RecoveryInProgress(), PruneFreezeParams::relation, RelationGetTargetPageFreeSpace, and TransactionIdIsValid.

Referenced by BitmapHeapScanNextBlock(), heap_prepare_pagescan(), and heapam_index_fetch_tuple().

◆ heap_pre_freeze_checks()

void heap_pre_freeze_checks ( Buffer  buffer,
HeapTupleFreeze tuples,
int  ntuples 
)
extern

Definition at line 7407 of file heapam.c.

7409{
7410 Page page = BufferGetPage(buffer);
7411
7412 for (int i = 0; i < ntuples; i++)
7413 {
7414 HeapTupleFreeze *frz = tuples + i;
7415 ItemId itemid = PageGetItemId(page, frz->offset);
7416 HeapTupleHeader htup;
7417
7418 htup = (HeapTupleHeader) PageGetItem(page, itemid);
7419
7420 /* Deliberately avoid relying on tuple hint bits here */
7421 if (frz->checkflags & HEAP_FREEZE_CHECK_XMIN_COMMITTED)
7422 {
7424
7426 if (unlikely(!TransactionIdDidCommit(xmin)))
7427 ereport(ERROR,
7429 errmsg_internal("uncommitted xmin %u needs to be frozen",
7430 xmin)));
7431 }
7432
7433 /*
7434 * TransactionIdDidAbort won't work reliably in the presence of XIDs
7435 * left behind by transactions that were in progress during a crash,
7436 * so we can only check that xmax didn't commit
7437 */
7438 if (frz->checkflags & HEAP_FREEZE_CHECK_XMAX_ABORTED)
7439 {
7441
7444 ereport(ERROR,
7446 errmsg_internal("cannot freeze committed xmax %u",
7447 xmax)));
7448 }
7449 }
7450}
#define HEAP_FREEZE_CHECK_XMAX_ABORTED
Definition heapam.h:138
#define HEAP_FREEZE_CHECK_XMIN_COMMITTED
Definition heapam.h:137
static bool HeapTupleHeaderXminFrozen(const HeapTupleHeaderData *tup)
static TransactionId HeapTupleHeaderGetRawXmin(const HeapTupleHeaderData *tup)
#define ERRCODE_DATA_CORRUPTED
bool TransactionIdDidCommit(TransactionId transactionId)
Definition transam.c:126
#define TransactionIdIsNormal(xid)
Definition transam.h:42

References Assert, BufferGetPage(), ereport, errcode(), ERRCODE_DATA_CORRUPTED, errmsg_internal(), ERROR, fb(), HEAP_FREEZE_CHECK_XMAX_ABORTED, HEAP_FREEZE_CHECK_XMIN_COMMITTED, HeapTupleHeaderGetRawXmax(), HeapTupleHeaderGetRawXmin(), HeapTupleHeaderXminFrozen(), i, PageGetItem(), PageGetItemId(), TransactionIdDidCommit(), TransactionIdIsNormal, and unlikely.

Referenced by heap_page_will_freeze().

◆ heap_prepare_freeze_tuple()

bool heap_prepare_freeze_tuple ( HeapTupleHeader  tuple,
const struct VacuumCutoffs cutoffs,
HeapPageFreeze pagefrz,
HeapTupleFreeze frz,
bool totally_frozen 
)
extern

Definition at line 7134 of file heapam.c.

7138{
7139 bool xmin_already_frozen = false,
7140 xmax_already_frozen = false;
7141 bool freeze_xmin = false,
7142 replace_xvac = false,
7143 replace_xmax = false,
7144 freeze_xmax = false;
7145 TransactionId xid;
7146
7147 frz->xmax = HeapTupleHeaderGetRawXmax(tuple);
7148 frz->t_infomask2 = tuple->t_infomask2;
7149 frz->t_infomask = tuple->t_infomask;
7150 frz->frzflags = 0;
7151 frz->checkflags = 0;
7152
7153 /*
7154 * Process xmin, while keeping track of whether it's already frozen, or
7155 * will become frozen iff our freeze plan is executed by caller (could be
7156 * neither).
7157 */
7158 xid = HeapTupleHeaderGetXmin(tuple);
7159 if (!TransactionIdIsNormal(xid))
7160 xmin_already_frozen = true;
7161 else
7162 {
7163 if (TransactionIdPrecedes(xid, cutoffs->relfrozenxid))
7164 ereport(ERROR,
7166 errmsg_internal("found xmin %u from before relfrozenxid %u",
7167 xid, cutoffs->relfrozenxid)));
7168
7169 /* Will set freeze_xmin flags in freeze plan below */
7171
7172 /* Verify that xmin committed if and when freeze plan is executed */
7173 if (freeze_xmin)
7175 }
7176
7177 /*
7178 * Old-style VACUUM FULL is gone, but we have to process xvac for as long
7179 * as we support having MOVED_OFF/MOVED_IN tuples in the database
7180 */
7181 xid = HeapTupleHeaderGetXvac(tuple);
7182 if (TransactionIdIsNormal(xid))
7183 {
7185 Assert(TransactionIdPrecedes(xid, cutoffs->OldestXmin));
7186
7187 /*
7188 * For Xvac, we always freeze proactively. This allows totally_frozen
7189 * tracking to ignore xvac.
7190 */
7191 replace_xvac = pagefrz->freeze_required = true;
7192
7193 /* Will set replace_xvac flags in freeze plan below */
7194 }
7195
7196 /* Now process xmax */
7197 xid = frz->xmax;
7198 if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
7199 {
7200 /* Raw xmax is a MultiXactId */
7202 uint16 flags;
7203
7204 /*
7205 * We will either remove xmax completely (in the "freeze_xmax" path),
7206 * process xmax by replacing it (in the "replace_xmax" path), or
7207 * perform no-op xmax processing. The only constraint is that the
7208 * FreezeLimit/MultiXactCutoff postcondition must never be violated.
7209 */
7210 newxmax = FreezeMultiXactId(xid, tuple->t_infomask, cutoffs,
7211 &flags, pagefrz);
7212
7213 if (flags & FRM_NOOP)
7214 {
7215 /*
7216 * xmax is a MultiXactId, and nothing about it changes for now.
7217 * This is the only case where 'freeze_required' won't have been
7218 * set for us by FreezeMultiXactId, as well as the only case where
7219 * neither freeze_xmax nor replace_xmax are set (given a multi).
7220 *
7221 * This is a no-op, but the call to FreezeMultiXactId might have
7222 * ratcheted back NewRelfrozenXid and/or NewRelminMxid trackers
7223 * for us (the "freeze page" variants, specifically). That'll
7224 * make it safe for our caller to freeze the page later on, while
7225 * leaving this particular xmax undisturbed.
7226 *
7227 * FreezeMultiXactId is _not_ responsible for the "no freeze"
7228 * NewRelfrozenXid/NewRelminMxid trackers, though -- that's our
7229 * job. A call to heap_tuple_should_freeze for this same tuple
7230 * will take place below if 'freeze_required' isn't set already.
7231 * (This repeats work from FreezeMultiXactId, but allows "no
7232 * freeze" tracker maintenance to happen in only one place.)
7233 */
7236 }
7237 else if (flags & FRM_RETURN_IS_XID)
7238 {
7239 /*
7240 * xmax will become an updater Xid (original MultiXact's updater
7241 * member Xid will be carried forward as a simple Xid in Xmax).
7242 */
7244
7245 /*
7246 * NB -- some of these transformations are only valid because we
7247 * know the return Xid is a tuple updater (i.e. not merely a
7248 * locker.) Also note that the only reason we don't explicitly
7249 * worry about HEAP_KEYS_UPDATED is because it lives in
7250 * t_infomask2 rather than t_infomask.
7251 */
7252 frz->t_infomask &= ~HEAP_XMAX_BITS;
7253 frz->xmax = newxmax;
7254 if (flags & FRM_MARK_COMMITTED)
7255 frz->t_infomask |= HEAP_XMAX_COMMITTED;
7256 replace_xmax = true;
7257 }
7258 else if (flags & FRM_RETURN_IS_MULTI)
7259 {
7262
7263 /*
7264 * xmax is an old MultiXactId that we have to replace with a new
7265 * MultiXactId, to carry forward two or more original member XIDs.
7266 */
7268
7269 /*
7270 * We can't use GetMultiXactIdHintBits directly on the new multi
7271 * here; that routine initializes the masks to all zeroes, which
7272 * would lose other bits we need. Doing it this way ensures all
7273 * unrelated bits remain untouched.
7274 */
7275 frz->t_infomask &= ~HEAP_XMAX_BITS;
7276 frz->t_infomask2 &= ~HEAP_KEYS_UPDATED;
7278 frz->t_infomask |= newbits;
7279 frz->t_infomask2 |= newbits2;
7280 frz->xmax = newxmax;
7281 replace_xmax = true;
7282 }
7283 else
7284 {
7285 /*
7286 * Freeze plan for tuple "freezes xmax" in the strictest sense:
7287 * it'll leave nothing in xmax (neither an Xid nor a MultiXactId).
7288 */
7289 Assert(flags & FRM_INVALIDATE_XMAX);
7291
7292 /* Will set freeze_xmax flags in freeze plan below */
7293 freeze_xmax = true;
7294 }
7295
7296 /* MultiXactId processing forces freezing (barring FRM_NOOP case) */
7297 Assert(pagefrz->freeze_required || (!freeze_xmax && !replace_xmax));
7298 }
7299 else if (TransactionIdIsNormal(xid))
7300 {
7301 /* Raw xmax is normal XID */
7302 if (TransactionIdPrecedes(xid, cutoffs->relfrozenxid))
7303 ereport(ERROR,
7305 errmsg_internal("found xmax %u from before relfrozenxid %u",
7306 xid, cutoffs->relfrozenxid)));
7307
7308 /* Will set freeze_xmax flags in freeze plan below */
7310
7311 /*
7312 * Verify that xmax aborted if and when freeze plan is executed,
7313 * provided it's from an update. (A lock-only xmax can be removed
7314 * independent of this, since the lock is released at xact end.)
7315 */
7317 frz->checkflags |= HEAP_FREEZE_CHECK_XMAX_ABORTED;
7318 }
7319 else if (!TransactionIdIsValid(xid))
7320 {
7321 /* Raw xmax is InvalidTransactionId XID */
7322 Assert((tuple->t_infomask & HEAP_XMAX_IS_MULTI) == 0);
7323 xmax_already_frozen = true;
7324 }
7325 else
7326 ereport(ERROR,
7328 errmsg_internal("found raw xmax %u (infomask 0x%04x) not invalid and not multi",
7329 xid, tuple->t_infomask)));
7330
7331 if (freeze_xmin)
7332 {
7334
7335 frz->t_infomask |= HEAP_XMIN_FROZEN;
7336 }
7337 if (replace_xvac)
7338 {
7339 /*
7340 * If a MOVED_OFF tuple is not dead, the xvac transaction must have
7341 * failed; whereas a non-dead MOVED_IN tuple must mean the xvac
7342 * transaction succeeded.
7343 */
7344 Assert(pagefrz->freeze_required);
7345 if (tuple->t_infomask & HEAP_MOVED_OFF)
7346 frz->frzflags |= XLH_INVALID_XVAC;
7347 else
7348 frz->frzflags |= XLH_FREEZE_XVAC;
7349 }
7350 if (replace_xmax)
7351 {
7353 Assert(pagefrz->freeze_required);
7354
7355 /* Already set replace_xmax flags in freeze plan earlier */
7356 }
7357 if (freeze_xmax)
7358 {
7360
7361 frz->xmax = InvalidTransactionId;
7362
7363 /*
7364 * The tuple might be marked either XMAX_INVALID or XMAX_COMMITTED +
7365 * LOCKED. Normalize to INVALID just to be sure no one gets confused.
7366 * Also get rid of the HEAP_KEYS_UPDATED bit.
7367 */
7368 frz->t_infomask &= ~HEAP_XMAX_BITS;
7369 frz->t_infomask |= HEAP_XMAX_INVALID;
7370 frz->t_infomask2 &= ~HEAP_HOT_UPDATED;
7371 frz->t_infomask2 &= ~HEAP_KEYS_UPDATED;
7372 }
7373
7374 /*
7375 * Determine if this tuple is already totally frozen, or will become
7376 * totally frozen (provided caller executes freeze plans for the page)
7377 */
7380
7381 if (!pagefrz->freeze_required && !(xmin_already_frozen &&
7383 {
7384 /*
7385 * So far no previous tuple from the page made freezing mandatory.
7386 * Does this tuple force caller to freeze the entire page?
7387 */
7388 pagefrz->freeze_required =
7389 heap_tuple_should_freeze(tuple, cutoffs,
7390 &pagefrz->NoFreezePageRelfrozenXid,
7391 &pagefrz->NoFreezePageRelminMxid);
7392 }
7393
7394 /* Tell caller if this tuple has a usable freeze plan set in *frz */
7396}
static void GetMultiXactIdHintBits(MultiXactId multi, uint16 *new_infomask, uint16 *new_infomask2)
Definition heapam.c:7526
#define FRM_RETURN_IS_XID
Definition heapam.c:6733
static TransactionId FreezeMultiXactId(MultiXactId multi, uint16 t_infomask, const struct VacuumCutoffs *cutoffs, uint16 *flags, HeapPageFreeze *pagefrz)
Definition heapam.c:6784
bool heap_tuple_should_freeze(HeapTupleHeader tuple, const struct VacuumCutoffs *cutoffs, TransactionId *NoFreezePageRelfrozenXid, MultiXactId *NoFreezePageRelminMxid)
Definition heapam.c:7945
#define FRM_MARK_COMMITTED
Definition heapam.c:6735
#define FRM_NOOP
Definition heapam.c:6731
#define FRM_RETURN_IS_MULTI
Definition heapam.c:6734
#define FRM_INVALIDATE_XMAX
Definition heapam.c:6732
#define HEAP_MOVED_OFF
#define HEAP_XMIN_FROZEN
static TransactionId HeapTupleHeaderGetXvac(const HeapTupleHeaderData *tup)
#define HEAP_XMAX_COMMITTED
bool MultiXactIdPrecedes(MultiXactId multi1, MultiXactId multi2)
Definition multixact.c:2765
#define MultiXactIdIsValid(multi)
Definition multixact.h:29
MultiXactId NoFreezePageRelminMxid
Definition heapam.h:220
TransactionId NoFreezePageRelfrozenXid
Definition heapam.h:219
TransactionId OldestXmin
Definition vacuum.h:279
MultiXactId OldestMxact
Definition vacuum.h:280
static bool TransactionIdPrecedesOrEquals(TransactionId id1, TransactionId id2)
Definition transam.h:282

References Assert, ereport, errcode(), ERRCODE_DATA_CORRUPTED, errmsg_internal(), ERROR, fb(), HeapPageFreeze::freeze_required, FreezeMultiXactId(), FRM_INVALIDATE_XMAX, FRM_MARK_COMMITTED, FRM_NOOP, FRM_RETURN_IS_MULTI, FRM_RETURN_IS_XID, GetMultiXactIdHintBits(), HEAP_FREEZE_CHECK_XMAX_ABORTED, HEAP_FREEZE_CHECK_XMIN_COMMITTED, HEAP_MOVED_OFF, heap_tuple_should_freeze(), HEAP_XMAX_COMMITTED, HEAP_XMAX_INVALID, HEAP_XMAX_IS_LOCKED_ONLY(), HEAP_XMAX_IS_MULTI, HEAP_XMIN_FROZEN, HeapTupleHeaderGetRawXmax(), HeapTupleHeaderGetXmin(), HeapTupleHeaderGetXvac(), InvalidTransactionId, VacuumCutoffs::MultiXactCutoff, MultiXactIdIsValid, MultiXactIdPrecedes(), HeapPageFreeze::NoFreezePageRelfrozenXid, HeapPageFreeze::NoFreezePageRelminMxid, VacuumCutoffs::OldestMxact, VacuumCutoffs::OldestXmin, VacuumCutoffs::relfrozenxid, HeapTupleHeaderData::t_infomask, HeapTupleHeaderData::t_infomask2, TransactionIdIsNormal, TransactionIdIsValid, TransactionIdPrecedes(), TransactionIdPrecedesOrEquals(), XLH_FREEZE_XVAC, and XLH_INVALID_XVAC.

Referenced by heap_freeze_tuple(), and heap_prune_record_unchanged_lp_normal().

◆ heap_prepare_pagescan()

void heap_prepare_pagescan ( TableScanDesc  sscan)
extern

Definition at line 615 of file heapam.c.

616{
618 Buffer buffer = scan->rs_cbuf;
619 BlockNumber block = scan->rs_cblock;
620 Snapshot snapshot;
621 Page page;
622 int lines;
623 bool all_visible;
625
626 Assert(BufferGetBlockNumber(buffer) == block);
627
628 /* ensure we're not accidentally being used when not in pagemode */
630 snapshot = scan->rs_base.rs_snapshot;
631
632 /*
633 * Prune and repair fragmentation for the whole page, if possible.
634 */
635 heap_page_prune_opt(scan->rs_base.rs_rd, buffer);
636
637 /*
638 * We must hold share lock on the buffer content while examining tuple
639 * visibility. Afterwards, however, the tuples we have found to be
640 * visible are guaranteed good as long as we hold the buffer pin.
641 */
643
644 page = BufferGetPage(buffer);
645 lines = PageGetMaxOffsetNumber(page);
646
647 /*
648 * If the all-visible flag indicates that all tuples on the page are
649 * visible to everyone, we can skip the per-tuple visibility tests.
650 *
651 * Note: In hot standby, a tuple that's already visible to all
652 * transactions on the primary might still be invisible to a read-only
653 * transaction in the standby. We partly handle this problem by tracking
654 * the minimum xmin of visible tuples as the cut-off XID while marking a
655 * page all-visible on the primary and WAL log that along with the
656 * visibility map SET operation. In hot standby, we wait for (or abort)
657 * all transactions that can potentially may not see one or more tuples on
658 * the page. That's how index-only scans work fine in hot standby. A
659 * crucial difference between index-only scans and heap scans is that the
660 * index-only scan completely relies on the visibility map where as heap
661 * scan looks at the page-level PD_ALL_VISIBLE flag. We are not sure if
662 * the page-level flag can be trusted in the same way, because it might
663 * get propagated somehow without being explicitly WAL-logged, e.g. via a
664 * full page write. Until we can prove that beyond doubt, let's check each
665 * tuple for visibility the hard way.
666 */
667 all_visible = PageIsAllVisible(page) && !snapshot->takenDuringRecovery;
670
671 /*
672 * We call page_collect_tuples() with constant arguments, to get the
673 * compiler to constant fold the constant arguments. Separate calls with
674 * constant arguments, rather than variables, are needed on several
675 * compilers to actually perform constant folding.
676 */
677 if (likely(all_visible))
678 {
680 scan->rs_ntuples = page_collect_tuples(scan, snapshot, page, buffer,
681 block, lines, true, false);
682 else
683 scan->rs_ntuples = page_collect_tuples(scan, snapshot, page, buffer,
684 block, lines, true, true);
685 }
686 else
687 {
689 scan->rs_ntuples = page_collect_tuples(scan, snapshot, page, buffer,
690 block, lines, false, false);
691 else
692 scan->rs_ntuples = page_collect_tuples(scan, snapshot, page, buffer,
693 block, lines, false, true);
694 }
695
697}
#define likely(x)
Definition c.h:411
static pg_attribute_always_inline int page_collect_tuples(HeapScanDesc scan, Snapshot snapshot, Page page, Buffer buffer, BlockNumber block, int lines, bool all_visible, bool check_serializable)
Definition heapam.c:521
bool CheckForSerializableConflictOutNeeded(Relation relation, Snapshot snapshot)
Definition predicate.c:3989
void heap_page_prune_opt(Relation relation, Buffer buffer)
Definition pruneheap.c:209
uint32 rs_ntuples
Definition heapam.h:99
BlockNumber rs_cblock
Definition heapam.h:69
bool takenDuringRecovery
Definition snapshot.h:180

References Assert, BUFFER_LOCK_SHARE, BUFFER_LOCK_UNLOCK, BufferGetBlockNumber(), BufferGetPage(), CheckForSerializableConflictOutNeeded(), fb(), heap_page_prune_opt(), likely, LockBuffer(), page_collect_tuples(), PageGetMaxOffsetNumber(), PageIsAllVisible(), HeapScanDescData::rs_base, HeapScanDescData::rs_cblock, HeapScanDescData::rs_cbuf, TableScanDescData::rs_flags, HeapScanDescData::rs_ntuples, TableScanDescData::rs_rd, TableScanDescData::rs_snapshot, SO_ALLOW_PAGEMODE, and SnapshotData::takenDuringRecovery.

Referenced by heapam_scan_sample_next_block(), and heapgettup_pagemode().

◆ heap_rescan()

void heap_rescan ( TableScanDesc  sscan,
ScanKey  key,
bool  set_params,
bool  allow_strat,
bool  allow_sync,
bool  allow_pagemode 
)
extern

Definition at line 1317 of file heapam.c.

1319{
1321
1322 if (set_params)
1323 {
1324 if (allow_strat)
1326 else
1328
1329 if (allow_sync)
1331 else
1333
1334 if (allow_pagemode && scan->rs_base.rs_snapshot &&
1337 else
1339 }
1340
1341 /*
1342 * unpin scan buffers
1343 */
1344 if (BufferIsValid(scan->rs_cbuf))
1345 {
1346 ReleaseBuffer(scan->rs_cbuf);
1347 scan->rs_cbuf = InvalidBuffer;
1348 }
1349
1350 /*
1351 * SO_TYPE_BITMAPSCAN would be cleaned up here, but it does not hold any
1352 * additional data vs a normal HeapScan
1353 */
1354
1355 /*
1356 * The read stream is reset on rescan. This must be done before
1357 * initscan(), as some state referred to by read_stream_reset() is reset
1358 * in initscan().
1359 */
1360 if (scan->rs_read_stream)
1362
1363 /*
1364 * reinitialize scan descriptor
1365 */
1366 initscan(scan, key, true);
1367}
void read_stream_reset(ReadStream *stream)
@ SO_ALLOW_STRAT
Definition tableam.h:58
@ SO_ALLOW_SYNC
Definition tableam.h:60

References BufferIsValid(), fb(), initscan(), InvalidBuffer, IsMVCCSnapshot, read_stream_reset(), ReleaseBuffer(), HeapScanDescData::rs_base, HeapScanDescData::rs_cbuf, TableScanDescData::rs_flags, HeapScanDescData::rs_read_stream, TableScanDescData::rs_snapshot, SO_ALLOW_PAGEMODE, SO_ALLOW_STRAT, and SO_ALLOW_SYNC.

◆ heap_set_tidrange()

void heap_set_tidrange ( TableScanDesc  sscan,
ItemPointer  mintid,
ItemPointer  maxtid 
)
extern

Definition at line 1478 of file heapam.c.

1480{
1486
1487 /*
1488 * For relations without any pages, we can simply leave the TID range
1489 * unset. There will be no tuples to scan, therefore no tuples outside
1490 * the given TID range.
1491 */
1492 if (scan->rs_nblocks == 0)
1493 return;
1494
1495 /*
1496 * Set up some ItemPointers which point to the first and last possible
1497 * tuples in the heap.
1498 */
1501
1502 /*
1503 * If the given maximum TID is below the highest possible TID in the
1504 * relation, then restrict the range to that, otherwise we scan to the end
1505 * of the relation.
1506 */
1509
1510 /*
1511 * If the given minimum TID is above the lowest possible TID in the
1512 * relation, then restrict the range to only scan for TIDs above that.
1513 */
1516
1517 /*
1518 * Check for an empty range and protect from would be negative results
1519 * from the numBlks calculation below.
1520 */
1522 {
1523 /* Set an empty range of blocks to scan */
1525 return;
1526 }
1527
1528 /*
1529 * Calculate the first block and the number of blocks we must scan. We
1530 * could be more aggressive here and perform some more validation to try
1531 * and further narrow the scope of blocks to scan by checking if the
1532 * lowestItem has an offset above MaxOffsetNumber. In this case, we could
1533 * advance startBlk by one. Likewise, if highestItem has an offset of 0
1534 * we could scan one fewer blocks. However, such an optimization does not
1535 * seem worth troubling over, currently.
1536 */
1538
1541
1542 /* Set the start block and number of blocks to scan */
1544
1545 /* Finally, set the TID range in sscan */
1546 ItemPointerCopy(&lowestItem, &sscan->st.tidrange.rs_mintid);
1547 ItemPointerCopy(&highestItem, &sscan->st.tidrange.rs_maxtid);
1548}
void heap_setscanlimits(TableScanDesc sscan, BlockNumber startBlk, BlockNumber numBlks)
Definition heapam.c:499
static BlockNumber ItemPointerGetBlockNumberNoCheck(const ItemPointerData *pointer)
Definition itemptr.h:93
#define MaxOffsetNumber
Definition off.h:28
BlockNumber rs_nblocks
Definition heapam.h:61

References fb(), FirstOffsetNumber, heap_setscanlimits(), ItemPointerCompare(), ItemPointerCopy(), ItemPointerGetBlockNumberNoCheck(), ItemPointerSet(), MaxOffsetNumber, and HeapScanDescData::rs_nblocks.

◆ heap_setscanlimits()

void heap_setscanlimits ( TableScanDesc  sscan,
BlockNumber  startBlk,
BlockNumber  numBlks 
)
extern

Definition at line 499 of file heapam.c.

500{
502
503 Assert(!scan->rs_inited); /* else too late to change */
504 /* else rs_startblock is significant */
506
507 /* Check startBlk is valid (but allow case of zero blocks...) */
508 Assert(startBlk == 0 || startBlk < scan->rs_nblocks);
509
510 scan->rs_startblock = startBlk;
511 scan->rs_numblocks = numBlks;
512}
BlockNumber rs_startblock
Definition heapam.h:62
BlockNumber rs_numblocks
Definition heapam.h:63

References Assert, fb(), HeapScanDescData::rs_base, TableScanDescData::rs_flags, HeapScanDescData::rs_inited, HeapScanDescData::rs_numblocks, HeapScanDescData::rs_startblock, and SO_ALLOW_SYNC.

Referenced by heap_set_tidrange(), and heapam_index_build_range_scan().

◆ heap_tuple_needs_eventual_freeze()

bool heap_tuple_needs_eventual_freeze ( HeapTupleHeader  tuple)
extern

Definition at line 7890 of file heapam.c.

7891{
7892 TransactionId xid;
7893
7894 /*
7895 * If xmin is a normal transaction ID, this tuple is definitely not
7896 * frozen.
7897 */
7898 xid = HeapTupleHeaderGetXmin(tuple);
7899 if (TransactionIdIsNormal(xid))
7900 return true;
7901
7902 /*
7903 * If xmax is a valid xact or multixact, this tuple is also not frozen.
7904 */
7905 if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
7906 {
7907 MultiXactId multi;
7908
7909 multi = HeapTupleHeaderGetRawXmax(tuple);
7910 if (MultiXactIdIsValid(multi))
7911 return true;
7912 }
7913 else
7914 {
7915 xid = HeapTupleHeaderGetRawXmax(tuple);
7916 if (TransactionIdIsNormal(xid))
7917 return true;
7918 }
7919
7920 if (tuple->t_infomask & HEAP_MOVED)
7921 {
7922 xid = HeapTupleHeaderGetXvac(tuple);
7923 if (TransactionIdIsNormal(xid))
7924 return true;
7925 }
7926
7927 return false;
7928}

References HEAP_MOVED, HEAP_XMAX_IS_MULTI, HeapTupleHeaderGetRawXmax(), HeapTupleHeaderGetXmin(), HeapTupleHeaderGetXvac(), MultiXactIdIsValid, HeapTupleHeaderData::t_infomask, and TransactionIdIsNormal.

Referenced by collect_corrupt_items(), and heap_page_would_be_all_visible().

◆ heap_tuple_should_freeze()

bool heap_tuple_should_freeze ( HeapTupleHeader  tuple,
const struct VacuumCutoffs cutoffs,
TransactionId NoFreezePageRelfrozenXid,
MultiXactId NoFreezePageRelminMxid 
)
extern

Definition at line 7945 of file heapam.c.

7949{
7950 TransactionId xid;
7951 MultiXactId multi;
7952 bool freeze = false;
7953
7954 /* First deal with xmin */
7955 xid = HeapTupleHeaderGetXmin(tuple);
7956 if (TransactionIdIsNormal(xid))
7957 {
7959 if (TransactionIdPrecedes(xid, *NoFreezePageRelfrozenXid))
7960 *NoFreezePageRelfrozenXid = xid;
7961 if (TransactionIdPrecedes(xid, cutoffs->FreezeLimit))
7962 freeze = true;
7963 }
7964
7965 /* Now deal with xmax */
7967 multi = InvalidMultiXactId;
7968 if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
7969 multi = HeapTupleHeaderGetRawXmax(tuple);
7970 else
7971 xid = HeapTupleHeaderGetRawXmax(tuple);
7972
7973 if (TransactionIdIsNormal(xid))
7974 {
7976 /* xmax is a non-permanent XID */
7977 if (TransactionIdPrecedes(xid, *NoFreezePageRelfrozenXid))
7978 *NoFreezePageRelfrozenXid = xid;
7979 if (TransactionIdPrecedes(xid, cutoffs->FreezeLimit))
7980 freeze = true;
7981 }
7982 else if (!MultiXactIdIsValid(multi))
7983 {
7984 /* xmax is a permanent XID or invalid MultiXactId/XID */
7985 }
7986 else if (HEAP_LOCKED_UPGRADED(tuple->t_infomask))
7987 {
7988 /* xmax is a pg_upgrade'd MultiXact, which can't have updater XID */
7989 if (MultiXactIdPrecedes(multi, *NoFreezePageRelminMxid))
7990 *NoFreezePageRelminMxid = multi;
7991 /* heap_prepare_freeze_tuple always freezes pg_upgrade'd xmax */
7992 freeze = true;
7993 }
7994 else
7995 {
7996 /* xmax is a MultiXactId that may have an updater XID */
7997 MultiXactMember *members;
7998 int nmembers;
7999
8001 if (MultiXactIdPrecedes(multi, *NoFreezePageRelminMxid))
8002 *NoFreezePageRelminMxid = multi;
8003 if (MultiXactIdPrecedes(multi, cutoffs->MultiXactCutoff))
8004 freeze = true;
8005
8006 /* need to check whether any member of the mxact is old */
8007 nmembers = GetMultiXactIdMembers(multi, &members, false,
8009
8010 for (int i = 0; i < nmembers; i++)
8011 {
8012 xid = members[i].xid;
8014 if (TransactionIdPrecedes(xid, *NoFreezePageRelfrozenXid))
8015 *NoFreezePageRelfrozenXid = xid;
8016 if (TransactionIdPrecedes(xid, cutoffs->FreezeLimit))
8017 freeze = true;
8018 }
8019 if (nmembers > 0)
8020 pfree(members);
8021 }
8022
8023 if (tuple->t_infomask & HEAP_MOVED)
8024 {
8025 xid = HeapTupleHeaderGetXvac(tuple);
8026 if (TransactionIdIsNormal(xid))
8027 {
8029 if (TransactionIdPrecedes(xid, *NoFreezePageRelfrozenXid))
8030 *NoFreezePageRelfrozenXid = xid;
8031 /* heap_prepare_freeze_tuple forces xvac freezing */
8032 freeze = true;
8033 }
8034 }
8035
8036 return freeze;
8037}
static bool HEAP_LOCKED_UPGRADED(uint16 infomask)
bool MultiXactIdPrecedesOrEquals(MultiXactId multi1, MultiXactId multi2)
Definition multixact.c:2779
#define InvalidMultiXactId
Definition multixact.h:25
TransactionId xid
Definition multixact.h:57

References Assert, VacuumCutoffs::FreezeLimit, GetMultiXactIdMembers(), HEAP_LOCKED_UPGRADED(), HEAP_MOVED, HEAP_XMAX_IS_LOCKED_ONLY(), HEAP_XMAX_IS_MULTI, HeapTupleHeaderGetRawXmax(), HeapTupleHeaderGetXmin(), HeapTupleHeaderGetXvac(), i, InvalidMultiXactId, InvalidTransactionId, VacuumCutoffs::MultiXactCutoff, MultiXactIdIsValid, MultiXactIdPrecedes(), MultiXactIdPrecedesOrEquals(), pfree(), VacuumCutoffs::relfrozenxid, VacuumCutoffs::relminmxid, HeapTupleHeaderData::t_infomask, TransactionIdIsNormal, TransactionIdPrecedes(), TransactionIdPrecedesOrEquals(), and MultiXactMember::xid.

Referenced by heap_prepare_freeze_tuple(), and lazy_scan_noprune().

◆ heap_update()

TM_Result heap_update ( Relation  relation,
const ItemPointerData otid,
HeapTuple  newtup,
CommandId  cid,
Snapshot  crosscheck,
bool  wait,
TM_FailureData tmfd,
LockTupleMode lockmode,
TU_UpdateIndexes update_indexes 
)
extern

Definition at line 3311 of file heapam.c.

3315{
3316 TM_Result result;
3324 ItemId lp;
3328 bool old_key_copied = false;
3329 Page page;
3330 BlockNumber block;
3332 Buffer buffer,
3333 newbuf,
3334 vmbuffer = InvalidBuffer,
3336 bool need_toast;
3338 pagefree;
3339 bool have_tuple_lock = false;
3340 bool iscombo;
3341 bool use_hot_update = false;
3342 bool summarized_update = false;
3343 bool key_intact;
3344 bool all_visible_cleared = false;
3345 bool all_visible_cleared_new = false;
3346 bool checked_lockers;
3347 bool locker_remains;
3348 bool id_has_external = false;
3355
3357
3358 /* Cheap, simplistic check that the tuple matches the rel's rowtype. */
3361
3362 AssertHasSnapshotForToast(relation);
3363
3364 /*
3365 * Forbid this during a parallel operation, lest it allocate a combo CID.
3366 * Other workers might need that combo CID for visibility checks, and we
3367 * have no provision for broadcasting it to them.
3368 */
3369 if (IsInParallelMode())
3370 ereport(ERROR,
3372 errmsg("cannot update tuples during a parallel operation")));
3373
3374#ifdef USE_ASSERT_CHECKING
3376#endif
3377
3378 /*
3379 * Fetch the list of attributes to be checked for various operations.
3380 *
3381 * For HOT considerations, this is wasted effort if we fail to update or
3382 * have to put the new tuple on a different page. But we must compute the
3383 * list before obtaining buffer lock --- in the worst case, if we are
3384 * doing an update on one of the relevant system catalogs, we could
3385 * deadlock if we try to fetch the list later. In any case, the relcache
3386 * caches the data so this is usually pretty cheap.
3387 *
3388 * We also need columns used by the replica identity and columns that are
3389 * considered the "key" of rows in the table.
3390 *
3391 * Note that we get copies of each bitmap, so we need not worry about
3392 * relcache flush happening midway through.
3393 */
3406
3408 INJECTION_POINT("heap_update-before-pin", NULL);
3409 buffer = ReadBuffer(relation, block);
3410 page = BufferGetPage(buffer);
3411
3412 /*
3413 * Before locking the buffer, pin the visibility map page if it appears to
3414 * be necessary. Since we haven't got the lock yet, someone else might be
3415 * in the middle of changing this, so we'll need to recheck after we have
3416 * the lock.
3417 */
3418 if (PageIsAllVisible(page))
3419 visibilitymap_pin(relation, block, &vmbuffer);
3420
3422
3424
3425 /*
3426 * Usually, a buffer pin and/or snapshot blocks pruning of otid, ensuring
3427 * we see LP_NORMAL here. When the otid origin is a syscache, we may have
3428 * neither a pin nor a snapshot. Hence, we may see other LP_ states, each
3429 * of which indicates concurrent pruning.
3430 *
3431 * Failing with TM_Updated would be most accurate. However, unlike other
3432 * TM_Updated scenarios, we don't know the successor ctid in LP_UNUSED and
3433 * LP_DEAD cases. While the distinction between TM_Updated and TM_Deleted
3434 * does matter to SQL statements UPDATE and MERGE, those SQL statements
3435 * hold a snapshot that ensures LP_NORMAL. Hence, the choice between
3436 * TM_Updated and TM_Deleted affects only the wording of error messages.
3437 * Settle on TM_Deleted, for two reasons. First, it avoids complicating
3438 * the specification of when tmfd->ctid is valid. Second, it creates
3439 * error log evidence that we took this branch.
3440 *
3441 * Since it's possible to see LP_UNUSED at otid, it's also possible to see
3442 * LP_NORMAL for a tuple that replaced LP_UNUSED. If it's a tuple for an
3443 * unrelated row, we'll fail with "duplicate key value violates unique".
3444 * XXX if otid is the live, newer version of the newtup row, we'll discard
3445 * changes originating in versions of this catalog row after the version
3446 * the caller got from syscache. See syscache-update-pruned.spec.
3447 */
3448 if (!ItemIdIsNormal(lp))
3449 {
3451
3452 UnlockReleaseBuffer(buffer);
3454 if (vmbuffer != InvalidBuffer)
3455 ReleaseBuffer(vmbuffer);
3456 tmfd->ctid = *otid;
3457 tmfd->xmax = InvalidTransactionId;
3458 tmfd->cmax = InvalidCommandId;
3460
3465 /* modified_attrs not yet initialized */
3467 return TM_Deleted;
3468 }
3469
3470 /*
3471 * Fill in enough data in oldtup for HeapDetermineColumnsInfo to work
3472 * properly.
3473 */
3474 oldtup.t_tableOid = RelationGetRelid(relation);
3475 oldtup.t_data = (HeapTupleHeader) PageGetItem(page, lp);
3476 oldtup.t_len = ItemIdGetLength(lp);
3477 oldtup.t_self = *otid;
3478
3479 /* the new tuple is ready, except for this: */
3480 newtup->t_tableOid = RelationGetRelid(relation);
3481
3482 /*
3483 * Determine columns modified by the update. Additionally, identify
3484 * whether any of the unmodified replica identity key attributes in the
3485 * old tuple is externally stored or not. This is required because for
3486 * such attributes the flattened value won't be WAL logged as part of the
3487 * new tuple so we must include it as part of the old_key_tuple. See
3488 * ExtractReplicaIdentity.
3489 */
3491 id_attrs, &oldtup,
3493
3494 /*
3495 * If we're not updating any "key" column, we can grab a weaker lock type.
3496 * This allows for more concurrency when we are running simultaneously
3497 * with foreign key checks.
3498 *
3499 * Note that if a column gets detoasted while executing the update, but
3500 * the value ends up being the same, this test will fail and we will use
3501 * the stronger lock. This is acceptable; the important case to optimize
3502 * is updates that don't manipulate key columns, not those that
3503 * serendipitously arrive at the same key values.
3504 */
3506 {
3507 *lockmode = LockTupleNoKeyExclusive;
3509 key_intact = true;
3510
3511 /*
3512 * If this is the first possibly-multixact-able operation in the
3513 * current transaction, set my per-backend OldestMemberMXactId
3514 * setting. We can be certain that the transaction will never become a
3515 * member of any older MultiXactIds than that. (We have to do this
3516 * even if we end up just using our own TransactionId below, since
3517 * some other backend could incorporate our XID into a MultiXact
3518 * immediately afterwards.)
3519 */
3521 }
3522 else
3523 {
3524 *lockmode = LockTupleExclusive;
3526 key_intact = false;
3527 }
3528
3529 /*
3530 * Note: beyond this point, use oldtup not otid to refer to old tuple.
3531 * otid may very well point at newtup->t_self, which we will overwrite
3532 * with the new tuple's location, so there's great risk of confusion if we
3533 * use otid anymore.
3534 */
3535
3536l2:
3537 checked_lockers = false;
3538 locker_remains = false;
3539 result = HeapTupleSatisfiesUpdate(&oldtup, cid, buffer);
3540
3541 /* see below about the "no wait" case */
3542 Assert(result != TM_BeingModified || wait);
3543
3544 if (result == TM_Invisible)
3545 {
3546 UnlockReleaseBuffer(buffer);
3547 ereport(ERROR,
3549 errmsg("attempted to update invisible tuple")));
3550 }
3551 else if (result == TM_BeingModified && wait)
3552 {
3555 bool can_continue = false;
3556
3557 /*
3558 * XXX note that we don't consider the "no wait" case here. This
3559 * isn't a problem currently because no caller uses that case, but it
3560 * should be fixed if such a caller is introduced. It wasn't a
3561 * problem previously because this code would always wait, but now
3562 * that some tuple locks do not conflict with one of the lock modes we
3563 * use, it is possible that this case is interesting to handle
3564 * specially.
3565 *
3566 * This may cause failures with third-party code that calls
3567 * heap_update directly.
3568 */
3569
3570 /* must copy state data before unlocking buffer */
3572 infomask = oldtup.t_data->t_infomask;
3573
3574 /*
3575 * Now we have to do something about the existing locker. If it's a
3576 * multi, sleep on it; we might be awakened before it is completely
3577 * gone (or even not sleep at all in some cases); we need to preserve
3578 * it as locker, unless it is gone completely.
3579 *
3580 * If it's not a multi, we need to check for sleeping conditions
3581 * before actually going to sleep. If the update doesn't conflict
3582 * with the locks, we just continue without sleeping (but making sure
3583 * it is preserved).
3584 *
3585 * Before sleeping, we need to acquire tuple lock to establish our
3586 * priority for the tuple (see heap_lock_tuple). LockTuple will
3587 * release us when we are next-in-line for the tuple. Note we must
3588 * not acquire the tuple lock until we're sure we're going to sleep;
3589 * otherwise we're open for race conditions with other transactions
3590 * holding the tuple lock which sleep on us.
3591 *
3592 * If we are forced to "start over" below, we keep the tuple lock;
3593 * this arranges that we stay at the head of the line while rechecking
3594 * tuple state.
3595 */
3597 {
3599 int remain;
3600 bool current_is_member = false;
3601
3603 *lockmode, &current_is_member))
3604 {
3606
3607 /*
3608 * Acquire the lock, if necessary (but skip it when we're
3609 * requesting a lock and already have one; avoids deadlock).
3610 */
3611 if (!current_is_member)
3612 heap_acquire_tuplock(relation, &(oldtup.t_self), *lockmode,
3614
3615 /* wait for multixact */
3617 relation, &oldtup.t_self, XLTW_Update,
3618 &remain);
3619 checked_lockers = true;
3620 locker_remains = remain != 0;
3622
3623 /*
3624 * If xwait had just locked the tuple then some other xact
3625 * could update this tuple before we get to this point. Check
3626 * for xmax change, and start over if so.
3627 */
3628 if (xmax_infomask_changed(oldtup.t_data->t_infomask,
3629 infomask) ||
3631 xwait))
3632 goto l2;
3633 }
3634
3635 /*
3636 * Note that the multixact may not be done by now. It could have
3637 * surviving members; our own xact or other subxacts of this
3638 * backend, and also any other concurrent transaction that locked
3639 * the tuple with LockTupleKeyShare if we only got
3640 * LockTupleNoKeyExclusive. If this is the case, we have to be
3641 * careful to mark the updated tuple with the surviving members in
3642 * Xmax.
3643 *
3644 * Note that there could have been another update in the
3645 * MultiXact. In that case, we need to check whether it committed
3646 * or aborted. If it aborted we are safe to update it again;
3647 * otherwise there is an update conflict, and we have to return
3648 * TableTuple{Deleted, Updated} below.
3649 *
3650 * In the LockTupleExclusive case, we still need to preserve the
3651 * surviving members: those would include the tuple locks we had
3652 * before this one, which are important to keep in case this
3653 * subxact aborts.
3654 */
3655 if (!HEAP_XMAX_IS_LOCKED_ONLY(oldtup.t_data->t_infomask))
3657 else
3659
3660 /*
3661 * There was no UPDATE in the MultiXact; or it aborted. No
3662 * TransactionIdIsInProgress() call needed here, since we called
3663 * MultiXactIdWait() above.
3664 */
3667 can_continue = true;
3668 }
3670 {
3671 /*
3672 * The only locker is ourselves; we can avoid grabbing the tuple
3673 * lock here, but must preserve our locking information.
3674 */
3675 checked_lockers = true;
3676 locker_remains = true;
3677 can_continue = true;
3678 }
3680 {
3681 /*
3682 * If it's just a key-share locker, and we're not changing the key
3683 * columns, we don't need to wait for it to end; but we need to
3684 * preserve it as locker.
3685 */
3686 checked_lockers = true;
3687 locker_remains = true;
3688 can_continue = true;
3689 }
3690 else
3691 {
3692 /*
3693 * Wait for regular transaction to end; but first, acquire tuple
3694 * lock.
3695 */
3697 heap_acquire_tuplock(relation, &(oldtup.t_self), *lockmode,
3699 XactLockTableWait(xwait, relation, &oldtup.t_self,
3700 XLTW_Update);
3701 checked_lockers = true;
3703
3704 /*
3705 * xwait is done, but if xwait had just locked the tuple then some
3706 * other xact could update this tuple before we get to this point.
3707 * Check for xmax change, and start over if so.
3708 */
3709 if (xmax_infomask_changed(oldtup.t_data->t_infomask, infomask) ||
3712 goto l2;
3713
3714 /* Otherwise check if it committed or aborted */
3715 UpdateXmaxHintBits(oldtup.t_data, buffer, xwait);
3716 if (oldtup.t_data->t_infomask & HEAP_XMAX_INVALID)
3717 can_continue = true;
3718 }
3719
3720 if (can_continue)
3721 result = TM_Ok;
3722 else if (!ItemPointerEquals(&oldtup.t_self, &oldtup.t_data->t_ctid))
3723 result = TM_Updated;
3724 else
3725 result = TM_Deleted;
3726 }
3727
3728 /* Sanity check the result HeapTupleSatisfiesUpdate() and the logic above */
3729 if (result != TM_Ok)
3730 {
3731 Assert(result == TM_SelfModified ||
3732 result == TM_Updated ||
3733 result == TM_Deleted ||
3734 result == TM_BeingModified);
3735 Assert(!(oldtup.t_data->t_infomask & HEAP_XMAX_INVALID));
3736 Assert(result != TM_Updated ||
3737 !ItemPointerEquals(&oldtup.t_self, &oldtup.t_data->t_ctid));
3738 }
3739
3740 if (crosscheck != InvalidSnapshot && result == TM_Ok)
3741 {
3742 /* Perform additional check for transaction-snapshot mode RI updates */
3744 result = TM_Updated;
3745 }
3746
3747 if (result != TM_Ok)
3748 {
3749 tmfd->ctid = oldtup.t_data->t_ctid;
3750 tmfd->xmax = HeapTupleHeaderGetUpdateXid(oldtup.t_data);
3751 if (result == TM_SelfModified)
3752 tmfd->cmax = HeapTupleHeaderGetCmax(oldtup.t_data);
3753 else
3754 tmfd->cmax = InvalidCommandId;
3755 UnlockReleaseBuffer(buffer);
3756 if (have_tuple_lock)
3757 UnlockTupleTuplock(relation, &(oldtup.t_self), *lockmode);
3758 if (vmbuffer != InvalidBuffer)
3759 ReleaseBuffer(vmbuffer);
3761
3768 return result;
3769 }
3770
3771 /*
3772 * If we didn't pin the visibility map page and the page has become all
3773 * visible while we were busy locking the buffer, or during some
3774 * subsequent window during which we had it unlocked, we'll have to unlock
3775 * and re-lock, to avoid holding the buffer lock across an I/O. That's a
3776 * bit unfortunate, especially since we'll now have to recheck whether the
3777 * tuple has been locked or updated under us, but hopefully it won't
3778 * happen very often.
3779 */
3780 if (vmbuffer == InvalidBuffer && PageIsAllVisible(page))
3781 {
3783 visibilitymap_pin(relation, block, &vmbuffer);
3785 goto l2;
3786 }
3787
3788 /* Fill in transaction status data */
3789
3790 /*
3791 * If the tuple we're updating is locked, we need to preserve the locking
3792 * info in the old tuple's Xmax. Prepare a new Xmax value for this.
3793 */
3795 oldtup.t_data->t_infomask,
3796 oldtup.t_data->t_infomask2,
3797 xid, *lockmode, true,
3800
3801 /*
3802 * And also prepare an Xmax value for the new copy of the tuple. If there
3803 * was no xmax previously, or there was one but all lockers are now gone,
3804 * then use InvalidTransactionId; otherwise, get the xmax from the old
3805 * tuple. (In rare cases that might also be InvalidTransactionId and yet
3806 * not have the HEAP_XMAX_INVALID bit set; that's fine.)
3807 */
3808 if ((oldtup.t_data->t_infomask & HEAP_XMAX_INVALID) ||
3809 HEAP_LOCKED_UPGRADED(oldtup.t_data->t_infomask) ||
3812 else
3814
3816 {
3819 }
3820 else
3821 {
3822 /*
3823 * If we found a valid Xmax for the new tuple, then the infomask bits
3824 * to use on the new tuple depend on what was there on the old one.
3825 * Note that since we're doing an update, the only possibility is that
3826 * the lockers had FOR KEY SHARE lock.
3827 */
3828 if (oldtup.t_data->t_infomask & HEAP_XMAX_IS_MULTI)
3829 {
3832 }
3833 else
3834 {
3837 }
3838 }
3839
3840 /*
3841 * Prepare the new tuple with the appropriate initial values of Xmin and
3842 * Xmax, as well as initial infomask bits as computed above.
3843 */
3844 newtup->t_data->t_infomask &= ~(HEAP_XACT_MASK);
3845 newtup->t_data->t_infomask2 &= ~(HEAP2_XACT_MASK);
3846 HeapTupleHeaderSetXmin(newtup->t_data, xid);
3848 newtup->t_data->t_infomask |= HEAP_UPDATED | infomask_new_tuple;
3849 newtup->t_data->t_infomask2 |= infomask2_new_tuple;
3851
3852 /*
3853 * Replace cid with a combo CID if necessary. Note that we already put
3854 * the plain cid into the new tuple.
3855 */
3857
3858 /*
3859 * If the toaster needs to be activated, OR if the new tuple will not fit
3860 * on the same page as the old, then we need to release the content lock
3861 * (but not the pin!) on the old tuple's buffer while we are off doing
3862 * TOAST and/or table-file-extension work. We must mark the old tuple to
3863 * show that it's locked, else other processes may try to update it
3864 * themselves.
3865 *
3866 * We need to invoke the toaster if there are already any out-of-line
3867 * toasted values present, or if the new tuple is over-threshold.
3868 */
3869 if (relation->rd_rel->relkind != RELKIND_RELATION &&
3870 relation->rd_rel->relkind != RELKIND_MATVIEW)
3871 {
3872 /* toast table entries should never be recursively toasted */
3875 need_toast = false;
3876 }
3877 else
3880 newtup->t_len > TOAST_TUPLE_THRESHOLD);
3881
3883
3884 newtupsize = MAXALIGN(newtup->t_len);
3885
3887 {
3891 bool cleared_all_frozen = false;
3892
3893 /*
3894 * To prevent concurrent sessions from updating the tuple, we have to
3895 * temporarily mark it locked, while we release the page-level lock.
3896 *
3897 * To satisfy the rule that any xid potentially appearing in a buffer
3898 * written out to disk, we unfortunately have to WAL log this
3899 * temporary modification. We can reuse xl_heap_lock for this
3900 * purpose. If we crash/error before following through with the
3901 * actual update, xmax will be of an aborted transaction, allowing
3902 * other sessions to proceed.
3903 */
3904
3905 /*
3906 * Compute xmax / infomask appropriate for locking the tuple. This has
3907 * to be done separately from the combo that's going to be used for
3908 * updating, because the potentially created multixact would otherwise
3909 * be wrong.
3910 */
3912 oldtup.t_data->t_infomask,
3913 oldtup.t_data->t_infomask2,
3914 xid, *lockmode, false,
3917
3919
3921
3922 /* Clear obsolete visibility flags ... */
3923 oldtup.t_data->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);
3924 oldtup.t_data->t_infomask2 &= ~HEAP_KEYS_UPDATED;
3926 /* ... and store info about transaction updating this tuple */
3929 oldtup.t_data->t_infomask |= infomask_lock_old_tuple;
3930 oldtup.t_data->t_infomask2 |= infomask2_lock_old_tuple;
3932
3933 /* temporarily make it look not-updated, but locked */
3934 oldtup.t_data->t_ctid = oldtup.t_self;
3935
3936 /*
3937 * Clear all-frozen bit on visibility map if needed. We could
3938 * immediately reset ALL_VISIBLE, but given that the WAL logging
3939 * overhead would be unchanged, that doesn't seem necessarily
3940 * worthwhile.
3941 */
3942 if (PageIsAllVisible(page) &&
3943 visibilitymap_clear(relation, block, vmbuffer,
3945 cleared_all_frozen = true;
3946
3947 MarkBufferDirty(buffer);
3948
3949 if (RelationNeedsWAL(relation))
3950 {
3953
3956
3957 xlrec.offnum = ItemPointerGetOffsetNumber(&oldtup.t_self);
3959 xlrec.infobits_set = compute_infobits(oldtup.t_data->t_infomask,
3960 oldtup.t_data->t_infomask2);
3961 xlrec.flags =
3965 PageSetLSN(page, recptr);
3966 }
3967
3969
3971
3972 /*
3973 * Let the toaster do its thing, if needed.
3974 *
3975 * Note: below this point, heaptup is the data we actually intend to
3976 * store into the relation; newtup is the caller's original untoasted
3977 * data.
3978 */
3979 if (need_toast)
3980 {
3981 /* Note we always use WAL and FSM during updates */
3983 newtupsize = MAXALIGN(heaptup->t_len);
3984 }
3985 else
3986 heaptup = newtup;
3987
3988 /*
3989 * Now, do we need a new page for the tuple, or not? This is a bit
3990 * tricky since someone else could have added tuples to the page while
3991 * we weren't looking. We have to recheck the available space after
3992 * reacquiring the buffer lock. But don't bother to do that if the
3993 * former amount of free space is still not enough; it's unlikely
3994 * there's more free now than before.
3995 *
3996 * What's more, if we need to get a new page, we will need to acquire
3997 * buffer locks on both old and new pages. To avoid deadlock against
3998 * some other backend trying to get the same two locks in the other
3999 * order, we must be consistent about the order we get the locks in.
4000 * We use the rule "lock the lower-numbered page of the relation
4001 * first". To implement this, we must do RelationGetBufferForTuple
4002 * while not holding the lock on the old page, and we must rely on it
4003 * to get the locks on both pages in the correct order.
4004 *
4005 * Another consideration is that we need visibility map page pin(s) if
4006 * we will have to clear the all-visible flag on either page. If we
4007 * call RelationGetBufferForTuple, we rely on it to acquire any such
4008 * pins; but if we don't, we have to handle that here. Hence we need
4009 * a loop.
4010 */
4011 for (;;)
4012 {
4013 if (newtupsize > pagefree)
4014 {
4015 /* It doesn't fit, must use RelationGetBufferForTuple. */
4016 newbuf = RelationGetBufferForTuple(relation, heaptup->t_len,
4017 buffer, 0, NULL,
4018 &vmbuffer_new, &vmbuffer,
4019 0);
4020 /* We're all done. */
4021 break;
4022 }
4023 /* Acquire VM page pin if needed and we don't have it. */
4024 if (vmbuffer == InvalidBuffer && PageIsAllVisible(page))
4025 visibilitymap_pin(relation, block, &vmbuffer);
4026 /* Re-acquire the lock on the old tuple's page. */
4028 /* Re-check using the up-to-date free space */
4030 if (newtupsize > pagefree ||
4031 (vmbuffer == InvalidBuffer && PageIsAllVisible(page)))
4032 {
4033 /*
4034 * Rats, it doesn't fit anymore, or somebody just now set the
4035 * all-visible flag. We must now unlock and loop to avoid
4036 * deadlock. Fortunately, this path should seldom be taken.
4037 */
4039 }
4040 else
4041 {
4042 /* We're all done. */
4043 newbuf = buffer;
4044 break;
4045 }
4046 }
4047 }
4048 else
4049 {
4050 /* No TOAST work needed, and it'll fit on same page */
4051 newbuf = buffer;
4052 heaptup = newtup;
4053 }
4054
4055 /*
4056 * We're about to do the actual update -- check for conflict first, to
4057 * avoid possibly having to roll back work we've just done.
4058 *
4059 * This is safe without a recheck as long as there is no possibility of
4060 * another process scanning the pages between this check and the update
4061 * being visible to the scan (i.e., exclusive buffer content lock(s) are
4062 * continuously held from this point until the tuple update is visible).
4063 *
4064 * For the new tuple the only check needed is at the relation level, but
4065 * since both tuples are in the same relation and the check for oldtup
4066 * will include checking the relation level, there is no benefit to a
4067 * separate check for the new tuple.
4068 */
4069 CheckForSerializableConflictIn(relation, &oldtup.t_self,
4070 BufferGetBlockNumber(buffer));
4071
4072 /*
4073 * At this point newbuf and buffer are both pinned and locked, and newbuf
4074 * has enough space for the new tuple. If they are the same buffer, only
4075 * one pin is held.
4076 */
4077
4078 if (newbuf == buffer)
4079 {
4080 /*
4081 * Since the new tuple is going into the same page, we might be able
4082 * to do a HOT update. Check if any of the index columns have been
4083 * changed.
4084 */
4086 {
4087 use_hot_update = true;
4088
4089 /*
4090 * If none of the columns that are used in hot-blocking indexes
4091 * were updated, we can apply HOT, but we do still need to check
4092 * if we need to update the summarizing indexes, and update those
4093 * indexes if the columns were updated, or we may fail to detect
4094 * e.g. value bound changes in BRIN minmax indexes.
4095 */
4097 summarized_update = true;
4098 }
4099 }
4100 else
4101 {
4102 /* Set a hint that the old page could use prune/defrag */
4103 PageSetFull(page);
4104 }
4105
4106 /*
4107 * Compute replica identity tuple before entering the critical section so
4108 * we don't PANIC upon a memory allocation failure.
4109 * ExtractReplicaIdentity() will return NULL if nothing needs to be
4110 * logged. Pass old key required as true only if the replica identity key
4111 * columns are modified or it has external data.
4112 */
4117
4118 /* NO EREPORT(ERROR) from here till changes are logged */
4120
4121 /*
4122 * If this transaction commits, the old tuple will become DEAD sooner or
4123 * later. Set flag that this page is a candidate for pruning once our xid
4124 * falls below the OldestXmin horizon. If the transaction finally aborts,
4125 * the subsequent page pruning will be a no-op and the hint will be
4126 * cleared.
4127 *
4128 * XXX Should we set hint on newbuf as well? If the transaction aborts,
4129 * there would be a prunable tuple in the newbuf; but for now we choose
4130 * not to optimize for aborts. Note that heap_xlog_update must be kept in
4131 * sync if this decision changes.
4132 */
4133 PageSetPrunable(page, xid);
4134
4135 if (use_hot_update)
4136 {
4137 /* Mark the old tuple as HOT-updated */
4139 /* And mark the new tuple as heap-only */
4141 /* Mark the caller's copy too, in case different from heaptup */
4143 }
4144 else
4145 {
4146 /* Make sure tuples are correctly marked as not-HOT */
4150 }
4151
4152 RelationPutHeapTuple(relation, newbuf, heaptup, false); /* insert new tuple */
4153
4154
4155 /* Clear obsolete visibility flags, possibly set by ourselves above... */
4156 oldtup.t_data->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);
4157 oldtup.t_data->t_infomask2 &= ~HEAP_KEYS_UPDATED;
4158 /* ... and store info about transaction updating this tuple */
4161 oldtup.t_data->t_infomask |= infomask_old_tuple;
4162 oldtup.t_data->t_infomask2 |= infomask2_old_tuple;
4164
4165 /* record address of new tuple in t_ctid of old one */
4166 oldtup.t_data->t_ctid = heaptup->t_self;
4167
4168 /* clear PD_ALL_VISIBLE flags, reset all visibilitymap bits */
4169 if (PageIsAllVisible(BufferGetPage(buffer)))
4170 {
4171 all_visible_cleared = true;
4173 visibilitymap_clear(relation, BufferGetBlockNumber(buffer),
4174 vmbuffer, VISIBILITYMAP_VALID_BITS);
4175 }
4176 if (newbuf != buffer && PageIsAllVisible(BufferGetPage(newbuf)))
4177 {
4182 }
4183
4184 if (newbuf != buffer)
4186 MarkBufferDirty(buffer);
4187
4188 /* XLOG stuff */
4189 if (RelationNeedsWAL(relation))
4190 {
4192
4193 /*
4194 * For logical decoding we need combo CIDs to properly decode the
4195 * catalog.
4196 */
4198 {
4199 log_heap_new_cid(relation, &oldtup);
4200 log_heap_new_cid(relation, heaptup);
4201 }
4202
4203 recptr = log_heap_update(relation, buffer,
4208 if (newbuf != buffer)
4209 {
4211 }
4213 }
4214
4216
4217 if (newbuf != buffer)
4220
4221 /*
4222 * Mark old tuple for invalidation from system caches at next command
4223 * boundary, and mark the new tuple for invalidation in case we abort. We
4224 * have to do this before releasing the buffer because oldtup is in the
4225 * buffer. (heaptup is all in local memory, but it's necessary to process
4226 * both tuple versions in one call to inval.c so we can avoid redundant
4227 * sinval messages.)
4228 */
4230
4231 /* Now we can release the buffer(s) */
4232 if (newbuf != buffer)
4234 ReleaseBuffer(buffer);
4237 if (BufferIsValid(vmbuffer))
4238 ReleaseBuffer(vmbuffer);
4239
4240 /*
4241 * Release the lmgr tuple lock, if we had it.
4242 */
4243 if (have_tuple_lock)
4244 UnlockTupleTuplock(relation, &(oldtup.t_self), *lockmode);
4245
4246 pgstat_count_heap_update(relation, use_hot_update, newbuf != buffer);
4247
4248 /*
4249 * If heaptup is a private copy, release it. Don't forget to copy t_self
4250 * back to the caller's image, too.
4251 */
4252 if (heaptup != newtup)
4253 {
4254 newtup->t_self = heaptup->t_self;
4256 }
4257
4258 /*
4259 * If it is a HOT update, the update may still need to update summarized
4260 * indexes, lest we fail to update those summaries and get incorrect
4261 * results (for example, minmax bounds of the block may change with this
4262 * update).
4263 */
4264 if (use_hot_update)
4265 {
4268 else
4270 }
4271 else
4273
4276
4283
4284 return TM_Ok;
4285}
void bms_free(Bitmapset *a)
Definition bitmapset.c:239
Bitmapset * bms_add_members(Bitmapset *a, const Bitmapset *b)
Definition bitmapset.c:916
bool bms_overlap(const Bitmapset *a, const Bitmapset *b)
Definition bitmapset.c:581
static void PageSetFull(Page page)
Definition bufpage.h:417
static Bitmapset * HeapDetermineColumnsInfo(Relation relation, Bitmapset *interesting_cols, Bitmapset *external_cols, HeapTuple oldtup, HeapTuple newtup, bool *has_external)
Definition heapam.c:4465
static XLogRecPtr log_heap_update(Relation reln, Buffer oldbuf, Buffer newbuf, HeapTuple oldtup, HeapTuple newtup, HeapTuple old_key_tuple, bool all_visible_cleared, bool new_all_visible_cleared)
Definition heapam.c:8918
TransactionId HeapTupleGetUpdateXid(const HeapTupleHeaderData *tup)
Definition heapam.c:7659
HeapTuple heap_toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup, int options)
Definition heaptoast.c:96
#define TOAST_TUPLE_THRESHOLD
Definition heaptoast.h:48
static void HeapTupleClearHotUpdated(const HeapTupleData *tuple)
#define HEAP2_XACT_MASK
#define HEAP_XMAX_LOCK_ONLY
static void HeapTupleHeaderSetCmin(HeapTupleHeaderData *tup, CommandId cid)
static void HeapTupleSetHeapOnly(const HeapTupleData *tuple)
#define HEAP_XACT_MASK
static void HeapTupleSetHotUpdated(const HeapTupleData *tuple)
static void HeapTupleClearHeapOnly(const HeapTupleData *tuple)
#define HEAP_UPDATED
#define HEAP_XMAX_KEYSHR_LOCK
#define INJECTION_POINT(name, arg)
void pgstat_count_heap_update(Relation rel, bool hot, bool newpage)
Bitmapset * RelationGetIndexAttrBitmap(Relation relation, IndexAttrBitmapKind attrKind)
Definition relcache.c:5298
@ INDEX_ATTR_BITMAP_KEY
Definition relcache.h:69
@ INDEX_ATTR_BITMAP_HOT_BLOCKING
Definition relcache.h:72
@ INDEX_ATTR_BITMAP_SUMMARIZED
Definition relcache.h:73
@ INDEX_ATTR_BITMAP_IDENTITY_KEY
Definition relcache.h:71
bool RelationSupportsSysCache(Oid relid)
Definition syscache.c:762
@ TU_Summarizing
Definition tableam.h:119
@ TU_All
Definition tableam.h:116
@ TU_None
Definition tableam.h:113
bool TransactionIdDidAbort(TransactionId transactionId)
Definition transam.c:188

References Assert, AssertHasSnapshotForToast(), bms_add_members(), bms_free(), bms_overlap(), BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferGetBlockNumber(), BufferGetPage(), BufferIsValid(), CacheInvalidateHeapTuple(), CheckForSerializableConflictIn(), TM_FailureData::cmax, compute_infobits(), compute_new_xmax_infomask(), TM_FailureData::ctid, DoesMultiXactIdConflict(), END_CRIT_SECTION, ereport, errcode(), errmsg(), ERROR, ExtractReplicaIdentity(), fb(), GetCurrentTransactionId(), GetMultiXactIdHintBits(), HEAP2_XACT_MASK, heap_acquire_tuplock(), heap_freetuple(), HEAP_LOCKED_UPGRADED(), HEAP_MOVED, heap_toast_insert_or_update(), HEAP_UPDATED, HEAP_XACT_MASK, HEAP_XMAX_BITS, HEAP_XMAX_INVALID, HEAP_XMAX_IS_KEYSHR_LOCKED(), HEAP_XMAX_IS_LOCKED_ONLY(), HEAP_XMAX_IS_MULTI, HEAP_XMAX_KEYSHR_LOCK, HEAP_XMAX_LOCK_ONLY, HeapDetermineColumnsInfo(), HeapTupleClearHeapOnly(), HeapTupleClearHotUpdated(), HeapTupleGetUpdateXid(), HeapTupleHasExternal(), HeapTupleHeaderAdjustCmax(), HeapTupleHeaderGetCmax(), HeapTupleHeaderGetNatts, HeapTupleHeaderGetRawXmax(), HeapTupleHeaderGetUpdateXid(), HeapTupleHeaderSetCmax(), HeapTupleHeaderSetCmin(), HeapTupleHeaderSetXmax(), HeapTupleHeaderSetXmin(), HeapTupleSatisfiesUpdate(), HeapTupleSatisfiesVisibility(), HeapTupleSetHeapOnly(), HeapTupleSetHotUpdated(), INDEX_ATTR_BITMAP_HOT_BLOCKING, INDEX_ATTR_BITMAP_IDENTITY_KEY, INDEX_ATTR_BITMAP_KEY, INDEX_ATTR_BITMAP_SUMMARIZED, INJECTION_POINT, InvalidBuffer, InvalidCommandId, InvalidSnapshot, InvalidTransactionId, IsInParallelMode(), ItemIdGetLength, ItemIdIsNormal, ItemPointerEquals(), ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), ItemPointerIsValid(), LockBuffer(), LockTupleExclusive, LockTupleNoKeyExclusive, LockWaitBlock, log_heap_new_cid(), log_heap_update(), MarkBufferDirty(), MAXALIGN, MultiXactIdSetOldestMember(), MultiXactIdWait(), MultiXactStatusNoKeyUpdate, MultiXactStatusUpdate, PageClearAllVisible(), PageGetHeapFreeSpace(), PageGetItem(), PageGetItemId(), PageIsAllVisible(), PageSetFull(), PageSetLSN(), PageSetPrunable, pgstat_count_heap_update(), RelationData::rd_rel, ReadBuffer(), REGBUF_STANDARD, RelationGetBufferForTuple(), RelationGetIndexAttrBitmap(), RelationGetNumberOfAttributes, RelationGetRelid, RelationIsAccessibleInLogicalDecoding, RelationNeedsWAL, RelationPutHeapTuple(), RelationSupportsSysCache(), ReleaseBuffer(), SizeOfHeapLock, START_CRIT_SECTION, TM_BeingModified, TM_Deleted, TM_Invisible, TM_Ok, TM_SelfModified, TM_Updated, TOAST_TUPLE_THRESHOLD, TransactionIdDidAbort(), TransactionIdEquals, TransactionIdIsCurrentTransactionId(), TransactionIdIsValid, TU_All, TU_None, TU_Summarizing, UnlockReleaseBuffer(), UnlockTupleTuplock, UpdateXmaxHintBits(), VISIBILITYMAP_ALL_FROZEN, visibilitymap_clear(), visibilitymap_pin(), VISIBILITYMAP_VALID_BITS, XactLockTableWait(), XLH_LOCK_ALL_FROZEN_CLEARED, XLOG_HEAP_LOCK, XLogBeginInsert(), XLogInsert(), XLogRegisterBuffer(), XLogRegisterData(), XLTW_Update, TM_FailureData::xmax, and xmax_infomask_changed().

Referenced by heapam_tuple_update(), and simple_heap_update().

◆ heap_vacuum_rel()

void heap_vacuum_rel ( Relation  rel,
const VacuumParams  params,
BufferAccessStrategy  bstrategy 
)
extern

Definition at line 624 of file vacuumlazy.c.

626{
628 bool verbose,
629 instrument,
630 skipwithvm,
638 TimestampTz starttime = 0;
640 startwritetime = 0;
643 ErrorContextCallback errcallback;
644 char **indnames = NULL;
646
647 verbose = (params.options & VACOPT_VERBOSE) != 0;
648 instrument = (verbose || (AmAutoVacuumWorkerProcess() &&
649 params.log_vacuum_min_duration >= 0));
650 if (instrument)
651 {
653 if (track_io_timing)
654 {
657 }
658 }
659
660 /* Used for instrumentation and stats report */
661 starttime = GetCurrentTimestamp();
662
664 RelationGetRelid(rel));
667 params.is_wraparound
670 else
673
674 /*
675 * Setup error traceback support for ereport() first. The idea is to set
676 * up an error context callback to display additional information on any
677 * error during a vacuum. During different phases of vacuum, we update
678 * the state so that the error context callback always display current
679 * information.
680 *
681 * Copy the names of heap rel into local memory for error reporting
682 * purposes, too. It isn't always safe to assume that we can get the name
683 * of each rel. It's convenient for code in lazy_scan_heap to always use
684 * these temp copies.
685 */
688 vacrel->relnamespace = get_namespace_name(RelationGetNamespace(rel));
689 vacrel->relname = pstrdup(RelationGetRelationName(rel));
690 vacrel->indname = NULL;
692 vacrel->verbose = verbose;
693 errcallback.callback = vacuum_error_callback;
694 errcallback.arg = vacrel;
695 errcallback.previous = error_context_stack;
696 error_context_stack = &errcallback;
697
698 /* Set up high level stuff about rel and its indexes */
699 vacrel->rel = rel;
701 &vacrel->indrels);
702 vacrel->bstrategy = bstrategy;
703 if (instrument && vacrel->nindexes > 0)
704 {
705 /* Copy index names used by instrumentation (not error reporting) */
706 indnames = palloc_array(char *, vacrel->nindexes);
707 for (int i = 0; i < vacrel->nindexes; i++)
709 }
710
711 /*
712 * The index_cleanup param either disables index vacuuming and cleanup or
713 * forces it to go ahead when we would otherwise apply the index bypass
714 * optimization. The default is 'auto', which leaves the final decision
715 * up to lazy_vacuum().
716 *
717 * The truncate param allows user to avoid attempting relation truncation,
718 * though it can't force truncation to happen.
719 */
722 params.truncate != VACOPTVALUE_AUTO);
723
724 /*
725 * While VacuumFailSafeActive is reset to false before calling this, we
726 * still need to reset it here due to recursive calls.
727 */
728 VacuumFailsafeActive = false;
729 vacrel->consider_bypass_optimization = true;
730 vacrel->do_index_vacuuming = true;
731 vacrel->do_index_cleanup = true;
732 vacrel->do_rel_truncate = (params.truncate != VACOPTVALUE_DISABLED);
734 {
735 /* Force disable index vacuuming up-front */
736 vacrel->do_index_vacuuming = false;
737 vacrel->do_index_cleanup = false;
738 }
739 else if (params.index_cleanup == VACOPTVALUE_ENABLED)
740 {
741 /* Force index vacuuming. Note that failsafe can still bypass. */
742 vacrel->consider_bypass_optimization = false;
743 }
744 else
745 {
746 /* Default/auto, make all decisions dynamically */
748 }
749
750 /* Initialize page counters explicitly (be tidy) */
751 vacrel->scanned_pages = 0;
752 vacrel->eager_scanned_pages = 0;
753 vacrel->removed_pages = 0;
754 vacrel->new_frozen_tuple_pages = 0;
755 vacrel->lpdead_item_pages = 0;
756 vacrel->missed_dead_pages = 0;
757 vacrel->nonempty_pages = 0;
758 /* dead_items_alloc allocates vacrel->dead_items later on */
759
760 /* Allocate/initialize output statistics state */
761 vacrel->new_rel_tuples = 0;
762 vacrel->new_live_tuples = 0;
763 vacrel->indstats = (IndexBulkDeleteResult **)
764 palloc0(vacrel->nindexes * sizeof(IndexBulkDeleteResult *));
765
766 /* Initialize remaining counters (be tidy) */
767 vacrel->num_index_scans = 0;
768 vacrel->num_dead_items_resets = 0;
769 vacrel->total_dead_items_bytes = 0;
770 vacrel->tuples_deleted = 0;
771 vacrel->tuples_frozen = 0;
772 vacrel->lpdead_items = 0;
773 vacrel->live_tuples = 0;
774 vacrel->recently_dead_tuples = 0;
775 vacrel->missed_dead_tuples = 0;
776
777 vacrel->vm_new_visible_pages = 0;
778 vacrel->vm_new_visible_frozen_pages = 0;
779 vacrel->vm_new_frozen_pages = 0;
780
781 /*
782 * Get cutoffs that determine which deleted tuples are considered DEAD,
783 * not just RECENTLY_DEAD, and which XIDs/MXIDs to freeze. Then determine
784 * the extent of the blocks that we'll scan in lazy_scan_heap. It has to
785 * happen in this order to ensure that the OldestXmin cutoff field works
786 * as an upper bound on the XIDs stored in the pages we'll actually scan
787 * (NewRelfrozenXid tracking must never be allowed to miss unfrozen XIDs).
788 *
789 * Next acquire vistest, a related cutoff that's used in pruning. We use
790 * vistest in combination with OldestXmin to ensure that
791 * heap_page_prune_and_freeze() always removes any deleted tuple whose
792 * xmax is < OldestXmin. lazy_scan_prune must never become confused about
793 * whether a tuple should be frozen or removed. (In the future we might
794 * want to teach lazy_scan_prune to recompute vistest from time to time,
795 * to increase the number of dead tuples it can prune away.)
796 */
797 vacrel->aggressive = vacuum_get_cutoffs(rel, params, &vacrel->cutoffs);
799 vacrel->vistest = GlobalVisTestFor(rel);
800
801 /* Initialize state used to track oldest extant XID/MXID */
802 vacrel->NewRelfrozenXid = vacrel->cutoffs.OldestXmin;
803 vacrel->NewRelminMxid = vacrel->cutoffs.OldestMxact;
804
805 /*
806 * Initialize state related to tracking all-visible page skipping. This is
807 * very important to determine whether or not it is safe to advance the
808 * relfrozenxid/relminmxid.
809 */
810 vacrel->skippedallvis = false;
811 skipwithvm = true;
813 {
814 /*
815 * Force aggressive mode, and disable skipping blocks using the
816 * visibility map (even those set all-frozen)
817 */
818 vacrel->aggressive = true;
819 skipwithvm = false;
820 }
821
822 vacrel->skipwithvm = skipwithvm;
823
824 /*
825 * Set up eager scan tracking state. This must happen after determining
826 * whether or not the vacuum must be aggressive, because only normal
827 * vacuums use the eager scan algorithm.
828 */
830
831 /* Report the vacuum mode: 'normal' or 'aggressive' */
833 vacrel->aggressive
836
837 if (verbose)
838 {
839 if (vacrel->aggressive)
841 (errmsg("aggressively vacuuming \"%s.%s.%s\"",
842 vacrel->dbname, vacrel->relnamespace,
843 vacrel->relname)));
844 else
846 (errmsg("vacuuming \"%s.%s.%s\"",
847 vacrel->dbname, vacrel->relnamespace,
848 vacrel->relname)));
849 }
850
851 /*
852 * Allocate dead_items memory using dead_items_alloc. This handles
853 * parallel VACUUM initialization as part of allocating shared memory
854 * space used for dead_items. (But do a failsafe precheck first, to
855 * ensure that parallel VACUUM won't be attempted at all when relfrozenxid
856 * is already dangerously old.)
857 */
860
861 /*
862 * Call lazy_scan_heap to perform all required heap pruning, index
863 * vacuuming, and heap vacuuming (plus related processing)
864 */
866
867 /*
868 * Save dead items max_bytes and update the memory usage statistics before
869 * cleanup, they are freed in parallel vacuum cases during
870 * dead_items_cleanup().
871 */
872 dead_items_max_bytes = vacrel->dead_items_info->max_bytes;
873 vacrel->total_dead_items_bytes += TidStoreMemoryUsage(vacrel->dead_items);
874
875 /*
876 * Free resources managed by dead_items_alloc. This ends parallel mode in
877 * passing when necessary.
878 */
881
882 /*
883 * Update pg_class entries for each of rel's indexes where appropriate.
884 *
885 * Unlike the later update to rel's pg_class entry, this is not critical.
886 * Maintains relpages/reltuples statistics used by the planner only.
887 */
888 if (vacrel->do_index_cleanup)
890
891 /* Done with rel's indexes */
892 vac_close_indexes(vacrel->nindexes, vacrel->indrels, NoLock);
893
894 /* Optionally truncate rel */
897
898 /* Pop the error context stack */
899 error_context_stack = errcallback.previous;
900
901 /* Report that we are now doing final cleanup */
904
905 /*
906 * Prepare to update rel's pg_class entry.
907 *
908 * Aggressive VACUUMs must always be able to advance relfrozenxid to a
909 * value >= FreezeLimit, and relminmxid to a value >= MultiXactCutoff.
910 * Non-aggressive VACUUMs may advance them by any amount, or not at all.
911 */
912 Assert(vacrel->NewRelfrozenXid == vacrel->cutoffs.OldestXmin ||
913 TransactionIdPrecedesOrEquals(vacrel->aggressive ? vacrel->cutoffs.FreezeLimit :
914 vacrel->cutoffs.relfrozenxid,
915 vacrel->NewRelfrozenXid));
916 Assert(vacrel->NewRelminMxid == vacrel->cutoffs.OldestMxact ||
917 MultiXactIdPrecedesOrEquals(vacrel->aggressive ? vacrel->cutoffs.MultiXactCutoff :
918 vacrel->cutoffs.relminmxid,
919 vacrel->NewRelminMxid));
920 if (vacrel->skippedallvis)
921 {
922 /*
923 * Must keep original relfrozenxid in a non-aggressive VACUUM that
924 * chose to skip an all-visible page range. The state that tracks new
925 * values will have missed unfrozen XIDs from the pages we skipped.
926 */
927 Assert(!vacrel->aggressive);
928 vacrel->NewRelfrozenXid = InvalidTransactionId;
929 vacrel->NewRelminMxid = InvalidMultiXactId;
930 }
931
932 /*
933 * For safety, clamp relallvisible to be not more than what we're setting
934 * pg_class.relpages to
935 */
936 new_rel_pages = vacrel->rel_pages; /* After possible rel truncation */
940
941 /*
942 * An all-frozen block _must_ be all-visible. As such, clamp the count of
943 * all-frozen blocks to the count of all-visible blocks. This matches the
944 * clamping of relallvisible above.
945 */
948
949 /*
950 * Now actually update rel's pg_class entry.
951 *
952 * In principle new_live_tuples could be -1 indicating that we (still)
953 * don't know the tuple count. In practice that can't happen, since we
954 * scan every page that isn't skipped using the visibility map.
955 */
956 vac_update_relstats(rel, new_rel_pages, vacrel->new_live_tuples,
958 vacrel->nindexes > 0,
959 vacrel->NewRelfrozenXid, vacrel->NewRelminMxid,
961
962 /*
963 * Report results to the cumulative stats system, too.
964 *
965 * Deliberately avoid telling the stats system about LP_DEAD items that
966 * remain in the table due to VACUUM bypassing index and heap vacuuming.
967 * ANALYZE will consider the remaining LP_DEAD items to be dead "tuples".
968 * It seems like a good idea to err on the side of not vacuuming again too
969 * soon in cases where the failsafe prevented significant amounts of heap
970 * vacuuming.
971 */
973 Max(vacrel->new_live_tuples, 0),
974 vacrel->recently_dead_tuples +
975 vacrel->missed_dead_tuples,
976 starttime);
978
979 if (instrument)
980 {
982
983 if (verbose || params.log_vacuum_min_duration == 0 ||
986 {
987 long secs_dur;
988 int usecs_dur;
989 WalUsage walusage;
990 BufferUsage bufferusage;
992 char *msgfmt;
993 int32 diff;
994 double read_rate = 0,
995 write_rate = 0;
999
1001 memset(&walusage, 0, sizeof(WalUsage));
1003 memset(&bufferusage, 0, sizeof(BufferUsage));
1005
1006 total_blks_hit = bufferusage.shared_blks_hit +
1007 bufferusage.local_blks_hit;
1008 total_blks_read = bufferusage.shared_blks_read +
1009 bufferusage.local_blks_read;
1011 bufferusage.local_blks_dirtied;
1012
1014 if (verbose)
1015 {
1016 /*
1017 * Aggressiveness already reported earlier, in dedicated
1018 * VACUUM VERBOSE ereport
1019 */
1020 Assert(!params.is_wraparound);
1021 msgfmt = _("finished vacuuming \"%s.%s.%s\": index scans: %d\n");
1022 }
1023 else if (params.is_wraparound)
1024 {
1025 /*
1026 * While it's possible for a VACUUM to be both is_wraparound
1027 * and !aggressive, that's just a corner-case -- is_wraparound
1028 * implies aggressive. Produce distinct output for the corner
1029 * case all the same, just in case.
1030 */
1031 if (vacrel->aggressive)
1032 msgfmt = _("automatic aggressive vacuum to prevent wraparound of table \"%s.%s.%s\": index scans: %d\n");
1033 else
1034 msgfmt = _("automatic vacuum to prevent wraparound of table \"%s.%s.%s\": index scans: %d\n");
1035 }
1036 else
1037 {
1038 if (vacrel->aggressive)
1039 msgfmt = _("automatic aggressive vacuum of table \"%s.%s.%s\": index scans: %d\n");
1040 else
1041 msgfmt = _("automatic vacuum of table \"%s.%s.%s\": index scans: %d\n");
1042 }
1044 vacrel->dbname,
1045 vacrel->relnamespace,
1046 vacrel->relname,
1047 vacrel->num_index_scans);
1048 appendStringInfo(&buf, _("pages: %u removed, %u remain, %u scanned (%.2f%% of total), %u eagerly scanned\n"),
1049 vacrel->removed_pages,
1051 vacrel->scanned_pages,
1052 orig_rel_pages == 0 ? 100.0 :
1053 100.0 * vacrel->scanned_pages /
1055 vacrel->eager_scanned_pages);
1057 _("tuples: %" PRId64 " removed, %" PRId64 " remain, %" PRId64 " are dead but not yet removable\n"),
1058 vacrel->tuples_deleted,
1059 (int64) vacrel->new_rel_tuples,
1060 vacrel->recently_dead_tuples);
1061 if (vacrel->missed_dead_tuples > 0)
1063 _("tuples missed: %" PRId64 " dead from %u pages not removed due to cleanup lock contention\n"),
1064 vacrel->missed_dead_tuples,
1065 vacrel->missed_dead_pages);
1067 vacrel->cutoffs.OldestXmin);
1069 _("removable cutoff: %u, which was %d XIDs old when operation ended\n"),
1070 vacrel->cutoffs.OldestXmin, diff);
1072 {
1073 diff = (int32) (vacrel->NewRelfrozenXid -
1074 vacrel->cutoffs.relfrozenxid);
1076 _("new relfrozenxid: %u, which is %d XIDs ahead of previous value\n"),
1077 vacrel->NewRelfrozenXid, diff);
1078 }
1079 if (minmulti_updated)
1080 {
1081 diff = (int32) (vacrel->NewRelminMxid -
1082 vacrel->cutoffs.relminmxid);
1084 _("new relminmxid: %u, which is %d MXIDs ahead of previous value\n"),
1085 vacrel->NewRelminMxid, diff);
1086 }
1087 appendStringInfo(&buf, _("frozen: %u pages from table (%.2f%% of total) had %" PRId64 " tuples frozen\n"),
1088 vacrel->new_frozen_tuple_pages,
1089 orig_rel_pages == 0 ? 100.0 :
1090 100.0 * vacrel->new_frozen_tuple_pages /
1092 vacrel->tuples_frozen);
1093
1095 _("visibility map: %u pages set all-visible, %u pages set all-frozen (%u were all-visible)\n"),
1096 vacrel->vm_new_visible_pages,
1097 vacrel->vm_new_visible_frozen_pages +
1098 vacrel->vm_new_frozen_pages,
1099 vacrel->vm_new_frozen_pages);
1100 if (vacrel->do_index_vacuuming)
1101 {
1102 if (vacrel->nindexes == 0 || vacrel->num_index_scans == 0)
1103 appendStringInfoString(&buf, _("index scan not needed: "));
1104 else
1105 appendStringInfoString(&buf, _("index scan needed: "));
1106
1107 msgfmt = _("%u pages from table (%.2f%% of total) had %" PRId64 " dead item identifiers removed\n");
1108 }
1109 else
1110 {
1112 appendStringInfoString(&buf, _("index scan bypassed: "));
1113 else
1114 appendStringInfoString(&buf, _("index scan bypassed by failsafe: "));
1115
1116 msgfmt = _("%u pages from table (%.2f%% of total) have %" PRId64 " dead item identifiers\n");
1117 }
1119 vacrel->lpdead_item_pages,
1120 orig_rel_pages == 0 ? 100.0 :
1121 100.0 * vacrel->lpdead_item_pages / orig_rel_pages,
1122 vacrel->lpdead_items);
1123 for (int i = 0; i < vacrel->nindexes; i++)
1124 {
1125 IndexBulkDeleteResult *istat = vacrel->indstats[i];
1126
1127 if (!istat)
1128 continue;
1129
1131 _("index \"%s\": pages: %u in total, %u newly deleted, %u currently deleted, %u reusable\n"),
1132 indnames[i],
1133 istat->num_pages,
1134 istat->pages_newly_deleted,
1135 istat->pages_deleted,
1136 istat->pages_free);
1137 }
1139 {
1140 /*
1141 * We bypass the changecount mechanism because this value is
1142 * only updated by the calling process. We also rely on the
1143 * above call to pgstat_progress_end_command() to not clear
1144 * the st_progress_param array.
1145 */
1146 appendStringInfo(&buf, _("delay time: %.3f ms\n"),
1148 }
1149 if (track_io_timing)
1150 {
1151 double read_ms = (double) (pgStatBlockReadTime - startreadtime) / 1000;
1152 double write_ms = (double) (pgStatBlockWriteTime - startwritetime) / 1000;
1153
1154 appendStringInfo(&buf, _("I/O timings: read: %.3f ms, write: %.3f ms\n"),
1155 read_ms, write_ms);
1156 }
1157 if (secs_dur > 0 || usecs_dur > 0)
1158 {
1160 (1024 * 1024) / (secs_dur + usecs_dur / 1000000.0);
1162 (1024 * 1024) / (secs_dur + usecs_dur / 1000000.0);
1163 }
1164 appendStringInfo(&buf, _("avg read rate: %.3f MB/s, avg write rate: %.3f MB/s\n"),
1167 _("buffer usage: %" PRId64 " hits, %" PRId64 " reads, %" PRId64 " dirtied\n"),
1172 _("WAL usage: %" PRId64 " records, %" PRId64 " full page images, %" PRIu64 " bytes, %" PRIu64 " full page image bytes, %" PRId64 " buffers full\n"),
1173 walusage.wal_records,
1174 walusage.wal_fpi,
1175 walusage.wal_bytes,
1176 walusage.wal_fpi_bytes,
1177 walusage.wal_buffers_full);
1178
1179 /*
1180 * Report the dead items memory usage.
1181 *
1182 * The num_dead_items_resets counter increases when we reset the
1183 * collected dead items, so the counter is non-zero if at least
1184 * one dead items are collected, even if index vacuuming is
1185 * disabled.
1186 */
1188 ngettext("memory usage: dead item storage %.2f MB accumulated across %d reset (limit %.2f MB each)\n",
1189 "memory usage: dead item storage %.2f MB accumulated across %d resets (limit %.2f MB each)\n",
1190 vacrel->num_dead_items_resets),
1191 (double) vacrel->total_dead_items_bytes / (1024 * 1024),
1192 vacrel->num_dead_items_resets,
1193 (double) dead_items_max_bytes / (1024 * 1024));
1194 appendStringInfo(&buf, _("system usage: %s"), pg_rusage_show(&ru0));
1195
1196 ereport(verbose ? INFO : LOG,
1197 (errmsg_internal("%s", buf.data)));
1198 pfree(buf.data);
1199 }
1200 }
1201
1202 /* Cleanup index statistics and index names */
1203 for (int i = 0; i < vacrel->nindexes; i++)
1204 {
1205 if (vacrel->indstats[i])
1206 pfree(vacrel->indstats[i]);
1207
1208 if (instrument)
1209 pfree(indnames[i]);
1210 }
1211}
void TimestampDifference(TimestampTz start_time, TimestampTz stop_time, long *secs, int *microsecs)
Definition timestamp.c:1721
bool TimestampDifferenceExceeds(TimestampTz start_time, TimestampTz stop_time, int msec)
Definition timestamp.c:1781
TimestampTz GetCurrentTimestamp(void)
Definition timestamp.c:1645
void pgstat_progress_start_command(ProgressCommandType cmdtype, Oid relid)
void pgstat_progress_update_param(int index, int64 val)
void pgstat_progress_end_command(void)
@ PROGRESS_COMMAND_VACUUM
PgBackendStatus * MyBEEntry
bool track_io_timing
Definition bufmgr.c:176
#define RelationGetNumberOfBlocks(reln)
Definition bufmgr.h:307
#define ngettext(s, p, n)
Definition c.h:1176
int32_t int32
Definition c.h:542
int64 TimestampTz
Definition timestamp.h:39
ErrorContextCallback * error_context_stack
Definition elog.c:95
#define _(x)
Definition elog.c:91
#define LOG
Definition elog.h:31
#define INFO
Definition elog.h:34
#define palloc0_object(type)
Definition fe_memutils.h:75
void WalUsageAccumDiff(WalUsage *dst, const WalUsage *add, const WalUsage *sub)
Definition instrument.c:285
BufferUsage pgBufferUsage
Definition instrument.c:20
void BufferUsageAccumDiff(BufferUsage *dst, const BufferUsage *add, const BufferUsage *sub)
Definition instrument.c:245
#define NoLock
Definition lockdefs.h:34
#define RowExclusiveLock
Definition lockdefs.h:38
char * get_database_name(Oid dbid)
Definition lsyscache.c:1242
char * get_namespace_name(Oid nspid)
Definition lsyscache.c:3516
char * pstrdup(const char *in)
Definition mcxt.c:1781
void * palloc0(Size size)
Definition mcxt.c:1417
#define AmAutoVacuumWorkerProcess()
Definition miscadmin.h:383
static int verbose
const void * data
const char * pg_rusage_show(const PGRUsage *ru0)
Definition pg_rusage.c:40
void pg_rusage_init(PGRUsage *ru0)
Definition pg_rusage.c:27
int64 PgStat_Counter
Definition pgstat.h:67
PgStat_Counter pgStatBlockReadTime
PgStat_Counter pgStatBlockWriteTime
void pgstat_report_vacuum(Relation rel, PgStat_Counter livetuples, PgStat_Counter deadtuples, TimestampTz starttime)
#define PROGRESS_VACUUM_PHASE_FINAL_CLEANUP
Definition progress.h:41
#define PROGRESS_VACUUM_MODE
Definition progress.h:32
#define PROGRESS_VACUUM_MODE_NORMAL
Definition progress.h:44
#define PROGRESS_VACUUM_STARTED_BY_AUTOVACUUM
Definition progress.h:50
#define PROGRESS_VACUUM_PHASE
Definition progress.h:21
#define PROGRESS_VACUUM_DELAY_TIME
Definition progress.h:31
#define PROGRESS_VACUUM_STARTED_BY_AUTOVACUUM_WRAPAROUND
Definition progress.h:51
#define PROGRESS_VACUUM_STARTED_BY_MANUAL
Definition progress.h:49
#define PROGRESS_VACUUM_STARTED_BY
Definition progress.h:33
#define PROGRESS_VACUUM_MODE_AGGRESSIVE
Definition progress.h:45
#define RelationGetNamespace(relation)
Definition rel.h:555
void appendStringInfo(StringInfo str, const char *fmt,...)
Definition stringinfo.c:145
void appendStringInfoString(StringInfo str, const char *s)
Definition stringinfo.c:230
void initStringInfo(StringInfo str)
Definition stringinfo.c:97
int64 shared_blks_dirtied
Definition instrument.h:28
int64 local_blks_hit
Definition instrument.h:30
int64 shared_blks_read
Definition instrument.h:27
int64 local_blks_read
Definition instrument.h:31
int64 local_blks_dirtied
Definition instrument.h:32
int64 shared_blks_hit
Definition instrument.h:26
struct ErrorContextCallback * previous
Definition elog.h:297
void(* callback)(void *arg)
Definition elog.h:298
BlockNumber pages_deleted
Definition genam.h:88
BlockNumber pages_newly_deleted
Definition genam.h:87
BlockNumber pages_free
Definition genam.h:89
BlockNumber num_pages
Definition genam.h:83
int64 st_progress_param[PGSTAT_NUM_PROGRESS_PARAM]
int nworkers
Definition vacuum.h:251
VacOptValue truncate
Definition vacuum.h:236
bits32 options
Definition vacuum.h:219
int log_vacuum_min_duration
Definition vacuum.h:227
bool is_wraparound
Definition vacuum.h:226
VacOptValue index_cleanup
Definition vacuum.h:235
int64 wal_buffers_full
Definition instrument.h:57
uint64 wal_bytes
Definition instrument.h:55
uint64 wal_fpi_bytes
Definition instrument.h:56
int64 wal_records
Definition instrument.h:53
size_t TidStoreMemoryUsage(TidStore *ts)
Definition tidstore.c:532
static TransactionId ReadNextTransactionId(void)
Definition transam.h:377
bool track_cost_delay_timing
Definition vacuum.c:82
void vac_open_indexes(Relation relation, LOCKMODE lockmode, int *nindexes, Relation **Irel)
Definition vacuum.c:2362
void vac_close_indexes(int nindexes, Relation *Irel, LOCKMODE lockmode)
Definition vacuum.c:2405
bool VacuumFailsafeActive
Definition vacuum.c:110
void vac_update_relstats(Relation relation, BlockNumber num_pages, double num_tuples, BlockNumber num_all_visible_pages, BlockNumber num_all_frozen_pages, bool hasindex, TransactionId frozenxid, MultiXactId minmulti, bool *frozenxid_updated, bool *minmulti_updated, bool in_outer_xact)
Definition vacuum.c:1426
bool vacuum_get_cutoffs(Relation rel, const VacuumParams params, struct VacuumCutoffs *cutoffs)
Definition vacuum.c:1100
#define VACOPT_VERBOSE
Definition vacuum.h:182
@ VACOPTVALUE_AUTO
Definition vacuum.h:203
@ VACOPTVALUE_ENABLED
Definition vacuum.h:205
@ VACOPTVALUE_UNSPECIFIED
Definition vacuum.h:202
@ VACOPTVALUE_DISABLED
Definition vacuum.h:204
#define VACOPT_DISABLE_PAGE_SKIPPING
Definition vacuum.h:188
static void dead_items_cleanup(LVRelState *vacrel)
static void update_relstats_all_indexes(LVRelState *vacrel)
static void heap_vacuum_eager_scan_setup(LVRelState *vacrel, const VacuumParams params)
Definition vacuumlazy.c:497
static void vacuum_error_callback(void *arg)
static void lazy_truncate_heap(LVRelState *vacrel)
static bool should_attempt_truncation(LVRelState *vacrel)
@ VACUUM_ERRCB_PHASE_UNKNOWN
Definition vacuumlazy.c:223
static void lazy_scan_heap(LVRelState *vacrel)
static bool lazy_check_wraparound_failsafe(LVRelState *vacrel)
static void dead_items_alloc(LVRelState *vacrel, int nworkers)
void visibilitymap_count(Relation rel, BlockNumber *all_visible, BlockNumber *all_frozen)

References _, AmAutoVacuumWorkerProcess, appendStringInfo(), appendStringInfoString(), ErrorContextCallback::arg, Assert, buf, BufferUsageAccumDiff(), ErrorContextCallback::callback, dead_items_alloc(), dead_items_cleanup(), ereport, errmsg(), errmsg_internal(), error_context_stack, fb(), get_database_name(), get_namespace_name(), GetCurrentTimestamp(), GlobalVisTestFor(), heap_vacuum_eager_scan_setup(), i, VacuumParams::index_cleanup, INFO, initStringInfo(), InvalidMultiXactId, InvalidTransactionId, VacuumParams::is_wraparound, IsInParallelMode(), lazy_check_wraparound_failsafe(), lazy_scan_heap(), lazy_truncate_heap(), BufferUsage::local_blks_dirtied, BufferUsage::local_blks_hit, BufferUsage::local_blks_read, LOG, VacuumParams::log_vacuum_min_duration, Max, MultiXactIdPrecedesOrEquals(), MyBEEntry, MyDatabaseId, ngettext, NoLock, IndexBulkDeleteResult::num_pages, VacuumParams::nworkers, VacuumParams::options, IndexBulkDeleteResult::pages_deleted, IndexBulkDeleteResult::pages_free, IndexBulkDeleteResult::pages_newly_deleted, palloc0(), palloc0_object, palloc_array, pfree(), pg_rusage_init(), pg_rusage_show(), pgBufferUsage, pgstat_progress_end_command(), pgstat_progress_start_command(), pgstat_progress_update_param(), pgstat_report_vacuum(), pgStatBlockReadTime, pgStatBlockWriteTime, pgWalUsage, ErrorContextCallback::previous, PROGRESS_COMMAND_VACUUM, PROGRESS_VACUUM_DELAY_TIME, PROGRESS_VACUUM_MODE, PROGRESS_VACUUM_MODE_AGGRESSIVE, PROGRESS_VACUUM_MODE_NORMAL, PROGRESS_VACUUM_PHASE, PROGRESS_VACUUM_PHASE_FINAL_CLEANUP, PROGRESS_VACUUM_STARTED_BY, PROGRESS_VACUUM_STARTED_BY_AUTOVACUUM, PROGRESS_VACUUM_STARTED_BY_AUTOVACUUM_WRAPAROUND, PROGRESS_VACUUM_STARTED_BY_MANUAL, pstrdup(), ReadNextTransactionId(), RelationGetNamespace, RelationGetNumberOfBlocks, RelationGetRelationName, RelationGetRelid, RowExclusiveLock, BufferUsage::shared_blks_dirtied, BufferUsage::shared_blks_hit, BufferUsage::shared_blks_read, should_attempt_truncation(), PgBackendStatus::st_progress_param, TidStoreMemoryUsage(), TimestampDifference(), TimestampDifferenceExceeds(), track_cost_delay_timing, track_io_timing, TransactionIdPrecedesOrEquals(), VacuumParams::truncate, update_relstats_all_indexes(), vac_close_indexes(), vac_open_indexes(), vac_update_relstats(), VACOPT_DISABLE_PAGE_SKIPPING, VACOPT_VERBOSE, VACOPTVALUE_AUTO, VACOPTVALUE_DISABLED, VACOPTVALUE_ENABLED, VACOPTVALUE_UNSPECIFIED, VACUUM_ERRCB_PHASE_UNKNOWN, vacuum_error_callback(), vacuum_get_cutoffs(), VacuumFailsafeActive, verbose, visibilitymap_count(), WalUsage::wal_buffers_full, WalUsage::wal_bytes, WalUsage::wal_fpi, WalUsage::wal_fpi_bytes, WalUsage::wal_records, and WalUsageAccumDiff().

◆ HeapCheckForSerializableConflictOut()

void HeapCheckForSerializableConflictOut ( bool  visible,
Relation  relation,
HeapTuple  tuple,
Buffer  buffer,
Snapshot  snapshot 
)
extern

Definition at line 9325 of file heapam.c.

9328{
9329 TransactionId xid;
9331
9332 if (!CheckForSerializableConflictOutNeeded(relation, snapshot))
9333 return;
9334
9335 /*
9336 * Check to see whether the tuple has been written to by a concurrent
9337 * transaction, either to create it not visible to us, or to delete it
9338 * while it is visible to us. The "visible" bool indicates whether the
9339 * tuple is visible to us, while HeapTupleSatisfiesVacuum checks what else
9340 * is going on with it.
9341 *
9342 * In the event of a concurrently inserted tuple that also happens to have
9343 * been concurrently updated (by a separate transaction), the xmin of the
9344 * tuple will be used -- not the updater's xid.
9345 */
9347 switch (htsvResult)
9348 {
9349 case HEAPTUPLE_LIVE:
9350 if (visible)
9351 return;
9352 xid = HeapTupleHeaderGetXmin(tuple->t_data);
9353 break;
9356 if (visible)
9357 xid = HeapTupleHeaderGetUpdateXid(tuple->t_data);
9358 else
9359 xid = HeapTupleHeaderGetXmin(tuple->t_data);
9360
9362 {
9363 /* This is like the HEAPTUPLE_DEAD case */
9364 Assert(!visible);
9365 return;
9366 }
9367 break;
9369 xid = HeapTupleHeaderGetXmin(tuple->t_data);
9370 break;
9371 case HEAPTUPLE_DEAD:
9372 Assert(!visible);
9373 return;
9374 default:
9375
9376 /*
9377 * The only way to get to this default clause is if a new value is
9378 * added to the enum type without adding it to this switch
9379 * statement. That's a bug, so elog.
9380 */
9381 elog(ERROR, "unrecognized return value from HeapTupleSatisfiesVacuum: %u", htsvResult);
9382
9383 /*
9384 * In spite of having all enum values covered and calling elog on
9385 * this default, some compilers think this is a code path which
9386 * allows xid to be used below without initialization. Silence
9387 * that warning.
9388 */
9390 }
9391
9394
9395 /*
9396 * Find top level xid. Bail out if xid is too early to be a conflict, or
9397 * if it's our own xid.
9398 */
9400 return;
9403 return;
9404
9405 CheckForSerializableConflictOut(relation, xid, snapshot);
9406}
HTSV_Result HeapTupleSatisfiesVacuum(HeapTuple htup, TransactionId OldestXmin, Buffer buffer)
void CheckForSerializableConflictOut(Relation relation, TransactionId xid, Snapshot snapshot)
Definition predicate.c:4021
TransactionId SubTransGetTopmostTransaction(TransactionId xid)
Definition subtrans.c:162
static bool TransactionIdFollowsOrEquals(TransactionId id1, TransactionId id2)
Definition transam.h:312
TransactionId GetTopTransactionIdIfAny(void)
Definition xact.c:442

References Assert, CheckForSerializableConflictOut(), CheckForSerializableConflictOutNeeded(), elog, ERROR, fb(), GetTopTransactionIdIfAny(), HEAPTUPLE_DEAD, HEAPTUPLE_DELETE_IN_PROGRESS, HEAPTUPLE_INSERT_IN_PROGRESS, HEAPTUPLE_LIVE, HEAPTUPLE_RECENTLY_DEAD, HeapTupleHeaderGetUpdateXid(), HeapTupleHeaderGetXmin(), HeapTupleSatisfiesVacuum(), InvalidTransactionId, SubTransGetTopmostTransaction(), HeapTupleData::t_data, TransactionIdEquals, TransactionIdFollowsOrEquals(), TransactionIdIsValid, TransactionIdPrecedes(), and TransactionXmin.

Referenced by BitmapHeapScanNextBlock(), heap_fetch(), heap_get_latest_tid(), heap_hot_search_buffer(), heapam_scan_sample_next_tuple(), heapgettup(), and page_collect_tuples().

◆ HeapTupleHeaderIsOnlyLocked()

bool HeapTupleHeaderIsOnlyLocked ( HeapTupleHeader  tuple)
extern

Definition at line 1365 of file heapam_visibility.c.

1366{
1367 TransactionId xmax;
1368
1369 /* if there's no valid Xmax, then there's obviously no update either */
1370 if (tuple->t_infomask & HEAP_XMAX_INVALID)
1371 return true;
1372
1373 if (tuple->t_infomask & HEAP_XMAX_LOCK_ONLY)
1374 return true;
1375
1376 /* invalid xmax means no update */
1378 return true;
1379
1380 /*
1381 * if HEAP_XMAX_LOCK_ONLY is not set and not a multi, then this must
1382 * necessarily have been updated
1383 */
1384 if (!(tuple->t_infomask & HEAP_XMAX_IS_MULTI))
1385 return false;
1386
1387 /* ... but if it's a multi, then perhaps the updating Xid aborted. */
1388 xmax = HeapTupleGetUpdateXid(tuple);
1389
1390 /* not LOCKED_ONLY, so it has to have an xmax */
1392
1394 return false;
1395 if (TransactionIdIsInProgress(xmax))
1396 return false;
1397 if (TransactionIdDidCommit(xmax))
1398 return false;
1399
1400 /*
1401 * not current, not in progress, not committed -- must have aborted or
1402 * crashed
1403 */
1404 return true;
1405}
bool TransactionIdIsInProgress(TransactionId xid)
Definition procarray.c:1399

References Assert, HEAP_XMAX_INVALID, HEAP_XMAX_IS_MULTI, HEAP_XMAX_LOCK_ONLY, HeapTupleGetUpdateXid(), HeapTupleHeaderGetRawXmax(), HeapTupleHeaderData::t_infomask, TransactionIdDidCommit(), TransactionIdIsCurrentTransactionId(), TransactionIdIsInProgress(), and TransactionIdIsValid.

Referenced by heap_delete(), heap_get_latest_tid(), heap_lock_tuple(), heap_lock_updated_tuple_rec(), HeapTupleSatisfiesVacuumHorizon(), and rewrite_heap_tuple().

◆ HeapTupleIsSurelyDead()

bool HeapTupleIsSurelyDead ( HeapTuple  htup,
GlobalVisState vistest 
)
extern

Definition at line 1310 of file heapam_visibility.c.

1311{
1312 HeapTupleHeader tuple = htup->t_data;
1313
1315 Assert(htup->t_tableOid != InvalidOid);
1316
1317 /*
1318 * If the inserting transaction is marked invalid, then it aborted, and
1319 * the tuple is definitely dead. If it's marked neither committed nor
1320 * invalid, then we assume it's still alive (since the presumption is that
1321 * all relevant hint bits were just set moments ago).
1322 */
1323 if (!HeapTupleHeaderXminCommitted(tuple))
1324 return HeapTupleHeaderXminInvalid(tuple);
1325
1326 /*
1327 * If the inserting transaction committed, but any deleting transaction
1328 * aborted, the tuple is still alive.
1329 */
1330 if (tuple->t_infomask & HEAP_XMAX_INVALID)
1331 return false;
1332
1333 /*
1334 * If the XMAX is just a lock, the tuple is still alive.
1335 */
1337 return false;
1338
1339 /*
1340 * If the Xmax is a MultiXact, it might be dead or alive, but we cannot
1341 * know without checking pg_multixact.
1342 */
1343 if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
1344 return false;
1345
1346 /* If deleter isn't known to have committed, assume it's still running. */
1347 if (!(tuple->t_infomask & HEAP_XMAX_COMMITTED))
1348 return false;
1349
1350 /* Deleter committed, so tuple is dead if the XID is old enough. */
1351 return GlobalVisTestIsRemovableXid(vistest,
1353}
static bool HeapTupleHeaderXminInvalid(const HeapTupleHeaderData *tup)
static bool HeapTupleHeaderXminCommitted(const HeapTupleHeaderData *tup)
#define InvalidOid

References Assert, GlobalVisTestIsRemovableXid(), HEAP_XMAX_COMMITTED, HEAP_XMAX_INVALID, HEAP_XMAX_IS_LOCKED_ONLY(), HEAP_XMAX_IS_MULTI, HeapTupleHeaderGetRawXmax(), HeapTupleHeaderXminCommitted(), HeapTupleHeaderXminInvalid(), InvalidOid, ItemPointerIsValid(), HeapTupleData::t_data, HeapTupleHeaderData::t_infomask, HeapTupleData::t_self, and HeapTupleData::t_tableOid.

Referenced by heap_hot_search_buffer().

◆ HeapTupleSatisfiesMVCCBatch()

int HeapTupleSatisfiesMVCCBatch ( Snapshot  snapshot,
Buffer  buffer,
int  ntups,
BatchMVCCState batchmvcc,
OffsetNumber vistuples_dense 
)
extern

Definition at line 1617 of file heapam_visibility.c.

1621{
1622 int nvis = 0;
1623
1624 Assert(IsMVCCSnapshot(snapshot));
1625
1626 for (int i = 0; i < ntups; i++)
1627 {
1628 bool valid;
1629 HeapTuple tup = &batchmvcc->tuples[i];
1630
1631 valid = HeapTupleSatisfiesMVCC(tup, snapshot, buffer);
1632 batchmvcc->visible[i] = valid;
1633
1634 if (likely(valid))
1635 {
1636 vistuples_dense[nvis] = tup->t_self.ip_posid;
1637 nvis++;
1638 }
1639 }
1640
1641 return nvis;
1642}
static bool HeapTupleSatisfiesMVCC(HeapTuple htup, Snapshot snapshot, Buffer buffer)

References Assert, fb(), HeapTupleSatisfiesMVCC(), i, IsMVCCSnapshot, and likely.

Referenced by page_collect_tuples().

◆ HeapTupleSatisfiesUpdate()

TM_Result HeapTupleSatisfiesUpdate ( HeapTuple  htup,
CommandId  curcid,
Buffer  buffer 
)
extern

Definition at line 440 of file heapam_visibility.c.

442{
443 HeapTupleHeader tuple = htup->t_data;
444
446 Assert(htup->t_tableOid != InvalidOid);
447
449 {
451 return TM_Invisible;
452
453 else if (!HeapTupleCleanMoved(tuple, buffer))
454 return TM_Invisible;
456 {
457 if (HeapTupleHeaderGetCmin(tuple) >= curcid)
458 return TM_Invisible; /* inserted after scan started */
459
460 if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid */
461 return TM_Ok;
462
464 {
465 TransactionId xmax;
466
467 xmax = HeapTupleHeaderGetRawXmax(tuple);
468
469 /*
470 * Careful here: even though this tuple was created by our own
471 * transaction, it might be locked by other transactions, if
472 * the original version was key-share locked when we updated
473 * it.
474 */
475
476 if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
477 {
478 if (MultiXactIdIsRunning(xmax, true))
479 return TM_BeingModified;
480 else
481 return TM_Ok;
482 }
483
484 /*
485 * If the locker is gone, then there is nothing of interest
486 * left in this Xmax; otherwise, report the tuple as
487 * locked/updated.
488 */
489 if (!TransactionIdIsInProgress(xmax))
490 return TM_Ok;
491 return TM_BeingModified;
492 }
493
494 if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
495 {
496 TransactionId xmax;
497
498 xmax = HeapTupleGetUpdateXid(tuple);
499
500 /* not LOCKED_ONLY, so it has to have an xmax */
502
503 /* deleting subtransaction must have aborted */
505 {
507 false))
508 return TM_BeingModified;
509 return TM_Ok;
510 }
511 else
512 {
513 if (HeapTupleHeaderGetCmax(tuple) >= curcid)
514 return TM_SelfModified; /* updated after scan started */
515 else
516 return TM_Invisible; /* updated before scan started */
517 }
518 }
519
521 {
522 /* deleting subtransaction must have aborted */
523 SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
525 return TM_Ok;
526 }
527
528 if (HeapTupleHeaderGetCmax(tuple) >= curcid)
529 return TM_SelfModified; /* updated after scan started */
530 else
531 return TM_Invisible; /* updated before scan started */
532 }
534 return TM_Invisible;
536 SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED,
538 else
539 {
540 /* it must have aborted or crashed */
541 SetHintBits(tuple, buffer, HEAP_XMIN_INVALID,
543 return TM_Invisible;
544 }
545 }
546
547 /* by here, the inserting transaction has committed */
548
549 if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid or aborted */
550 return TM_Ok;
551
552 if (tuple->t_infomask & HEAP_XMAX_COMMITTED)
553 {
555 return TM_Ok;
556 if (!ItemPointerEquals(&htup->t_self, &tuple->t_ctid))
557 return TM_Updated; /* updated by other */
558 else
559 return TM_Deleted; /* deleted by other */
560 }
561
562 if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
563 {
564 TransactionId xmax;
565
567 return TM_Ok;
568
570 {
572 return TM_BeingModified;
573
575 return TM_Ok;
576 }
577
578 xmax = HeapTupleGetUpdateXid(tuple);
579 if (!TransactionIdIsValid(xmax))
580 {
582 return TM_BeingModified;
583 }
584
585 /* not LOCKED_ONLY, so it has to have an xmax */
587
589 {
590 if (HeapTupleHeaderGetCmax(tuple) >= curcid)
591 return TM_SelfModified; /* updated after scan started */
592 else
593 return TM_Invisible; /* updated before scan started */
594 }
595
597 return TM_BeingModified;
598
599 if (TransactionIdDidCommit(xmax))
600 {
601 if (!ItemPointerEquals(&htup->t_self, &tuple->t_ctid))
602 return TM_Updated;
603 else
604 return TM_Deleted;
605 }
606
607 /*
608 * By here, the update in the Xmax is either aborted or crashed, but
609 * what about the other members?
610 */
611
613 {
614 /*
615 * There's no member, even just a locker, alive anymore, so we can
616 * mark the Xmax as invalid.
617 */
618 SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
620 return TM_Ok;
621 }
622 else
623 {
624 /* There are lockers running */
625 return TM_BeingModified;
626 }
627 }
628
630 {
632 return TM_BeingModified;
633 if (HeapTupleHeaderGetCmax(tuple) >= curcid)
634 return TM_SelfModified; /* updated after scan started */
635 else
636 return TM_Invisible; /* updated before scan started */
637 }
638
640 return TM_BeingModified;
641
643 {
644 /* it must have aborted or crashed */
645 SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
647 return TM_Ok;
648 }
649
650 /* xmax transaction committed */
651
653 {
654 SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
656 return TM_Ok;
657 }
658
659 SetHintBits(tuple, buffer, HEAP_XMAX_COMMITTED,
661 if (!ItemPointerEquals(&htup->t_self, &tuple->t_ctid))
662 return TM_Updated; /* updated by other */
663 else
664 return TM_Deleted; /* deleted by other */
665}
CommandId HeapTupleHeaderGetCmin(const HeapTupleHeaderData *tup)
Definition combocid.c:104
static void SetHintBits(HeapTupleHeader tuple, Buffer buffer, uint16 infomask, TransactionId xid)
static bool HeapTupleCleanMoved(HeapTupleHeader tuple, Buffer buffer)
#define HEAP_XMIN_COMMITTED
#define HEAP_XMIN_INVALID
bool MultiXactIdIsRunning(MultiXactId multi, bool isLockOnly)
Definition multixact.c:463

References Assert, HEAP_LOCKED_UPGRADED(), HEAP_XMAX_COMMITTED, HEAP_XMAX_INVALID, HEAP_XMAX_IS_LOCKED_ONLY(), HEAP_XMAX_IS_MULTI, HEAP_XMIN_COMMITTED, HEAP_XMIN_INVALID, HeapTupleCleanMoved(), HeapTupleGetUpdateXid(), HeapTupleHeaderGetCmax(), HeapTupleHeaderGetCmin(), HeapTupleHeaderGetRawXmax(), HeapTupleHeaderGetRawXmin(), HeapTupleHeaderXminCommitted(), HeapTupleHeaderXminInvalid(), InvalidOid, InvalidTransactionId, ItemPointerEquals(), ItemPointerIsValid(), MultiXactIdIsRunning(), SetHintBits(), HeapTupleHeaderData::t_ctid, HeapTupleData::t_data, HeapTupleHeaderData::t_infomask, HeapTupleData::t_self, HeapTupleData::t_tableOid, TM_BeingModified, TM_Deleted, TM_Invisible, TM_Ok, TM_SelfModified, TM_Updated, TransactionIdDidCommit(), TransactionIdIsCurrentTransactionId(), TransactionIdIsInProgress(), and TransactionIdIsValid.

Referenced by heap_delete(), heap_inplace_lock(), heap_lock_tuple(), heap_update(), and pgrowlocks().

◆ HeapTupleSatisfiesVacuum()

◆ HeapTupleSatisfiesVacuumHorizon()

HTSV_Result HeapTupleSatisfiesVacuumHorizon ( HeapTuple  htup,
Buffer  buffer,
TransactionId dead_after 
)
extern

Definition at line 1076 of file heapam_visibility.c.

1077{
1078 HeapTupleHeader tuple = htup->t_data;
1079
1081 Assert(htup->t_tableOid != InvalidOid);
1083
1085
1086 /*
1087 * Has inserting transaction committed?
1088 *
1089 * If the inserting transaction aborted, then the tuple was never visible
1090 * to any other transaction, so we can delete it immediately.
1091 */
1092 if (!HeapTupleHeaderXminCommitted(tuple))
1093 {
1094 if (HeapTupleHeaderXminInvalid(tuple))
1095 return HEAPTUPLE_DEAD;
1096 else if (!HeapTupleCleanMoved(tuple, buffer))
1097 return HEAPTUPLE_DEAD;
1099 {
1100 if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid */
1102 /* only locked? run infomask-only check first, for performance */
1106 /* inserted and then deleted by same xact */
1109 /* deleting subtransaction must have aborted */
1111 }
1113 {
1114 /*
1115 * It'd be possible to discern between INSERT/DELETE in progress
1116 * here by looking at xmax - but that doesn't seem beneficial for
1117 * the majority of callers and even detrimental for some. We'd
1118 * rather have callers look at/wait for xmin than xmax. It's
1119 * always correct to return INSERT_IN_PROGRESS because that's
1120 * what's happening from the view of other backends.
1121 */
1123 }
1125 SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED,
1127 else
1128 {
1129 /*
1130 * Not in Progress, Not Committed, so either Aborted or crashed
1131 */
1132 SetHintBits(tuple, buffer, HEAP_XMIN_INVALID,
1134 return HEAPTUPLE_DEAD;
1135 }
1136
1137 /*
1138 * At this point the xmin is known committed, but we might not have
1139 * been able to set the hint bit yet; so we can no longer Assert that
1140 * it's set.
1141 */
1142 }
1143
1144 /*
1145 * Okay, the inserter committed, so it was good at some point. Now what
1146 * about the deleting transaction?
1147 */
1148 if (tuple->t_infomask & HEAP_XMAX_INVALID)
1149 return HEAPTUPLE_LIVE;
1150
1152 {
1153 /*
1154 * "Deleting" xact really only locked it, so the tuple is live in any
1155 * case. However, we should make sure that either XMAX_COMMITTED or
1156 * XMAX_INVALID gets set once the xact is gone, to reduce the costs of
1157 * examining the tuple for future xacts.
1158 */
1159 if (!(tuple->t_infomask & HEAP_XMAX_COMMITTED))
1160 {
1161 if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
1162 {
1163 /*
1164 * If it's a pre-pg_upgrade tuple, the multixact cannot
1165 * possibly be running; otherwise have to check.
1166 */
1167 if (!HEAP_LOCKED_UPGRADED(tuple->t_infomask) &&
1169 true))
1170 return HEAPTUPLE_LIVE;
1172 }
1173 else
1174 {
1176 return HEAPTUPLE_LIVE;
1177 SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
1179 }
1180 }
1181
1182 /*
1183 * We don't really care whether xmax did commit, abort or crash. We
1184 * know that xmax did lock the tuple, but it did not and will never
1185 * actually update it.
1186 */
1187
1188 return HEAPTUPLE_LIVE;
1189 }
1190
1191 if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
1192 {
1194
1195 /* already checked above */
1197
1198 /* not LOCKED_ONLY, so it has to have an xmax */
1200
1201 if (TransactionIdIsInProgress(xmax))
1203 else if (TransactionIdDidCommit(xmax))
1204 {
1205 /*
1206 * The multixact might still be running due to lockers. Need to
1207 * allow for pruning if below the xid horizon regardless --
1208 * otherwise we could end up with a tuple where the updater has to
1209 * be removed due to the horizon, but is not pruned away. It's
1210 * not a problem to prune that tuple, because any remaining
1211 * lockers will also be present in newer tuple versions.
1212 */
1213 *dead_after = xmax;
1215 }
1216 else if (!MultiXactIdIsRunning(HeapTupleHeaderGetRawXmax(tuple), false))
1217 {
1218 /*
1219 * Not in Progress, Not Committed, so either Aborted or crashed.
1220 * Mark the Xmax as invalid.
1221 */
1223 }
1224
1225 return HEAPTUPLE_LIVE;
1226 }
1227
1228 if (!(tuple->t_infomask & HEAP_XMAX_COMMITTED))
1229 {
1233 SetHintBits(tuple, buffer, HEAP_XMAX_COMMITTED,
1235 else
1236 {
1237 /*
1238 * Not in Progress, Not Committed, so either Aborted or crashed
1239 */
1240 SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
1242 return HEAPTUPLE_LIVE;
1243 }
1244
1245 /*
1246 * At this point the xmax is known committed, but we might not have
1247 * been able to set the hint bit yet; so we can no longer Assert that
1248 * it's set.
1249 */
1250 }
1251
1252 /*
1253 * Deleter committed, allow caller to check if it was recent enough that
1254 * some open transactions could still see the tuple.
1255 */
1258}

References Assert, fb(), HEAP_LOCKED_UPGRADED(), HEAP_XMAX_COMMITTED, HEAP_XMAX_INVALID, HEAP_XMAX_IS_LOCKED_ONLY(), HEAP_XMAX_IS_MULTI, HEAP_XMIN_COMMITTED, HEAP_XMIN_INVALID, HEAPTUPLE_DEAD, HEAPTUPLE_DELETE_IN_PROGRESS, HEAPTUPLE_INSERT_IN_PROGRESS, HEAPTUPLE_LIVE, HEAPTUPLE_RECENTLY_DEAD, HeapTupleCleanMoved(), HeapTupleGetUpdateXid(), HeapTupleHeaderGetRawXmax(), HeapTupleHeaderGetRawXmin(), HeapTupleHeaderGetUpdateXid(), HeapTupleHeaderIsOnlyLocked(), HeapTupleHeaderXminCommitted(), HeapTupleHeaderXminInvalid(), InvalidOid, InvalidTransactionId, ItemPointerIsValid(), MultiXactIdIsRunning(), SetHintBits(), HeapTupleData::t_data, HeapTupleHeaderData::t_infomask, HeapTupleData::t_self, HeapTupleData::t_tableOid, TransactionIdDidCommit(), TransactionIdIsCurrentTransactionId(), TransactionIdIsInProgress(), and TransactionIdIsValid.

Referenced by heap_prune_satisfies_vacuum(), HeapTupleSatisfiesNonVacuumable(), and HeapTupleSatisfiesVacuum().

◆ HeapTupleSatisfiesVisibility()

bool HeapTupleSatisfiesVisibility ( HeapTuple  htup,
Snapshot  snapshot,
Buffer  buffer 
)
extern

Definition at line 1655 of file heapam_visibility.c.

1656{
1657 switch (snapshot->snapshot_type)
1658 {
1659 case SNAPSHOT_MVCC:
1660 return HeapTupleSatisfiesMVCC(htup, snapshot, buffer);
1661 case SNAPSHOT_SELF:
1662 return HeapTupleSatisfiesSelf(htup, snapshot, buffer);
1663 case SNAPSHOT_ANY:
1664 return HeapTupleSatisfiesAny(htup, snapshot, buffer);
1665 case SNAPSHOT_TOAST:
1666 return HeapTupleSatisfiesToast(htup, snapshot, buffer);
1667 case SNAPSHOT_DIRTY:
1668 return HeapTupleSatisfiesDirty(htup, snapshot, buffer);
1670 return HeapTupleSatisfiesHistoricMVCC(htup, snapshot, buffer);
1672 return HeapTupleSatisfiesNonVacuumable(htup, snapshot, buffer);
1673 }
1674
1675 return false; /* keep compiler quiet */
1676}
static bool HeapTupleSatisfiesAny(HeapTuple htup, Snapshot snapshot, Buffer buffer)
static bool HeapTupleSatisfiesNonVacuumable(HeapTuple htup, Snapshot snapshot, Buffer buffer)
static bool HeapTupleSatisfiesToast(HeapTuple htup, Snapshot snapshot, Buffer buffer)
static bool HeapTupleSatisfiesDirty(HeapTuple htup, Snapshot snapshot, Buffer buffer)
static bool HeapTupleSatisfiesSelf(HeapTuple htup, Snapshot snapshot, Buffer buffer)
static bool HeapTupleSatisfiesHistoricMVCC(HeapTuple htup, Snapshot snapshot, Buffer buffer)
@ SNAPSHOT_TOAST
Definition snapshot.h:70
@ SNAPSHOT_SELF
Definition snapshot.h:60
@ SNAPSHOT_NON_VACUUMABLE
Definition snapshot.h:114
@ SNAPSHOT_MVCC
Definition snapshot.h:46
@ SNAPSHOT_ANY
Definition snapshot.h:65
@ SNAPSHOT_HISTORIC_MVCC
Definition snapshot.h:105
@ SNAPSHOT_DIRTY
Definition snapshot.h:98
SnapshotType snapshot_type
Definition snapshot.h:140

References HeapTupleSatisfiesAny(), HeapTupleSatisfiesDirty(), HeapTupleSatisfiesHistoricMVCC(), HeapTupleSatisfiesMVCC(), HeapTupleSatisfiesNonVacuumable(), HeapTupleSatisfiesSelf(), HeapTupleSatisfiesToast(), SNAPSHOT_ANY, SNAPSHOT_DIRTY, SNAPSHOT_HISTORIC_MVCC, SNAPSHOT_MVCC, SNAPSHOT_NON_VACUUMABLE, SNAPSHOT_SELF, SNAPSHOT_TOAST, and SnapshotData::snapshot_type.

Referenced by BitmapHeapScanNextBlock(), heap_delete(), heap_fetch(), heap_get_latest_tid(), heap_hot_search_buffer(), heap_update(), heapam_tuple_satisfies_snapshot(), heapgettup(), pgstat_heap(), SampleHeapTupleVisible(), and ScanSourceDatabasePgClassPage().

◆ HeapTupleSetHintBits()

void HeapTupleSetHintBits ( HeapTupleHeader  tuple,
Buffer  buffer,
uint16  infomask,
TransactionId  xid 
)
extern

Definition at line 141 of file heapam_visibility.c.

143{
144 /*
145 * The uses from heapam.c rely on being able to perform the hint bit
146 * updates, which can only be guaranteed if we are holding an exclusive
147 * lock on the buffer - which all callers are doing.
148 */
150
151 SetHintBits(tuple, buffer, infomask, xid);
152}
bool BufferIsLockedByMeInMode(Buffer buffer, BufferLockMode mode)
Definition bufmgr.c:2997

References Assert, BUFFER_LOCK_EXCLUSIVE, BufferIsLockedByMeInMode(), fb(), and SetHintBits().

Referenced by UpdateXmaxHintBits().

◆ log_heap_prune_and_freeze()

void log_heap_prune_and_freeze ( Relation  relation,
Buffer  buffer,
Buffer  vmbuffer,
uint8  vmflags,
TransactionId  conflict_xid,
bool  cleanup_lock,
PruneReason  reason,
HeapTupleFreeze frozen,
int  nfrozen,
OffsetNumber redirected,
int  nredirected,
OffsetNumber dead,
int  ndead,
OffsetNumber unused,
int  nunused 
)
extern

Definition at line 2167 of file pruneheap.c.

2176{
2179 uint8 info;
2181
2182 /* The following local variables hold data registered in the WAL record: */
2186 xlhp_prune_items dead_items;
2189 bool do_prune = nredirected > 0 || ndead > 0 || nunused > 0;
2191
2193
2194 xlrec.flags = 0;
2196
2197 /*
2198 * We can avoid an FPI of the heap page if the only modification we are
2199 * making to it is to set PD_ALL_VISIBLE and checksums/wal_log_hints are
2200 * disabled. Note that if we explicitly skip an FPI, we must not stamp the
2201 * heap page with this record's LSN. Recovery skips records <= the stamped
2202 * LSN, so this could lead to skipping an earlier FPI needed to repair a
2203 * torn page.
2204 */
2205 if (!do_prune &&
2206 nfrozen == 0 &&
2209
2210 /*
2211 * Prepare data for the buffer. The arrays are not actually in the
2212 * buffer, but we pretend that they are. When XLogInsert stores a full
2213 * page image, the arrays can be omitted.
2214 */
2217
2218 if (do_set_vm)
2219 XLogRegisterBuffer(1, vmbuffer, 0);
2220
2221 if (nfrozen > 0)
2222 {
2223 int nplans;
2224
2226
2227 /*
2228 * Prepare deduplicated representation for use in the WAL record. This
2229 * destructively sorts frozen tuples array in-place.
2230 */
2231 nplans = heap_log_freeze_plan(frozen, nfrozen, plans, frz_offsets);
2232
2233 freeze_plans.nplans = nplans;
2235 offsetof(xlhp_freeze_plans, plans));
2236 XLogRegisterBufData(0, plans,
2237 sizeof(xlhp_freeze_plan) * nplans);
2238 }
2239 if (nredirected > 0)
2240 {
2242
2243 redirect_items.ntargets = nredirected;
2246 XLogRegisterBufData(0, redirected,
2247 sizeof(OffsetNumber[2]) * nredirected);
2248 }
2249 if (ndead > 0)
2250 {
2251 xlrec.flags |= XLHP_HAS_DEAD_ITEMS;
2252
2253 dead_items.ntargets = ndead;
2254 XLogRegisterBufData(0, &dead_items,
2256 XLogRegisterBufData(0, dead,
2257 sizeof(OffsetNumber) * ndead);
2258 }
2259 if (nunused > 0)
2260 {
2262
2263 unused_items.ntargets = nunused;
2266 XLogRegisterBufData(0, unused,
2267 sizeof(OffsetNumber) * nunused);
2268 }
2269 if (nfrozen > 0)
2271 sizeof(OffsetNumber) * nfrozen);
2272
2273 /*
2274 * Prepare the main xl_heap_prune record. We already set the XLHP_HAS_*
2275 * flag above.
2276 */
2278 {
2279 xlrec.flags |= XLHP_VM_ALL_VISIBLE;
2281 xlrec.flags |= XLHP_VM_ALL_FROZEN;
2282 }
2284 xlrec.flags |= XLHP_IS_CATALOG_REL;
2287 if (cleanup_lock)
2288 xlrec.flags |= XLHP_CLEANUP_LOCK;
2289 else
2290 {
2291 Assert(nredirected == 0 && ndead == 0);
2292 /* also, any items in 'unused' must've been LP_DEAD previously */
2293 }
2297
2298 switch (reason)
2299 {
2300 case PRUNE_ON_ACCESS:
2302 break;
2303 case PRUNE_VACUUM_SCAN:
2305 break;
2308 break;
2309 default:
2310 elog(ERROR, "unrecognized prune reason: %d", (int) reason);
2311 break;
2312 }
2313 recptr = XLogInsert(RM_HEAP2_ID, info);
2314
2315 if (do_set_vm)
2316 {
2317 Assert(BufferIsDirty(vmbuffer));
2318 PageSetLSN(BufferGetPage(vmbuffer), recptr);
2319 }
2320
2321 /*
2322 * See comment at the top of the function about regbuf_flags_heap for
2323 * details on when we can advance the page LSN.
2324 */
2325 if (do_prune || nfrozen > 0 || (do_set_vm && XLogHintBitIsNeeded()))
2326 {
2327 Assert(BufferIsDirty(buffer));
2329 }
2330}
#define XLHP_HAS_CONFLICT_HORIZON
#define XLHP_HAS_FREEZE_PLANS
#define XLHP_VM_ALL_VISIBLE
#define SizeOfHeapPrune
#define XLHP_HAS_NOW_UNUSED_ITEMS
#define XLHP_VM_ALL_FROZEN
#define XLHP_HAS_REDIRECTIONS
#define XLOG_HEAP2_PRUNE_VACUUM_SCAN
Definition heapam_xlog.h:61
#define XLOG_HEAP2_PRUNE_ON_ACCESS
Definition heapam_xlog.h:60
#define XLHP_CLEANUP_LOCK
#define XLHP_HAS_DEAD_ITEMS
#define XLOG_HEAP2_PRUNE_VACUUM_CLEANUP
Definition heapam_xlog.h:62
#define XLHP_IS_CATALOG_REL
static int heap_log_freeze_plan(HeapTupleFreeze *tuples, int ntuples, xlhp_freeze_plan *plans_out, OffsetNumber *offsets_out)
Definition pruneheap.c:2088
#define XLogHintBitIsNeeded()
Definition xlog.h:122
#define REGBUF_NO_IMAGE
Definition xloginsert.h:33

References Assert, BufferGetPage(), BufferIsDirty(), data, elog, ERROR, fb(), heap_log_freeze_plan(), MaxHeapTuplesPerPage, xlhp_prune_items::ntargets, PageSetLSN(), PRUNE_ON_ACCESS, PRUNE_VACUUM_CLEANUP, PRUNE_VACUUM_SCAN, REGBUF_NO_IMAGE, REGBUF_STANDARD, RelationIsAccessibleInLogicalDecoding, SizeOfHeapPrune, TransactionIdIsValid, VISIBILITYMAP_ALL_FROZEN, VISIBILITYMAP_ALL_VISIBLE, VISIBILITYMAP_VALID_BITS, XLHP_CLEANUP_LOCK, XLHP_HAS_CONFLICT_HORIZON, XLHP_HAS_DEAD_ITEMS, XLHP_HAS_FREEZE_PLANS, XLHP_HAS_NOW_UNUSED_ITEMS, XLHP_HAS_REDIRECTIONS, XLHP_IS_CATALOG_REL, XLHP_VM_ALL_FROZEN, XLHP_VM_ALL_VISIBLE, XLOG_HEAP2_PRUNE_ON_ACCESS, XLOG_HEAP2_PRUNE_VACUUM_CLEANUP, XLOG_HEAP2_PRUNE_VACUUM_SCAN, XLogBeginInsert(), XLogHintBitIsNeeded, XLogInsert(), XLogRegisterBufData(), XLogRegisterBuffer(), and XLogRegisterData().

Referenced by heap_page_prune_and_freeze(), and lazy_vacuum_heap_page().

◆ ReleaseBulkInsertStatePin()

void ReleaseBulkInsertStatePin ( BulkInsertState  bistate)
extern

Definition at line 2103 of file heapam.c.

2104{
2105 if (bistate->current_buf != InvalidBuffer)
2106 ReleaseBuffer(bistate->current_buf);
2107 bistate->current_buf = InvalidBuffer;
2108
2109 /*
2110 * Despite the name, we also reset bulk relation extension state.
2111 * Otherwise we can end up erroring out due to looking for free space in
2112 * ->next_free of one partition, even though ->next_free was set when
2113 * extending another partition. It could obviously also be bad for
2114 * efficiency to look at existing blocks at offsets from another
2115 * partition, even if we don't error out.
2116 */
2117 bistate->next_free = InvalidBlockNumber;
2118 bistate->last_free = InvalidBlockNumber;
2119}

References BulkInsertStateData::current_buf, InvalidBlockNumber, InvalidBuffer, BulkInsertStateData::last_free, BulkInsertStateData::next_free, and ReleaseBuffer().

Referenced by CopyFrom().

◆ ResolveCminCmaxDuringDecoding()

bool ResolveCminCmaxDuringDecoding ( struct HTAB tuplecid_data,
Snapshot  snapshot,
HeapTuple  htup,
Buffer  buffer,
CommandId cmin,
CommandId cmax 
)
extern

Definition at line 5557 of file reorderbuffer.c.

5561{
5564 ForkNumber forkno;
5565 BlockNumber blockno;
5566 bool updated_mapping = false;
5567
5568 /*
5569 * Return unresolved if tuplecid_data is not valid. That's because when
5570 * streaming in-progress transactions we may run into tuples with the CID
5571 * before actually decoding them. Think e.g. about INSERT followed by
5572 * TRUNCATE, where the TRUNCATE may not be decoded yet when applying the
5573 * INSERT. So in such cases, we assume the CID is from the future
5574 * command.
5575 */
5576 if (tuplecid_data == NULL)
5577 return false;
5578
5579 /* be careful about padding */
5580 memset(&key, 0, sizeof(key));
5581
5582 Assert(!BufferIsLocal(buffer));
5583
5584 /*
5585 * get relfilelocator from the buffer, no convenient way to access it
5586 * other than that.
5587 */
5588 BufferGetTag(buffer, &key.rlocator, &forkno, &blockno);
5589
5590 /* tuples can only be in the main fork */
5591 Assert(forkno == MAIN_FORKNUM);
5592 Assert(blockno == ItemPointerGetBlockNumber(&htup->t_self));
5593
5594 ItemPointerCopy(&htup->t_self,
5595 &key.tid);
5596
5597restart:
5600
5601 /*
5602 * failed to find a mapping, check whether the table was rewritten and
5603 * apply mapping if so, but only do that once - there can be no new
5604 * mappings while we are in here since we have to hold a lock on the
5605 * relation.
5606 */
5607 if (ent == NULL && !updated_mapping)
5608 {
5610 /* now check but don't update for a mapping again */
5611 updated_mapping = true;
5612 goto restart;
5613 }
5614 else if (ent == NULL)
5615 return false;
5616
5617 if (cmin)
5618 *cmin = ent->cmin;
5619 if (cmax)
5620 *cmax = ent->cmax;
5621 return true;
5622}
#define BufferIsLocal(buffer)
Definition buf.h:37
void * hash_search(HTAB *hashp, const void *keyPtr, HASHACTION action, bool *foundPtr)
Definition dynahash.c:952
@ HASH_FIND
Definition hsearch.h:113
static void UpdateLogicalMappings(HTAB *tuplecid_data, Oid relid, Snapshot snapshot)
static HTAB * tuplecid_data
Definition snapmgr.c:163

References Assert, BufferGetTag(), BufferIsLocal, fb(), HASH_FIND, hash_search(), ItemPointerCopy(), ItemPointerGetBlockNumber(), MAIN_FORKNUM, HeapTupleData::t_self, HeapTupleData::t_tableOid, tuplecid_data, and UpdateLogicalMappings().

Referenced by HeapTupleSatisfiesHistoricMVCC().

◆ simple_heap_delete()

void simple_heap_delete ( Relation  relation,
const ItemPointerData tid 
)
extern

Definition at line 3265 of file heapam.c.

3266{
3267 TM_Result result;
3268 TM_FailureData tmfd;
3269
3270 result = heap_delete(relation, tid,
3272 true /* wait for commit */ ,
3273 &tmfd, false /* changingPart */ );
3274 switch (result)
3275 {
3276 case TM_SelfModified:
3277 /* Tuple was already updated in current command? */
3278 elog(ERROR, "tuple already updated by self");
3279 break;
3280
3281 case TM_Ok:
3282 /* done successfully */
3283 break;
3284
3285 case TM_Updated:
3286 elog(ERROR, "tuple concurrently updated");
3287 break;
3288
3289 case TM_Deleted:
3290 elog(ERROR, "tuple concurrently deleted");
3291 break;
3292
3293 default:
3294 elog(ERROR, "unrecognized heap_delete status: %u", result);
3295 break;
3296 }
3297}
TM_Result heap_delete(Relation relation, const ItemPointerData *tid, CommandId cid, Snapshot crosscheck, bool wait, TM_FailureData *tmfd, bool changingPart)
Definition heapam.c:2842

References elog, ERROR, GetCurrentCommandId(), heap_delete(), InvalidSnapshot, TM_Deleted, TM_Ok, TM_SelfModified, and TM_Updated.

Referenced by CatalogTupleDelete(), and toast_delete_datum().

◆ simple_heap_insert()

void simple_heap_insert ( Relation  relation,
HeapTuple  tup 
)
extern

Definition at line 2784 of file heapam.c.

2785{
2786 heap_insert(relation, tup, GetCurrentCommandId(true), 0, NULL);
2787}
void heap_insert(Relation relation, HeapTuple tup, CommandId cid, int options, BulkInsertState bistate)
Definition heapam.c:2141

References fb(), GetCurrentCommandId(), and heap_insert().

Referenced by CatalogTupleInsert(), CatalogTupleInsertWithInfo(), and InsertOneTuple().

◆ simple_heap_update()

void simple_heap_update ( Relation  relation,
const ItemPointerData otid,
HeapTuple  tup,
TU_UpdateIndexes update_indexes 
)
extern

Definition at line 4555 of file heapam.c.

4557{
4558 TM_Result result;
4559 TM_FailureData tmfd;
4560 LockTupleMode lockmode;
4561
4562 result = heap_update(relation, otid, tup,
4564 true /* wait for commit */ ,
4565 &tmfd, &lockmode, update_indexes);
4566 switch (result)
4567 {
4568 case TM_SelfModified:
4569 /* Tuple was already updated in current command? */
4570 elog(ERROR, "tuple already updated by self");
4571 break;
4572
4573 case TM_Ok:
4574 /* done successfully */
4575 break;
4576
4577 case TM_Updated:
4578 elog(ERROR, "tuple concurrently updated");
4579 break;
4580
4581 case TM_Deleted:
4582 elog(ERROR, "tuple concurrently deleted");
4583 break;
4584
4585 default:
4586 elog(ERROR, "unrecognized heap_update status: %u", result);
4587 break;
4588 }
4589}
TM_Result heap_update(Relation relation, const ItemPointerData *otid, HeapTuple newtup, CommandId cid, Snapshot crosscheck, bool wait, TM_FailureData *tmfd, LockTupleMode *lockmode, TU_UpdateIndexes *update_indexes)
Definition heapam.c:3311

References elog, ERROR, fb(), GetCurrentCommandId(), heap_update(), InvalidSnapshot, TM_Deleted, TM_Ok, TM_SelfModified, and TM_Updated.

Referenced by CatalogTupleUpdate(), and CatalogTupleUpdateWithInfo().